mirror of
https://github.com/stashapp/stash.git
synced 2025-12-17 12:24:38 +03:00
Add Xpath post processing and performer name query (#333)
* Extend xpath configuration. Support concatenation * Add parseDate parsing option * Add regex replacements * Add xpath query performer by name * Fix loading spinner on scrape performer * Change ReplaceAll to Replace
This commit is contained in:
@@ -41,6 +41,9 @@ type scraperTypeConfig struct {
|
|||||||
Script []string `yaml:"script,flow"`
|
Script []string `yaml:"script,flow"`
|
||||||
Scraper string `yaml:"scraper"`
|
Scraper string `yaml:"scraper"`
|
||||||
|
|
||||||
|
// for xpath name scraper only
|
||||||
|
QueryURL string `yaml:"queryURL"`
|
||||||
|
|
||||||
scraperConfig *scraperConfig
|
scraperConfig *scraperConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -56,6 +59,8 @@ func (c *performerByNameConfig) resolveFn() {
|
|||||||
c.performScrape = scrapePerformerNamesScript
|
c.performScrape = scrapePerformerNamesScript
|
||||||
} else if c.Action == scraperActionStash {
|
} else if c.Action == scraperActionStash {
|
||||||
c.performScrape = scrapePerformerNamesStash
|
c.performScrape = scrapePerformerNamesStash
|
||||||
|
} else if c.Action == scraperActionXPath {
|
||||||
|
c.performScrape = scrapePerformerNamesXPath
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -266,6 +271,11 @@ func (c scraperConfig) ScrapePerformer(scrapedPerformer models.ScrapedPerformerI
|
|||||||
return c.PerformerByFragment.performScrape(c.PerformerByFragment.scraperTypeConfig, scrapedPerformer)
|
return c.PerformerByFragment.performScrape(c.PerformerByFragment.scraperTypeConfig, scrapedPerformer)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// try to match against URL if present
|
||||||
|
if scrapedPerformer.URL != nil && *scrapedPerformer.URL != "" {
|
||||||
|
return c.ScrapePerformerURL(*scrapedPerformer.URL)
|
||||||
|
}
|
||||||
|
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,9 +2,11 @@ package scraper
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
|
"net/url"
|
||||||
"reflect"
|
"reflect"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/antchfx/htmlquery"
|
"github.com/antchfx/htmlquery"
|
||||||
"golang.org/x/net/html"
|
"golang.org/x/net/html"
|
||||||
@@ -43,35 +45,209 @@ func createXPathScraperConfig(src map[interface{}]interface{}) xpathScraperConfi
|
|||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s xpathScraperConfig) process(doc *html.Node, common commonXPathConfig) []xPathResult {
|
type xpathRegexConfig map[interface{}]interface{}
|
||||||
var ret []xPathResult
|
type xpathRegexConfigs []xpathRegexConfig
|
||||||
|
|
||||||
for k, v := range s {
|
func (c xpathRegexConfig) apply(value string) string {
|
||||||
asStr, isStr := v.(string)
|
regex := ""
|
||||||
|
with := ""
|
||||||
|
|
||||||
if isStr {
|
if regexI, _ := c["regex"]; regexI != nil {
|
||||||
// apply common
|
regex, _ = regexI.(string)
|
||||||
if common != nil {
|
}
|
||||||
asStr = common.applyCommon(asStr)
|
if withI, _ := c["with"]; withI != nil {
|
||||||
}
|
with, _ = withI.(string)
|
||||||
|
}
|
||||||
|
|
||||||
found, err := htmlquery.QueryAll(doc, asStr)
|
if regex != "" {
|
||||||
if err != nil {
|
re, err := regexp.Compile(regex)
|
||||||
logger.Warnf("Error parsing xpath expression '%s': %s", asStr, err.Error())
|
if err != nil {
|
||||||
continue
|
logger.Warnf("Error compiling regex '%s': %s", regex, err.Error())
|
||||||
}
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
return re.ReplaceAllString(value, with)
|
||||||
|
}
|
||||||
|
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c xpathRegexConfigs) apply(value string) string {
|
||||||
|
// apply regex in order
|
||||||
|
for _, config := range c {
|
||||||
|
value = config.apply(value)
|
||||||
|
}
|
||||||
|
|
||||||
|
// remove whitespace again
|
||||||
|
value = commonPostProcess(value)
|
||||||
|
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
type xpathScraperAttrConfig map[interface{}]interface{}
|
||||||
|
|
||||||
|
func (c xpathScraperAttrConfig) getString(key string) string {
|
||||||
|
ret, _ := c[key]
|
||||||
|
|
||||||
|
if ret == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
asStr, _ := ret.(string)
|
||||||
|
return asStr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c xpathScraperAttrConfig) getSelector() string {
|
||||||
|
const selectorKey = "selector"
|
||||||
|
return c.getString(selectorKey)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c xpathScraperAttrConfig) getConcat() string {
|
||||||
|
const concatKey = "concat"
|
||||||
|
return c.getString(concatKey)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c xpathScraperAttrConfig) hasConcat() bool {
|
||||||
|
return c.getConcat() != ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c xpathScraperAttrConfig) getParseDate() string {
|
||||||
|
const parseDateKey = "parseDate"
|
||||||
|
return c.getString(parseDateKey)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c xpathScraperAttrConfig) getReplace() xpathRegexConfigs {
|
||||||
|
const replaceKey = "replace"
|
||||||
|
val, _ := c[replaceKey]
|
||||||
|
|
||||||
|
var ret xpathRegexConfigs
|
||||||
|
if val == nil {
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
|
||||||
|
asSlice, _ := val.([]interface{})
|
||||||
|
|
||||||
|
for _, v := range asSlice {
|
||||||
|
asMap, _ := v.(map[interface{}]interface{})
|
||||||
|
ret = append(ret, xpathRegexConfig(asMap))
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c xpathScraperAttrConfig) concatenateResults(nodes []*html.Node) string {
|
||||||
|
separator := c.getConcat()
|
||||||
|
result := []string{}
|
||||||
|
|
||||||
|
for _, elem := range nodes {
|
||||||
|
text := htmlquery.InnerText(elem)
|
||||||
|
text = commonPostProcess(text)
|
||||||
|
|
||||||
|
result = append(result, text)
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.Join(result, separator)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c xpathScraperAttrConfig) parseDate(value string) string {
|
||||||
|
parseDate := c.getParseDate()
|
||||||
|
|
||||||
|
if parseDate == "" {
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
// try to parse the date using the pattern
|
||||||
|
// if it fails, then just fall back to the original value
|
||||||
|
parsedValue, err := time.Parse(parseDate, value)
|
||||||
|
if err != nil {
|
||||||
|
logger.Warnf("Error parsing date string '%s' using format '%s': %s", value, parseDate, err.Error())
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
// convert it into our date format
|
||||||
|
const internalDateFormat = "2006-01-02"
|
||||||
|
return parsedValue.Format(internalDateFormat)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c xpathScraperAttrConfig) replaceRegex(value string) string {
|
||||||
|
replace := c.getReplace()
|
||||||
|
return replace.apply(value)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c xpathScraperAttrConfig) postProcess(value string) string {
|
||||||
|
// perform regex replacements first
|
||||||
|
value = c.replaceRegex(value)
|
||||||
|
value = c.parseDate(value)
|
||||||
|
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
func commonPostProcess(value string) string {
|
||||||
|
value = strings.TrimSpace(value)
|
||||||
|
|
||||||
|
// remove multiple whitespace and end lines
|
||||||
|
re := regexp.MustCompile("\n")
|
||||||
|
value = re.ReplaceAllString(value, "")
|
||||||
|
re = regexp.MustCompile(" +")
|
||||||
|
value = re.ReplaceAllString(value, " ")
|
||||||
|
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
func runXPathQuery(doc *html.Node, xpath string, common commonXPathConfig) []*html.Node {
|
||||||
|
// apply common
|
||||||
|
if common != nil {
|
||||||
|
xpath = common.applyCommon(xpath)
|
||||||
|
}
|
||||||
|
|
||||||
|
found, err := htmlquery.QueryAll(doc, xpath)
|
||||||
|
if err != nil {
|
||||||
|
logger.Warnf("Error parsing xpath expression '%s': %s", xpath, err.Error())
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return found
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s xpathScraperConfig) process(doc *html.Node, common commonXPathConfig) xPathResults {
|
||||||
|
var ret xPathResults
|
||||||
|
|
||||||
|
for k, value := range s {
|
||||||
|
switch v := value.(type) {
|
||||||
|
case string:
|
||||||
|
found := runXPathQuery(doc, v, common)
|
||||||
|
|
||||||
if len(found) > 0 {
|
if len(found) > 0 {
|
||||||
for i, elem := range found {
|
for i, elem := range found {
|
||||||
if i >= len(ret) {
|
text := htmlquery.InnerText(elem)
|
||||||
ret = append(ret, make(xPathResult))
|
text = commonPostProcess(text)
|
||||||
}
|
|
||||||
|
|
||||||
ret[i][k] = elem
|
ret = ret.setKey(i, k, text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case map[interface{}]interface{}:
|
||||||
|
attrConfig := xpathScraperAttrConfig(v)
|
||||||
|
|
||||||
|
found := runXPathQuery(doc, attrConfig.getSelector(), common)
|
||||||
|
|
||||||
|
if len(found) > 0 {
|
||||||
|
// check if we're concatenating the results into a single result
|
||||||
|
if attrConfig.hasConcat() {
|
||||||
|
result := attrConfig.concatenateResults(found)
|
||||||
|
result = attrConfig.postProcess(result)
|
||||||
|
const i = 0
|
||||||
|
ret = ret.setKey(i, k, result)
|
||||||
|
} else {
|
||||||
|
for i, elem := range found {
|
||||||
|
text := htmlquery.InnerText(elem)
|
||||||
|
text = commonPostProcess(text)
|
||||||
|
text = attrConfig.postProcess(text)
|
||||||
|
|
||||||
|
ret = ret.setKey(i, k, text)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// TODO - handle map type
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
@@ -153,6 +329,24 @@ func (s xpathScraper) scrapePerformer(doc *html.Node) (*models.ScrapedPerformer,
|
|||||||
return &ret, nil
|
return &ret, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s xpathScraper) scrapePerformers(doc *html.Node) ([]*models.ScrapedPerformer, error) {
|
||||||
|
var ret []*models.ScrapedPerformer
|
||||||
|
|
||||||
|
performerMap := s.Performer
|
||||||
|
if performerMap == nil {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
results := performerMap.process(doc, s.Common)
|
||||||
|
for _, r := range results {
|
||||||
|
var p models.ScrapedPerformer
|
||||||
|
r.apply(&p)
|
||||||
|
ret = append(ret, &p)
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (s xpathScraper) scrapeScene(doc *html.Node) (*models.ScrapedScene, error) {
|
func (s xpathScraper) scrapeScene(doc *html.Node) (*models.ScrapedScene, error) {
|
||||||
var ret models.ScrapedScene
|
var ret models.ScrapedScene
|
||||||
|
|
||||||
@@ -204,7 +398,8 @@ func (s xpathScraper) scrapeScene(doc *html.Node) (*models.ScrapedScene, error)
|
|||||||
return &ret, nil
|
return &ret, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type xPathResult map[string]*html.Node
|
type xPathResult map[string]string
|
||||||
|
type xPathResults []xPathResult
|
||||||
|
|
||||||
func (r xPathResult) apply(dest interface{}) {
|
func (r xPathResult) apply(dest interface{}) {
|
||||||
destVal := reflect.ValueOf(dest)
|
destVal := reflect.ValueOf(dest)
|
||||||
@@ -212,22 +407,16 @@ func (r xPathResult) apply(dest interface{}) {
|
|||||||
// dest should be a pointer
|
// dest should be a pointer
|
||||||
destVal = destVal.Elem()
|
destVal = destVal.Elem()
|
||||||
|
|
||||||
for key, v := range r {
|
for key, value := range r {
|
||||||
field := destVal.FieldByName(key)
|
field := destVal.FieldByName(key)
|
||||||
|
|
||||||
if field.IsValid() {
|
if field.IsValid() {
|
||||||
value := htmlquery.InnerText(v)
|
|
||||||
value = strings.TrimSpace(value)
|
|
||||||
|
|
||||||
// remove multiple whitespace and end lines
|
|
||||||
re := regexp.MustCompile("\n")
|
|
||||||
value = re.ReplaceAllString(value, "")
|
|
||||||
re = regexp.MustCompile(" +")
|
|
||||||
value = re.ReplaceAllString(value, " ")
|
|
||||||
|
|
||||||
var reflectValue reflect.Value
|
var reflectValue reflect.Value
|
||||||
if field.Kind() == reflect.Ptr {
|
if field.Kind() == reflect.Ptr {
|
||||||
reflectValue = reflect.ValueOf(&value)
|
// need to copy the value, otherwise everything is set to the
|
||||||
|
// same pointer
|
||||||
|
localValue := value
|
||||||
|
reflectValue = reflect.ValueOf(&localValue)
|
||||||
} else {
|
} else {
|
||||||
reflectValue = reflect.ValueOf(value)
|
reflectValue = reflect.ValueOf(value)
|
||||||
}
|
}
|
||||||
@@ -239,6 +428,15 @@ func (r xPathResult) apply(dest interface{}) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r xPathResults) setKey(index int, key string, value string) xPathResults {
|
||||||
|
if index >= len(r) {
|
||||||
|
r = append(r, make(xPathResult))
|
||||||
|
}
|
||||||
|
|
||||||
|
r[index][key] = value
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
func scrapePerformerURLXpath(c scraperTypeConfig, url string) (*models.ScrapedPerformer, error) {
|
func scrapePerformerURLXpath(c scraperTypeConfig, url string) (*models.ScrapedPerformer, error) {
|
||||||
scraper := c.scraperConfig.XPathScrapers[c.Scraper]
|
scraper := c.scraperConfig.XPathScrapers[c.Scraper]
|
||||||
|
|
||||||
@@ -270,3 +468,27 @@ func scrapeSceneURLXPath(c scraperTypeConfig, url string) (*models.ScrapedScene,
|
|||||||
|
|
||||||
return scraper.scrapeScene(doc)
|
return scraper.scrapeScene(doc)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func scrapePerformerNamesXPath(c scraperTypeConfig, name string) ([]*models.ScrapedPerformer, error) {
|
||||||
|
scraper := c.scraperConfig.XPathScrapers[c.Scraper]
|
||||||
|
|
||||||
|
if scraper == nil {
|
||||||
|
return nil, errors.New("xpath scraper with name " + c.Scraper + " not found in config")
|
||||||
|
}
|
||||||
|
|
||||||
|
const placeholder = "{}"
|
||||||
|
|
||||||
|
// replace the placeholder string with the URL-escaped name
|
||||||
|
escapedName := url.QueryEscape(name)
|
||||||
|
|
||||||
|
u := c.QueryURL
|
||||||
|
u = strings.Replace(u, placeholder, escapedName, -1)
|
||||||
|
|
||||||
|
doc, err := htmlquery.LoadURL(u)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return scraper.scrapePerformers(doc)
|
||||||
|
}
|
||||||
|
|||||||
@@ -183,23 +183,50 @@ func makeCommonXPath(attr string) string {
|
|||||||
return `//table[@id="biographyTable"]//tr/td[@class="paramname"]//b[text() = '` + attr + `']/ancestor::tr/td[@class="paramvalue"]`
|
return `//table[@id="biographyTable"]//tr/td[@class="paramname"]//b[text() = '` + attr + `']/ancestor::tr/td[@class="paramvalue"]`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func makeReplaceRegex(regex string, with string) map[interface{}]interface{} {
|
||||||
|
ret := make(map[interface{}]interface{})
|
||||||
|
|
||||||
|
ret["regex"] = regex
|
||||||
|
ret["with"] = with
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
|
||||||
func makeXPathConfig() xpathScraperConfig {
|
func makeXPathConfig() xpathScraperConfig {
|
||||||
config := make(xpathScraperConfig)
|
config := make(xpathScraperConfig)
|
||||||
|
|
||||||
config["Name"] = makeCommonXPath("Babe Name:") + `/a`
|
config["Name"] = makeCommonXPath("Babe Name:") + `/a`
|
||||||
config["Ethnicity"] = makeCommonXPath("Ethnicity:")
|
config["Ethnicity"] = makeCommonXPath("Ethnicity:")
|
||||||
config["Country"] = makeCommonXPath("Country of Origin:")
|
config["Country"] = makeCommonXPath("Country of Origin:")
|
||||||
config["Birthdate"] = makeCommonXPath("Date of Birth:")
|
|
||||||
config["Aliases"] = makeCommonXPath("Aliases:")
|
config["Aliases"] = makeCommonXPath("Aliases:")
|
||||||
config["EyeColor"] = makeCommonXPath("Eye Color:")
|
config["EyeColor"] = makeCommonXPath("Eye Color:")
|
||||||
config["Measurements"] = makeCommonXPath("Measurements:")
|
config["Measurements"] = makeCommonXPath("Measurements:")
|
||||||
config["FakeTits"] = makeCommonXPath("Fake boobs:")
|
config["FakeTits"] = makeCommonXPath("Fake boobs:")
|
||||||
config["Height"] = makeCommonXPath("Height:")
|
config["Height"] = makeCommonXPath("Height:")
|
||||||
// no colon in attribute header
|
|
||||||
config["CareerLength"] = makeCommonXPath("Career Start And End")
|
|
||||||
config["Tattoos"] = makeCommonXPath("Tattoos:")
|
config["Tattoos"] = makeCommonXPath("Tattoos:")
|
||||||
config["Piercings"] = makeCommonXPath("Piercings:")
|
config["Piercings"] = makeCommonXPath("Piercings:")
|
||||||
|
|
||||||
|
// special handling for birthdate
|
||||||
|
birthdateAttrConfig := make(map[interface{}]interface{})
|
||||||
|
birthdateAttrConfig["selector"] = makeCommonXPath("Date of Birth:")
|
||||||
|
|
||||||
|
var birthdateReplace []interface{}
|
||||||
|
birthdateReplace = append(birthdateReplace, makeReplaceRegex(` \(.* years old\)`, ""))
|
||||||
|
|
||||||
|
birthdateAttrConfig["replace"] = birthdateReplace
|
||||||
|
birthdateAttrConfig["parseDate"] = "January 2, 2006" // "July 1, 1992 (27 years old) "
|
||||||
|
config["Birthdate"] = birthdateAttrConfig
|
||||||
|
|
||||||
|
// special handling for career length
|
||||||
|
careerLengthAttrConfig := make(map[interface{}]interface{})
|
||||||
|
// no colon in attribute header
|
||||||
|
careerLengthAttrConfig["selector"] = makeCommonXPath("Career Start And End")
|
||||||
|
|
||||||
|
var careerLengthReplace []interface{}
|
||||||
|
careerLengthReplace = append(careerLengthReplace, makeReplaceRegex(`\s+\(.*\)`, ""))
|
||||||
|
careerLengthAttrConfig["replace"] = careerLengthReplace
|
||||||
|
|
||||||
|
config["CareerLength"] = careerLengthAttrConfig
|
||||||
|
|
||||||
return config
|
return config
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -240,7 +267,7 @@ func TestScrapePerformerXPath(t *testing.T) {
|
|||||||
const performerName = "Mia Malkova"
|
const performerName = "Mia Malkova"
|
||||||
const ethnicity = "Caucasian"
|
const ethnicity = "Caucasian"
|
||||||
const country = "United States"
|
const country = "United States"
|
||||||
const birthdate = "July 1, 1992 (27 years old)"
|
const birthdate = "1992-07-01"
|
||||||
const aliases = "Mia Bliss, Madison Clover, Madison Swan, Mia Mountain, Jessica"
|
const aliases = "Mia Bliss, Madison Clover, Madison Swan, Mia Mountain, Jessica"
|
||||||
const eyeColor = "Hazel"
|
const eyeColor = "Hazel"
|
||||||
const measurements = "34C-26-36"
|
const measurements = "34C-26-36"
|
||||||
@@ -251,19 +278,65 @@ func TestScrapePerformerXPath(t *testing.T) {
|
|||||||
verifyField(t, performerName, performer.Name, "Name")
|
verifyField(t, performerName, performer.Name, "Name")
|
||||||
verifyField(t, ethnicity, performer.Ethnicity, "Ethnicity")
|
verifyField(t, ethnicity, performer.Ethnicity, "Ethnicity")
|
||||||
verifyField(t, country, performer.Country, "Country")
|
verifyField(t, country, performer.Country, "Country")
|
||||||
|
|
||||||
verifyField(t, birthdate, performer.Birthdate, "Birthdate")
|
verifyField(t, birthdate, performer.Birthdate, "Birthdate")
|
||||||
|
|
||||||
verifyField(t, aliases, performer.Aliases, "Aliases")
|
verifyField(t, aliases, performer.Aliases, "Aliases")
|
||||||
verifyField(t, eyeColor, performer.EyeColor, "EyeColor")
|
verifyField(t, eyeColor, performer.EyeColor, "EyeColor")
|
||||||
verifyField(t, measurements, performer.Measurements, "Measurements")
|
verifyField(t, measurements, performer.Measurements, "Measurements")
|
||||||
verifyField(t, fakeTits, performer.FakeTits, "FakeTits")
|
verifyField(t, fakeTits, performer.FakeTits, "FakeTits")
|
||||||
|
|
||||||
// TODO - this needs post-processing
|
verifyField(t, careerLength, performer.CareerLength, "CareerLength")
|
||||||
//verifyField(t, careerLength, performer.CareerLength, "CareerLength")
|
|
||||||
|
|
||||||
verifyField(t, tattoosPiercings, performer.Tattoos, "Tattoos")
|
verifyField(t, tattoosPiercings, performer.Tattoos, "Tattoos")
|
||||||
verifyField(t, tattoosPiercings, performer.Piercings, "Piercings")
|
verifyField(t, tattoosPiercings, performer.Piercings, "Piercings")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestConcatXPath(t *testing.T) {
|
||||||
|
const firstName = "FirstName"
|
||||||
|
const lastName = "LastName"
|
||||||
|
const eyeColor = "EyeColor"
|
||||||
|
const separator = " "
|
||||||
|
const testDoc = `
|
||||||
|
<html>
|
||||||
|
<div>` + firstName + `</div>
|
||||||
|
<div>` + lastName + `</div>
|
||||||
|
<span>` + eyeColor + `</span>
|
||||||
|
</html>
|
||||||
|
`
|
||||||
|
|
||||||
|
reader := strings.NewReader(testDoc)
|
||||||
|
doc, err := htmlquery.Parse(reader)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Error loading document: %s", err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
xpathConfig := make(xpathScraperConfig)
|
||||||
|
nameAttrConfig := make(map[interface{}]interface{})
|
||||||
|
nameAttrConfig["selector"] = "//div"
|
||||||
|
nameAttrConfig["concat"] = separator
|
||||||
|
xpathConfig["Name"] = nameAttrConfig
|
||||||
|
xpathConfig["EyeColor"] = "//span"
|
||||||
|
|
||||||
|
scraper := xpathScraper{
|
||||||
|
Performer: xpathConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
performer, err := scraper.scrapePerformer(doc)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Error scraping performer: %s", err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const performerName = firstName + separator + lastName
|
||||||
|
|
||||||
|
verifyField(t, performerName, performer.Name, "Name")
|
||||||
|
verifyField(t, eyeColor, performer.EyeColor, "EyeColor")
|
||||||
|
}
|
||||||
|
|
||||||
const sceneHTML = `
|
const sceneHTML = `
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
|
|
||||||
|
|||||||
@@ -174,16 +174,17 @@ export const PerformerDetailsPanel: FunctionComponent<IPerformerDetailsProps> =
|
|||||||
|
|
||||||
async function onScrapePerformer() {
|
async function onScrapePerformer() {
|
||||||
setIsDisplayingScraperDialog(undefined);
|
setIsDisplayingScraperDialog(undefined);
|
||||||
setIsLoading(true);
|
|
||||||
try {
|
try {
|
||||||
if (!scrapePerformerDetails || !isDisplayingScraperDialog) { return; }
|
if (!scrapePerformerDetails || !isDisplayingScraperDialog) { return; }
|
||||||
|
setIsLoading(true);
|
||||||
const result = await StashService.queryScrapePerformer(isDisplayingScraperDialog.id, getQueryScraperPerformerInput());
|
const result = await StashService.queryScrapePerformer(isDisplayingScraperDialog.id, getQueryScraperPerformerInput());
|
||||||
if (!result.data || !result.data.scrapePerformer) { return; }
|
if (!result.data || !result.data.scrapePerformer) { return; }
|
||||||
updatePerformerEditState(result.data.scrapePerformer);
|
updatePerformerEditState(result.data.scrapePerformer);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
ErrorUtils.handle(e);
|
ErrorUtils.handle(e);
|
||||||
|
} finally {
|
||||||
|
setIsLoading(false);
|
||||||
}
|
}
|
||||||
setIsLoading(false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function onScrapePerformerURL() {
|
async function onScrapePerformerURL() {
|
||||||
|
|||||||
Reference in New Issue
Block a user