mirror of
https://github.com/stashapp/stash.git
synced 2025-12-17 12:24:38 +03:00
Add "split" xpath in post-processing , newlines in replace support (#579)
This commit is contained in:
@@ -75,6 +75,10 @@ func (c xpathRegexConfig) apply(value string) string {
|
||||
}
|
||||
|
||||
ret := re.ReplaceAllString(value, with)
|
||||
// replace lines if needed to protect from commonPostprocess
|
||||
if with == "\n" {
|
||||
ret = replaceLines(ret)
|
||||
}
|
||||
|
||||
logger.Debugf(`Replace: '%s' with '%s'`, regex, with)
|
||||
logger.Debugf("Before: %s", value)
|
||||
@@ -94,6 +98,9 @@ func (c xpathRegexConfigs) apply(value string) string {
|
||||
// remove whitespace again
|
||||
value = commonPostProcess(value)
|
||||
|
||||
// restore replaced lines
|
||||
|
||||
value = restoreLines(value)
|
||||
return value
|
||||
}
|
||||
|
||||
@@ -129,6 +136,15 @@ func (c xpathScraperAttrConfig) getParseDate() string {
|
||||
return c.getString(parseDateKey)
|
||||
}
|
||||
|
||||
func (c xpathScraperAttrConfig) getSplit() string {
|
||||
const splitKey = "split"
|
||||
return c.getString(splitKey)
|
||||
}
|
||||
|
||||
func (c xpathScraperAttrConfig) hasSplit() bool {
|
||||
return c.getSplit() != ""
|
||||
}
|
||||
|
||||
func (c xpathScraperAttrConfig) getReplace() xpathRegexConfigs {
|
||||
const replaceKey = "replace"
|
||||
val, _ := c[replaceKey]
|
||||
@@ -198,6 +214,36 @@ func (c xpathScraperAttrConfig) parseDate(value string) string {
|
||||
return parsedValue.Format(internalDateFormat)
|
||||
}
|
||||
|
||||
func (c xpathScraperAttrConfig) splitString(value string) []string {
|
||||
separator := c.getSplit()
|
||||
var res []string
|
||||
|
||||
if separator == "" {
|
||||
return []string{value}
|
||||
}
|
||||
|
||||
for _, str := range strings.Split(value, separator) {
|
||||
if str != "" {
|
||||
res = append(res, str)
|
||||
}
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
// setKeyAndSplit sets the key "k" for the results "ret" and splits if needed
|
||||
// "i" is the index starting position
|
||||
func (c xpathScraperAttrConfig) setKeyAndSplit(ret *xPathResults, value string, k string, i int) {
|
||||
if c.hasSplit() {
|
||||
for j, txt := range c.splitString(value) {
|
||||
*ret = ret.setKey(j+i, k, txt)
|
||||
}
|
||||
} else {
|
||||
*ret = ret.setKey(i, k, value)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func (c xpathScraperAttrConfig) replaceRegex(value string) string {
|
||||
replace := c.getReplace()
|
||||
return replace.apply(value)
|
||||
@@ -258,6 +304,24 @@ func commonPostProcess(value string) string {
|
||||
return value
|
||||
}
|
||||
|
||||
// func replaceLines replaces all newlines ("\n") with alert ("\a")
|
||||
func replaceLines(value string) string {
|
||||
re := regexp.MustCompile("\a") // \a shouldn't exist in the string
|
||||
value = re.ReplaceAllString(value, "") // remove it
|
||||
re = regexp.MustCompile("\n") // replace newlines with (\a)'s so that they don't get removed by commonPostprocess
|
||||
value = re.ReplaceAllString(value, "\a")
|
||||
|
||||
return value
|
||||
}
|
||||
|
||||
// func restoreLines replaces all alerts ("\a") with newlines ("\n")
|
||||
func restoreLines(value string) string {
|
||||
re := regexp.MustCompile("\a")
|
||||
value = re.ReplaceAllString(value, "\n")
|
||||
|
||||
return value
|
||||
}
|
||||
|
||||
func runXPathQuery(doc *html.Node, xpath string, common commonXPathConfig) []*html.Node {
|
||||
// apply common
|
||||
if common != nil {
|
||||
@@ -299,15 +363,13 @@ func (s xpathScraperConfig) process(doc *html.Node, common commonXPathConfig) xP
|
||||
if attrConfig.hasConcat() {
|
||||
result := attrConfig.concatenateResults(found)
|
||||
result = attrConfig.postProcess(result)
|
||||
const i = 0
|
||||
ret = ret.setKey(i, k, result)
|
||||
attrConfig.setKeyAndSplit(&ret, result, k, 0)
|
||||
} else {
|
||||
for i, elem := range found {
|
||||
text := NodeText(elem)
|
||||
text = commonPostProcess(text)
|
||||
text = attrConfig.postProcess(text)
|
||||
|
||||
ret = ret.setKey(i, k, text)
|
||||
attrConfig.setKeyAndSplit(&ret, text, k, i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -663,6 +663,14 @@ func makeSceneXPathConfig() xpathScraper {
|
||||
studioConfig["URL"] = `$studioElem/@href`
|
||||
config["Studio"] = studioConfig
|
||||
|
||||
const sep = " "
|
||||
moviesNameConfig := make(map[interface{}]interface{})
|
||||
moviesNameConfig["selector"] = `//i[@class="isMe tooltipTrig"]/@data-title`
|
||||
moviesNameConfig["split"] = sep
|
||||
moviesConfig := make(map[interface{}]interface{})
|
||||
moviesConfig["Name"] = moviesNameConfig
|
||||
config["Movies"] = moviesConfig
|
||||
|
||||
scraper := xpathScraper{
|
||||
Scene: config,
|
||||
Common: common,
|
||||
@@ -692,6 +700,27 @@ func verifyTags(t *testing.T, expectedTagNames []string, actualTags []*models.Sc
|
||||
}
|
||||
}
|
||||
|
||||
func verifyMovies(t *testing.T, expectedMovieNames []string, actualMovies []*models.ScrapedSceneMovie) {
|
||||
t.Helper()
|
||||
|
||||
i := 0
|
||||
for i < len(expectedMovieNames) || i < len(actualMovies) {
|
||||
expectedMovie := ""
|
||||
actualMovie := ""
|
||||
if i < len(expectedMovieNames) {
|
||||
expectedMovie = expectedMovieNames[i]
|
||||
}
|
||||
if i < len(actualMovies) {
|
||||
actualMovie = actualMovies[i].Name
|
||||
}
|
||||
|
||||
if expectedMovie != actualMovie {
|
||||
t.Errorf("Expected movie %s, got %s", expectedMovie, actualMovie)
|
||||
}
|
||||
i++
|
||||
}
|
||||
}
|
||||
|
||||
func verifyPerformers(t *testing.T, expectedNames []string, expectedURLs []string, actualPerformers []*models.ScrapedScenePerformer) {
|
||||
t.Helper()
|
||||
|
||||
@@ -761,6 +790,15 @@ func TestApplySceneXPathConfig(t *testing.T) {
|
||||
}
|
||||
verifyTags(t, expectedTags, scene.Tags)
|
||||
|
||||
// verify movies
|
||||
expectedMovies := []string{
|
||||
"Video",
|
||||
"of",
|
||||
"verified",
|
||||
"member",
|
||||
}
|
||||
verifyMovies(t, expectedMovies, scene.Movies)
|
||||
|
||||
expectedPerformerNames := []string{
|
||||
"Alex D",
|
||||
"Mia Malkova",
|
||||
|
||||
@@ -6,6 +6,7 @@ const markup = `
|
||||
* Add support for parent/child studios.
|
||||
|
||||
### 🎨 Improvements
|
||||
* Add split xpath post-processing action.
|
||||
* Improved the layout of the scene page.
|
||||
* Show rating as stars in scene page.
|
||||
* Add reload scrapers button.
|
||||
|
||||
@@ -488,3 +488,7 @@ div.dropdown-menu {
|
||||
text-transform: uppercase;
|
||||
}
|
||||
}
|
||||
|
||||
.pre {
|
||||
white-space: pre-line;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user