mirror of
https://github.com/stashapp/stash.git
synced 2025-12-17 12:24:38 +03:00
Add "split" xpath in post-processing , newlines in replace support (#579)
This commit is contained in:
@@ -75,6 +75,10 @@ func (c xpathRegexConfig) apply(value string) string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ret := re.ReplaceAllString(value, with)
|
ret := re.ReplaceAllString(value, with)
|
||||||
|
// replace lines if needed to protect from commonPostprocess
|
||||||
|
if with == "\n" {
|
||||||
|
ret = replaceLines(ret)
|
||||||
|
}
|
||||||
|
|
||||||
logger.Debugf(`Replace: '%s' with '%s'`, regex, with)
|
logger.Debugf(`Replace: '%s' with '%s'`, regex, with)
|
||||||
logger.Debugf("Before: %s", value)
|
logger.Debugf("Before: %s", value)
|
||||||
@@ -94,6 +98,9 @@ func (c xpathRegexConfigs) apply(value string) string {
|
|||||||
// remove whitespace again
|
// remove whitespace again
|
||||||
value = commonPostProcess(value)
|
value = commonPostProcess(value)
|
||||||
|
|
||||||
|
// restore replaced lines
|
||||||
|
|
||||||
|
value = restoreLines(value)
|
||||||
return value
|
return value
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -129,6 +136,15 @@ func (c xpathScraperAttrConfig) getParseDate() string {
|
|||||||
return c.getString(parseDateKey)
|
return c.getString(parseDateKey)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c xpathScraperAttrConfig) getSplit() string {
|
||||||
|
const splitKey = "split"
|
||||||
|
return c.getString(splitKey)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c xpathScraperAttrConfig) hasSplit() bool {
|
||||||
|
return c.getSplit() != ""
|
||||||
|
}
|
||||||
|
|
||||||
func (c xpathScraperAttrConfig) getReplace() xpathRegexConfigs {
|
func (c xpathScraperAttrConfig) getReplace() xpathRegexConfigs {
|
||||||
const replaceKey = "replace"
|
const replaceKey = "replace"
|
||||||
val, _ := c[replaceKey]
|
val, _ := c[replaceKey]
|
||||||
@@ -198,6 +214,36 @@ func (c xpathScraperAttrConfig) parseDate(value string) string {
|
|||||||
return parsedValue.Format(internalDateFormat)
|
return parsedValue.Format(internalDateFormat)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c xpathScraperAttrConfig) splitString(value string) []string {
|
||||||
|
separator := c.getSplit()
|
||||||
|
var res []string
|
||||||
|
|
||||||
|
if separator == "" {
|
||||||
|
return []string{value}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, str := range strings.Split(value, separator) {
|
||||||
|
if str != "" {
|
||||||
|
res = append(res, str)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
|
// setKeyAndSplit sets the key "k" for the results "ret" and splits if needed
|
||||||
|
// "i" is the index starting position
|
||||||
|
func (c xpathScraperAttrConfig) setKeyAndSplit(ret *xPathResults, value string, k string, i int) {
|
||||||
|
if c.hasSplit() {
|
||||||
|
for j, txt := range c.splitString(value) {
|
||||||
|
*ret = ret.setKey(j+i, k, txt)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
*ret = ret.setKey(i, k, value)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
func (c xpathScraperAttrConfig) replaceRegex(value string) string {
|
func (c xpathScraperAttrConfig) replaceRegex(value string) string {
|
||||||
replace := c.getReplace()
|
replace := c.getReplace()
|
||||||
return replace.apply(value)
|
return replace.apply(value)
|
||||||
@@ -258,6 +304,24 @@ func commonPostProcess(value string) string {
|
|||||||
return value
|
return value
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// func replaceLines replaces all newlines ("\n") with alert ("\a")
|
||||||
|
func replaceLines(value string) string {
|
||||||
|
re := regexp.MustCompile("\a") // \a shouldn't exist in the string
|
||||||
|
value = re.ReplaceAllString(value, "") // remove it
|
||||||
|
re = regexp.MustCompile("\n") // replace newlines with (\a)'s so that they don't get removed by commonPostprocess
|
||||||
|
value = re.ReplaceAllString(value, "\a")
|
||||||
|
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
// func restoreLines replaces all alerts ("\a") with newlines ("\n")
|
||||||
|
func restoreLines(value string) string {
|
||||||
|
re := regexp.MustCompile("\a")
|
||||||
|
value = re.ReplaceAllString(value, "\n")
|
||||||
|
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
func runXPathQuery(doc *html.Node, xpath string, common commonXPathConfig) []*html.Node {
|
func runXPathQuery(doc *html.Node, xpath string, common commonXPathConfig) []*html.Node {
|
||||||
// apply common
|
// apply common
|
||||||
if common != nil {
|
if common != nil {
|
||||||
@@ -299,15 +363,13 @@ func (s xpathScraperConfig) process(doc *html.Node, common commonXPathConfig) xP
|
|||||||
if attrConfig.hasConcat() {
|
if attrConfig.hasConcat() {
|
||||||
result := attrConfig.concatenateResults(found)
|
result := attrConfig.concatenateResults(found)
|
||||||
result = attrConfig.postProcess(result)
|
result = attrConfig.postProcess(result)
|
||||||
const i = 0
|
attrConfig.setKeyAndSplit(&ret, result, k, 0)
|
||||||
ret = ret.setKey(i, k, result)
|
|
||||||
} else {
|
} else {
|
||||||
for i, elem := range found {
|
for i, elem := range found {
|
||||||
text := NodeText(elem)
|
text := NodeText(elem)
|
||||||
text = commonPostProcess(text)
|
text = commonPostProcess(text)
|
||||||
text = attrConfig.postProcess(text)
|
text = attrConfig.postProcess(text)
|
||||||
|
attrConfig.setKeyAndSplit(&ret, text, k, i)
|
||||||
ret = ret.setKey(i, k, text)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -663,6 +663,14 @@ func makeSceneXPathConfig() xpathScraper {
|
|||||||
studioConfig["URL"] = `$studioElem/@href`
|
studioConfig["URL"] = `$studioElem/@href`
|
||||||
config["Studio"] = studioConfig
|
config["Studio"] = studioConfig
|
||||||
|
|
||||||
|
const sep = " "
|
||||||
|
moviesNameConfig := make(map[interface{}]interface{})
|
||||||
|
moviesNameConfig["selector"] = `//i[@class="isMe tooltipTrig"]/@data-title`
|
||||||
|
moviesNameConfig["split"] = sep
|
||||||
|
moviesConfig := make(map[interface{}]interface{})
|
||||||
|
moviesConfig["Name"] = moviesNameConfig
|
||||||
|
config["Movies"] = moviesConfig
|
||||||
|
|
||||||
scraper := xpathScraper{
|
scraper := xpathScraper{
|
||||||
Scene: config,
|
Scene: config,
|
||||||
Common: common,
|
Common: common,
|
||||||
@@ -692,6 +700,27 @@ func verifyTags(t *testing.T, expectedTagNames []string, actualTags []*models.Sc
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func verifyMovies(t *testing.T, expectedMovieNames []string, actualMovies []*models.ScrapedSceneMovie) {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
i := 0
|
||||||
|
for i < len(expectedMovieNames) || i < len(actualMovies) {
|
||||||
|
expectedMovie := ""
|
||||||
|
actualMovie := ""
|
||||||
|
if i < len(expectedMovieNames) {
|
||||||
|
expectedMovie = expectedMovieNames[i]
|
||||||
|
}
|
||||||
|
if i < len(actualMovies) {
|
||||||
|
actualMovie = actualMovies[i].Name
|
||||||
|
}
|
||||||
|
|
||||||
|
if expectedMovie != actualMovie {
|
||||||
|
t.Errorf("Expected movie %s, got %s", expectedMovie, actualMovie)
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func verifyPerformers(t *testing.T, expectedNames []string, expectedURLs []string, actualPerformers []*models.ScrapedScenePerformer) {
|
func verifyPerformers(t *testing.T, expectedNames []string, expectedURLs []string, actualPerformers []*models.ScrapedScenePerformer) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
|
|
||||||
@@ -761,6 +790,15 @@ func TestApplySceneXPathConfig(t *testing.T) {
|
|||||||
}
|
}
|
||||||
verifyTags(t, expectedTags, scene.Tags)
|
verifyTags(t, expectedTags, scene.Tags)
|
||||||
|
|
||||||
|
// verify movies
|
||||||
|
expectedMovies := []string{
|
||||||
|
"Video",
|
||||||
|
"of",
|
||||||
|
"verified",
|
||||||
|
"member",
|
||||||
|
}
|
||||||
|
verifyMovies(t, expectedMovies, scene.Movies)
|
||||||
|
|
||||||
expectedPerformerNames := []string{
|
expectedPerformerNames := []string{
|
||||||
"Alex D",
|
"Alex D",
|
||||||
"Mia Malkova",
|
"Mia Malkova",
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ const markup = `
|
|||||||
* Add support for parent/child studios.
|
* Add support for parent/child studios.
|
||||||
|
|
||||||
### 🎨 Improvements
|
### 🎨 Improvements
|
||||||
|
* Add split xpath post-processing action.
|
||||||
* Improved the layout of the scene page.
|
* Improved the layout of the scene page.
|
||||||
* Show rating as stars in scene page.
|
* Show rating as stars in scene page.
|
||||||
* Add reload scrapers button.
|
* Add reload scrapers button.
|
||||||
|
|||||||
@@ -488,3 +488,7 @@ div.dropdown-menu {
|
|||||||
text-transform: uppercase;
|
text-transform: uppercase;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.pre {
|
||||||
|
white-space: pre-line;
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user