mirror of
https://github.com/stashapp/stash.git
synced 2025-12-17 20:34:37 +03:00
Get distinct values from scraper (#1338)
Co-authored-by: WithoutPants <53250216+WithoutPants@users.noreply.github.com>
This commit is contained in:
@@ -12,6 +12,7 @@ import (
|
|||||||
|
|
||||||
"github.com/stashapp/stash/pkg/logger"
|
"github.com/stashapp/stash/pkg/logger"
|
||||||
"github.com/stashapp/stash/pkg/models"
|
"github.com/stashapp/stash/pkg/models"
|
||||||
|
"github.com/stashapp/stash/pkg/utils"
|
||||||
"gopkg.in/yaml.v2"
|
"gopkg.in/yaml.v2"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -73,7 +74,9 @@ func (s mappedConfig) postProcess(q mappedQuery, attrConfig mappedScraperAttrCon
|
|||||||
result := attrConfig.concatenateResults(found)
|
result := attrConfig.concatenateResults(found)
|
||||||
result = attrConfig.postProcess(result, q)
|
result = attrConfig.postProcess(result, q)
|
||||||
if attrConfig.hasSplit() {
|
if attrConfig.hasSplit() {
|
||||||
return attrConfig.splitString(result)
|
results := attrConfig.splitString(result)
|
||||||
|
results = attrConfig.distinctResults(results)
|
||||||
|
return results
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = []string{result}
|
ret = []string{result}
|
||||||
@@ -86,6 +89,7 @@ func (s mappedConfig) postProcess(q mappedQuery, attrConfig mappedScraperAttrCon
|
|||||||
|
|
||||||
ret = append(ret, text)
|
ret = append(ret, text)
|
||||||
}
|
}
|
||||||
|
ret = attrConfig.distinctResults(ret)
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
@@ -639,6 +643,10 @@ func (c mappedScraperAttrConfig) concatenateResults(nodes []string) string {
|
|||||||
return strings.Join(result, separator)
|
return strings.Join(result, separator)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c mappedScraperAttrConfig) distinctResults(nodes []string) []string {
|
||||||
|
return utils.StrUnique(nodes)
|
||||||
|
}
|
||||||
|
|
||||||
func (c mappedScraperAttrConfig) splitString(value string) []string {
|
func (c mappedScraperAttrConfig) splitString(value string) []string {
|
||||||
separator := c.Split
|
separator := c.Split
|
||||||
var res []string
|
var res []string
|
||||||
|
|||||||
@@ -163,6 +163,8 @@ const htmlDoc1 = `
|
|||||||
</td>
|
</td>
|
||||||
<td class="paramvalue">
|
<td class="paramvalue">
|
||||||
<ul id="socialmedia">
|
<ul id="socialmedia">
|
||||||
|
<!-- Adding twitter twice to verify distict post-processing -->
|
||||||
|
<li class="twitter"><a href="https://twitter.com/MiaMalkova" target="_blank" alt="Mia Malkova Twitter" title="Mia Malkova Twitter">Twitter</a></li>
|
||||||
<li class="twitter"><a href="https://twitter.com/MiaMalkova" target="_blank" alt="Mia Malkova Twitter" title="Mia Malkova Twitter">Twitter</a></li>
|
<li class="twitter"><a href="https://twitter.com/MiaMalkova" target="_blank" alt="Mia Malkova Twitter" title="Mia Malkova Twitter">Twitter</a></li>
|
||||||
<li class="facebook"><a href="https://www.facebook.com/MiaMalcove" target="_blank" alt="Mia Malkova Facebook" title="Mia Malkova Facebook">Facebook</a></li>
|
<li class="facebook"><a href="https://www.facebook.com/MiaMalcove" target="_blank" alt="Mia Malkova Facebook" title="Mia Malkova Facebook">Facebook</a></li>
|
||||||
<li class="youtube"><a href="https://www.youtube.com/channel/UCEPR0sZKa_ScMoyhemfB7nA" target="_blank" alt="Mia Malkova YouTube" title="Mia Malkova YouTube">YouTube</a></li>
|
<li class="youtube"><a href="https://www.youtube.com/channel/UCEPR0sZKa_ScMoyhemfB7nA" target="_blank" alt="Mia Malkova YouTube" title="Mia Malkova YouTube">YouTube</a></li>
|
||||||
@@ -270,6 +272,12 @@ func makeXPathConfig() mappedPerformerScraperConfig {
|
|||||||
}
|
}
|
||||||
config.mappedConfig["Weight"] = weightConfig
|
config.mappedConfig["Weight"] = weightConfig
|
||||||
|
|
||||||
|
tagConfig := mappedScraperAttrConfig{
|
||||||
|
Selector: `//ul[@id="socialmedia"]//a`,
|
||||||
|
}
|
||||||
|
config.Tags = make(mappedConfig)
|
||||||
|
config.Tags["Name"] = tagConfig
|
||||||
|
|
||||||
return config
|
return config
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -348,6 +356,16 @@ func TestScrapePerformerXPath(t *testing.T) {
|
|||||||
verifyField(t, details, performer.Details, "Details")
|
verifyField(t, details, performer.Details, "Details")
|
||||||
verifyField(t, hairColor, performer.HairColor, "HairColor")
|
verifyField(t, hairColor, performer.HairColor, "HairColor")
|
||||||
verifyField(t, weight, performer.Weight, "Weight")
|
verifyField(t, weight, performer.Weight, "Weight")
|
||||||
|
|
||||||
|
expectedTagNames := []string{
|
||||||
|
"Twitter",
|
||||||
|
"Facebook",
|
||||||
|
"YouTube",
|
||||||
|
"Instagram",
|
||||||
|
}
|
||||||
|
for i, expected := range expectedTagNames {
|
||||||
|
verifyField(t, expected, &performer.Tags[i].Name, "TagName")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestConcatXPath(t *testing.T) {
|
func TestConcatXPath(t *testing.T) {
|
||||||
|
|||||||
@@ -35,6 +35,40 @@ func StrMap(vs []string, f func(string) string) []string {
|
|||||||
return vsm
|
return vsm
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// StrAppendUnique appends toAdd to the vs string slice if toAdd does not already
|
||||||
|
// exist in the slice. It returns the new or unchanged string slice.
|
||||||
|
func StrAppendUnique(vs []string, toAdd string) []string {
|
||||||
|
if StrInclude(vs, toAdd) {
|
||||||
|
return vs
|
||||||
|
}
|
||||||
|
|
||||||
|
return append(vs, toAdd)
|
||||||
|
}
|
||||||
|
|
||||||
|
// StrAppendUniques appends a slice of string values to the vs string slice. It only
|
||||||
|
// appends values that do not already exist in the slice. It returns the new or
|
||||||
|
// unchanged string slice.
|
||||||
|
func StrAppendUniques(vs []string, toAdd []string) []string {
|
||||||
|
for _, v := range toAdd {
|
||||||
|
vs = StrAppendUnique(vs, v)
|
||||||
|
}
|
||||||
|
|
||||||
|
return vs
|
||||||
|
}
|
||||||
|
|
||||||
|
// StrUnique returns the vs string slice with non-unique values removed.
|
||||||
|
func StrUnique(vs []string) []string {
|
||||||
|
distinctValues := make(map[string]struct{})
|
||||||
|
var ret []string
|
||||||
|
for _, v := range vs {
|
||||||
|
if _, exists := distinctValues[v]; !exists {
|
||||||
|
distinctValues[v] = struct{}{}
|
||||||
|
ret = append(ret, v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
|
||||||
// StringSliceToIntSlice converts a slice of strings to a slice of ints.
|
// StringSliceToIntSlice converts a slice of strings to a slice of ints.
|
||||||
// Returns an error if any values cannot be parsed.
|
// Returns an error if any values cannot be parsed.
|
||||||
func StringSliceToIntSlice(ss []string) ([]int, error) {
|
func StringSliceToIntSlice(ss []string) ([]int, error) {
|
||||||
|
|||||||
@@ -13,6 +13,7 @@
|
|||||||
* Added scene queue.
|
* Added scene queue.
|
||||||
|
|
||||||
### 🎨 Improvements
|
### 🎨 Improvements
|
||||||
|
* Remove duplicate values when scraping lists of elements.
|
||||||
* Improved performance of the auto-tagger.
|
* Improved performance of the auto-tagger.
|
||||||
* Clean generation artifacts after generating each scene.
|
* Clean generation artifacts after generating each scene.
|
||||||
* Log message at startup when cleaning the `tmp` and `downloads` generated folders takes more than one second.
|
* Log message at startup when cleaning the `tmp` and `downloads` generated folders takes more than one second.
|
||||||
|
|||||||
@@ -389,7 +389,17 @@ Replaces `2001 to 2003` with `2001-2003`.
|
|||||||
|
|
||||||
Additionally, there are a number of fixed post-processing fields that are specified at the attribute level (not in `postProcess`) that are performed after the `postProcess` operations:
|
Additionally, there are a number of fixed post-processing fields that are specified at the attribute level (not in `postProcess`) that are performed after the `postProcess` operations:
|
||||||
* `concat`: if an xpath matches multiple elements, and `concat` is present, then all of the elements will be concatenated together
|
* `concat`: if an xpath matches multiple elements, and `concat` is present, then all of the elements will be concatenated together
|
||||||
* `split`: Its the inverse of `concat`. Splits a string to more elements using the separator given. For more info and examples have a look at PR [#579](https://github.com/stashapp/stash/pull/579)
|
* `split`: the inverse of `concat`. Splits a string to more elements using the separator given. For more info and examples have a look at PR [#579](https://github.com/stashapp/stash/pull/579)
|
||||||
|
|
||||||
|
Example:
|
||||||
|
```yaml
|
||||||
|
Tags:
|
||||||
|
Name:
|
||||||
|
selector: //span[@class="list_attributes"]
|
||||||
|
split: ","
|
||||||
|
```
|
||||||
|
Splits a comma separated list of tags located in the span and returns the tags.
|
||||||
|
|
||||||
|
|
||||||
For backwards compatibility, `replace`, `subscraper` and `parseDate` are also allowed as keys for the attribute.
|
For backwards compatibility, `replace`, `subscraper` and `parseDate` are also allowed as keys for the attribute.
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user