mirror of
https://github.com/stashapp/stash.git
synced 2025-12-18 12:54:38 +03:00
scraper/mapped: Add feetToCm post process. (#711)
This patch adds a feetToCm post process that converts imperial feet and inches to centimeters.
This commit is contained in:
@@ -3,8 +3,10 @@ package scraper
|
|||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math"
|
||||||
"reflect"
|
"reflect"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -328,11 +330,36 @@ func (p *postProcessMap) Apply(value string, q mappedQuery) string {
|
|||||||
return value
|
return value
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type postProcessFeetToCm bool
|
||||||
|
|
||||||
|
func (p *postProcessFeetToCm) Apply(value string, q mappedQuery) string {
|
||||||
|
const foot_in_cm = 30.48
|
||||||
|
const inch_in_cm = 2.54
|
||||||
|
|
||||||
|
reg := regexp.MustCompile("[0-9]+")
|
||||||
|
filtered := reg.FindAllString(value, -1)
|
||||||
|
|
||||||
|
var feet float64
|
||||||
|
var inches float64
|
||||||
|
if len(filtered) > 0 {
|
||||||
|
feet, _ = strconv.ParseFloat(filtered[0], 64)
|
||||||
|
}
|
||||||
|
if len(filtered) > 1 {
|
||||||
|
inches, _ = strconv.ParseFloat(filtered[1], 64)
|
||||||
|
}
|
||||||
|
|
||||||
|
var centimeters = feet*foot_in_cm + inches*inch_in_cm
|
||||||
|
|
||||||
|
// Return rounded integer string
|
||||||
|
return strconv.Itoa(int(math.Round(centimeters)))
|
||||||
|
}
|
||||||
|
|
||||||
type mappedPostProcessAction struct {
|
type mappedPostProcessAction struct {
|
||||||
ParseDate string `yaml:"parseDate"`
|
ParseDate string `yaml:"parseDate"`
|
||||||
Replace mappedRegexConfigs `yaml:"replace"`
|
Replace mappedRegexConfigs `yaml:"replace"`
|
||||||
SubScraper *mappedScraperAttrConfig `yaml:"subScraper"`
|
SubScraper *mappedScraperAttrConfig `yaml:"subScraper"`
|
||||||
Map map[string]string `yaml:"map"`
|
Map map[string]string `yaml:"map"`
|
||||||
|
FeetToCm bool `yaml:"feetToCm"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a mappedPostProcessAction) ToPostProcessAction() (postProcessAction, error) {
|
func (a mappedPostProcessAction) ToPostProcessAction() (postProcessAction, error) {
|
||||||
@@ -368,6 +395,14 @@ func (a mappedPostProcessAction) ToPostProcessAction() (postProcessAction, error
|
|||||||
action := postProcessMap(a.Map)
|
action := postProcessMap(a.Map)
|
||||||
ret = &action
|
ret = &action
|
||||||
}
|
}
|
||||||
|
if a.FeetToCm {
|
||||||
|
if found != "" {
|
||||||
|
return nil, fmt.Errorf("post-process actions must have a single field, found %s and %s", found, "feetToCm")
|
||||||
|
}
|
||||||
|
found = "feetToCm"
|
||||||
|
action := postProcessFeetToCm(a.FeetToCm)
|
||||||
|
ret = &action
|
||||||
|
}
|
||||||
|
|
||||||
if ret == nil {
|
if ret == nil {
|
||||||
return nil, errors.New("invalid post-process action")
|
return nil, errors.New("invalid post-process action")
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package scraper
|
|||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
"gopkg.in/yaml.v2"
|
"gopkg.in/yaml.v2"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -29,3 +30,31 @@ xPathScrapers:
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type feetToCMTest struct {
|
||||||
|
in string
|
||||||
|
out string
|
||||||
|
}
|
||||||
|
|
||||||
|
var feetToCMTests = []feetToCMTest{
|
||||||
|
{"", "0"},
|
||||||
|
{"a", "0"},
|
||||||
|
{"6", "183"},
|
||||||
|
{"6 feet", "183"},
|
||||||
|
{"6ft0", "183"},
|
||||||
|
{"6ft2", "188"},
|
||||||
|
{"6'2\"", "188"},
|
||||||
|
{"6.2", "188"},
|
||||||
|
{"6ft2.99", "188"},
|
||||||
|
{"text6other2", "188"},
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFeetToCM(t *testing.T) {
|
||||||
|
pp := postProcessFeetToCm(true)
|
||||||
|
|
||||||
|
q := &xpathQuery{}
|
||||||
|
|
||||||
|
for _, test := range feetToCMTests {
|
||||||
|
assert.Equal(t, test.out, pp.Apply(test.in, q))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -97,28 +97,7 @@ const htmlDoc1 = `
|
|||||||
<b>Height:</b>
|
<b>Height:</b>
|
||||||
</td>
|
</td>
|
||||||
<td class="paramvalue">
|
<td class="paramvalue">
|
||||||
<script type="text/javascript">
|
5ft7
|
||||||
<!--
|
|
||||||
heightcm = "171";
|
|
||||||
morethenone = 'inch';
|
|
||||||
feet = heightcm / 30.48;
|
|
||||||
inches = (feet - Math.floor(feet)) * 30.48 / 2.54;
|
|
||||||
|
|
||||||
feet = Math.floor(feet);
|
|
||||||
inches = inches.toFixed(0);
|
|
||||||
|
|
||||||
if (inches > 1) {
|
|
||||||
morethenone = 'inches';
|
|
||||||
}
|
|
||||||
|
|
||||||
if (heightcm == 0) {
|
|
||||||
message = 'Unknown';
|
|
||||||
} else {
|
|
||||||
message = '171 cm - ' + feet + ' feet and ' + inches + ' ' + morethenone;
|
|
||||||
}
|
|
||||||
document.write(message);
|
|
||||||
// -->
|
|
||||||
</script>
|
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
@@ -209,12 +188,10 @@ func makeXPathConfig() mappedPerformerScraperConfig {
|
|||||||
|
|
||||||
config.mappedConfig["Name"] = makeSimpleAttrConfig(makeCommonXPath("Babe Name:") + `/a`)
|
config.mappedConfig["Name"] = makeSimpleAttrConfig(makeCommonXPath("Babe Name:") + `/a`)
|
||||||
config.mappedConfig["Ethnicity"] = makeSimpleAttrConfig(makeCommonXPath("Ethnicity:"))
|
config.mappedConfig["Ethnicity"] = makeSimpleAttrConfig(makeCommonXPath("Ethnicity:"))
|
||||||
config.mappedConfig["Country"] = makeSimpleAttrConfig(makeCommonXPath("Country of Origin:"))
|
|
||||||
config.mappedConfig["Aliases"] = makeSimpleAttrConfig(makeCommonXPath("Aliases:"))
|
config.mappedConfig["Aliases"] = makeSimpleAttrConfig(makeCommonXPath("Aliases:"))
|
||||||
config.mappedConfig["EyeColor"] = makeSimpleAttrConfig(makeCommonXPath("Eye Color:"))
|
config.mappedConfig["EyeColor"] = makeSimpleAttrConfig(makeCommonXPath("Eye Color:"))
|
||||||
config.mappedConfig["Measurements"] = makeSimpleAttrConfig(makeCommonXPath("Measurements:"))
|
config.mappedConfig["Measurements"] = makeSimpleAttrConfig(makeCommonXPath("Measurements:"))
|
||||||
config.mappedConfig["FakeTits"] = makeSimpleAttrConfig(makeCommonXPath("Fake boobs:"))
|
config.mappedConfig["FakeTits"] = makeSimpleAttrConfig(makeCommonXPath("Fake boobs:"))
|
||||||
config.mappedConfig["Height"] = makeSimpleAttrConfig(makeCommonXPath("Height:"))
|
|
||||||
config.mappedConfig["Tattoos"] = makeSimpleAttrConfig(makeCommonXPath("Tattoos:"))
|
config.mappedConfig["Tattoos"] = makeSimpleAttrConfig(makeCommonXPath("Tattoos:"))
|
||||||
config.mappedConfig["Piercings"] = makeSimpleAttrConfig(makeCommonXPath("Piercings:"))
|
config.mappedConfig["Piercings"] = makeSimpleAttrConfig(makeCommonXPath("Piercings:"))
|
||||||
|
|
||||||
@@ -257,10 +234,17 @@ func makeXPathConfig() mappedPerformerScraperConfig {
|
|||||||
config.mappedConfig["Gender"] = genderConfig
|
config.mappedConfig["Gender"] = genderConfig
|
||||||
|
|
||||||
// use fixed for height
|
// use fixed for height
|
||||||
config.mappedConfig["Height"] = mappedScraperAttrConfig{
|
config.mappedConfig["Country"] = mappedScraperAttrConfig{
|
||||||
Fixed: "1234",
|
Fixed: "United States",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
heightConfig := makeSimpleAttrConfig(makeCommonXPath("Height:"))
|
||||||
|
heightConvAction := postProcessFeetToCm(true)
|
||||||
|
heightConfig.postProcessActions = []postProcessAction{
|
||||||
|
&heightConvAction,
|
||||||
|
}
|
||||||
|
config.mappedConfig["Height"] = heightConfig
|
||||||
|
|
||||||
return config
|
return config
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -313,7 +297,7 @@ func TestScrapePerformerXPath(t *testing.T) {
|
|||||||
const careerLength = "2012 - 2019"
|
const careerLength = "2012 - 2019"
|
||||||
const tattoosPiercings = "None"
|
const tattoosPiercings = "None"
|
||||||
const gender = "Female"
|
const gender = "Female"
|
||||||
const height = "1234"
|
const height = "170"
|
||||||
|
|
||||||
verifyField(t, performerName, performer.Name, "Name")
|
verifyField(t, performerName, performer.Name, "Name")
|
||||||
verifyField(t, gender, performer.Gender, "Gender")
|
verifyField(t, gender, performer.Gender, "Gender")
|
||||||
@@ -331,7 +315,7 @@ func TestScrapePerformerXPath(t *testing.T) {
|
|||||||
|
|
||||||
verifyField(t, tattoosPiercings, performer.Tattoos, "Tattoos")
|
verifyField(t, tattoosPiercings, performer.Tattoos, "Tattoos")
|
||||||
verifyField(t, tattoosPiercings, performer.Piercings, "Piercings")
|
verifyField(t, tattoosPiercings, performer.Piercings, "Piercings")
|
||||||
verifyField(t, height, performer.Height, "Piercings")
|
verifyField(t, height, performer.Height, "Height")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestConcatXPath(t *testing.T) {
|
func TestConcatXPath(t *testing.T) {
|
||||||
|
|||||||
@@ -289,6 +289,7 @@ The `Measurements` xpath string will replace `$infoPiece` with `//div[@class="in
|
|||||||
##### Post-processing options
|
##### Post-processing options
|
||||||
|
|
||||||
Post-processing operations are contained in the `postProcess` key. Post-processing operations are performed in the order they are specified. The following post-processing operations are available:
|
Post-processing operations are contained in the `postProcess` key. Post-processing operations are performed in the order they are specified. The following post-processing operations are available:
|
||||||
|
* `feetToCm`: converts a string containing feet and inches numbers into centimetres. Looks for up to two separate integers and interprets the first as the number of feet, and the second as the number of inches. The numbers can be separated by any non-numeric character including the `.` character. It does not handle decimal numbers. For example `6.3` and `6ft3.3` would both be interpreted as 6 feet, 3 inches before converting into centimetres.
|
||||||
* `map`: contains a map of input values to output values. Where a value matches one of the input values, it is replaced with the matching output value. If no value is matched, then value is unmodified.
|
* `map`: contains a map of input values to output values. Where a value matches one of the input values, it is replaced with the matching output value. If no value is matched, then value is unmodified.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
@@ -303,6 +304,8 @@ performer:
|
|||||||
```
|
```
|
||||||
Gets the contents of the selected div element, and sets the returned value to `Female` if the scraped value is `F`; `Male` if the scraped value is `M`.
|
Gets the contents of the selected div element, and sets the returned value to `Female` if the scraped value is `F`; `Male` if the scraped value is `M`.
|
||||||
|
|
||||||
|
* `parseDate`: if present, the value is the date format using go's reference date (2006-01-02). For example, if an example date was `14-Mar-2003`, then the date format would be `02-Jan-2006`. See the [time.Parse documentation](https://golang.org/pkg/time/#Parse) for details. When present, the scraper will convert the input string into a date, then convert it to the string format used by stash (`YYYY-MM-DD`).
|
||||||
|
|
||||||
* `replace`: contains an array of sub-objects. Each sub-object must have a `regex` and `with` field. The `regex` field is the regex pattern to replace, and `with` is the string to replace it with. `$` is used to reference capture groups - `$1` is the first capture group, `$2` the second and so on. Replacements are performed in order of the array.
|
* `replace`: contains an array of sub-objects. Each sub-object must have a `regex` and `with` field. The `regex` field is the regex pattern to replace, and `with` is the string to replace it with. `$` is used to reference capture groups - `$1` is the first capture group, `$2` the second and so on. Replacements are performed in order of the array.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
@@ -317,7 +320,6 @@ CareerLength:
|
|||||||
Replaces `2001 to 2003` with `2001-2003`.
|
Replaces `2001 to 2003` with `2001-2003`.
|
||||||
|
|
||||||
* `subScraper`: if present, the sub-scraper will be executed after all other post-processes are complete and before parseDate. It then takes the value and performs an http request, using the value as the URL. Within the `subScraper` config is a nested scraping configuration. This allows you to traverse to other webpages to get the attribute value you are after. For more info and examples have a look at [#370](https://github.com/stashapp/stash/pull/370), [#606](https://github.com/stashapp/stash/pull/606)
|
* `subScraper`: if present, the sub-scraper will be executed after all other post-processes are complete and before parseDate. It then takes the value and performs an http request, using the value as the URL. Within the `subScraper` config is a nested scraping configuration. This allows you to traverse to other webpages to get the attribute value you are after. For more info and examples have a look at [#370](https://github.com/stashapp/stash/pull/370), [#606](https://github.com/stashapp/stash/pull/606)
|
||||||
* `parseDate`: if present, the value is the date format using go's reference date (2006-01-02). For example, if an example date was `14-Mar-2003`, then the date format would be `02-Jan-2006`. See the [time.Parse documentation](https://golang.org/pkg/time/#Parse) for details. When present, the scraper will convert the input string into a date, then convert it to the string format used by stash (`YYYY-MM-DD`).
|
|
||||||
|
|
||||||
Additionally, there are a number of fixed post-processing fields that are specified at the attribute level (not in `postProcess`) that are performed after the `postProcess` operations:
|
Additionally, there are a number of fixed post-processing fields that are specified at the attribute level (not in `postProcess`) that are performed after the `postProcess` operations:
|
||||||
* `concat`: if an xpath matches multiple elements, and `concat` is present, then all of the elements will be concatenated together
|
* `concat`: if an xpath matches multiple elements, and `concat` is present, then all of the elements will be concatenated together
|
||||||
|
|||||||
Reference in New Issue
Block a user