Update freeones scraper (#881)

This commit is contained in:
WithoutPants
2020-10-24 13:12:21 +11:00
committed by GitHub
parent 2987b7f3d2
commit 70f73ecf4a

View File

@@ -14,12 +14,13 @@ const freeonesScraperConfig = `
name: Freeones name: Freeones
performerByName: performerByName:
action: scrapeXPath action: scrapeXPath
queryURL: https://www.freeones.xxx/babes?q={}&v=teasers&s=relevance&l=96&m%5BcanPreviewFeatures%5D=0 queryURL: https://www.freeones.com/babes?q={}&v=teasers&s=relevance&l=96&m%5BcanPreviewFeatures%5D=0
scraper: performerSearch scraper: performerSearch
performerByURL: performerByURL:
- action: scrapeXPath - action: scrapeXPath
url: url:
- https://www.freeones.xxx - freeones.xxx
- freeones.com
scraper: performerScraper scraper: performerScraper
xPathScrapers: xPathScrapers:
@@ -28,80 +29,78 @@ xPathScrapers:
Name: //div[@id="search-result"]//p[@data-test="subject-name"]/text() Name: //div[@id="search-result"]//p[@data-test="subject-name"]/text()
URL: URL:
selector: //div[@id="search-result"]//div[@data-test="teaser-subject"]/a/@href selector: //div[@id="search-result"]//div[@data-test="teaser-subject"]/a/@href
replace: postProcess:
- regex: ^ - replace:
with: https://www.freeones.xxx - regex: ^
- regex: $ with: https://www.freeones.com
with: /profile - regex: $
with: /profile
performerScraper: performerScraper:
performer: performer:
Name: //h1 Name:
selector: //h1
postProcess:
- replace:
- regex: \sBio\s*$
with: ""
URL: URL:
selector: //a[span[text()="Profile"]]/@href selector: //a[span[text()="Profile"]]/@href
replace: postProcess:
- regex: ^ - replace:
with: https://www.freeones.xxx - regex: ^
Twitter: //div[p[text()='Follow On']]//div//a[@class='d-flex align-items-center justify-content-center m-2 social-icons color-twitter']/@href with: https://www.freeones.com
Instagram: //div[p[text()='Follow On']]//div//a[@class='d-flex align-items-center justify-content-center m-2 social-icons color-telegram']/@href Twitter: //a[contains(@href,'twitter.com/')]/@href
Instagram: //a[contains(@href,'instagram.com/')]/@href
Birthdate: Birthdate:
selector: //div[p[text()='Personal Information']]//div//p/a/span[contains(text(),'Born On')] selector: //div[p[text()='Personal Information']]//span[contains(text(),'Born On')]
replace: postProcess:
- regex: Born On - replace:
with: - regex: Born On
- regex: "," with:
with: - parseDate: January 2, 2006
parseDate: January 2 2006
Ethnicity: Ethnicity:
selector: //div[p[text()='Ethnicity']]//div//p[@class='mb-0 text-center'] selector: //div[p[text()='Ethnicity']]//a[@data-test="link_ethnicity"]
replace: postProcess:
- regex: Asian - map:
with: "asian" Asian: asian
- regex: Caucasian Caucasian: white
with: "white" Black: black
- regex: Black Latin: hispanic
with: "black" Country: //div[p[text()='Personal Information']]//a[@data-test="link-country"]
- regex: Latin EyeColor: //span[text()='Eye Color']/following-sibling::span/a
with: "hispanic"
Country: //div[p[text()='Personal Information']]//div//p//a[@data-test="link-country"]
EyeColor: //span[@data-test="link_span_eye_color"]
Height: Height:
selector: //span[@data-test="link_span_height"] selector: //span[text()='Height']/following-sibling::span/a
replace: postProcess:
- regex: \D+[\s\S]+ - replace:
with: "" - regex: \D+[\s\S]+
with: ""
- map:
Unknown: ""
Measurements: Measurements:
selector: //span[@data-test="p-measurements"]//a/span selector: //span[text()='Measurements']/following-sibling::span/span/a
concat: " - " concat: " - "
replace: postProcess:
- regex: Unknown - map:
with: Unknown: ""
FakeTits: FakeTits:
selector: //span[@data-test='link_span_boobs'] selector: //span[text()='Boobs']/following-sibling::span/a
replace: postProcess:
- regex: Unknown - map:
with: Unknown: ""
- regex: Fake Fake: Yes
with: "Yes" Natural: No
- regex: Natural
with: "No"
CareerLength: CareerLength:
selector: //div[p[text()='career']]//div//div[@class='timeline-horizontal mb-3']//div//p[@class='m-0'] selector: //div[p[text()='career']]//div[contains(@class,'timeline-horizontal')]//p[@class='m-0']
concat: "-" concat: "-"
replace: Aliases: //p[text()='Aliases']/following-sibling::div/p
- regex: -\w+-\w+-\w+-\w+-\w+$ Tattoos: //span[text()='Tattoos']/following-sibling::span/span
with: "" Piercings: //span[text()='Piercings']/following-sibling::span/span
Aliases: //div[p[text()='Aliases']]//div//p[@class='mb-0 text-center']
Tattoos: //span[@data-test="p_has_tattoos"]|//span[@cdata-test="p_has_tattoos"]
Piercings: //span[@data-test="p_has_piercings"]
Image: Image:
selector: //div[@class='profile-image-container']//a/img/@src selector: //div[@class='profile-image-container']//a/img/@src
Gender: Gender:
selector: //meta[@name="language"]/@name fixed: "Female"
replace: # Last updated October 21, 2020
- regex: language
with: "Female"
# Last updated June 15, 2020
` `
func getFreeonesScraper() config { func getFreeonesScraper() config {