mirror of
https://github.com/stashapp/stash.git
synced 2025-12-17 04:14:39 +03:00
Generic performer scrapers (#203)
* Generalise scraper API * Add script performer scraper * Fixes from testing * Add context to scrapers and generalise * Add scraping performer from URL * Add error handling * Move log to debug * Add supported scrape types
This commit is contained in:
@@ -8,7 +8,6 @@ import (
|
||||
"github.com/99designs/gqlgen/graphql"
|
||||
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
"github.com/stashapp/stash/pkg/scraper"
|
||||
)
|
||||
|
||||
type Resolver struct{}
|
||||
@@ -161,14 +160,6 @@ func (r *queryResolver) SceneMarkerTags(ctx context.Context, scene_id string) ([
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (r *queryResolver) ScrapeFreeones(ctx context.Context, performer_name string) (*models.ScrapedPerformer, error) {
|
||||
return scraper.GetPerformer(performer_name)
|
||||
}
|
||||
|
||||
func (r *queryResolver) ScrapeFreeonesPerformerList(ctx context.Context, query string) ([]string, error) {
|
||||
return scraper.GetPerformerNames(query)
|
||||
}
|
||||
|
||||
// wasFieldIncluded returns true if the given field was included in the request.
|
||||
// Slices are unmarshalled to empty slices even if the field was omitted. This
|
||||
// method determines if it was omitted altogether.
|
||||
|
||||
53
pkg/api/resolver_query_scraper.go
Normal file
53
pkg/api/resolver_query_scraper.go
Normal file
@@ -0,0 +1,53 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
"github.com/stashapp/stash/pkg/scraper"
|
||||
)
|
||||
|
||||
// deprecated
|
||||
func (r *queryResolver) ScrapeFreeones(ctx context.Context, performer_name string) (*models.ScrapedPerformer, error) {
|
||||
scrapedPerformer := models.ScrapedPerformerInput{
|
||||
Name: &performer_name,
|
||||
}
|
||||
return scraper.GetFreeonesScraper().ScrapePerformer(scrapedPerformer)
|
||||
}
|
||||
|
||||
// deprecated
|
||||
func (r *queryResolver) ScrapeFreeonesPerformerList(ctx context.Context, query string) ([]string, error) {
|
||||
scrapedPerformers, err := scraper.GetFreeonesScraper().ScrapePerformerNames(query)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var ret []string
|
||||
for _, v := range scrapedPerformers {
|
||||
name := v.Name
|
||||
ret = append(ret, *name)
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (r *queryResolver) ListScrapers(ctx context.Context, scraperType models.ScraperType) ([]*models.Scraper, error) {
|
||||
return scraper.ListScrapers(scraperType)
|
||||
}
|
||||
|
||||
func (r *queryResolver) ScrapePerformerList(ctx context.Context, scraperID string, query string) ([]*models.ScrapedPerformer, error) {
|
||||
if query == "" {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return scraper.ScrapePerformerList(scraperID, query)
|
||||
}
|
||||
|
||||
func (r *queryResolver) ScrapePerformer(ctx context.Context, scraperID string, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
|
||||
return scraper.ScrapePerformer(scraperID, scrapedPerformer)
|
||||
}
|
||||
|
||||
func (r *queryResolver) ScrapePerformerURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) {
|
||||
return scraper.ScrapePerformerURL(url)
|
||||
}
|
||||
@@ -22,6 +22,8 @@ const Password = "password"
|
||||
|
||||
const Database = "database"
|
||||
|
||||
const ScrapersPath = "scrapers_path"
|
||||
|
||||
const MaxTranscodeSize = "max_transcode_size"
|
||||
const MaxStreamingTranscodeSize = "max_streaming_transcode_size"
|
||||
|
||||
@@ -73,6 +75,20 @@ func GetDatabasePath() string {
|
||||
return viper.GetString(Database)
|
||||
}
|
||||
|
||||
func GetDefaultScrapersPath() string {
|
||||
// default to the same directory as the config file
|
||||
configFileUsed := viper.ConfigFileUsed()
|
||||
configDir := filepath.Dir(configFileUsed)
|
||||
|
||||
fn := filepath.Join(configDir, "scrapers")
|
||||
|
||||
return fn
|
||||
}
|
||||
|
||||
func GetScrapersPath() string {
|
||||
return viper.GetString(ScrapersPath)
|
||||
}
|
||||
|
||||
func GetHost() string {
|
||||
return viper.GetString(Host)
|
||||
}
|
||||
|
||||
@@ -71,6 +71,9 @@ func initConfig() {
|
||||
// Set generated to the metadata path for backwards compat
|
||||
viper.SetDefault(config.Generated, viper.GetString(config.Metadata))
|
||||
|
||||
// Set default scrapers path
|
||||
viper.SetDefault(config.ScrapersPath, config.GetDefaultScrapersPath())
|
||||
|
||||
// Disabling config watching due to race condition issue
|
||||
// See: https://github.com/spf13/viper/issues/174
|
||||
// Changes to the config outside the system will require a restart
|
||||
|
||||
@@ -2,17 +2,38 @@ package scraper
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/stashapp/stash/pkg/logger"
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/stashapp/stash/pkg/logger"
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
)
|
||||
|
||||
func GetPerformerNames(q string) ([]string, error) {
|
||||
const freeonesScraperID = "builtin_freeones"
|
||||
const freeonesName = "Freeones"
|
||||
|
||||
var freeonesURLs = []string{
|
||||
"freeones.com",
|
||||
}
|
||||
|
||||
func GetFreeonesScraper() scraperConfig {
|
||||
return scraperConfig{
|
||||
ID: freeonesScraperID,
|
||||
Name: "Freeones",
|
||||
Type: models.ScraperTypePerformer,
|
||||
Method: ScraperMethodBuiltin,
|
||||
URLs: freeonesURLs,
|
||||
scrapePerformerNamesFunc: GetPerformerNames,
|
||||
scrapePerformerFunc: GetPerformer,
|
||||
scrapePerformerURLFunc: GetPerformerURL,
|
||||
}
|
||||
}
|
||||
|
||||
func GetPerformerNames(c scraperConfig, q string) ([]*models.ScrapedPerformer, error) {
|
||||
// Request the HTML page.
|
||||
queryURL := "https://www.freeones.com/suggestions.php?q=" + url.PathEscape(q) + "&t=1"
|
||||
res, err := http.Get(queryURL)
|
||||
@@ -31,65 +52,42 @@ func GetPerformerNames(q string) ([]string, error) {
|
||||
}
|
||||
|
||||
// Find the performers
|
||||
var performerNames []string
|
||||
var performers []*models.ScrapedPerformer
|
||||
doc.Find(".suggestion").Each(func(i int, s *goquery.Selection) {
|
||||
name := strings.Trim(s.Text(), " ")
|
||||
performerNames = append(performerNames, name)
|
||||
p := models.ScrapedPerformer{
|
||||
Name: &name,
|
||||
}
|
||||
performers = append(performers, &p)
|
||||
})
|
||||
|
||||
return performerNames, nil
|
||||
return performers, nil
|
||||
}
|
||||
|
||||
func GetPerformer(performerName string) (*models.ScrapedPerformer, error) {
|
||||
queryURL := "https://www.freeones.com/search/?t=1&q=" + url.PathEscape(performerName) + "&view=thumbs"
|
||||
res, err := http.Get(queryURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != 200 {
|
||||
return nil, fmt.Errorf("status code error: %d %s", res.StatusCode, res.Status)
|
||||
func GetPerformerURL(c scraperConfig, href string) (*models.ScrapedPerformer, error) {
|
||||
// if we're already in the bio page, just scrape it
|
||||
if regexp.MustCompile(`\/bio_.*\.php$`).MatchString(href) {
|
||||
return getPerformerBio(c, href)
|
||||
}
|
||||
|
||||
// Load the HTML document
|
||||
doc, err := goquery.NewDocumentFromReader(res.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
// otherwise try to get the bio page from the url
|
||||
profileRE := regexp.MustCompile(`_links\/(.*?)\/$`)
|
||||
if profileRE.MatchString(href) {
|
||||
href = profileRE.ReplaceAllString(href, "_links/bio_$1.php")
|
||||
return getPerformerBio(c, href)
|
||||
}
|
||||
|
||||
performerLink := doc.Find("div.Block3 a").FilterFunction(func(i int, s *goquery.Selection) bool {
|
||||
href, _ := s.Attr("href")
|
||||
if href == "/html/j_links/Jenna_Leigh_c/" || href == "/html/a_links/Alexa_Grace_c/" {
|
||||
return false
|
||||
}
|
||||
if strings.ToLower(s.Text()) == strings.ToLower(performerName) {
|
||||
return true
|
||||
}
|
||||
alias := s.ParentsFiltered(".babeNameBlock").Find(".babeAlias").First();
|
||||
if strings.Contains( strings.ToLower(alias.Text()), strings.ToLower(performerName) ) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
})
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
href, _ := performerLink.Attr("href")
|
||||
href = strings.TrimSuffix(href, "/")
|
||||
regex := regexp.MustCompile(`.+_links\/(.+)`)
|
||||
matches := regex.FindStringSubmatch(href)
|
||||
if len(matches) < 2 {
|
||||
return nil, fmt.Errorf("No matches found in %s",href)
|
||||
}
|
||||
|
||||
href = strings.Replace(href, matches[1], "bio_"+matches[1]+".php", -1)
|
||||
href = "https://www.freeones.com" + href
|
||||
|
||||
func getPerformerBio(c scraperConfig, href string) (*models.ScrapedPerformer, error) {
|
||||
bioRes, err := http.Get(href)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer bioRes.Body.Close()
|
||||
if res.StatusCode != 200 {
|
||||
return nil, fmt.Errorf("status code error: %d %s", res.StatusCode, res.Status)
|
||||
if bioRes.StatusCode != 200 {
|
||||
return nil, fmt.Errorf("status code error: %d %s", bioRes.StatusCode, bioRes.Status)
|
||||
}
|
||||
|
||||
// Load the HTML document
|
||||
@@ -175,6 +173,57 @@ func GetPerformer(performerName string) (*models.ScrapedPerformer, error) {
|
||||
return &result, nil
|
||||
}
|
||||
|
||||
func GetPerformer(c scraperConfig, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
|
||||
if scrapedPerformer.Name == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
performerName := *scrapedPerformer.Name
|
||||
queryURL := "https://www.freeones.com/search/?t=1&q=" + url.PathEscape(performerName) + "&view=thumbs"
|
||||
res, err := http.Get(queryURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != 200 {
|
||||
return nil, fmt.Errorf("status code error: %d %s", res.StatusCode, res.Status)
|
||||
}
|
||||
|
||||
// Load the HTML document
|
||||
doc, err := goquery.NewDocumentFromReader(res.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
performerLink := doc.Find("div.Block3 a").FilterFunction(func(i int, s *goquery.Selection) bool {
|
||||
href, _ := s.Attr("href")
|
||||
if href == "/html/j_links/Jenna_Leigh_c/" || href == "/html/a_links/Alexa_Grace_c/" {
|
||||
return false
|
||||
}
|
||||
if strings.ToLower(s.Text()) == strings.ToLower(performerName) {
|
||||
return true
|
||||
}
|
||||
alias := s.ParentsFiltered(".babeNameBlock").Find(".babeAlias").First()
|
||||
if strings.Contains(strings.ToLower(alias.Text()), strings.ToLower(performerName)) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
})
|
||||
|
||||
href, _ := performerLink.Attr("href")
|
||||
href = strings.TrimSuffix(href, "/")
|
||||
regex := regexp.MustCompile(`.+_links\/(.+)`)
|
||||
matches := regex.FindStringSubmatch(href)
|
||||
if len(matches) < 2 {
|
||||
return nil, fmt.Errorf("No matches found in %s", href)
|
||||
}
|
||||
|
||||
href = strings.Replace(href, matches[1], "bio_"+matches[1]+".php", -1)
|
||||
href = "https://www.freeones.com" + href
|
||||
|
||||
return getPerformerBio(c, href)
|
||||
}
|
||||
|
||||
func getIndexes(doc *goquery.Document) map[string]int {
|
||||
var indexes = make(map[string]int)
|
||||
doc.Find(".paramname").Each(func(i int, s *goquery.Selection) {
|
||||
@@ -236,7 +285,7 @@ func paramValue(params *goquery.Selection, paramIndex int) string {
|
||||
return content
|
||||
}
|
||||
node = node.NextSibling
|
||||
if (node == nil) {
|
||||
if node == nil {
|
||||
return ""
|
||||
}
|
||||
return trim(node.FirstChild.Data)
|
||||
|
||||
318
pkg/scraper/scrapers.go
Normal file
318
pkg/scraper/scrapers.go
Normal file
@@ -0,0 +1,318 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/stashapp/stash/pkg/logger"
|
||||
"github.com/stashapp/stash/pkg/manager/config"
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
)
|
||||
|
||||
type ScraperMethod string
|
||||
|
||||
const (
|
||||
ScraperMethodScript ScraperMethod = "SCRIPT"
|
||||
ScraperMethodBuiltin ScraperMethod = "BUILTIN"
|
||||
)
|
||||
|
||||
var AllScraperMethod = []ScraperMethod{
|
||||
ScraperMethodScript,
|
||||
}
|
||||
|
||||
func (e ScraperMethod) IsValid() bool {
|
||||
switch e {
|
||||
case ScraperMethodScript:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
type scraperConfig struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Type models.ScraperType `json:"type"`
|
||||
Method ScraperMethod `json:"method"`
|
||||
URLs []string `json:"urls"`
|
||||
GetPerformerNames []string `json:"get_performer_names"`
|
||||
GetPerformer []string `json:"get_performer"`
|
||||
GetPerformerURL []string `json:"get_performer_url"`
|
||||
|
||||
scrapePerformerNamesFunc func(c scraperConfig, name string) ([]*models.ScrapedPerformer, error)
|
||||
scrapePerformerFunc func(c scraperConfig, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error)
|
||||
scrapePerformerURLFunc func(c scraperConfig, url string) (*models.ScrapedPerformer, error)
|
||||
}
|
||||
|
||||
func (c scraperConfig) toScraper() *models.Scraper {
|
||||
ret := models.Scraper{
|
||||
ID: c.ID,
|
||||
Name: c.Name,
|
||||
Type: c.Type,
|
||||
Urls: c.URLs,
|
||||
}
|
||||
|
||||
// determine supported actions
|
||||
if len(c.URLs) > 0 {
|
||||
ret.SupportedScrapes = append(ret.SupportedScrapes, models.ScrapeTypeURL)
|
||||
}
|
||||
|
||||
if c.scrapePerformerNamesFunc != nil && c.scrapePerformerFunc != nil {
|
||||
ret.SupportedScrapes = append(ret.SupportedScrapes, models.ScrapeTypeQuery)
|
||||
}
|
||||
|
||||
return &ret
|
||||
}
|
||||
|
||||
func (c *scraperConfig) postDecode() {
|
||||
if c.Method == ScraperMethodScript {
|
||||
// only set scrape performer names/performer if the applicable field is set
|
||||
if len(c.GetPerformer) > 0 && len(c.GetPerformerNames) > 0 {
|
||||
c.scrapePerformerNamesFunc = scrapePerformerNamesScript
|
||||
c.scrapePerformerFunc = scrapePerformerScript
|
||||
}
|
||||
c.scrapePerformerURLFunc = scrapePerformerURLScript
|
||||
}
|
||||
}
|
||||
|
||||
func (c scraperConfig) ScrapePerformerNames(name string) ([]*models.ScrapedPerformer, error) {
|
||||
return c.scrapePerformerNamesFunc(c, name)
|
||||
}
|
||||
|
||||
func (c scraperConfig) ScrapePerformer(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
|
||||
return c.scrapePerformerFunc(c, scrapedPerformer)
|
||||
}
|
||||
|
||||
func (c scraperConfig) ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) {
|
||||
return c.scrapePerformerURLFunc(c, url)
|
||||
}
|
||||
|
||||
func runScraperScript(command []string, inString string, out interface{}) error {
|
||||
cmd := exec.Command(command[0], command[1:]...)
|
||||
cmd.Dir = config.GetScrapersPath()
|
||||
|
||||
stdin, err := cmd.StdinPipe()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
go func() {
|
||||
defer stdin.Close()
|
||||
|
||||
io.WriteString(stdin, inString)
|
||||
}()
|
||||
|
||||
stderr, err := cmd.StderrPipe()
|
||||
if err != nil {
|
||||
logger.Error("Scraper stderr not available: " + err.Error())
|
||||
}
|
||||
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if nil != err {
|
||||
logger.Error("Scraper stdout not available: " + err.Error())
|
||||
}
|
||||
|
||||
if err = cmd.Start(); err != nil {
|
||||
return errors.New("Error running scraper script")
|
||||
}
|
||||
|
||||
// TODO - add a timeout here
|
||||
decodeErr := json.NewDecoder(stdout).Decode(out)
|
||||
|
||||
stderrData, _ := ioutil.ReadAll(stderr)
|
||||
stderrString := string(stderrData)
|
||||
|
||||
err = cmd.Wait()
|
||||
|
||||
if err != nil {
|
||||
// error message should be in the stderr stream
|
||||
logger.Errorf("scraper error when running command <%s>: %s", strings.Join(cmd.Args, " "), stderrString)
|
||||
return errors.New("Error running scraper script")
|
||||
}
|
||||
|
||||
if decodeErr != nil {
|
||||
logger.Errorf("error decoding performer from scraper data: %s", err.Error())
|
||||
return errors.New("Error decoding performer from scraper script")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func scrapePerformerNamesScript(c scraperConfig, name string) ([]*models.ScrapedPerformer, error) {
|
||||
inString := `{"name": "` + name + `"}`
|
||||
|
||||
var performers []models.ScrapedPerformer
|
||||
|
||||
err := runScraperScript(c.GetPerformerNames, inString, &performers)
|
||||
|
||||
// convert to pointers
|
||||
var ret []*models.ScrapedPerformer
|
||||
if err == nil {
|
||||
for i := 0; i < len(performers); i++ {
|
||||
ret = append(ret, &performers[i])
|
||||
}
|
||||
}
|
||||
|
||||
return ret, err
|
||||
}
|
||||
|
||||
func scrapePerformerScript(c scraperConfig, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
|
||||
inString, err := json.Marshal(scrapedPerformer)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var ret models.ScrapedPerformer
|
||||
|
||||
err = runScraperScript(c.GetPerformer, string(inString), &ret)
|
||||
|
||||
return &ret, err
|
||||
}
|
||||
|
||||
func scrapePerformerURLScript(c scraperConfig, url string) (*models.ScrapedPerformer, error) {
|
||||
inString := `{"url": "` + url + `"}`
|
||||
|
||||
var ret models.ScrapedPerformer
|
||||
|
||||
err := runScraperScript(c.GetPerformerURL, string(inString), &ret)
|
||||
|
||||
return &ret, err
|
||||
}
|
||||
|
||||
var scrapers []scraperConfig
|
||||
|
||||
func loadScraper(path string) (*scraperConfig, error) {
|
||||
var scraper scraperConfig
|
||||
file, err := os.Open(path)
|
||||
defer file.Close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
jsonParser := json.NewDecoder(file)
|
||||
err = jsonParser.Decode(&scraper)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// set id to the filename
|
||||
id := filepath.Base(path)
|
||||
id = id[:strings.LastIndex(id, ".")]
|
||||
scraper.ID = id
|
||||
scraper.postDecode()
|
||||
|
||||
return &scraper, nil
|
||||
}
|
||||
|
||||
func loadScrapers() ([]scraperConfig, error) {
|
||||
if scrapers != nil {
|
||||
return scrapers, nil
|
||||
}
|
||||
|
||||
path := config.GetScrapersPath()
|
||||
scrapers = make([]scraperConfig, 0)
|
||||
|
||||
logger.Debugf("Reading scraper configs from %s", path)
|
||||
scraperFiles, err := filepath.Glob(filepath.Join(path, "*.json"))
|
||||
|
||||
if err != nil {
|
||||
logger.Errorf("Error reading scraper configs: %s", err.Error())
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// add built-in freeones scraper
|
||||
scrapers = append(scrapers, GetFreeonesScraper())
|
||||
|
||||
for _, file := range scraperFiles {
|
||||
scraper, err := loadScraper(file)
|
||||
if err != nil {
|
||||
logger.Errorf("Error loading scraper %s: %s", file, err.Error())
|
||||
} else {
|
||||
scrapers = append(scrapers, *scraper)
|
||||
}
|
||||
}
|
||||
|
||||
return scrapers, nil
|
||||
}
|
||||
|
||||
func ListScrapers(scraperType models.ScraperType) ([]*models.Scraper, error) {
|
||||
// read scraper config files from the directory and cache
|
||||
scrapers, err := loadScrapers()
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var ret []*models.Scraper
|
||||
for _, s := range scrapers {
|
||||
// filter on type
|
||||
if s.Type == scraperType {
|
||||
ret = append(ret, s.toScraper())
|
||||
}
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func findPerformerScraper(scraperID string) *scraperConfig {
|
||||
// read scraper config files from the directory and cache
|
||||
loadScrapers()
|
||||
|
||||
for _, s := range scrapers {
|
||||
if s.ID == scraperID {
|
||||
return &s
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func findPerformerScraperURL(url string) *scraperConfig {
|
||||
// read scraper config files from the directory and cache
|
||||
loadScrapers()
|
||||
|
||||
for _, s := range scrapers {
|
||||
for _, thisURL := range s.URLs {
|
||||
if strings.Contains(url, thisURL) {
|
||||
return &s
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func ScrapePerformerList(scraperID string, query string) ([]*models.ScrapedPerformer, error) {
|
||||
// find scraper with the provided id
|
||||
s := findPerformerScraper(scraperID)
|
||||
if s != nil {
|
||||
return s.ScrapePerformerNames(query)
|
||||
}
|
||||
|
||||
return nil, errors.New("Scraper with ID " + scraperID + " not found")
|
||||
}
|
||||
|
||||
func ScrapePerformer(scraperID string, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
|
||||
// find scraper with the provided id
|
||||
s := findPerformerScraper(scraperID)
|
||||
if s != nil {
|
||||
return s.ScrapePerformer(scrapedPerformer)
|
||||
}
|
||||
|
||||
return nil, errors.New("Scraper with ID " + scraperID + " not found")
|
||||
}
|
||||
|
||||
func ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) {
|
||||
// find scraper that matches the url given
|
||||
s := findPerformerScraperURL(url)
|
||||
if s != nil {
|
||||
return s.ScrapePerformerURL(url)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
Reference in New Issue
Block a user