mirror of
https://github.com/stashapp/stash.git
synced 2025-12-17 04:14:39 +03:00
Change scraper config to yaml (#256)
This commit is contained in:
1
go.mod
1
go.mod
@@ -21,6 +21,7 @@ require (
|
||||
github.com/vektah/gqlparser v1.1.2
|
||||
golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4
|
||||
golang.org/x/image v0.0.0-20190118043309-183bebdce1b2 // indirect
|
||||
gopkg.in/yaml.v2 v2.2.2
|
||||
)
|
||||
|
||||
replace git.apache.org/thrift.git => github.com/apache/thrift v0.0.0-20180902110319-2566ecd5d999
|
||||
|
||||
@@ -1,13 +1,25 @@
|
||||
query ListScrapers($scraper_type: ScraperType!) {
|
||||
listScrapers(scraper_type: $scraper_type) {
|
||||
query ListPerformerScrapers {
|
||||
listPerformerScrapers {
|
||||
id
|
||||
name
|
||||
type
|
||||
urls
|
||||
supported_scrapes
|
||||
performer {
|
||||
urls
|
||||
supported_scrapes
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# query ListSceneScrapers {
|
||||
# listSceneScrapers {
|
||||
# id
|
||||
# name
|
||||
# scene {
|
||||
# urls
|
||||
# supported_scrapes
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
|
||||
query ScrapePerformerList($scraper_id: ID!, $query: String!) {
|
||||
scrapePerformerList(scraper_id: $scraper_id, query: $query) {
|
||||
name
|
||||
|
||||
@@ -46,7 +46,8 @@ type Query {
|
||||
# Scrapers
|
||||
|
||||
"""List available scrapers"""
|
||||
listScrapers(scraper_type: ScraperType!): [Scraper!]!
|
||||
listPerformerScrapers: [Scraper!]!
|
||||
#listSceneScrapers: [Scraper!]!
|
||||
"""Scrape a list of performers based on name"""
|
||||
scrapePerformerList(scraper_id: ID!, query: String!): [ScrapedPerformer!]!
|
||||
"""Scrapes a complete performer record based on a scrapePerformerList result"""
|
||||
|
||||
@@ -1,16 +1,21 @@
|
||||
enum ScraperType {
|
||||
PERFORMER
|
||||
enum ScrapeType {
|
||||
NAME
|
||||
FRAGMENT
|
||||
URL
|
||||
}
|
||||
|
||||
enum ScrapeType {
|
||||
QUERY
|
||||
URL
|
||||
type ScraperSpec {
|
||||
"""URLs matching these can be scraped with"""
|
||||
urls: [String!]
|
||||
supported_scrapes: [ScrapeType!]!
|
||||
}
|
||||
|
||||
type Scraper {
|
||||
id: ID!
|
||||
name: String!
|
||||
type: ScraperType!
|
||||
urls: [String!]
|
||||
supported_scrapes: [ScrapeType!]!
|
||||
"""Details for performer scraper"""
|
||||
performer: ScraperSpec
|
||||
# TODO
|
||||
# """Details for scene scraper"""
|
||||
# scene: ScraperSpec
|
||||
}
|
||||
@@ -32,8 +32,8 @@ func (r *queryResolver) ScrapeFreeonesPerformerList(ctx context.Context, query s
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (r *queryResolver) ListScrapers(ctx context.Context, scraperType models.ScraperType) ([]*models.Scraper, error) {
|
||||
return scraper.ListScrapers(scraperType)
|
||||
func (r *queryResolver) ListPerformerScrapers(ctx context.Context) ([]*models.Scraper, error) {
|
||||
return scraper.ListPerformerScrapers()
|
||||
}
|
||||
|
||||
func (r *queryResolver) ScrapePerformerList(ctx context.Context, scraperID string, query string) ([]*models.ScrapedPerformer, error) {
|
||||
|
||||
204
pkg/scraper/config.go
Normal file
204
pkg/scraper/config.go
Normal file
@@ -0,0 +1,204 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
)
|
||||
|
||||
type scraperAction string
|
||||
|
||||
const (
|
||||
scraperActionScript scraperAction = "script"
|
||||
)
|
||||
|
||||
var allScraperAction = []scraperAction{
|
||||
scraperActionScript,
|
||||
}
|
||||
|
||||
func (e scraperAction) IsValid() bool {
|
||||
switch e {
|
||||
case scraperActionScript:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
type scraperTypeConfig struct {
|
||||
Action scraperAction `yaml:"action"`
|
||||
Script []string `yaml:"script,flow"`
|
||||
}
|
||||
|
||||
type scrapePerformerNamesFunc func(c scraperTypeConfig, name string) ([]*models.ScrapedPerformer, error)
|
||||
|
||||
type performerByNameConfig struct {
|
||||
scraperTypeConfig `yaml:",inline"`
|
||||
performScrape scrapePerformerNamesFunc
|
||||
}
|
||||
|
||||
func (c *performerByNameConfig) resolveFn() {
|
||||
if c.Action == scraperActionScript {
|
||||
c.performScrape = scrapePerformerNamesScript
|
||||
}
|
||||
}
|
||||
|
||||
type scrapePerformerFragmentFunc func(c scraperTypeConfig, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error)
|
||||
|
||||
type performerByFragmentConfig struct {
|
||||
scraperTypeConfig `yaml:",inline"`
|
||||
performScrape scrapePerformerFragmentFunc
|
||||
}
|
||||
|
||||
func (c *performerByFragmentConfig) resolveFn() {
|
||||
if c.Action == scraperActionScript {
|
||||
c.performScrape = scrapePerformerFragmentScript
|
||||
}
|
||||
}
|
||||
|
||||
type scrapePerformerByURLFunc func(c scraperTypeConfig, url string) (*models.ScrapedPerformer, error)
|
||||
|
||||
type scraperByURLConfig struct {
|
||||
scraperTypeConfig `yaml:",inline"`
|
||||
URL []string `yaml:"url,flow"`
|
||||
performScrape scrapePerformerByURLFunc
|
||||
}
|
||||
|
||||
func (c *scraperByURLConfig) resolveFn() {
|
||||
if c.Action == scraperActionScript {
|
||||
c.performScrape = scrapePerformerURLScript
|
||||
}
|
||||
}
|
||||
|
||||
func (s scraperByURLConfig) matchesURL(url string) bool {
|
||||
for _, thisURL := range s.URL {
|
||||
if strings.Contains(url, thisURL) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
type scraperConfig struct {
|
||||
ID string
|
||||
Name string `yaml:"name"`
|
||||
PerformerByName *performerByNameConfig `yaml:"performerByName"`
|
||||
PerformerByFragment *performerByFragmentConfig `yaml:"performerByFragment"`
|
||||
PerformerByURL []*scraperByURLConfig `yaml:"performerByURL"`
|
||||
}
|
||||
|
||||
func loadScraperFromYAML(path string) (*scraperConfig, error) {
|
||||
ret := &scraperConfig{}
|
||||
|
||||
file, err := os.Open(path)
|
||||
defer file.Close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
parser := yaml.NewDecoder(file)
|
||||
parser.SetStrict(true)
|
||||
err = parser.Decode(&ret)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// set id to the filename
|
||||
id := filepath.Base(path)
|
||||
id = id[:strings.LastIndex(id, ".")]
|
||||
ret.ID = id
|
||||
|
||||
// set the scraper interface
|
||||
ret.initialiseConfigs()
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (c *scraperConfig) initialiseConfigs() {
|
||||
if c.PerformerByName != nil {
|
||||
c.PerformerByName.resolveFn()
|
||||
}
|
||||
if c.PerformerByFragment != nil {
|
||||
c.PerformerByFragment.resolveFn()
|
||||
}
|
||||
for _, s := range c.PerformerByURL {
|
||||
s.resolveFn()
|
||||
}
|
||||
}
|
||||
|
||||
func (c scraperConfig) toScraper() *models.Scraper {
|
||||
ret := models.Scraper{
|
||||
ID: c.ID,
|
||||
Name: c.Name,
|
||||
}
|
||||
|
||||
performer := models.ScraperSpec{}
|
||||
if c.PerformerByName != nil {
|
||||
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeName)
|
||||
}
|
||||
if c.PerformerByFragment != nil {
|
||||
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeFragment)
|
||||
}
|
||||
if len(c.PerformerByURL) > 0 {
|
||||
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeURL)
|
||||
for _, v := range c.PerformerByURL {
|
||||
performer.Urls = append(performer.Urls, v.URL...)
|
||||
}
|
||||
}
|
||||
|
||||
if len(performer.SupportedScrapes) > 0 {
|
||||
ret.Performer = &performer
|
||||
}
|
||||
|
||||
return &ret
|
||||
}
|
||||
|
||||
func (c scraperConfig) supportsPerformers() bool {
|
||||
return c.PerformerByName != nil || c.PerformerByFragment != nil || len(c.PerformerByURL) > 0
|
||||
}
|
||||
|
||||
func (c scraperConfig) matchesPerformerURL(url string) bool {
|
||||
for _, scraper := range c.PerformerByURL {
|
||||
if scraper.matchesURL(url) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (c scraperConfig) ScrapePerformerNames(name string) ([]*models.ScrapedPerformer, error) {
|
||||
if c.PerformerByName != nil && c.PerformerByName.performScrape != nil {
|
||||
return c.PerformerByName.performScrape(c.PerformerByName.scraperTypeConfig, name)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (c scraperConfig) ScrapePerformer(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
|
||||
if c.PerformerByFragment != nil && c.PerformerByFragment.performScrape != nil {
|
||||
return c.PerformerByFragment.performScrape(c.PerformerByFragment.scraperTypeConfig, scrapedPerformer)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (c scraperConfig) ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) {
|
||||
for _, scraper := range c.PerformerByURL {
|
||||
if scraper.matchesURL(url) && scraper.performScrape != nil {
|
||||
ret, err := scraper.performScrape(scraper.scraperTypeConfig, url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if ret != nil {
|
||||
return ret, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
@@ -22,18 +22,24 @@ var freeonesURLs = []string{
|
||||
|
||||
func GetFreeonesScraper() scraperConfig {
|
||||
return scraperConfig{
|
||||
ID: freeonesScraperID,
|
||||
Name: "Freeones",
|
||||
Type: models.ScraperTypePerformer,
|
||||
Method: ScraperMethodBuiltin,
|
||||
URLs: freeonesURLs,
|
||||
scrapePerformerNamesFunc: GetPerformerNames,
|
||||
scrapePerformerFunc: GetPerformer,
|
||||
scrapePerformerURLFunc: GetPerformerURL,
|
||||
ID: freeonesScraperID,
|
||||
Name: "Freeones",
|
||||
PerformerByName: &performerByNameConfig{
|
||||
performScrape: GetPerformerNames,
|
||||
},
|
||||
PerformerByFragment: &performerByFragmentConfig{
|
||||
performScrape: GetPerformer,
|
||||
},
|
||||
PerformerByURL: []*scraperByURLConfig{
|
||||
&scraperByURLConfig{
|
||||
performScrape: GetPerformerURL,
|
||||
URL: freeonesURLs,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func GetPerformerNames(c scraperConfig, q string) ([]*models.ScrapedPerformer, error) {
|
||||
func GetPerformerNames(c scraperTypeConfig, q string) ([]*models.ScrapedPerformer, error) {
|
||||
// Request the HTML page.
|
||||
queryURL := "https://www.freeones.com/suggestions.php?q=" + url.PathEscape(q) + "&t=1"
|
||||
res, err := http.Get(queryURL)
|
||||
@@ -64,7 +70,7 @@ func GetPerformerNames(c scraperConfig, q string) ([]*models.ScrapedPerformer, e
|
||||
return performers, nil
|
||||
}
|
||||
|
||||
func GetPerformerURL(c scraperConfig, href string) (*models.ScrapedPerformer, error) {
|
||||
func GetPerformerURL(c scraperTypeConfig, href string) (*models.ScrapedPerformer, error) {
|
||||
// if we're already in the bio page, just scrape it
|
||||
if regexp.MustCompile(`\/bio_.*\.php$`).MatchString(href) {
|
||||
return getPerformerBio(c, href)
|
||||
@@ -80,7 +86,7 @@ func GetPerformerURL(c scraperConfig, href string) (*models.ScrapedPerformer, er
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func getPerformerBio(c scraperConfig, href string) (*models.ScrapedPerformer, error) {
|
||||
func getPerformerBio(c scraperTypeConfig, href string) (*models.ScrapedPerformer, error) {
|
||||
bioRes, err := http.Get(href)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -173,7 +179,7 @@ func getPerformerBio(c scraperConfig, href string) (*models.ScrapedPerformer, er
|
||||
return &result, nil
|
||||
}
|
||||
|
||||
func GetPerformer(c scraperConfig, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
|
||||
func GetPerformer(c scraperTypeConfig, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
|
||||
if scrapedPerformer.Name == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -1,214 +1,16 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/stashapp/stash/pkg/logger"
|
||||
"github.com/stashapp/stash/pkg/manager/config"
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
)
|
||||
|
||||
type ScraperMethod string
|
||||
|
||||
const (
|
||||
ScraperMethodScript ScraperMethod = "SCRIPT"
|
||||
ScraperMethodBuiltin ScraperMethod = "BUILTIN"
|
||||
)
|
||||
|
||||
var AllScraperMethod = []ScraperMethod{
|
||||
ScraperMethodScript,
|
||||
}
|
||||
|
||||
func (e ScraperMethod) IsValid() bool {
|
||||
switch e {
|
||||
case ScraperMethodScript:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
type scraperConfig struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Type models.ScraperType `json:"type"`
|
||||
Method ScraperMethod `json:"method"`
|
||||
URLs []string `json:"urls"`
|
||||
GetPerformerNames []string `json:"get_performer_names"`
|
||||
GetPerformer []string `json:"get_performer"`
|
||||
GetPerformerURL []string `json:"get_performer_url"`
|
||||
|
||||
scrapePerformerNamesFunc func(c scraperConfig, name string) ([]*models.ScrapedPerformer, error)
|
||||
scrapePerformerFunc func(c scraperConfig, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error)
|
||||
scrapePerformerURLFunc func(c scraperConfig, url string) (*models.ScrapedPerformer, error)
|
||||
}
|
||||
|
||||
func (c scraperConfig) toScraper() *models.Scraper {
|
||||
ret := models.Scraper{
|
||||
ID: c.ID,
|
||||
Name: c.Name,
|
||||
Type: c.Type,
|
||||
Urls: c.URLs,
|
||||
}
|
||||
|
||||
// determine supported actions
|
||||
if len(c.URLs) > 0 {
|
||||
ret.SupportedScrapes = append(ret.SupportedScrapes, models.ScrapeTypeURL)
|
||||
}
|
||||
|
||||
if c.scrapePerformerNamesFunc != nil && c.scrapePerformerFunc != nil {
|
||||
ret.SupportedScrapes = append(ret.SupportedScrapes, models.ScrapeTypeQuery)
|
||||
}
|
||||
|
||||
return &ret
|
||||
}
|
||||
|
||||
func (c *scraperConfig) postDecode() {
|
||||
if c.Method == ScraperMethodScript {
|
||||
// only set scrape performer names/performer if the applicable field is set
|
||||
if len(c.GetPerformer) > 0 && len(c.GetPerformerNames) > 0 {
|
||||
c.scrapePerformerNamesFunc = scrapePerformerNamesScript
|
||||
c.scrapePerformerFunc = scrapePerformerScript
|
||||
}
|
||||
c.scrapePerformerURLFunc = scrapePerformerURLScript
|
||||
}
|
||||
}
|
||||
|
||||
func (c scraperConfig) ScrapePerformerNames(name string) ([]*models.ScrapedPerformer, error) {
|
||||
return c.scrapePerformerNamesFunc(c, name)
|
||||
}
|
||||
|
||||
func (c scraperConfig) ScrapePerformer(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
|
||||
return c.scrapePerformerFunc(c, scrapedPerformer)
|
||||
}
|
||||
|
||||
func (c scraperConfig) ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) {
|
||||
return c.scrapePerformerURLFunc(c, url)
|
||||
}
|
||||
|
||||
func runScraperScript(command []string, inString string, out interface{}) error {
|
||||
cmd := exec.Command(command[0], command[1:]...)
|
||||
cmd.Dir = config.GetScrapersPath()
|
||||
|
||||
stdin, err := cmd.StdinPipe()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
go func() {
|
||||
defer stdin.Close()
|
||||
|
||||
io.WriteString(stdin, inString)
|
||||
}()
|
||||
|
||||
stderr, err := cmd.StderrPipe()
|
||||
if err != nil {
|
||||
logger.Error("Scraper stderr not available: " + err.Error())
|
||||
}
|
||||
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if nil != err {
|
||||
logger.Error("Scraper stdout not available: " + err.Error())
|
||||
}
|
||||
|
||||
if err = cmd.Start(); err != nil {
|
||||
return errors.New("Error running scraper script")
|
||||
}
|
||||
|
||||
// TODO - add a timeout here
|
||||
decodeErr := json.NewDecoder(stdout).Decode(out)
|
||||
|
||||
stderrData, _ := ioutil.ReadAll(stderr)
|
||||
stderrString := string(stderrData)
|
||||
|
||||
err = cmd.Wait()
|
||||
|
||||
if err != nil {
|
||||
// error message should be in the stderr stream
|
||||
logger.Errorf("scraper error when running command <%s>: %s", strings.Join(cmd.Args, " "), stderrString)
|
||||
return errors.New("Error running scraper script")
|
||||
}
|
||||
|
||||
if decodeErr != nil {
|
||||
logger.Errorf("error decoding performer from scraper data: %s", err.Error())
|
||||
return errors.New("Error decoding performer from scraper script")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func scrapePerformerNamesScript(c scraperConfig, name string) ([]*models.ScrapedPerformer, error) {
|
||||
inString := `{"name": "` + name + `"}`
|
||||
|
||||
var performers []models.ScrapedPerformer
|
||||
|
||||
err := runScraperScript(c.GetPerformerNames, inString, &performers)
|
||||
|
||||
// convert to pointers
|
||||
var ret []*models.ScrapedPerformer
|
||||
if err == nil {
|
||||
for i := 0; i < len(performers); i++ {
|
||||
ret = append(ret, &performers[i])
|
||||
}
|
||||
}
|
||||
|
||||
return ret, err
|
||||
}
|
||||
|
||||
func scrapePerformerScript(c scraperConfig, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
|
||||
inString, err := json.Marshal(scrapedPerformer)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var ret models.ScrapedPerformer
|
||||
|
||||
err = runScraperScript(c.GetPerformer, string(inString), &ret)
|
||||
|
||||
return &ret, err
|
||||
}
|
||||
|
||||
func scrapePerformerURLScript(c scraperConfig, url string) (*models.ScrapedPerformer, error) {
|
||||
inString := `{"url": "` + url + `"}`
|
||||
|
||||
var ret models.ScrapedPerformer
|
||||
|
||||
err := runScraperScript(c.GetPerformerURL, string(inString), &ret)
|
||||
|
||||
return &ret, err
|
||||
}
|
||||
|
||||
var scrapers []scraperConfig
|
||||
|
||||
func loadScraper(path string) (*scraperConfig, error) {
|
||||
var scraper scraperConfig
|
||||
file, err := os.Open(path)
|
||||
defer file.Close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
jsonParser := json.NewDecoder(file)
|
||||
err = jsonParser.Decode(&scraper)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// set id to the filename
|
||||
id := filepath.Base(path)
|
||||
id = id[:strings.LastIndex(id, ".")]
|
||||
scraper.ID = id
|
||||
scraper.postDecode()
|
||||
|
||||
return &scraper, nil
|
||||
}
|
||||
|
||||
func loadScrapers() ([]scraperConfig, error) {
|
||||
if scrapers != nil {
|
||||
return scrapers, nil
|
||||
@@ -218,7 +20,7 @@ func loadScrapers() ([]scraperConfig, error) {
|
||||
scrapers = make([]scraperConfig, 0)
|
||||
|
||||
logger.Debugf("Reading scraper configs from %s", path)
|
||||
scraperFiles, err := filepath.Glob(filepath.Join(path, "*.json"))
|
||||
scraperFiles, err := filepath.Glob(filepath.Join(path, "*.yml"))
|
||||
|
||||
if err != nil {
|
||||
logger.Errorf("Error reading scraper configs: %s", err.Error())
|
||||
@@ -229,7 +31,7 @@ func loadScrapers() ([]scraperConfig, error) {
|
||||
scrapers = append(scrapers, GetFreeonesScraper())
|
||||
|
||||
for _, file := range scraperFiles {
|
||||
scraper, err := loadScraper(file)
|
||||
scraper, err := loadScraperFromYAML(file)
|
||||
if err != nil {
|
||||
logger.Errorf("Error loading scraper %s: %s", file, err.Error())
|
||||
} else {
|
||||
@@ -240,7 +42,7 @@ func loadScrapers() ([]scraperConfig, error) {
|
||||
return scrapers, nil
|
||||
}
|
||||
|
||||
func ListScrapers(scraperType models.ScraperType) ([]*models.Scraper, error) {
|
||||
func ListPerformerScrapers() ([]*models.Scraper, error) {
|
||||
// read scraper config files from the directory and cache
|
||||
scrapers, err := loadScrapers()
|
||||
|
||||
@@ -251,7 +53,7 @@ func ListScrapers(scraperType models.ScraperType) ([]*models.Scraper, error) {
|
||||
var ret []*models.Scraper
|
||||
for _, s := range scrapers {
|
||||
// filter on type
|
||||
if s.Type == scraperType {
|
||||
if s.supportsPerformers() {
|
||||
ret = append(ret, s.toScraper())
|
||||
}
|
||||
}
|
||||
@@ -272,21 +74,6 @@ func findPerformerScraper(scraperID string) *scraperConfig {
|
||||
return nil
|
||||
}
|
||||
|
||||
func findPerformerScraperURL(url string) *scraperConfig {
|
||||
// read scraper config files from the directory and cache
|
||||
loadScrapers()
|
||||
|
||||
for _, s := range scrapers {
|
||||
for _, thisURL := range s.URLs {
|
||||
if strings.Contains(url, thisURL) {
|
||||
return &s
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func ScrapePerformerList(scraperID string, query string) ([]*models.ScrapedPerformer, error) {
|
||||
// find scraper with the provided id
|
||||
s := findPerformerScraper(scraperID)
|
||||
@@ -308,10 +95,10 @@ func ScrapePerformer(scraperID string, scrapedPerformer models.ScrapedPerformerI
|
||||
}
|
||||
|
||||
func ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) {
|
||||
// find scraper that matches the url given
|
||||
s := findPerformerScraperURL(url)
|
||||
if s != nil {
|
||||
return s.ScrapePerformerURL(url)
|
||||
for _, s := range scrapers {
|
||||
if s.matchesPerformerURL(url) {
|
||||
return s.ScrapePerformerURL(url)
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
|
||||
108
pkg/scraper/script.go
Normal file
108
pkg/scraper/script.go
Normal file
@@ -0,0 +1,108 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os/exec"
|
||||
"strings"
|
||||
|
||||
"github.com/stashapp/stash/pkg/logger"
|
||||
"github.com/stashapp/stash/pkg/manager/config"
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
)
|
||||
|
||||
func runScraperScript(command []string, inString string, out interface{}) error {
|
||||
cmd := exec.Command(command[0], command[1:]...)
|
||||
cmd.Dir = config.GetScrapersPath()
|
||||
|
||||
stdin, err := cmd.StdinPipe()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
go func() {
|
||||
defer stdin.Close()
|
||||
|
||||
io.WriteString(stdin, inString)
|
||||
}()
|
||||
|
||||
stderr, err := cmd.StderrPipe()
|
||||
if err != nil {
|
||||
logger.Error("Scraper stderr not available: " + err.Error())
|
||||
}
|
||||
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if nil != err {
|
||||
logger.Error("Scraper stdout not available: " + err.Error())
|
||||
}
|
||||
|
||||
if err = cmd.Start(); err != nil {
|
||||
logger.Error("Error running scraper script: " + err.Error())
|
||||
return errors.New("Error running scraper script")
|
||||
}
|
||||
|
||||
// TODO - add a timeout here
|
||||
decodeErr := json.NewDecoder(stdout).Decode(out)
|
||||
|
||||
stderrData, _ := ioutil.ReadAll(stderr)
|
||||
stderrString := string(stderrData)
|
||||
|
||||
err = cmd.Wait()
|
||||
|
||||
if err != nil {
|
||||
// error message should be in the stderr stream
|
||||
logger.Errorf("scraper error when running command <%s>: %s", strings.Join(cmd.Args, " "), stderrString)
|
||||
return errors.New("Error running scraper script")
|
||||
}
|
||||
|
||||
if decodeErr != nil {
|
||||
logger.Errorf("error decoding performer from scraper data: %s", err.Error())
|
||||
return errors.New("Error decoding performer from scraper script")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func scrapePerformerNamesScript(c scraperTypeConfig, name string) ([]*models.ScrapedPerformer, error) {
|
||||
inString := `{"name": "` + name + `"}`
|
||||
|
||||
var performers []models.ScrapedPerformer
|
||||
|
||||
err := runScraperScript(c.Script, inString, &performers)
|
||||
|
||||
// convert to pointers
|
||||
var ret []*models.ScrapedPerformer
|
||||
if err == nil {
|
||||
for i := 0; i < len(performers); i++ {
|
||||
ret = append(ret, &performers[i])
|
||||
}
|
||||
}
|
||||
|
||||
return ret, err
|
||||
}
|
||||
|
||||
func scrapePerformerFragmentScript(c scraperTypeConfig, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
|
||||
inString, err := json.Marshal(scrapedPerformer)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var ret models.ScrapedPerformer
|
||||
|
||||
err = runScraperScript(c.Script, string(inString), &ret)
|
||||
|
||||
return &ret, err
|
||||
}
|
||||
|
||||
func scrapePerformerURLScript(c scraperTypeConfig, url string) (*models.ScrapedPerformer, error) {
|
||||
inString := `{"url": "` + url + `"}`
|
||||
|
||||
var ret models.ScrapedPerformer
|
||||
|
||||
err := runScraperScript(c.Script, string(inString), &ret)
|
||||
|
||||
return &ret, err
|
||||
}
|
||||
@@ -26,8 +26,8 @@ interface IProps {
|
||||
onImageChange: (event: React.FormEvent<HTMLInputElement>) => void;
|
||||
|
||||
// TODO: only for performers. make generic
|
||||
scrapers?: GQL.ListScrapersListScrapers[];
|
||||
onDisplayScraperDialog?: (scraper: GQL.ListScrapersListScrapers) => void;
|
||||
scrapers?: GQL.ListPerformerScrapersListPerformerScrapers[];
|
||||
onDisplayScraperDialog?: (scraper: GQL.ListPerformerScrapersListPerformerScrapers) => void;
|
||||
}
|
||||
|
||||
export const DetailsEditNavbar: FunctionComponent<IProps> = (props: IProps) => {
|
||||
@@ -59,7 +59,7 @@ export const DetailsEditNavbar: FunctionComponent<IProps> = (props: IProps) => {
|
||||
return <FileInput text="Choose image..." onInputChange={props.onImageChange} inputProps={{accept: ".jpg,.jpeg"}} />;
|
||||
}
|
||||
|
||||
function renderScraperMenuItem(scraper : GQL.ListScrapersListScrapers) {
|
||||
function renderScraperMenuItem(scraper : GQL.ListPerformerScrapersListPerformerScrapers) {
|
||||
return (
|
||||
<MenuItem
|
||||
text={scraper.name}
|
||||
|
||||
@@ -26,7 +26,7 @@ export const Performer: FunctionComponent<IPerformerProps> = (props: IPerformerP
|
||||
|
||||
// Editing state
|
||||
const [isEditing, setIsEditing] = useState<boolean>(isNew);
|
||||
const [isDisplayingScraperDialog, setIsDisplayingScraperDialog] = useState<GQL.ListScrapersListScrapers | undefined>(undefined);
|
||||
const [isDisplayingScraperDialog, setIsDisplayingScraperDialog] = useState<GQL.ListPerformerScrapersListPerformerScrapers | undefined>(undefined);
|
||||
const [scrapePerformerDetails, setScrapePerformerDetails] = useState<GQL.ScrapePerformerListScrapePerformerList | undefined>(undefined);
|
||||
|
||||
// Editing performer state
|
||||
@@ -55,8 +55,8 @@ export const Performer: FunctionComponent<IPerformerProps> = (props: IPerformerP
|
||||
// Network state
|
||||
const [isLoading, setIsLoading] = useState(false);
|
||||
|
||||
const Scrapers = StashService.useListScrapers(GQL.ScraperType.Performer);
|
||||
const [queryableScrapers, setQueryableScrapers] = useState<GQL.ListScrapersListScrapers[]>([]);
|
||||
const Scrapers = StashService.useListPerformerScrapers();
|
||||
const [queryableScrapers, setQueryableScrapers] = useState<GQL.ListPerformerScrapersListPerformerScrapers[]>([]);
|
||||
|
||||
const { data, error, loading } = StashService.useFindPerformer(props.match.params.id);
|
||||
const updatePerformer = StashService.usePerformerUpdate(getPerformerInput() as GQL.PerformerUpdateInput);
|
||||
@@ -121,11 +121,11 @@ export const Performer: FunctionComponent<IPerformerProps> = (props: IPerformerP
|
||||
});
|
||||
|
||||
useEffect(() => {
|
||||
var newQueryableScrapers : GQL.ListScrapersListScrapers[] = [];
|
||||
var newQueryableScrapers : GQL.ListPerformerScrapersListPerformerScrapers[] = [];
|
||||
|
||||
if (!!Scrapers.data && Scrapers.data.listScrapers) {
|
||||
newQueryableScrapers = Scrapers.data.listScrapers.filter((s) => {
|
||||
return s.supported_scrapes.includes(GQL.ScrapeType.Query);
|
||||
if (!!Scrapers.data && Scrapers.data.listPerformerScrapers) {
|
||||
newQueryableScrapers = Scrapers.data.listPerformerScrapers.filter((s) => {
|
||||
return s.performer && s.performer.supported_scrapes.includes(GQL.ScrapeType.Name);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -218,7 +218,7 @@ export const Performer: FunctionComponent<IPerformerProps> = (props: IPerformerP
|
||||
reader.readAsDataURL(file);
|
||||
}
|
||||
|
||||
function onDisplayFreeOnesDialog(scraper: GQL.ListScrapersListScrapers) {
|
||||
function onDisplayFreeOnesDialog(scraper: GQL.ListPerformerScrapersListPerformerScrapers) {
|
||||
setIsDisplayingScraperDialog(scraper);
|
||||
}
|
||||
|
||||
@@ -295,8 +295,8 @@ export const Performer: FunctionComponent<IPerformerProps> = (props: IPerformerP
|
||||
}
|
||||
|
||||
function urlScrapable(url: string) : boolean {
|
||||
return !!url && !!Scrapers.data && Scrapers.data.listScrapers && Scrapers.data.listScrapers.some((s) => {
|
||||
return !!s.urls && s.urls.some((u) => { return url.includes(u); });
|
||||
return !!url && !!Scrapers.data && Scrapers.data.listPerformerScrapers && Scrapers.data.listPerformerScrapers.some((s) => {
|
||||
return !!s.performer && !!s.performer.urls && s.performer.urls.some((u) => { return url.includes(u); });
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -211,12 +211,8 @@ export class StashService {
|
||||
return GQL.useSceneMarkerDestroy({ refetchQueries: ["FindScene"] });
|
||||
}
|
||||
|
||||
public static useListScrapers(scraperType: GQL.ScraperType) {
|
||||
return GQL.useListScrapers({
|
||||
variables: {
|
||||
scraper_type: scraperType
|
||||
}
|
||||
});
|
||||
public static useListPerformerScrapers() {
|
||||
return GQL.useListPerformerScrapers();
|
||||
}
|
||||
public static useScrapePerformerList(scraperId: string, q : string) {
|
||||
return GQL.useScrapePerformerList({ variables: { scraper_id: scraperId, query: q }});
|
||||
|
||||
Reference in New Issue
Block a user