mirror of
https://github.com/stashapp/stash.git
synced 2025-12-17 20:34:37 +03:00
Change scraper config to yaml (#256)
This commit is contained in:
1
go.mod
1
go.mod
@@ -21,6 +21,7 @@ require (
|
|||||||
github.com/vektah/gqlparser v1.1.2
|
github.com/vektah/gqlparser v1.1.2
|
||||||
golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4
|
golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4
|
||||||
golang.org/x/image v0.0.0-20190118043309-183bebdce1b2 // indirect
|
golang.org/x/image v0.0.0-20190118043309-183bebdce1b2 // indirect
|
||||||
|
gopkg.in/yaml.v2 v2.2.2
|
||||||
)
|
)
|
||||||
|
|
||||||
replace git.apache.org/thrift.git => github.com/apache/thrift v0.0.0-20180902110319-2566ecd5d999
|
replace git.apache.org/thrift.git => github.com/apache/thrift v0.0.0-20180902110319-2566ecd5d999
|
||||||
|
|||||||
@@ -1,13 +1,25 @@
|
|||||||
query ListScrapers($scraper_type: ScraperType!) {
|
query ListPerformerScrapers {
|
||||||
listScrapers(scraper_type: $scraper_type) {
|
listPerformerScrapers {
|
||||||
id
|
id
|
||||||
name
|
name
|
||||||
type
|
performer {
|
||||||
urls
|
urls
|
||||||
supported_scrapes
|
supported_scrapes
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# query ListSceneScrapers {
|
||||||
|
# listSceneScrapers {
|
||||||
|
# id
|
||||||
|
# name
|
||||||
|
# scene {
|
||||||
|
# urls
|
||||||
|
# supported_scrapes
|
||||||
|
# }
|
||||||
|
# }
|
||||||
|
# }
|
||||||
|
|
||||||
query ScrapePerformerList($scraper_id: ID!, $query: String!) {
|
query ScrapePerformerList($scraper_id: ID!, $query: String!) {
|
||||||
scrapePerformerList(scraper_id: $scraper_id, query: $query) {
|
scrapePerformerList(scraper_id: $scraper_id, query: $query) {
|
||||||
name
|
name
|
||||||
|
|||||||
@@ -46,7 +46,8 @@ type Query {
|
|||||||
# Scrapers
|
# Scrapers
|
||||||
|
|
||||||
"""List available scrapers"""
|
"""List available scrapers"""
|
||||||
listScrapers(scraper_type: ScraperType!): [Scraper!]!
|
listPerformerScrapers: [Scraper!]!
|
||||||
|
#listSceneScrapers: [Scraper!]!
|
||||||
"""Scrape a list of performers based on name"""
|
"""Scrape a list of performers based on name"""
|
||||||
scrapePerformerList(scraper_id: ID!, query: String!): [ScrapedPerformer!]!
|
scrapePerformerList(scraper_id: ID!, query: String!): [ScrapedPerformer!]!
|
||||||
"""Scrapes a complete performer record based on a scrapePerformerList result"""
|
"""Scrapes a complete performer record based on a scrapePerformerList result"""
|
||||||
|
|||||||
@@ -1,16 +1,21 @@
|
|||||||
enum ScraperType {
|
enum ScrapeType {
|
||||||
PERFORMER
|
NAME
|
||||||
|
FRAGMENT
|
||||||
|
URL
|
||||||
}
|
}
|
||||||
|
|
||||||
enum ScrapeType {
|
type ScraperSpec {
|
||||||
QUERY
|
"""URLs matching these can be scraped with"""
|
||||||
URL
|
urls: [String!]
|
||||||
|
supported_scrapes: [ScrapeType!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Scraper {
|
type Scraper {
|
||||||
id: ID!
|
id: ID!
|
||||||
name: String!
|
name: String!
|
||||||
type: ScraperType!
|
"""Details for performer scraper"""
|
||||||
urls: [String!]
|
performer: ScraperSpec
|
||||||
supported_scrapes: [ScrapeType!]!
|
# TODO
|
||||||
|
# """Details for scene scraper"""
|
||||||
|
# scene: ScraperSpec
|
||||||
}
|
}
|
||||||
@@ -32,8 +32,8 @@ func (r *queryResolver) ScrapeFreeonesPerformerList(ctx context.Context, query s
|
|||||||
return ret, nil
|
return ret, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *queryResolver) ListScrapers(ctx context.Context, scraperType models.ScraperType) ([]*models.Scraper, error) {
|
func (r *queryResolver) ListPerformerScrapers(ctx context.Context) ([]*models.Scraper, error) {
|
||||||
return scraper.ListScrapers(scraperType)
|
return scraper.ListPerformerScrapers()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *queryResolver) ScrapePerformerList(ctx context.Context, scraperID string, query string) ([]*models.ScrapedPerformer, error) {
|
func (r *queryResolver) ScrapePerformerList(ctx context.Context, scraperID string, query string) ([]*models.ScrapedPerformer, error) {
|
||||||
|
|||||||
204
pkg/scraper/config.go
Normal file
204
pkg/scraper/config.go
Normal file
@@ -0,0 +1,204 @@
|
|||||||
|
package scraper
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"gopkg.in/yaml.v2"
|
||||||
|
|
||||||
|
"github.com/stashapp/stash/pkg/models"
|
||||||
|
)
|
||||||
|
|
||||||
|
type scraperAction string
|
||||||
|
|
||||||
|
const (
|
||||||
|
scraperActionScript scraperAction = "script"
|
||||||
|
)
|
||||||
|
|
||||||
|
var allScraperAction = []scraperAction{
|
||||||
|
scraperActionScript,
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e scraperAction) IsValid() bool {
|
||||||
|
switch e {
|
||||||
|
case scraperActionScript:
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
type scraperTypeConfig struct {
|
||||||
|
Action scraperAction `yaml:"action"`
|
||||||
|
Script []string `yaml:"script,flow"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type scrapePerformerNamesFunc func(c scraperTypeConfig, name string) ([]*models.ScrapedPerformer, error)
|
||||||
|
|
||||||
|
type performerByNameConfig struct {
|
||||||
|
scraperTypeConfig `yaml:",inline"`
|
||||||
|
performScrape scrapePerformerNamesFunc
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *performerByNameConfig) resolveFn() {
|
||||||
|
if c.Action == scraperActionScript {
|
||||||
|
c.performScrape = scrapePerformerNamesScript
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type scrapePerformerFragmentFunc func(c scraperTypeConfig, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error)
|
||||||
|
|
||||||
|
type performerByFragmentConfig struct {
|
||||||
|
scraperTypeConfig `yaml:",inline"`
|
||||||
|
performScrape scrapePerformerFragmentFunc
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *performerByFragmentConfig) resolveFn() {
|
||||||
|
if c.Action == scraperActionScript {
|
||||||
|
c.performScrape = scrapePerformerFragmentScript
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type scrapePerformerByURLFunc func(c scraperTypeConfig, url string) (*models.ScrapedPerformer, error)
|
||||||
|
|
||||||
|
type scraperByURLConfig struct {
|
||||||
|
scraperTypeConfig `yaml:",inline"`
|
||||||
|
URL []string `yaml:"url,flow"`
|
||||||
|
performScrape scrapePerformerByURLFunc
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *scraperByURLConfig) resolveFn() {
|
||||||
|
if c.Action == scraperActionScript {
|
||||||
|
c.performScrape = scrapePerformerURLScript
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s scraperByURLConfig) matchesURL(url string) bool {
|
||||||
|
for _, thisURL := range s.URL {
|
||||||
|
if strings.Contains(url, thisURL) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
type scraperConfig struct {
|
||||||
|
ID string
|
||||||
|
Name string `yaml:"name"`
|
||||||
|
PerformerByName *performerByNameConfig `yaml:"performerByName"`
|
||||||
|
PerformerByFragment *performerByFragmentConfig `yaml:"performerByFragment"`
|
||||||
|
PerformerByURL []*scraperByURLConfig `yaml:"performerByURL"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadScraperFromYAML(path string) (*scraperConfig, error) {
|
||||||
|
ret := &scraperConfig{}
|
||||||
|
|
||||||
|
file, err := os.Open(path)
|
||||||
|
defer file.Close()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
parser := yaml.NewDecoder(file)
|
||||||
|
parser.SetStrict(true)
|
||||||
|
err = parser.Decode(&ret)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// set id to the filename
|
||||||
|
id := filepath.Base(path)
|
||||||
|
id = id[:strings.LastIndex(id, ".")]
|
||||||
|
ret.ID = id
|
||||||
|
|
||||||
|
// set the scraper interface
|
||||||
|
ret.initialiseConfigs()
|
||||||
|
|
||||||
|
return ret, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *scraperConfig) initialiseConfigs() {
|
||||||
|
if c.PerformerByName != nil {
|
||||||
|
c.PerformerByName.resolveFn()
|
||||||
|
}
|
||||||
|
if c.PerformerByFragment != nil {
|
||||||
|
c.PerformerByFragment.resolveFn()
|
||||||
|
}
|
||||||
|
for _, s := range c.PerformerByURL {
|
||||||
|
s.resolveFn()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c scraperConfig) toScraper() *models.Scraper {
|
||||||
|
ret := models.Scraper{
|
||||||
|
ID: c.ID,
|
||||||
|
Name: c.Name,
|
||||||
|
}
|
||||||
|
|
||||||
|
performer := models.ScraperSpec{}
|
||||||
|
if c.PerformerByName != nil {
|
||||||
|
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeName)
|
||||||
|
}
|
||||||
|
if c.PerformerByFragment != nil {
|
||||||
|
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeFragment)
|
||||||
|
}
|
||||||
|
if len(c.PerformerByURL) > 0 {
|
||||||
|
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeURL)
|
||||||
|
for _, v := range c.PerformerByURL {
|
||||||
|
performer.Urls = append(performer.Urls, v.URL...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(performer.SupportedScrapes) > 0 {
|
||||||
|
ret.Performer = &performer
|
||||||
|
}
|
||||||
|
|
||||||
|
return &ret
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c scraperConfig) supportsPerformers() bool {
|
||||||
|
return c.PerformerByName != nil || c.PerformerByFragment != nil || len(c.PerformerByURL) > 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c scraperConfig) matchesPerformerURL(url string) bool {
|
||||||
|
for _, scraper := range c.PerformerByURL {
|
||||||
|
if scraper.matchesURL(url) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c scraperConfig) ScrapePerformerNames(name string) ([]*models.ScrapedPerformer, error) {
|
||||||
|
if c.PerformerByName != nil && c.PerformerByName.performScrape != nil {
|
||||||
|
return c.PerformerByName.performScrape(c.PerformerByName.scraperTypeConfig, name)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c scraperConfig) ScrapePerformer(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
|
||||||
|
if c.PerformerByFragment != nil && c.PerformerByFragment.performScrape != nil {
|
||||||
|
return c.PerformerByFragment.performScrape(c.PerformerByFragment.scraperTypeConfig, scrapedPerformer)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c scraperConfig) ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) {
|
||||||
|
for _, scraper := range c.PerformerByURL {
|
||||||
|
if scraper.matchesURL(url) && scraper.performScrape != nil {
|
||||||
|
ret, err := scraper.performScrape(scraper.scraperTypeConfig, url)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if ret != nil {
|
||||||
|
return ret, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
@@ -22,18 +22,24 @@ var freeonesURLs = []string{
|
|||||||
|
|
||||||
func GetFreeonesScraper() scraperConfig {
|
func GetFreeonesScraper() scraperConfig {
|
||||||
return scraperConfig{
|
return scraperConfig{
|
||||||
ID: freeonesScraperID,
|
ID: freeonesScraperID,
|
||||||
Name: "Freeones",
|
Name: "Freeones",
|
||||||
Type: models.ScraperTypePerformer,
|
PerformerByName: &performerByNameConfig{
|
||||||
Method: ScraperMethodBuiltin,
|
performScrape: GetPerformerNames,
|
||||||
URLs: freeonesURLs,
|
},
|
||||||
scrapePerformerNamesFunc: GetPerformerNames,
|
PerformerByFragment: &performerByFragmentConfig{
|
||||||
scrapePerformerFunc: GetPerformer,
|
performScrape: GetPerformer,
|
||||||
scrapePerformerURLFunc: GetPerformerURL,
|
},
|
||||||
|
PerformerByURL: []*scraperByURLConfig{
|
||||||
|
&scraperByURLConfig{
|
||||||
|
performScrape: GetPerformerURL,
|
||||||
|
URL: freeonesURLs,
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetPerformerNames(c scraperConfig, q string) ([]*models.ScrapedPerformer, error) {
|
func GetPerformerNames(c scraperTypeConfig, q string) ([]*models.ScrapedPerformer, error) {
|
||||||
// Request the HTML page.
|
// Request the HTML page.
|
||||||
queryURL := "https://www.freeones.com/suggestions.php?q=" + url.PathEscape(q) + "&t=1"
|
queryURL := "https://www.freeones.com/suggestions.php?q=" + url.PathEscape(q) + "&t=1"
|
||||||
res, err := http.Get(queryURL)
|
res, err := http.Get(queryURL)
|
||||||
@@ -64,7 +70,7 @@ func GetPerformerNames(c scraperConfig, q string) ([]*models.ScrapedPerformer, e
|
|||||||
return performers, nil
|
return performers, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetPerformerURL(c scraperConfig, href string) (*models.ScrapedPerformer, error) {
|
func GetPerformerURL(c scraperTypeConfig, href string) (*models.ScrapedPerformer, error) {
|
||||||
// if we're already in the bio page, just scrape it
|
// if we're already in the bio page, just scrape it
|
||||||
if regexp.MustCompile(`\/bio_.*\.php$`).MatchString(href) {
|
if regexp.MustCompile(`\/bio_.*\.php$`).MatchString(href) {
|
||||||
return getPerformerBio(c, href)
|
return getPerformerBio(c, href)
|
||||||
@@ -80,7 +86,7 @@ func GetPerformerURL(c scraperConfig, href string) (*models.ScrapedPerformer, er
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getPerformerBio(c scraperConfig, href string) (*models.ScrapedPerformer, error) {
|
func getPerformerBio(c scraperTypeConfig, href string) (*models.ScrapedPerformer, error) {
|
||||||
bioRes, err := http.Get(href)
|
bioRes, err := http.Get(href)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -173,7 +179,7 @@ func getPerformerBio(c scraperConfig, href string) (*models.ScrapedPerformer, er
|
|||||||
return &result, nil
|
return &result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetPerformer(c scraperConfig, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
|
func GetPerformer(c scraperTypeConfig, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
|
||||||
if scrapedPerformer.Name == nil {
|
if scrapedPerformer.Name == nil {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,214 +1,16 @@
|
|||||||
package scraper
|
package scraper
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
|
||||||
"errors"
|
"errors"
|
||||||
"io"
|
|
||||||
"io/ioutil"
|
|
||||||
"os"
|
|
||||||
"os/exec"
|
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/stashapp/stash/pkg/logger"
|
"github.com/stashapp/stash/pkg/logger"
|
||||||
"github.com/stashapp/stash/pkg/manager/config"
|
"github.com/stashapp/stash/pkg/manager/config"
|
||||||
"github.com/stashapp/stash/pkg/models"
|
"github.com/stashapp/stash/pkg/models"
|
||||||
)
|
)
|
||||||
|
|
||||||
type ScraperMethod string
|
|
||||||
|
|
||||||
const (
|
|
||||||
ScraperMethodScript ScraperMethod = "SCRIPT"
|
|
||||||
ScraperMethodBuiltin ScraperMethod = "BUILTIN"
|
|
||||||
)
|
|
||||||
|
|
||||||
var AllScraperMethod = []ScraperMethod{
|
|
||||||
ScraperMethodScript,
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e ScraperMethod) IsValid() bool {
|
|
||||||
switch e {
|
|
||||||
case ScraperMethodScript:
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
type scraperConfig struct {
|
|
||||||
ID string `json:"id"`
|
|
||||||
Name string `json:"name"`
|
|
||||||
Type models.ScraperType `json:"type"`
|
|
||||||
Method ScraperMethod `json:"method"`
|
|
||||||
URLs []string `json:"urls"`
|
|
||||||
GetPerformerNames []string `json:"get_performer_names"`
|
|
||||||
GetPerformer []string `json:"get_performer"`
|
|
||||||
GetPerformerURL []string `json:"get_performer_url"`
|
|
||||||
|
|
||||||
scrapePerformerNamesFunc func(c scraperConfig, name string) ([]*models.ScrapedPerformer, error)
|
|
||||||
scrapePerformerFunc func(c scraperConfig, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error)
|
|
||||||
scrapePerformerURLFunc func(c scraperConfig, url string) (*models.ScrapedPerformer, error)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c scraperConfig) toScraper() *models.Scraper {
|
|
||||||
ret := models.Scraper{
|
|
||||||
ID: c.ID,
|
|
||||||
Name: c.Name,
|
|
||||||
Type: c.Type,
|
|
||||||
Urls: c.URLs,
|
|
||||||
}
|
|
||||||
|
|
||||||
// determine supported actions
|
|
||||||
if len(c.URLs) > 0 {
|
|
||||||
ret.SupportedScrapes = append(ret.SupportedScrapes, models.ScrapeTypeURL)
|
|
||||||
}
|
|
||||||
|
|
||||||
if c.scrapePerformerNamesFunc != nil && c.scrapePerformerFunc != nil {
|
|
||||||
ret.SupportedScrapes = append(ret.SupportedScrapes, models.ScrapeTypeQuery)
|
|
||||||
}
|
|
||||||
|
|
||||||
return &ret
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *scraperConfig) postDecode() {
|
|
||||||
if c.Method == ScraperMethodScript {
|
|
||||||
// only set scrape performer names/performer if the applicable field is set
|
|
||||||
if len(c.GetPerformer) > 0 && len(c.GetPerformerNames) > 0 {
|
|
||||||
c.scrapePerformerNamesFunc = scrapePerformerNamesScript
|
|
||||||
c.scrapePerformerFunc = scrapePerformerScript
|
|
||||||
}
|
|
||||||
c.scrapePerformerURLFunc = scrapePerformerURLScript
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c scraperConfig) ScrapePerformerNames(name string) ([]*models.ScrapedPerformer, error) {
|
|
||||||
return c.scrapePerformerNamesFunc(c, name)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c scraperConfig) ScrapePerformer(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
|
|
||||||
return c.scrapePerformerFunc(c, scrapedPerformer)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c scraperConfig) ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) {
|
|
||||||
return c.scrapePerformerURLFunc(c, url)
|
|
||||||
}
|
|
||||||
|
|
||||||
func runScraperScript(command []string, inString string, out interface{}) error {
|
|
||||||
cmd := exec.Command(command[0], command[1:]...)
|
|
||||||
cmd.Dir = config.GetScrapersPath()
|
|
||||||
|
|
||||||
stdin, err := cmd.StdinPipe()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
defer stdin.Close()
|
|
||||||
|
|
||||||
io.WriteString(stdin, inString)
|
|
||||||
}()
|
|
||||||
|
|
||||||
stderr, err := cmd.StderrPipe()
|
|
||||||
if err != nil {
|
|
||||||
logger.Error("Scraper stderr not available: " + err.Error())
|
|
||||||
}
|
|
||||||
|
|
||||||
stdout, err := cmd.StdoutPipe()
|
|
||||||
if nil != err {
|
|
||||||
logger.Error("Scraper stdout not available: " + err.Error())
|
|
||||||
}
|
|
||||||
|
|
||||||
if err = cmd.Start(); err != nil {
|
|
||||||
return errors.New("Error running scraper script")
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO - add a timeout here
|
|
||||||
decodeErr := json.NewDecoder(stdout).Decode(out)
|
|
||||||
|
|
||||||
stderrData, _ := ioutil.ReadAll(stderr)
|
|
||||||
stderrString := string(stderrData)
|
|
||||||
|
|
||||||
err = cmd.Wait()
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
// error message should be in the stderr stream
|
|
||||||
logger.Errorf("scraper error when running command <%s>: %s", strings.Join(cmd.Args, " "), stderrString)
|
|
||||||
return errors.New("Error running scraper script")
|
|
||||||
}
|
|
||||||
|
|
||||||
if decodeErr != nil {
|
|
||||||
logger.Errorf("error decoding performer from scraper data: %s", err.Error())
|
|
||||||
return errors.New("Error decoding performer from scraper script")
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func scrapePerformerNamesScript(c scraperConfig, name string) ([]*models.ScrapedPerformer, error) {
|
|
||||||
inString := `{"name": "` + name + `"}`
|
|
||||||
|
|
||||||
var performers []models.ScrapedPerformer
|
|
||||||
|
|
||||||
err := runScraperScript(c.GetPerformerNames, inString, &performers)
|
|
||||||
|
|
||||||
// convert to pointers
|
|
||||||
var ret []*models.ScrapedPerformer
|
|
||||||
if err == nil {
|
|
||||||
for i := 0; i < len(performers); i++ {
|
|
||||||
ret = append(ret, &performers[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret, err
|
|
||||||
}
|
|
||||||
|
|
||||||
func scrapePerformerScript(c scraperConfig, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
|
|
||||||
inString, err := json.Marshal(scrapedPerformer)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
var ret models.ScrapedPerformer
|
|
||||||
|
|
||||||
err = runScraperScript(c.GetPerformer, string(inString), &ret)
|
|
||||||
|
|
||||||
return &ret, err
|
|
||||||
}
|
|
||||||
|
|
||||||
func scrapePerformerURLScript(c scraperConfig, url string) (*models.ScrapedPerformer, error) {
|
|
||||||
inString := `{"url": "` + url + `"}`
|
|
||||||
|
|
||||||
var ret models.ScrapedPerformer
|
|
||||||
|
|
||||||
err := runScraperScript(c.GetPerformerURL, string(inString), &ret)
|
|
||||||
|
|
||||||
return &ret, err
|
|
||||||
}
|
|
||||||
|
|
||||||
var scrapers []scraperConfig
|
var scrapers []scraperConfig
|
||||||
|
|
||||||
func loadScraper(path string) (*scraperConfig, error) {
|
|
||||||
var scraper scraperConfig
|
|
||||||
file, err := os.Open(path)
|
|
||||||
defer file.Close()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
jsonParser := json.NewDecoder(file)
|
|
||||||
err = jsonParser.Decode(&scraper)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// set id to the filename
|
|
||||||
id := filepath.Base(path)
|
|
||||||
id = id[:strings.LastIndex(id, ".")]
|
|
||||||
scraper.ID = id
|
|
||||||
scraper.postDecode()
|
|
||||||
|
|
||||||
return &scraper, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func loadScrapers() ([]scraperConfig, error) {
|
func loadScrapers() ([]scraperConfig, error) {
|
||||||
if scrapers != nil {
|
if scrapers != nil {
|
||||||
return scrapers, nil
|
return scrapers, nil
|
||||||
@@ -218,7 +20,7 @@ func loadScrapers() ([]scraperConfig, error) {
|
|||||||
scrapers = make([]scraperConfig, 0)
|
scrapers = make([]scraperConfig, 0)
|
||||||
|
|
||||||
logger.Debugf("Reading scraper configs from %s", path)
|
logger.Debugf("Reading scraper configs from %s", path)
|
||||||
scraperFiles, err := filepath.Glob(filepath.Join(path, "*.json"))
|
scraperFiles, err := filepath.Glob(filepath.Join(path, "*.yml"))
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Errorf("Error reading scraper configs: %s", err.Error())
|
logger.Errorf("Error reading scraper configs: %s", err.Error())
|
||||||
@@ -229,7 +31,7 @@ func loadScrapers() ([]scraperConfig, error) {
|
|||||||
scrapers = append(scrapers, GetFreeonesScraper())
|
scrapers = append(scrapers, GetFreeonesScraper())
|
||||||
|
|
||||||
for _, file := range scraperFiles {
|
for _, file := range scraperFiles {
|
||||||
scraper, err := loadScraper(file)
|
scraper, err := loadScraperFromYAML(file)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Errorf("Error loading scraper %s: %s", file, err.Error())
|
logger.Errorf("Error loading scraper %s: %s", file, err.Error())
|
||||||
} else {
|
} else {
|
||||||
@@ -240,7 +42,7 @@ func loadScrapers() ([]scraperConfig, error) {
|
|||||||
return scrapers, nil
|
return scrapers, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func ListScrapers(scraperType models.ScraperType) ([]*models.Scraper, error) {
|
func ListPerformerScrapers() ([]*models.Scraper, error) {
|
||||||
// read scraper config files from the directory and cache
|
// read scraper config files from the directory and cache
|
||||||
scrapers, err := loadScrapers()
|
scrapers, err := loadScrapers()
|
||||||
|
|
||||||
@@ -251,7 +53,7 @@ func ListScrapers(scraperType models.ScraperType) ([]*models.Scraper, error) {
|
|||||||
var ret []*models.Scraper
|
var ret []*models.Scraper
|
||||||
for _, s := range scrapers {
|
for _, s := range scrapers {
|
||||||
// filter on type
|
// filter on type
|
||||||
if s.Type == scraperType {
|
if s.supportsPerformers() {
|
||||||
ret = append(ret, s.toScraper())
|
ret = append(ret, s.toScraper())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -272,21 +74,6 @@ func findPerformerScraper(scraperID string) *scraperConfig {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func findPerformerScraperURL(url string) *scraperConfig {
|
|
||||||
// read scraper config files from the directory and cache
|
|
||||||
loadScrapers()
|
|
||||||
|
|
||||||
for _, s := range scrapers {
|
|
||||||
for _, thisURL := range s.URLs {
|
|
||||||
if strings.Contains(url, thisURL) {
|
|
||||||
return &s
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func ScrapePerformerList(scraperID string, query string) ([]*models.ScrapedPerformer, error) {
|
func ScrapePerformerList(scraperID string, query string) ([]*models.ScrapedPerformer, error) {
|
||||||
// find scraper with the provided id
|
// find scraper with the provided id
|
||||||
s := findPerformerScraper(scraperID)
|
s := findPerformerScraper(scraperID)
|
||||||
@@ -308,10 +95,10 @@ func ScrapePerformer(scraperID string, scrapedPerformer models.ScrapedPerformerI
|
|||||||
}
|
}
|
||||||
|
|
||||||
func ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) {
|
func ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) {
|
||||||
// find scraper that matches the url given
|
for _, s := range scrapers {
|
||||||
s := findPerformerScraperURL(url)
|
if s.matchesPerformerURL(url) {
|
||||||
if s != nil {
|
return s.ScrapePerformerURL(url)
|
||||||
return s.ScrapePerformerURL(url)
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil, nil
|
return nil, nil
|
||||||
|
|||||||
108
pkg/scraper/script.go
Normal file
108
pkg/scraper/script.go
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
package scraper
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
"io/ioutil"
|
||||||
|
"os/exec"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/stashapp/stash/pkg/logger"
|
||||||
|
"github.com/stashapp/stash/pkg/manager/config"
|
||||||
|
"github.com/stashapp/stash/pkg/models"
|
||||||
|
)
|
||||||
|
|
||||||
|
func runScraperScript(command []string, inString string, out interface{}) error {
|
||||||
|
cmd := exec.Command(command[0], command[1:]...)
|
||||||
|
cmd.Dir = config.GetScrapersPath()
|
||||||
|
|
||||||
|
stdin, err := cmd.StdinPipe()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer stdin.Close()
|
||||||
|
|
||||||
|
io.WriteString(stdin, inString)
|
||||||
|
}()
|
||||||
|
|
||||||
|
stderr, err := cmd.StderrPipe()
|
||||||
|
if err != nil {
|
||||||
|
logger.Error("Scraper stderr not available: " + err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
stdout, err := cmd.StdoutPipe()
|
||||||
|
if nil != err {
|
||||||
|
logger.Error("Scraper stdout not available: " + err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = cmd.Start(); err != nil {
|
||||||
|
logger.Error("Error running scraper script: " + err.Error())
|
||||||
|
return errors.New("Error running scraper script")
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO - add a timeout here
|
||||||
|
decodeErr := json.NewDecoder(stdout).Decode(out)
|
||||||
|
|
||||||
|
stderrData, _ := ioutil.ReadAll(stderr)
|
||||||
|
stderrString := string(stderrData)
|
||||||
|
|
||||||
|
err = cmd.Wait()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
// error message should be in the stderr stream
|
||||||
|
logger.Errorf("scraper error when running command <%s>: %s", strings.Join(cmd.Args, " "), stderrString)
|
||||||
|
return errors.New("Error running scraper script")
|
||||||
|
}
|
||||||
|
|
||||||
|
if decodeErr != nil {
|
||||||
|
logger.Errorf("error decoding performer from scraper data: %s", err.Error())
|
||||||
|
return errors.New("Error decoding performer from scraper script")
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func scrapePerformerNamesScript(c scraperTypeConfig, name string) ([]*models.ScrapedPerformer, error) {
|
||||||
|
inString := `{"name": "` + name + `"}`
|
||||||
|
|
||||||
|
var performers []models.ScrapedPerformer
|
||||||
|
|
||||||
|
err := runScraperScript(c.Script, inString, &performers)
|
||||||
|
|
||||||
|
// convert to pointers
|
||||||
|
var ret []*models.ScrapedPerformer
|
||||||
|
if err == nil {
|
||||||
|
for i := 0; i < len(performers); i++ {
|
||||||
|
ret = append(ret, &performers[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func scrapePerformerFragmentScript(c scraperTypeConfig, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
|
||||||
|
inString, err := json.Marshal(scrapedPerformer)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var ret models.ScrapedPerformer
|
||||||
|
|
||||||
|
err = runScraperScript(c.Script, string(inString), &ret)
|
||||||
|
|
||||||
|
return &ret, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func scrapePerformerURLScript(c scraperTypeConfig, url string) (*models.ScrapedPerformer, error) {
|
||||||
|
inString := `{"url": "` + url + `"}`
|
||||||
|
|
||||||
|
var ret models.ScrapedPerformer
|
||||||
|
|
||||||
|
err := runScraperScript(c.Script, string(inString), &ret)
|
||||||
|
|
||||||
|
return &ret, err
|
||||||
|
}
|
||||||
@@ -26,8 +26,8 @@ interface IProps {
|
|||||||
onImageChange: (event: React.FormEvent<HTMLInputElement>) => void;
|
onImageChange: (event: React.FormEvent<HTMLInputElement>) => void;
|
||||||
|
|
||||||
// TODO: only for performers. make generic
|
// TODO: only for performers. make generic
|
||||||
scrapers?: GQL.ListScrapersListScrapers[];
|
scrapers?: GQL.ListPerformerScrapersListPerformerScrapers[];
|
||||||
onDisplayScraperDialog?: (scraper: GQL.ListScrapersListScrapers) => void;
|
onDisplayScraperDialog?: (scraper: GQL.ListPerformerScrapersListPerformerScrapers) => void;
|
||||||
}
|
}
|
||||||
|
|
||||||
export const DetailsEditNavbar: FunctionComponent<IProps> = (props: IProps) => {
|
export const DetailsEditNavbar: FunctionComponent<IProps> = (props: IProps) => {
|
||||||
@@ -59,7 +59,7 @@ export const DetailsEditNavbar: FunctionComponent<IProps> = (props: IProps) => {
|
|||||||
return <FileInput text="Choose image..." onInputChange={props.onImageChange} inputProps={{accept: ".jpg,.jpeg"}} />;
|
return <FileInput text="Choose image..." onInputChange={props.onImageChange} inputProps={{accept: ".jpg,.jpeg"}} />;
|
||||||
}
|
}
|
||||||
|
|
||||||
function renderScraperMenuItem(scraper : GQL.ListScrapersListScrapers) {
|
function renderScraperMenuItem(scraper : GQL.ListPerformerScrapersListPerformerScrapers) {
|
||||||
return (
|
return (
|
||||||
<MenuItem
|
<MenuItem
|
||||||
text={scraper.name}
|
text={scraper.name}
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ export const Performer: FunctionComponent<IPerformerProps> = (props: IPerformerP
|
|||||||
|
|
||||||
// Editing state
|
// Editing state
|
||||||
const [isEditing, setIsEditing] = useState<boolean>(isNew);
|
const [isEditing, setIsEditing] = useState<boolean>(isNew);
|
||||||
const [isDisplayingScraperDialog, setIsDisplayingScraperDialog] = useState<GQL.ListScrapersListScrapers | undefined>(undefined);
|
const [isDisplayingScraperDialog, setIsDisplayingScraperDialog] = useState<GQL.ListPerformerScrapersListPerformerScrapers | undefined>(undefined);
|
||||||
const [scrapePerformerDetails, setScrapePerformerDetails] = useState<GQL.ScrapePerformerListScrapePerformerList | undefined>(undefined);
|
const [scrapePerformerDetails, setScrapePerformerDetails] = useState<GQL.ScrapePerformerListScrapePerformerList | undefined>(undefined);
|
||||||
|
|
||||||
// Editing performer state
|
// Editing performer state
|
||||||
@@ -55,8 +55,8 @@ export const Performer: FunctionComponent<IPerformerProps> = (props: IPerformerP
|
|||||||
// Network state
|
// Network state
|
||||||
const [isLoading, setIsLoading] = useState(false);
|
const [isLoading, setIsLoading] = useState(false);
|
||||||
|
|
||||||
const Scrapers = StashService.useListScrapers(GQL.ScraperType.Performer);
|
const Scrapers = StashService.useListPerformerScrapers();
|
||||||
const [queryableScrapers, setQueryableScrapers] = useState<GQL.ListScrapersListScrapers[]>([]);
|
const [queryableScrapers, setQueryableScrapers] = useState<GQL.ListPerformerScrapersListPerformerScrapers[]>([]);
|
||||||
|
|
||||||
const { data, error, loading } = StashService.useFindPerformer(props.match.params.id);
|
const { data, error, loading } = StashService.useFindPerformer(props.match.params.id);
|
||||||
const updatePerformer = StashService.usePerformerUpdate(getPerformerInput() as GQL.PerformerUpdateInput);
|
const updatePerformer = StashService.usePerformerUpdate(getPerformerInput() as GQL.PerformerUpdateInput);
|
||||||
@@ -121,11 +121,11 @@ export const Performer: FunctionComponent<IPerformerProps> = (props: IPerformerP
|
|||||||
});
|
});
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
var newQueryableScrapers : GQL.ListScrapersListScrapers[] = [];
|
var newQueryableScrapers : GQL.ListPerformerScrapersListPerformerScrapers[] = [];
|
||||||
|
|
||||||
if (!!Scrapers.data && Scrapers.data.listScrapers) {
|
if (!!Scrapers.data && Scrapers.data.listPerformerScrapers) {
|
||||||
newQueryableScrapers = Scrapers.data.listScrapers.filter((s) => {
|
newQueryableScrapers = Scrapers.data.listPerformerScrapers.filter((s) => {
|
||||||
return s.supported_scrapes.includes(GQL.ScrapeType.Query);
|
return s.performer && s.performer.supported_scrapes.includes(GQL.ScrapeType.Name);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -218,7 +218,7 @@ export const Performer: FunctionComponent<IPerformerProps> = (props: IPerformerP
|
|||||||
reader.readAsDataURL(file);
|
reader.readAsDataURL(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
function onDisplayFreeOnesDialog(scraper: GQL.ListScrapersListScrapers) {
|
function onDisplayFreeOnesDialog(scraper: GQL.ListPerformerScrapersListPerformerScrapers) {
|
||||||
setIsDisplayingScraperDialog(scraper);
|
setIsDisplayingScraperDialog(scraper);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -295,8 +295,8 @@ export const Performer: FunctionComponent<IPerformerProps> = (props: IPerformerP
|
|||||||
}
|
}
|
||||||
|
|
||||||
function urlScrapable(url: string) : boolean {
|
function urlScrapable(url: string) : boolean {
|
||||||
return !!url && !!Scrapers.data && Scrapers.data.listScrapers && Scrapers.data.listScrapers.some((s) => {
|
return !!url && !!Scrapers.data && Scrapers.data.listPerformerScrapers && Scrapers.data.listPerformerScrapers.some((s) => {
|
||||||
return !!s.urls && s.urls.some((u) => { return url.includes(u); });
|
return !!s.performer && !!s.performer.urls && s.performer.urls.some((u) => { return url.includes(u); });
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -211,12 +211,8 @@ export class StashService {
|
|||||||
return GQL.useSceneMarkerDestroy({ refetchQueries: ["FindScene"] });
|
return GQL.useSceneMarkerDestroy({ refetchQueries: ["FindScene"] });
|
||||||
}
|
}
|
||||||
|
|
||||||
public static useListScrapers(scraperType: GQL.ScraperType) {
|
public static useListPerformerScrapers() {
|
||||||
return GQL.useListScrapers({
|
return GQL.useListPerformerScrapers();
|
||||||
variables: {
|
|
||||||
scraper_type: scraperType
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
public static useScrapePerformerList(scraperId: string, q : string) {
|
public static useScrapePerformerList(scraperId: string, q : string) {
|
||||||
return GQL.useScrapePerformerList({ variables: { scraper_id: scraperId, query: q }});
|
return GQL.useScrapePerformerList({ variables: { scraper_id: scraperId, query: q }});
|
||||||
|
|||||||
Reference in New Issue
Block a user