Add scene metadata scraping functionality (#236)

* Add scene scraping functionality

* Adapt to changed scraper config
This commit is contained in:
WithoutPants
2019-12-16 12:35:34 +11:00
committed by Leopere
parent f8a760d729
commit 92837fe1f7
11 changed files with 614 additions and 83 deletions

View File

@@ -36,6 +36,10 @@ func (r *queryResolver) ListPerformerScrapers(ctx context.Context) ([]*models.Sc
return scraper.ListPerformerScrapers()
}
func (r *queryResolver) ListSceneScrapers(ctx context.Context) ([]*models.Scraper, error) {
return scraper.ListSceneScrapers()
}
func (r *queryResolver) ScrapePerformerList(ctx context.Context, scraperID string, query string) ([]*models.ScrapedPerformer, error) {
if query == "" {
return nil, nil
@@ -51,3 +55,11 @@ func (r *queryResolver) ScrapePerformer(ctx context.Context, scraperID string, s
func (r *queryResolver) ScrapePerformerURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) {
return scraper.ScrapePerformerURL(url)
}
func (r *queryResolver) ScrapeScene(ctx context.Context, scraperID string, scene models.SceneUpdateInput) (*models.ScrapedScene, error) {
return scraper.ScrapeScene(scraperID, scene)
}
func (r *queryResolver) ScrapeSceneURL(ctx context.Context, url string) (*models.ScrapedScene, error) {
return scraper.ScrapeSceneURL(url)
}

View File

@@ -59,22 +59,13 @@ func (c *performerByFragmentConfig) resolveFn() {
}
}
type scrapePerformerByURLFunc func(c scraperTypeConfig, url string) (*models.ScrapedPerformer, error)
type scraperByURLConfig struct {
type scrapeByURLConfig struct {
scraperTypeConfig `yaml:",inline"`
URL []string `yaml:"url,flow"`
performScrape scrapePerformerByURLFunc
}
func (c *scraperByURLConfig) resolveFn() {
if c.Action == scraperActionScript {
c.performScrape = scrapePerformerURLScript
}
}
func (s scraperByURLConfig) matchesURL(url string) bool {
for _, thisURL := range s.URL {
func (c scrapeByURLConfig) matchesURL(url string) bool {
for _, thisURL := range c.URL {
if strings.Contains(url, thisURL) {
return true
}
@@ -83,12 +74,53 @@ func (s scraperByURLConfig) matchesURL(url string) bool {
return false
}
type scrapePerformerByURLFunc func(c scraperTypeConfig, url string) (*models.ScrapedPerformer, error)
type scrapePerformerByURLConfig struct {
scrapeByURLConfig `yaml:",inline"`
performScrape scrapePerformerByURLFunc
}
func (c *scrapePerformerByURLConfig) resolveFn() {
if c.Action == scraperActionScript {
c.performScrape = scrapePerformerURLScript
}
}
type scrapeSceneFragmentFunc func(c scraperTypeConfig, scene models.SceneUpdateInput) (*models.ScrapedScene, error)
type sceneByFragmentConfig struct {
scraperTypeConfig `yaml:",inline"`
performScrape scrapeSceneFragmentFunc
}
func (c *sceneByFragmentConfig) resolveFn() {
if c.Action == scraperActionScript {
c.performScrape = scrapeSceneFragmentScript
}
}
type scrapeSceneByURLFunc func(c scraperTypeConfig, url string) (*models.ScrapedScene, error)
type scrapeSceneByURLConfig struct {
scrapeByURLConfig `yaml:",inline"`
performScrape scrapeSceneByURLFunc
}
func (c *scrapeSceneByURLConfig) resolveFn() {
if c.Action == scraperActionScript {
c.performScrape = scrapeSceneURLScript
}
}
type scraperConfig struct {
ID string
Name string `yaml:"name"`
PerformerByName *performerByNameConfig `yaml:"performerByName"`
PerformerByFragment *performerByFragmentConfig `yaml:"performerByFragment"`
PerformerByURL []*scraperByURLConfig `yaml:"performerByURL"`
Name string `yaml:"name"`
PerformerByName *performerByNameConfig `yaml:"performerByName"`
PerformerByFragment *performerByFragmentConfig `yaml:"performerByFragment"`
PerformerByURL []*scrapePerformerByURLConfig `yaml:"performerByURL"`
SceneByFragment *sceneByFragmentConfig `yaml:"sceneByFragment"`
SceneByURL []*scrapeSceneByURLConfig `yaml:"sceneByURL"`
}
func loadScraperFromYAML(path string) (*scraperConfig, error) {
@@ -127,6 +159,13 @@ func (c *scraperConfig) initialiseConfigs() {
for _, s := range c.PerformerByURL {
s.resolveFn()
}
if c.SceneByFragment != nil {
c.SceneByFragment.resolveFn()
}
for _, s := range c.SceneByURL {
s.resolveFn()
}
}
func (c scraperConfig) toScraper() *models.Scraper {
@@ -153,6 +192,21 @@ func (c scraperConfig) toScraper() *models.Scraper {
ret.Performer = &performer
}
scene := models.ScraperSpec{}
if c.SceneByFragment != nil {
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeFragment)
}
if len(c.SceneByURL) > 0 {
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.SceneByURL {
scene.Urls = append(scene.Urls, v.URL...)
}
}
if len(scene.SupportedScrapes) > 0 {
ret.Scene = &scene
}
return &ret
}
@@ -202,3 +256,42 @@ func (c scraperConfig) ScrapePerformerURL(url string) (*models.ScrapedPerformer,
return nil, nil
}
func (c scraperConfig) supportsScenes() bool {
return c.SceneByFragment != nil || len(c.SceneByURL) > 0
}
func (c scraperConfig) matchesSceneURL(url string) bool {
for _, scraper := range c.SceneByURL {
if scraper.matchesURL(url) {
return true
}
}
return false
}
func (c scraperConfig) ScrapeScene(scene models.SceneUpdateInput) (*models.ScrapedScene, error) {
if c.SceneByFragment != nil && c.SceneByFragment.performScrape != nil {
return c.SceneByFragment.performScrape(c.SceneByFragment.scraperTypeConfig, scene)
}
return nil, nil
}
func (c scraperConfig) ScrapeSceneURL(url string) (*models.ScrapedScene, error) {
for _, scraper := range c.SceneByURL {
if scraper.matchesURL(url) && scraper.performScrape != nil {
ret, err := scraper.performScrape(scraper.scraperTypeConfig, url)
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}

View File

@@ -30,10 +30,12 @@ func GetFreeonesScraper() scraperConfig {
PerformerByFragment: &performerByFragmentConfig{
performScrape: GetPerformer,
},
PerformerByURL: []*scraperByURLConfig{
&scraperByURLConfig{
PerformerByURL: []*scrapePerformerByURLConfig{
&scrapePerformerByURLConfig{
scrapeByURLConfig: scrapeByURLConfig{
URL: freeonesURLs,
},
performScrape: GetPerformerURL,
URL: freeonesURLs,
},
},
}

View File

@@ -3,6 +3,7 @@ package scraper
import (
"errors"
"path/filepath"
"strconv"
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/manager/config"
@@ -61,7 +62,26 @@ func ListPerformerScrapers() ([]*models.Scraper, error) {
return ret, nil
}
func findPerformerScraper(scraperID string) *scraperConfig {
func ListSceneScrapers() ([]*models.Scraper, error) {
// read scraper config files from the directory and cache
scrapers, err := loadScrapers()
if err != nil {
return nil, err
}
var ret []*models.Scraper
for _, s := range scrapers {
// filter on type
if s.supportsScenes() {
ret = append(ret, s.toScraper())
}
}
return ret, nil
}
func findScraper(scraperID string) *scraperConfig {
// read scraper config files from the directory and cache
loadScrapers()
@@ -76,7 +96,7 @@ func findPerformerScraper(scraperID string) *scraperConfig {
func ScrapePerformerList(scraperID string, query string) ([]*models.ScrapedPerformer, error) {
// find scraper with the provided id
s := findPerformerScraper(scraperID)
s := findScraper(scraperID)
if s != nil {
return s.ScrapePerformerNames(query)
}
@@ -86,7 +106,7 @@ func ScrapePerformerList(scraperID string, query string) ([]*models.ScrapedPerfo
func ScrapePerformer(scraperID string, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
// find scraper with the provided id
s := findPerformerScraper(scraperID)
s := findScraper(scraperID)
if s != nil {
return s.ScrapePerformer(scrapedPerformer)
}
@@ -103,3 +123,127 @@ func ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) {
return nil, nil
}
func matchPerformer(p *models.ScrapedScenePerformer) error {
qb := models.NewPerformerQueryBuilder()
performers, err := qb.FindByNames([]string{p.Name}, nil)
if err != nil {
return err
}
if len(performers) != 1 {
// ignore - cannot match
return nil
}
id := strconv.Itoa(performers[0].ID)
p.ID = &id
return nil
}
func matchStudio(s *models.ScrapedSceneStudio) error {
qb := models.NewStudioQueryBuilder()
studio, err := qb.FindByName(s.Name, nil)
if err != nil {
return err
}
if studio == nil {
// ignore - cannot match
return nil
}
id := strconv.Itoa(studio.ID)
s.ID = &id
return nil
}
func matchTag(s *models.ScrapedSceneTag) error {
qb := models.NewTagQueryBuilder()
tag, err := qb.FindByName(s.Name, nil)
if err != nil {
return err
}
if tag == nil {
// ignore - cannot match
return nil
}
id := strconv.Itoa(tag.ID)
s.ID = &id
return nil
}
func postScrapeScene(ret *models.ScrapedScene) error {
for _, p := range ret.Performers {
err := matchPerformer(p)
if err != nil {
return err
}
}
for _, t := range ret.Tags {
err := matchTag(t)
if err != nil {
return err
}
}
if ret.Studio != nil {
err := matchStudio(ret.Studio)
if err != nil {
return err
}
}
return nil
}
func ScrapeScene(scraperID string, scene models.SceneUpdateInput) (*models.ScrapedScene, error) {
// find scraper with the provided id
s := findScraper(scraperID)
if s != nil {
ret, err := s.ScrapeScene(scene)
if err != nil {
return nil, err
}
err = postScrapeScene(ret)
if err != nil {
return nil, err
}
return ret, nil
}
return nil, errors.New("Scraper with ID " + scraperID + " not found")
}
func ScrapeSceneURL(url string) (*models.ScrapedScene, error) {
for _, s := range scrapers {
if s.matchesSceneURL(url) {
ret, err := s.ScrapeSceneURL(url)
if err != nil {
return nil, err
}
err = postScrapeScene(ret)
if err != nil {
return nil, err
}
return ret, nil
}
}
return nil, nil
}

View File

@@ -106,3 +106,27 @@ func scrapePerformerURLScript(c scraperTypeConfig, url string) (*models.ScrapedP
return &ret, err
}
func scrapeSceneFragmentScript(c scraperTypeConfig, scene models.SceneUpdateInput) (*models.ScrapedScene, error) {
inString, err := json.Marshal(scene)
if err != nil {
return nil, err
}
var ret models.ScrapedScene
err = runScraperScript(c.Script, string(inString), &ret)
return &ret, err
}
func scrapeSceneURLScript(c scraperTypeConfig, url string) (*models.ScrapedScene, error) {
inString := `{"url": "` + url + `"}`
var ret models.ScrapedScene
err := runScraperScript(c.Script, string(inString), &ret)
return &ret, err
}