mirror of
https://github.com/stashapp/stash.git
synced 2025-12-17 04:14:39 +03:00
Add scene metadata scraping functionality (#236)
* Add scene scraping functionality * Adapt to changed scraper config
This commit is contained in:
@@ -36,6 +36,10 @@ func (r *queryResolver) ListPerformerScrapers(ctx context.Context) ([]*models.Sc
|
||||
return scraper.ListPerformerScrapers()
|
||||
}
|
||||
|
||||
func (r *queryResolver) ListSceneScrapers(ctx context.Context) ([]*models.Scraper, error) {
|
||||
return scraper.ListSceneScrapers()
|
||||
}
|
||||
|
||||
func (r *queryResolver) ScrapePerformerList(ctx context.Context, scraperID string, query string) ([]*models.ScrapedPerformer, error) {
|
||||
if query == "" {
|
||||
return nil, nil
|
||||
@@ -51,3 +55,11 @@ func (r *queryResolver) ScrapePerformer(ctx context.Context, scraperID string, s
|
||||
func (r *queryResolver) ScrapePerformerURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) {
|
||||
return scraper.ScrapePerformerURL(url)
|
||||
}
|
||||
|
||||
func (r *queryResolver) ScrapeScene(ctx context.Context, scraperID string, scene models.SceneUpdateInput) (*models.ScrapedScene, error) {
|
||||
return scraper.ScrapeScene(scraperID, scene)
|
||||
}
|
||||
|
||||
func (r *queryResolver) ScrapeSceneURL(ctx context.Context, url string) (*models.ScrapedScene, error) {
|
||||
return scraper.ScrapeSceneURL(url)
|
||||
}
|
||||
|
||||
@@ -59,22 +59,13 @@ func (c *performerByFragmentConfig) resolveFn() {
|
||||
}
|
||||
}
|
||||
|
||||
type scrapePerformerByURLFunc func(c scraperTypeConfig, url string) (*models.ScrapedPerformer, error)
|
||||
|
||||
type scraperByURLConfig struct {
|
||||
type scrapeByURLConfig struct {
|
||||
scraperTypeConfig `yaml:",inline"`
|
||||
URL []string `yaml:"url,flow"`
|
||||
performScrape scrapePerformerByURLFunc
|
||||
}
|
||||
|
||||
func (c *scraperByURLConfig) resolveFn() {
|
||||
if c.Action == scraperActionScript {
|
||||
c.performScrape = scrapePerformerURLScript
|
||||
}
|
||||
}
|
||||
|
||||
func (s scraperByURLConfig) matchesURL(url string) bool {
|
||||
for _, thisURL := range s.URL {
|
||||
func (c scrapeByURLConfig) matchesURL(url string) bool {
|
||||
for _, thisURL := range c.URL {
|
||||
if strings.Contains(url, thisURL) {
|
||||
return true
|
||||
}
|
||||
@@ -83,12 +74,53 @@ func (s scraperByURLConfig) matchesURL(url string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
type scrapePerformerByURLFunc func(c scraperTypeConfig, url string) (*models.ScrapedPerformer, error)
|
||||
|
||||
type scrapePerformerByURLConfig struct {
|
||||
scrapeByURLConfig `yaml:",inline"`
|
||||
performScrape scrapePerformerByURLFunc
|
||||
}
|
||||
|
||||
func (c *scrapePerformerByURLConfig) resolveFn() {
|
||||
if c.Action == scraperActionScript {
|
||||
c.performScrape = scrapePerformerURLScript
|
||||
}
|
||||
}
|
||||
|
||||
type scrapeSceneFragmentFunc func(c scraperTypeConfig, scene models.SceneUpdateInput) (*models.ScrapedScene, error)
|
||||
|
||||
type sceneByFragmentConfig struct {
|
||||
scraperTypeConfig `yaml:",inline"`
|
||||
performScrape scrapeSceneFragmentFunc
|
||||
}
|
||||
|
||||
func (c *sceneByFragmentConfig) resolveFn() {
|
||||
if c.Action == scraperActionScript {
|
||||
c.performScrape = scrapeSceneFragmentScript
|
||||
}
|
||||
}
|
||||
|
||||
type scrapeSceneByURLFunc func(c scraperTypeConfig, url string) (*models.ScrapedScene, error)
|
||||
|
||||
type scrapeSceneByURLConfig struct {
|
||||
scrapeByURLConfig `yaml:",inline"`
|
||||
performScrape scrapeSceneByURLFunc
|
||||
}
|
||||
|
||||
func (c *scrapeSceneByURLConfig) resolveFn() {
|
||||
if c.Action == scraperActionScript {
|
||||
c.performScrape = scrapeSceneURLScript
|
||||
}
|
||||
}
|
||||
|
||||
type scraperConfig struct {
|
||||
ID string
|
||||
Name string `yaml:"name"`
|
||||
PerformerByName *performerByNameConfig `yaml:"performerByName"`
|
||||
PerformerByFragment *performerByFragmentConfig `yaml:"performerByFragment"`
|
||||
PerformerByURL []*scraperByURLConfig `yaml:"performerByURL"`
|
||||
Name string `yaml:"name"`
|
||||
PerformerByName *performerByNameConfig `yaml:"performerByName"`
|
||||
PerformerByFragment *performerByFragmentConfig `yaml:"performerByFragment"`
|
||||
PerformerByURL []*scrapePerformerByURLConfig `yaml:"performerByURL"`
|
||||
SceneByFragment *sceneByFragmentConfig `yaml:"sceneByFragment"`
|
||||
SceneByURL []*scrapeSceneByURLConfig `yaml:"sceneByURL"`
|
||||
}
|
||||
|
||||
func loadScraperFromYAML(path string) (*scraperConfig, error) {
|
||||
@@ -127,6 +159,13 @@ func (c *scraperConfig) initialiseConfigs() {
|
||||
for _, s := range c.PerformerByURL {
|
||||
s.resolveFn()
|
||||
}
|
||||
|
||||
if c.SceneByFragment != nil {
|
||||
c.SceneByFragment.resolveFn()
|
||||
}
|
||||
for _, s := range c.SceneByURL {
|
||||
s.resolveFn()
|
||||
}
|
||||
}
|
||||
|
||||
func (c scraperConfig) toScraper() *models.Scraper {
|
||||
@@ -153,6 +192,21 @@ func (c scraperConfig) toScraper() *models.Scraper {
|
||||
ret.Performer = &performer
|
||||
}
|
||||
|
||||
scene := models.ScraperSpec{}
|
||||
if c.SceneByFragment != nil {
|
||||
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeFragment)
|
||||
}
|
||||
if len(c.SceneByURL) > 0 {
|
||||
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeURL)
|
||||
for _, v := range c.SceneByURL {
|
||||
scene.Urls = append(scene.Urls, v.URL...)
|
||||
}
|
||||
}
|
||||
|
||||
if len(scene.SupportedScrapes) > 0 {
|
||||
ret.Scene = &scene
|
||||
}
|
||||
|
||||
return &ret
|
||||
}
|
||||
|
||||
@@ -202,3 +256,42 @@ func (c scraperConfig) ScrapePerformerURL(url string) (*models.ScrapedPerformer,
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (c scraperConfig) supportsScenes() bool {
|
||||
return c.SceneByFragment != nil || len(c.SceneByURL) > 0
|
||||
}
|
||||
|
||||
func (c scraperConfig) matchesSceneURL(url string) bool {
|
||||
for _, scraper := range c.SceneByURL {
|
||||
if scraper.matchesURL(url) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (c scraperConfig) ScrapeScene(scene models.SceneUpdateInput) (*models.ScrapedScene, error) {
|
||||
if c.SceneByFragment != nil && c.SceneByFragment.performScrape != nil {
|
||||
return c.SceneByFragment.performScrape(c.SceneByFragment.scraperTypeConfig, scene)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (c scraperConfig) ScrapeSceneURL(url string) (*models.ScrapedScene, error) {
|
||||
for _, scraper := range c.SceneByURL {
|
||||
if scraper.matchesURL(url) && scraper.performScrape != nil {
|
||||
ret, err := scraper.performScrape(scraper.scraperTypeConfig, url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if ret != nil {
|
||||
return ret, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -30,10 +30,12 @@ func GetFreeonesScraper() scraperConfig {
|
||||
PerformerByFragment: &performerByFragmentConfig{
|
||||
performScrape: GetPerformer,
|
||||
},
|
||||
PerformerByURL: []*scraperByURLConfig{
|
||||
&scraperByURLConfig{
|
||||
PerformerByURL: []*scrapePerformerByURLConfig{
|
||||
&scrapePerformerByURLConfig{
|
||||
scrapeByURLConfig: scrapeByURLConfig{
|
||||
URL: freeonesURLs,
|
||||
},
|
||||
performScrape: GetPerformerURL,
|
||||
URL: freeonesURLs,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package scraper
|
||||
import (
|
||||
"errors"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
||||
"github.com/stashapp/stash/pkg/logger"
|
||||
"github.com/stashapp/stash/pkg/manager/config"
|
||||
@@ -61,7 +62,26 @@ func ListPerformerScrapers() ([]*models.Scraper, error) {
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func findPerformerScraper(scraperID string) *scraperConfig {
|
||||
func ListSceneScrapers() ([]*models.Scraper, error) {
|
||||
// read scraper config files from the directory and cache
|
||||
scrapers, err := loadScrapers()
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var ret []*models.Scraper
|
||||
for _, s := range scrapers {
|
||||
// filter on type
|
||||
if s.supportsScenes() {
|
||||
ret = append(ret, s.toScraper())
|
||||
}
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func findScraper(scraperID string) *scraperConfig {
|
||||
// read scraper config files from the directory and cache
|
||||
loadScrapers()
|
||||
|
||||
@@ -76,7 +96,7 @@ func findPerformerScraper(scraperID string) *scraperConfig {
|
||||
|
||||
func ScrapePerformerList(scraperID string, query string) ([]*models.ScrapedPerformer, error) {
|
||||
// find scraper with the provided id
|
||||
s := findPerformerScraper(scraperID)
|
||||
s := findScraper(scraperID)
|
||||
if s != nil {
|
||||
return s.ScrapePerformerNames(query)
|
||||
}
|
||||
@@ -86,7 +106,7 @@ func ScrapePerformerList(scraperID string, query string) ([]*models.ScrapedPerfo
|
||||
|
||||
func ScrapePerformer(scraperID string, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
|
||||
// find scraper with the provided id
|
||||
s := findPerformerScraper(scraperID)
|
||||
s := findScraper(scraperID)
|
||||
if s != nil {
|
||||
return s.ScrapePerformer(scrapedPerformer)
|
||||
}
|
||||
@@ -103,3 +123,127 @@ func ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) {
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func matchPerformer(p *models.ScrapedScenePerformer) error {
|
||||
qb := models.NewPerformerQueryBuilder()
|
||||
|
||||
performers, err := qb.FindByNames([]string{p.Name}, nil)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(performers) != 1 {
|
||||
// ignore - cannot match
|
||||
return nil
|
||||
}
|
||||
|
||||
id := strconv.Itoa(performers[0].ID)
|
||||
p.ID = &id
|
||||
return nil
|
||||
}
|
||||
|
||||
func matchStudio(s *models.ScrapedSceneStudio) error {
|
||||
qb := models.NewStudioQueryBuilder()
|
||||
|
||||
studio, err := qb.FindByName(s.Name, nil)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if studio == nil {
|
||||
// ignore - cannot match
|
||||
return nil
|
||||
}
|
||||
|
||||
id := strconv.Itoa(studio.ID)
|
||||
s.ID = &id
|
||||
return nil
|
||||
}
|
||||
|
||||
func matchTag(s *models.ScrapedSceneTag) error {
|
||||
qb := models.NewTagQueryBuilder()
|
||||
|
||||
tag, err := qb.FindByName(s.Name, nil)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if tag == nil {
|
||||
// ignore - cannot match
|
||||
return nil
|
||||
}
|
||||
|
||||
id := strconv.Itoa(tag.ID)
|
||||
s.ID = &id
|
||||
return nil
|
||||
}
|
||||
|
||||
func postScrapeScene(ret *models.ScrapedScene) error {
|
||||
for _, p := range ret.Performers {
|
||||
err := matchPerformer(p)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, t := range ret.Tags {
|
||||
err := matchTag(t)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if ret.Studio != nil {
|
||||
err := matchStudio(ret.Studio)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func ScrapeScene(scraperID string, scene models.SceneUpdateInput) (*models.ScrapedScene, error) {
|
||||
// find scraper with the provided id
|
||||
s := findScraper(scraperID)
|
||||
if s != nil {
|
||||
ret, err := s.ScrapeScene(scene)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = postScrapeScene(ret)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
return nil, errors.New("Scraper with ID " + scraperID + " not found")
|
||||
}
|
||||
|
||||
func ScrapeSceneURL(url string) (*models.ScrapedScene, error) {
|
||||
for _, s := range scrapers {
|
||||
if s.matchesSceneURL(url) {
|
||||
ret, err := s.ScrapeSceneURL(url)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = postScrapeScene(ret)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -106,3 +106,27 @@ func scrapePerformerURLScript(c scraperTypeConfig, url string) (*models.ScrapedP
|
||||
|
||||
return &ret, err
|
||||
}
|
||||
|
||||
func scrapeSceneFragmentScript(c scraperTypeConfig, scene models.SceneUpdateInput) (*models.ScrapedScene, error) {
|
||||
inString, err := json.Marshal(scene)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var ret models.ScrapedScene
|
||||
|
||||
err = runScraperScript(c.Script, string(inString), &ret)
|
||||
|
||||
return &ret, err
|
||||
}
|
||||
|
||||
func scrapeSceneURLScript(c scraperTypeConfig, url string) (*models.ScrapedScene, error) {
|
||||
inString := `{"url": "` + url + `"}`
|
||||
|
||||
var ret models.ScrapedScene
|
||||
|
||||
err := runScraperScript(c.Script, string(inString), &ret)
|
||||
|
||||
return &ret, err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user