Autotag scraper (#1817)

* Refactor scraper structures
* Move matching code into new package
* Add autotag scraper
* Always check first letter of auto-tag names
* Account for nulls

Co-authored-by: Kermie <kermie@isinthe.house>
This commit is contained in:
WithoutPants
2021-10-11 23:06:06 +11:00
committed by GitHub
parent b5381ff071
commit e9d48683f8
22 changed files with 1023 additions and 660 deletions

View File

@@ -19,7 +19,7 @@ func (e scraperAction) IsValid() bool {
return false
}
type scraper interface {
type scraperActionImpl interface {
scrapePerformersByName(name string) ([]*models.ScrapedPerformer, error)
scrapePerformerByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error)
scrapePerformerByURL(url string) (*models.ScrapedPerformer, error)
@@ -36,16 +36,16 @@ type scraper interface {
scrapeMovieByURL(url string) (*models.ScrapedMovie, error)
}
func getScraper(scraper scraperTypeConfig, txnManager models.TransactionManager, config config, globalConfig GlobalConfig) scraper {
func (c config) getScraper(scraper scraperTypeConfig, txnManager models.TransactionManager, globalConfig GlobalConfig) scraperActionImpl {
switch scraper.Action {
case scraperActionScript:
return newScriptScraper(scraper, config, globalConfig)
return newScriptScraper(scraper, c, globalConfig)
case scraperActionStash:
return newStashScraper(scraper, txnManager, config, globalConfig)
return newStashScraper(scraper, txnManager, c, globalConfig)
case scraperActionXPath:
return newXpathScraper(scraper, txnManager, config, globalConfig)
return newXpathScraper(scraper, txnManager, c, globalConfig)
case scraperActionJson:
return newJsonScraper(scraper, txnManager, config, globalConfig)
return newJsonScraper(scraper, txnManager, c, globalConfig)
}
panic("unknown scraper action: " + scraper.Action)

218
pkg/scraper/autotag.go Normal file
View File

@@ -0,0 +1,218 @@
package scraper
import (
"context"
"errors"
"fmt"
"strconv"
"github.com/stashapp/stash/pkg/match"
"github.com/stashapp/stash/pkg/models"
)
// autoTagScraperID is the scraper ID for the built-in AutoTag scraper
const (
autoTagScraperID = "builtin_autotag"
autoTagScraperName = "Auto Tag"
)
var errNotSupported = errors.New("not supported")
type autotagScraper struct {
txnManager models.TransactionManager
globalConfig GlobalConfig
}
func (s *autotagScraper) matchPerformers(path string, performerReader models.PerformerReader) ([]*models.ScrapedPerformer, error) {
p, err := match.PathToPerformers(path, performerReader)
if err != nil {
return nil, fmt.Errorf("error matching performers: %w", err)
}
var ret []*models.ScrapedPerformer
for _, pp := range p {
id := strconv.Itoa(pp.ID)
sp := &models.ScrapedPerformer{
Name: &pp.Name.String,
StoredID: &id,
}
if pp.Gender.Valid {
sp.Gender = &pp.Gender.String
}
ret = append(ret, sp)
}
return ret, nil
}
func (s *autotagScraper) matchStudio(path string, studioReader models.StudioReader) (*models.ScrapedStudio, error) {
st, err := match.PathToStudios(path, studioReader)
if err != nil {
return nil, fmt.Errorf("error matching studios: %w", err)
}
if len(st) > 0 {
id := strconv.Itoa(st[0].ID)
return &models.ScrapedStudio{
Name: st[0].Name.String,
StoredID: &id,
}, nil
}
return nil, nil
}
func (s *autotagScraper) matchTags(path string, tagReader models.TagReader) ([]*models.ScrapedTag, error) {
t, err := match.PathToTags(path, tagReader)
if err != nil {
return nil, fmt.Errorf("error matching tags: %w", err)
}
var ret []*models.ScrapedTag
for _, tt := range t {
id := strconv.Itoa(tt.ID)
st := &models.ScrapedTag{
Name: tt.Name,
StoredID: &id,
}
ret = append(ret, st)
}
return ret, nil
}
type autotagSceneScraper struct {
*autotagScraper
}
func (c *autotagSceneScraper) scrapeByName(name string) ([]*models.ScrapedScene, error) {
return nil, errNotSupported
}
func (c *autotagSceneScraper) scrapeByScene(scene *models.Scene) (*models.ScrapedScene, error) {
var ret *models.ScrapedScene
// populate performers, studio and tags based on scene path
if err := c.txnManager.WithReadTxn(context.Background(), func(r models.ReaderRepository) error {
path := scene.Path
performers, err := c.matchPerformers(path, r.Performer())
if err != nil {
return err
}
studio, err := c.matchStudio(path, r.Studio())
if err != nil {
return err
}
tags, err := c.matchTags(path, r.Tag())
if err != nil {
return err
}
if len(performers) > 0 || studio != nil || len(tags) > 0 {
ret = &models.ScrapedScene{
Performers: performers,
Studio: studio,
Tags: tags,
}
}
return nil
}); err != nil {
return nil, err
}
return ret, nil
}
func (c *autotagSceneScraper) scrapeByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
return nil, errNotSupported
}
func (c *autotagSceneScraper) scrapeByURL(url string) (*models.ScrapedScene, error) {
return nil, errNotSupported
}
type autotagGalleryScraper struct {
*autotagScraper
}
func (c *autotagGalleryScraper) scrapeByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) {
if !gallery.Path.Valid {
// not valid for non-path-based galleries
return nil, nil
}
var ret *models.ScrapedGallery
// populate performers, studio and tags based on scene path
if err := c.txnManager.WithReadTxn(context.Background(), func(r models.ReaderRepository) error {
path := gallery.Path.String
performers, err := c.matchPerformers(path, r.Performer())
if err != nil {
return err
}
studio, err := c.matchStudio(path, r.Studio())
if err != nil {
return err
}
tags, err := c.matchTags(path, r.Tag())
if err != nil {
return err
}
if len(performers) > 0 || studio != nil || len(tags) > 0 {
ret = &models.ScrapedGallery{
Performers: performers,
Studio: studio,
Tags: tags,
}
}
return nil
}); err != nil {
return nil, err
}
return ret, nil
}
func (c *autotagGalleryScraper) scrapeByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) {
return nil, errNotSupported
}
func (c *autotagGalleryScraper) scrapeByURL(url string) (*models.ScrapedGallery, error) {
return nil, errNotSupported
}
func getAutoTagScraper(txnManager models.TransactionManager, globalConfig GlobalConfig) scraper {
base := autotagScraper{
txnManager: txnManager,
globalConfig: globalConfig,
}
supportedScrapes := []models.ScrapeType{
models.ScrapeTypeFragment,
}
return scraper{
ID: autoTagScraperID,
Spec: &models.Scraper{
ID: autoTagScraperID,
Name: autoTagScraperName,
Scene: &models.ScraperSpec{
SupportedScrapes: supportedScrapes,
},
Gallery: &models.ScraperSpec{
SupportedScrapes: supportedScrapes,
},
},
Scene: &autotagSceneScraper{&base},
Gallery: &autotagGalleryScraper{&base},
}
}

View File

@@ -9,8 +9,6 @@ import (
"strings"
"gopkg.in/yaml.v2"
"github.com/stashapp/stash/pkg/models"
)
type config struct {
@@ -194,7 +192,7 @@ type scraperDriverOptions struct {
Headers []*header `yaml:"headers"`
}
func loadScraperFromYAML(id string, reader io.Reader) (*config, error) {
func loadConfigFromYAML(id string, reader io.Reader) (*config, error) {
ret := &config{}
parser := yaml.NewDecoder(reader)
@@ -213,7 +211,7 @@ func loadScraperFromYAML(id string, reader io.Reader) (*config, error) {
return ret, nil
}
func loadScraperFromYAMLFile(path string) (*config, error) {
func loadConfigFromYAMLFile(path string) (*config, error) {
file, err := os.Open(path)
if err != nil {
return nil, err
@@ -224,7 +222,7 @@ func loadScraperFromYAMLFile(path string) (*config, error) {
id := filepath.Base(path)
id = id[:strings.LastIndex(id, ".")]
ret, err := loadScraperFromYAML(id, file)
ret, err := loadConfigFromYAML(id, file)
if err != nil {
return nil, err
}
@@ -234,78 +232,6 @@ func loadScraperFromYAMLFile(path string) (*config, error) {
return ret, nil
}
func (c config) toScraper() *models.Scraper {
ret := models.Scraper{
ID: c.ID,
Name: c.Name,
}
performer := models.ScraperSpec{}
if c.PerformerByName != nil {
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeName)
}
if c.PerformerByFragment != nil {
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeFragment)
}
if len(c.PerformerByURL) > 0 {
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.PerformerByURL {
performer.Urls = append(performer.Urls, v.URL...)
}
}
if len(performer.SupportedScrapes) > 0 {
ret.Performer = &performer
}
scene := models.ScraperSpec{}
if c.SceneByFragment != nil {
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeFragment)
}
if c.SceneByName != nil && c.SceneByQueryFragment != nil {
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeName)
}
if len(c.SceneByURL) > 0 {
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.SceneByURL {
scene.Urls = append(scene.Urls, v.URL...)
}
}
if len(scene.SupportedScrapes) > 0 {
ret.Scene = &scene
}
gallery := models.ScraperSpec{}
if c.GalleryByFragment != nil {
gallery.SupportedScrapes = append(gallery.SupportedScrapes, models.ScrapeTypeFragment)
}
if len(c.GalleryByURL) > 0 {
gallery.SupportedScrapes = append(gallery.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.GalleryByURL {
gallery.Urls = append(gallery.Urls, v.URL...)
}
}
if len(gallery.SupportedScrapes) > 0 {
ret.Gallery = &gallery
}
movie := models.ScraperSpec{}
if len(c.MovieByURL) > 0 {
movie.SupportedScrapes = append(movie.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.MovieByURL {
movie.Urls = append(movie.Urls, v.URL...)
}
}
if len(movie.SupportedScrapes) > 0 {
ret.Movie = &movie
}
return &ret
}
func (c config) supportsPerformers() bool {
return c.PerformerByName != nil || c.PerformerByFragment != nil || len(c.PerformerByURL) > 0
}
@@ -320,47 +246,6 @@ func (c config) matchesPerformerURL(url string) bool {
return false
}
func (c config) ScrapePerformerNames(name string, txnManager models.TransactionManager, globalConfig GlobalConfig) ([]*models.ScrapedPerformer, error) {
if c.PerformerByName != nil {
s := getScraper(*c.PerformerByName, txnManager, c, globalConfig)
return s.scrapePerformersByName(name)
}
return nil, nil
}
func (c config) ScrapePerformer(scrapedPerformer models.ScrapedPerformerInput, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedPerformer, error) {
if c.PerformerByFragment != nil {
s := getScraper(*c.PerformerByFragment, txnManager, c, globalConfig)
return s.scrapePerformerByFragment(scrapedPerformer)
}
// try to match against URL if present
if scrapedPerformer.URL != nil && *scrapedPerformer.URL != "" {
return c.ScrapePerformerURL(*scrapedPerformer.URL, txnManager, globalConfig)
}
return nil, nil
}
func (c config) ScrapePerformerURL(url string, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedPerformer, error) {
for _, scraper := range c.PerformerByURL {
if scraper.matchesURL(url) {
s := getScraper(scraper.scraperTypeConfig, txnManager, c, globalConfig)
ret, err := s.scrapePerformerByURL(url)
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}
func (c config) supportsScenes() bool {
return (c.SceneByName != nil && c.SceneByQueryFragment != nil) || c.SceneByFragment != nil || len(c.SceneByURL) > 0
}
@@ -401,103 +286,3 @@ func (c config) matchesMovieURL(url string) bool {
return false
}
func (c config) ScrapeSceneQuery(name string, txnManager models.TransactionManager, globalConfig GlobalConfig) ([]*models.ScrapedScene, error) {
if c.SceneByName != nil {
s := getScraper(*c.SceneByName, txnManager, c, globalConfig)
return s.scrapeScenesByName(name)
}
return nil, nil
}
func (c config) ScrapeSceneByScene(scene *models.Scene, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedScene, error) {
if c.SceneByFragment != nil {
s := getScraper(*c.SceneByFragment, txnManager, c, globalConfig)
return s.scrapeSceneByScene(scene)
}
return nil, nil
}
func (c config) ScrapeSceneByFragment(scene models.ScrapedSceneInput, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedScene, error) {
if c.SceneByQueryFragment != nil {
s := getScraper(*c.SceneByQueryFragment, txnManager, c, globalConfig)
return s.scrapeSceneByFragment(scene)
}
return nil, nil
}
func (c config) ScrapeSceneURL(url string, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedScene, error) {
for _, scraper := range c.SceneByURL {
if scraper.matchesURL(url) {
s := getScraper(scraper.scraperTypeConfig, txnManager, c, globalConfig)
ret, err := s.scrapeSceneByURL(url)
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}
func (c config) ScrapeGalleryByGallery(gallery *models.Gallery, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedGallery, error) {
if c.GalleryByFragment != nil {
s := getScraper(*c.GalleryByFragment, txnManager, c, globalConfig)
return s.scrapeGalleryByGallery(gallery)
}
return nil, nil
}
func (c config) ScrapeGalleryByFragment(gallery models.ScrapedGalleryInput, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedGallery, error) {
if c.GalleryByFragment != nil {
// TODO - this should be galleryByQueryFragment
s := getScraper(*c.GalleryByFragment, txnManager, c, globalConfig)
return s.scrapeGalleryByFragment(gallery)
}
return nil, nil
}
func (c config) ScrapeGalleryURL(url string, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedGallery, error) {
for _, scraper := range c.GalleryByURL {
if scraper.matchesURL(url) {
s := getScraper(scraper.scraperTypeConfig, txnManager, c, globalConfig)
ret, err := s.scrapeGalleryByURL(url)
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}
func (c config) ScrapeMovieURL(url string, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedMovie, error) {
for _, scraper := range c.MovieByURL {
if scraper.matchesURL(url) {
s := getScraper(scraper.scraperTypeConfig, txnManager, c, globalConfig)
ret, err := s.scrapeMovieByURL(url)
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}

View File

@@ -0,0 +1,283 @@
package scraper
import "github.com/stashapp/stash/pkg/models"
type configSceneScraper struct {
*configScraper
}
func (c *configSceneScraper) matchesURL(url string) bool {
return c.config.matchesSceneURL(url)
}
func (c *configSceneScraper) scrapeByName(name string) ([]*models.ScrapedScene, error) {
if c.config.SceneByName != nil {
s := c.config.getScraper(*c.config.SceneByName, c.txnManager, c.globalConfig)
return s.scrapeScenesByName(name)
}
return nil, nil
}
func (c *configSceneScraper) scrapeByScene(scene *models.Scene) (*models.ScrapedScene, error) {
if c.config.SceneByFragment != nil {
s := c.config.getScraper(*c.config.SceneByFragment, c.txnManager, c.globalConfig)
return s.scrapeSceneByScene(scene)
}
return nil, nil
}
func (c *configSceneScraper) scrapeByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
if c.config.SceneByQueryFragment != nil {
s := c.config.getScraper(*c.config.SceneByQueryFragment, c.txnManager, c.globalConfig)
return s.scrapeSceneByFragment(scene)
}
return nil, nil
}
func (c *configSceneScraper) scrapeByURL(url string) (*models.ScrapedScene, error) {
for _, scraper := range c.config.SceneByURL {
if scraper.matchesURL(url) {
s := c.config.getScraper(scraper.scraperTypeConfig, c.txnManager, c.globalConfig)
ret, err := s.scrapeSceneByURL(url)
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}
type configPerformerScraper struct {
*configScraper
}
func (c *configPerformerScraper) matchesURL(url string) bool {
return c.config.matchesPerformerURL(url)
}
func (c *configPerformerScraper) scrapeByName(name string) ([]*models.ScrapedPerformer, error) {
if c.config.PerformerByName != nil {
s := c.config.getScraper(*c.config.PerformerByName, c.txnManager, c.globalConfig)
return s.scrapePerformersByName(name)
}
return nil, nil
}
func (c *configPerformerScraper) scrapeByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
if c.config.PerformerByFragment != nil {
s := c.config.getScraper(*c.config.PerformerByFragment, c.txnManager, c.globalConfig)
return s.scrapePerformerByFragment(scrapedPerformer)
}
// try to match against URL if present
if scrapedPerformer.URL != nil && *scrapedPerformer.URL != "" {
return c.scrapeByURL(*scrapedPerformer.URL)
}
return nil, nil
}
func (c *configPerformerScraper) scrapeByURL(url string) (*models.ScrapedPerformer, error) {
for _, scraper := range c.config.PerformerByURL {
if scraper.matchesURL(url) {
s := c.config.getScraper(scraper.scraperTypeConfig, c.txnManager, c.globalConfig)
ret, err := s.scrapePerformerByURL(url)
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}
type configGalleryScraper struct {
*configScraper
}
func (c *configGalleryScraper) matchesURL(url string) bool {
return c.config.matchesGalleryURL(url)
}
func (c *configGalleryScraper) scrapeByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) {
if c.config.GalleryByFragment != nil {
s := c.config.getScraper(*c.config.GalleryByFragment, c.txnManager, c.globalConfig)
return s.scrapeGalleryByGallery(gallery)
}
return nil, nil
}
func (c *configGalleryScraper) scrapeByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) {
if c.config.GalleryByFragment != nil {
// TODO - this should be galleryByQueryFragment
s := c.config.getScraper(*c.config.GalleryByFragment, c.txnManager, c.globalConfig)
return s.scrapeGalleryByFragment(gallery)
}
return nil, nil
}
func (c *configGalleryScraper) scrapeByURL(url string) (*models.ScrapedGallery, error) {
for _, scraper := range c.config.GalleryByURL {
if scraper.matchesURL(url) {
s := c.config.getScraper(scraper.scraperTypeConfig, c.txnManager, c.globalConfig)
ret, err := s.scrapeGalleryByURL(url)
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}
type configMovieScraper struct {
*configScraper
}
func (c *configMovieScraper) matchesURL(url string) bool {
return c.config.matchesMovieURL(url)
}
func (c *configMovieScraper) scrapeByURL(url string) (*models.ScrapedMovie, error) {
for _, scraper := range c.config.MovieByURL {
if scraper.matchesURL(url) {
s := c.config.getScraper(scraper.scraperTypeConfig, c.txnManager, c.globalConfig)
ret, err := s.scrapeMovieByURL(url)
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}
type configScraper struct {
config config
txnManager models.TransactionManager
globalConfig GlobalConfig
}
func createScraperFromConfig(c config, txnManager models.TransactionManager, globalConfig GlobalConfig) scraper {
base := configScraper{
config: c,
txnManager: txnManager,
globalConfig: globalConfig,
}
ret := scraper{
ID: c.ID,
Spec: configScraperSpec(c),
}
// only set fields if supported
if c.supportsPerformers() {
ret.Performer = &configPerformerScraper{&base}
}
if c.supportsGalleries() {
ret.Gallery = &configGalleryScraper{&base}
}
if c.supportsMovies() {
ret.Movie = &configMovieScraper{&base}
}
if c.supportsScenes() {
ret.Scene = &configSceneScraper{&base}
}
return ret
}
func configScraperSpec(c config) *models.Scraper {
ret := models.Scraper{
ID: c.ID,
Name: c.Name,
}
performer := models.ScraperSpec{}
if c.PerformerByName != nil {
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeName)
}
if c.PerformerByFragment != nil {
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeFragment)
}
if len(c.PerformerByURL) > 0 {
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.PerformerByURL {
performer.Urls = append(performer.Urls, v.URL...)
}
}
if len(performer.SupportedScrapes) > 0 {
ret.Performer = &performer
}
scene := models.ScraperSpec{}
if c.SceneByFragment != nil {
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeFragment)
}
if c.SceneByName != nil && c.SceneByQueryFragment != nil {
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeName)
}
if len(c.SceneByURL) > 0 {
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.SceneByURL {
scene.Urls = append(scene.Urls, v.URL...)
}
}
if len(scene.SupportedScrapes) > 0 {
ret.Scene = &scene
}
gallery := models.ScraperSpec{}
if c.GalleryByFragment != nil {
gallery.SupportedScrapes = append(gallery.SupportedScrapes, models.ScrapeTypeFragment)
}
if len(c.GalleryByURL) > 0 {
gallery.SupportedScrapes = append(gallery.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.GalleryByURL {
gallery.Urls = append(gallery.Urls, v.URL...)
}
}
if len(gallery.SupportedScrapes) > 0 {
ret.Gallery = &gallery
}
movie := models.ScraperSpec{}
if len(c.MovieByURL) > 0 {
movie.SupportedScrapes = append(movie.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.MovieByURL {
movie.Urls = append(movie.Urls, v.URL...)
}
}
if len(movie.SupportedScrapes) > 0 {
ret.Movie = &movie
}
return &ret
}

View File

@@ -4,6 +4,7 @@ import (
"strings"
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/models"
)
// FreeonesScraperID is the scraper ID for the built-in Freeones scraper
@@ -122,13 +123,13 @@ xPathScrapers:
# Last updated April 13, 2021
`
func getFreeonesScraper() config {
func getFreeonesScraper(txnManager models.TransactionManager, globalConfig GlobalConfig) scraper {
yml := freeonesScraperConfig
scraper, err := loadScraperFromYAML(FreeonesScraperID, strings.NewReader(yml))
c, err := loadConfigFromYAML(FreeonesScraperID, strings.NewReader(yml))
if err != nil {
logger.Fatalf("Error loading builtin freeones scraper: %s", err.Error())
}
return *scraper
return createScraperFromConfig(*c, txnManager, globalConfig)
}

View File

@@ -1,109 +0,0 @@
package scraper
import (
"strconv"
"github.com/stashapp/stash/pkg/models"
"github.com/stashapp/stash/pkg/studio"
"github.com/stashapp/stash/pkg/tag"
)
// MatchScrapedPerformer matches the provided performer with the
// performers in the database and sets the ID field if one is found.
func MatchScrapedPerformer(qb models.PerformerReader, p *models.ScrapedPerformer) error {
if p.Name == nil {
return nil
}
performers, err := qb.FindByNames([]string{*p.Name}, true)
if err != nil {
return err
}
if len(performers) != 1 {
// ignore - cannot match
return nil
}
id := strconv.Itoa(performers[0].ID)
p.StoredID = &id
return nil
}
// MatchScrapedStudio matches the provided studio with the studios
// in the database and sets the ID field if one is found.
func MatchScrapedStudio(qb models.StudioReader, s *models.ScrapedStudio) error {
st, err := studio.ByName(qb, s.Name)
if err != nil {
return err
}
if st == nil {
// try matching by alias
st, err = studio.ByAlias(qb, s.Name)
if err != nil {
return err
}
}
if st == nil {
// ignore - cannot match
return nil
}
id := strconv.Itoa(st.ID)
s.StoredID = &id
return nil
}
// MatchScrapedMovie matches the provided movie with the movies
// in the database and sets the ID field if one is found.
func MatchScrapedMovie(qb models.MovieReader, m *models.ScrapedMovie) error {
if m.Name == nil {
return nil
}
movies, err := qb.FindByNames([]string{*m.Name}, true)
if err != nil {
return err
}
if len(movies) != 1 {
// ignore - cannot match
return nil
}
id := strconv.Itoa(movies[0].ID)
m.StoredID = &id
return nil
}
// MatchScrapedTag matches the provided tag with the tags
// in the database and sets the ID field if one is found.
func MatchScrapedTag(qb models.TagReader, s *models.ScrapedTag) error {
t, err := tag.ByName(qb, s.Name)
if err != nil {
return err
}
if t == nil {
// try matching by alias
t, err = tag.ByAlias(qb, s.Name)
if err != nil {
return err
}
}
if t == nil {
// ignore - cannot match
return nil
}
id := strconv.Itoa(t.ID)
s.StoredID = &id
return nil
}

51
pkg/scraper/scraper.go Normal file
View File

@@ -0,0 +1,51 @@
package scraper
import "github.com/stashapp/stash/pkg/models"
type urlMatcher interface {
matchesURL(url string) bool
}
type performerScraper interface {
scrapeByName(name string) ([]*models.ScrapedPerformer, error)
scrapeByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error)
scrapeByURL(url string) (*models.ScrapedPerformer, error)
}
type sceneScraper interface {
scrapeByName(name string) ([]*models.ScrapedScene, error)
scrapeByScene(scene *models.Scene) (*models.ScrapedScene, error)
scrapeByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error)
scrapeByURL(url string) (*models.ScrapedScene, error)
}
type galleryScraper interface {
scrapeByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error)
scrapeByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error)
scrapeByURL(url string) (*models.ScrapedGallery, error)
}
type movieScraper interface {
scrapeByURL(url string) (*models.ScrapedMovie, error)
}
type scraper struct {
ID string
Spec *models.Scraper
Performer performerScraper
Scene sceneScraper
Gallery galleryScraper
Movie movieScraper
}
func matchesURL(maybeURLMatcher interface{}, url string) bool {
if maybeURLMatcher != nil {
matcher, ok := maybeURLMatcher.(urlMatcher)
if ok {
return matcher.matchesURL(url)
}
}
return false
}

View File

@@ -10,6 +10,7 @@ import (
"github.com/stashapp/stash/pkg/logger"
stash_config "github.com/stashapp/stash/pkg/manager/config"
"github.com/stashapp/stash/pkg/match"
"github.com/stashapp/stash/pkg/models"
"github.com/stashapp/stash/pkg/utils"
)
@@ -32,7 +33,7 @@ func isCDPPathWS(c GlobalConfig) bool {
// Cache stores scraper details.
type Cache struct {
scrapers []config
scrapers []scraper
globalConfig GlobalConfig
txnManager models.TransactionManager
}
@@ -44,7 +45,7 @@ type Cache struct {
// Scraper configurations are loaded from yml files in the provided scrapers
// directory and any subdirectories.
func NewCache(globalConfig GlobalConfig, txnManager models.TransactionManager) (*Cache, error) {
scrapers, err := loadScrapers(globalConfig.GetScrapersPath())
scrapers, err := loadScrapers(globalConfig, txnManager)
if err != nil {
return nil, err
}
@@ -56,8 +57,9 @@ func NewCache(globalConfig GlobalConfig, txnManager models.TransactionManager) (
}, nil
}
func loadScrapers(path string) ([]config, error) {
scrapers := make([]config, 0)
func loadScrapers(globalConfig GlobalConfig, txnManager models.TransactionManager) ([]scraper, error) {
path := globalConfig.GetScrapersPath()
scrapers := make([]scraper, 0)
logger.Debugf("Reading scraper configs from %s", path)
scraperFiles := []string{}
@@ -74,14 +76,15 @@ func loadScrapers(path string) ([]config, error) {
}
// add built-in freeones scraper
scrapers = append(scrapers, getFreeonesScraper())
scrapers = append(scrapers, getFreeonesScraper(txnManager, globalConfig), getAutoTagScraper(txnManager, globalConfig))
for _, file := range scraperFiles {
scraper, err := loadScraperFromYAMLFile(file)
c, err := loadConfigFromYAMLFile(file)
if err != nil {
logger.Errorf("Error loading scraper %s: %s", file, err.Error())
} else {
scrapers = append(scrapers, *scraper)
scraper := createScraperFromConfig(*c, txnManager, globalConfig)
scrapers = append(scrapers, scraper)
}
}
@@ -92,7 +95,7 @@ func loadScrapers(path string) ([]config, error) {
// In the event of an error during loading, the cache will be left empty.
func (c *Cache) ReloadScrapers() error {
c.scrapers = nil
scrapers, err := loadScrapers(c.globalConfig.GetScrapersPath())
scrapers, err := loadScrapers(c.globalConfig, c.txnManager)
if err != nil {
return err
}
@@ -114,8 +117,8 @@ func (c Cache) ListPerformerScrapers() []*models.Scraper {
var ret []*models.Scraper
for _, s := range c.scrapers {
// filter on type
if s.supportsPerformers() {
ret = append(ret, s.toScraper())
if s.Performer != nil {
ret = append(ret, s.Spec)
}
}
@@ -128,8 +131,8 @@ func (c Cache) ListSceneScrapers() []*models.Scraper {
var ret []*models.Scraper
for _, s := range c.scrapers {
// filter on type
if s.supportsScenes() {
ret = append(ret, s.toScraper())
if s.Scene != nil {
ret = append(ret, s.Spec)
}
}
@@ -142,8 +145,8 @@ func (c Cache) ListGalleryScrapers() []*models.Scraper {
var ret []*models.Scraper
for _, s := range c.scrapers {
// filter on type
if s.supportsGalleries() {
ret = append(ret, s.toScraper())
if s.Gallery != nil {
ret = append(ret, s.Spec)
}
}
@@ -156,15 +159,15 @@ func (c Cache) ListMovieScrapers() []*models.Scraper {
var ret []*models.Scraper
for _, s := range c.scrapers {
// filter on type
if s.supportsMovies() {
ret = append(ret, s.toScraper())
if s.Movie != nil {
ret = append(ret, s.Spec)
}
}
return ret
}
func (c Cache) findScraper(scraperID string) *config {
func (c Cache) findScraper(scraperID string) *scraper {
for _, s := range c.scrapers {
if s.ID == scraperID {
return &s
@@ -180,8 +183,8 @@ func (c Cache) findScraper(scraperID string) *config {
func (c Cache) ScrapePerformerList(scraperID string, query string) ([]*models.ScrapedPerformer, error) {
// find scraper with the provided id
s := c.findScraper(scraperID)
if s != nil {
return s.ScrapePerformerNames(query, c.txnManager, c.globalConfig)
if s != nil && s.Performer != nil {
return s.Performer.scrapeByName(query)
}
return nil, errors.New("Scraper with ID " + scraperID + " not found")
@@ -192,8 +195,8 @@ func (c Cache) ScrapePerformerList(scraperID string, query string) ([]*models.Sc
func (c Cache) ScrapePerformer(scraperID string, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
// find scraper with the provided id
s := c.findScraper(scraperID)
if s != nil {
ret, err := s.ScrapePerformer(scrapedPerformer, c.txnManager, c.globalConfig)
if s != nil && s.Performer != nil {
ret, err := s.Performer.scrapeByFragment(scrapedPerformer)
if err != nil {
return nil, err
}
@@ -216,8 +219,8 @@ func (c Cache) ScrapePerformer(scraperID string, scrapedPerformer models.Scraped
// the URL, then nil is returned.
func (c Cache) ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) {
for _, s := range c.scrapers {
if s.matchesPerformerURL(url) {
ret, err := s.ScrapePerformerURL(url, c.txnManager, c.globalConfig)
if matchesURL(s.Performer, url) {
ret, err := s.Performer.scrapeByURL(url)
if err != nil {
return nil, err
}
@@ -289,13 +292,13 @@ func (c Cache) postScrapeScene(ret *models.ScrapedScene) error {
return err
}
if err := MatchScrapedPerformer(pqb, p); err != nil {
if err := match.ScrapedPerformer(pqb, p); err != nil {
return err
}
}
for _, p := range ret.Movies {
err := MatchScrapedMovie(mqb, p)
err := match.ScrapedMovie(mqb, p)
if err != nil {
return err
}
@@ -308,7 +311,7 @@ func (c Cache) postScrapeScene(ret *models.ScrapedScene) error {
ret.Tags = tags
if ret.Studio != nil {
err := MatchScrapedStudio(sqb, ret.Studio)
err := match.ScrapedStudio(sqb, ret.Studio)
if err != nil {
return err
}
@@ -334,7 +337,7 @@ func (c Cache) postScrapeGallery(ret *models.ScrapedGallery) error {
sqb := r.Studio()
for _, p := range ret.Performers {
err := MatchScrapedPerformer(pqb, p)
err := match.ScrapedPerformer(pqb, p)
if err != nil {
return err
}
@@ -347,7 +350,7 @@ func (c Cache) postScrapeGallery(ret *models.ScrapedGallery) error {
ret.Tags = tags
if ret.Studio != nil {
err := MatchScrapedStudio(sqb, ret.Studio)
err := match.ScrapedStudio(sqb, ret.Studio)
if err != nil {
return err
}
@@ -365,14 +368,14 @@ func (c Cache) postScrapeGallery(ret *models.ScrapedGallery) error {
func (c Cache) ScrapeScene(scraperID string, sceneID int) (*models.ScrapedScene, error) {
// find scraper with the provided id
s := c.findScraper(scraperID)
if s != nil {
if s != nil && s.Scene != nil {
// get scene from id
scene, err := getScene(sceneID, c.txnManager)
if err != nil {
return nil, err
}
ret, err := s.ScrapeSceneByScene(scene, c.txnManager, c.globalConfig)
ret, err := s.Scene.scrapeByScene(scene)
if err != nil {
return nil, err
@@ -397,8 +400,8 @@ func (c Cache) ScrapeScene(scraperID string, sceneID int) (*models.ScrapedScene,
func (c Cache) ScrapeSceneQuery(scraperID string, query string) ([]*models.ScrapedScene, error) {
// find scraper with the provided id
s := c.findScraper(scraperID)
if s != nil {
return s.ScrapeSceneQuery(query, c.txnManager, c.globalConfig)
if s != nil && s.Scene != nil {
return s.Scene.scrapeByName(query)
}
return nil, errors.New("Scraper with ID " + scraperID + " not found")
@@ -408,8 +411,8 @@ func (c Cache) ScrapeSceneQuery(scraperID string, query string) ([]*models.Scrap
func (c Cache) ScrapeSceneFragment(scraperID string, scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
// find scraper with the provided id
s := c.findScraper(scraperID)
if s != nil {
ret, err := s.ScrapeSceneByFragment(scene, c.txnManager, c.globalConfig)
if s != nil && s.Scene != nil {
ret, err := s.Scene.scrapeByFragment(scene)
if err != nil {
return nil, err
@@ -433,8 +436,8 @@ func (c Cache) ScrapeSceneFragment(scraperID string, scene models.ScrapedSceneIn
// the URL, then nil is returned.
func (c Cache) ScrapeSceneURL(url string) (*models.ScrapedScene, error) {
for _, s := range c.scrapers {
if s.matchesSceneURL(url) {
ret, err := s.ScrapeSceneURL(url, c.txnManager, c.globalConfig)
if matchesURL(s.Scene, url) {
ret, err := s.Scene.scrapeByURL(url)
if err != nil {
return nil, err
@@ -455,14 +458,14 @@ func (c Cache) ScrapeSceneURL(url string) (*models.ScrapedScene, error) {
// ScrapeGallery uses the scraper with the provided ID to scrape a gallery using existing data.
func (c Cache) ScrapeGallery(scraperID string, galleryID int) (*models.ScrapedGallery, error) {
s := c.findScraper(scraperID)
if s != nil {
if s != nil && s.Gallery != nil {
// get gallery from id
gallery, err := getGallery(galleryID, c.txnManager)
if err != nil {
return nil, err
}
ret, err := s.ScrapeGalleryByGallery(gallery, c.txnManager, c.globalConfig)
ret, err := s.Gallery.scrapeByGallery(gallery)
if err != nil {
return nil, err
@@ -484,8 +487,8 @@ func (c Cache) ScrapeGallery(scraperID string, galleryID int) (*models.ScrapedGa
// ScrapeGalleryFragment uses the scraper with the provided ID to scrape a gallery.
func (c Cache) ScrapeGalleryFragment(scraperID string, gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) {
s := c.findScraper(scraperID)
if s != nil {
ret, err := s.ScrapeGalleryByFragment(gallery, c.txnManager, c.globalConfig)
if s != nil && s.Gallery != nil {
ret, err := s.Gallery.scrapeByFragment(gallery)
if err != nil {
return nil, err
@@ -509,8 +512,8 @@ func (c Cache) ScrapeGalleryFragment(scraperID string, gallery models.ScrapedGal
// the URL, then nil is returned.
func (c Cache) ScrapeGalleryURL(url string) (*models.ScrapedGallery, error) {
for _, s := range c.scrapers {
if s.matchesGalleryURL(url) {
ret, err := s.ScrapeGalleryURL(url, c.txnManager, c.globalConfig)
if matchesURL(s.Gallery, url) {
ret, err := s.Gallery.scrapeByURL(url)
if err != nil {
return nil, err
@@ -533,15 +536,15 @@ func (c Cache) ScrapeGalleryURL(url string) (*models.ScrapedGallery, error) {
// the URL, then nil is returned.
func (c Cache) ScrapeMovieURL(url string) (*models.ScrapedMovie, error) {
for _, s := range c.scrapers {
if s.matchesMovieURL(url) {
ret, err := s.ScrapeMovieURL(url, c.txnManager, c.globalConfig)
if s.Movie != nil && matchesURL(s.Movie, url) {
ret, err := s.Movie.scrapeByURL(url)
if err != nil {
return nil, err
}
if ret.Studio != nil {
if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error {
return MatchScrapedStudio(r.Studio(), ret.Studio)
return match.ScrapedStudio(r.Studio(), ret.Studio)
}); err != nil {
return nil, err
}
@@ -587,7 +590,7 @@ ScrapeTag:
}
}
err := MatchScrapedTag(tqb, t)
err := match.ScrapedTag(tqb, t)
if err != nil {
return nil, err
}

View File

@@ -12,8 +12,8 @@ import (
"github.com/Yamashou/gqlgenc/client"
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/match"
"github.com/stashapp/stash/pkg/models"
"github.com/stashapp/stash/pkg/scraper"
"github.com/stashapp/stash/pkg/scraper/stashbox/graphql"
"github.com/stashapp/stash/pkg/utils"
)
@@ -644,7 +644,7 @@ func sceneFragmentToScrapedScene(txnManager models.TransactionManager, s *graphq
RemoteSiteID: &studioID,
}
err := scraper.MatchScrapedStudio(r.Studio(), ss.Studio)
err := match.ScrapedStudio(r.Studio(), ss.Studio)
if err != nil {
return err
}
@@ -653,7 +653,7 @@ func sceneFragmentToScrapedScene(txnManager models.TransactionManager, s *graphq
for _, p := range s.Performers {
sp := performerFragmentToScrapedScenePerformer(p.Performer)
err := scraper.MatchScrapedPerformer(pqb, sp)
err := match.ScrapedPerformer(pqb, sp)
if err != nil {
return err
}
@@ -666,7 +666,7 @@ func sceneFragmentToScrapedScene(txnManager models.TransactionManager, s *graphq
Name: t.Name,
}
err := scraper.MatchScrapedTag(tqb, st)
err := match.ScrapedTag(tqb, st)
if err != nil {
return err
}

View File

@@ -874,7 +874,8 @@ xPathScrapers:
globalConfig := mockGlobalConfig{}
performer, err := c.ScrapePerformerURL(ts.URL, nil, globalConfig)
s := createScraperFromConfig(*c, nil, globalConfig)
performer, err := s.Performer.scrapeByURL(ts.URL)
if err != nil {
t.Errorf("Error scraping performer: %s", err.Error())