mirror of
https://github.com/stashapp/stash.git
synced 2025-12-17 04:14:39 +03:00
Scraper refactor middle (#2043)
* Push scrapeByURL into scrapers Replace ScrapePerfomerByURL, ScrapeMovie..., ... with ScrapeByURL in the scraperActionImpl interface. This allows us to delete a lot of repeated code in the scrapers and replace the central part with a switch on the scraper type. * Fold name scraping into one call Follow up on scraper refactoring. Name scrapers use the same code path. This allows us to restructure some code and kill some functions, adding variance to the name scraping code. It allows us to remove some code repetition as well. * Do not export loop refs. * Simplify fragment scraping Generalize fragment scrapers into ScrapeByFragment. This simplifies fragment code flows into a simpler pathing which should be easier to handle in the future. * Eliminate more context.TODO() In a number of cases, we have a context now. Use the context rather than TODO() for those cases in order to make those operations cancellable. * Pass the context for the stashbox scraper This removes all context.TODO() in the path of the stashbox scraper, and replaces it with the context that's present on each of the paths. * Pass the context into subscrapers Mostly a mechanical update, where we pass in the context for subscraping. This removes the final context.TODO() in the scraper code. * Warn on unknown fields from scripts A common mistake for new script writers are that they return fields not known to stash. For instance the name "description" is used rather than "details". Decode disallowing unknown fields. If this fails, use a tee-reader to fall back to the old behavior, but print a warning for the user in this case. Thus, we retain the old behavior, but print warnings for scripts which fails the more strict unknown-fields detection. * Nil-check before running the postprocessing chain Fixes panics when scraping returns nil values. * Lift nil-ness in post-postprocessing If the struct we are trying to post-process is nil, we shouldn't enter the postprocessing flow at all. Pass the struct as a value rather than a pointer, eliminating nil-checks as we go. Use the top-level postProcess call to make the nil-check and then abort there if the object we are looking at is nil. * Allow conversion routines to handle values If we have a non-pointer type in the interface, we should also convert those into ScrapedContent. Otherwise we get errors on deprecated functions.
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
@@ -18,7 +19,7 @@ import (
|
||||
|
||||
type mappedQuery interface {
|
||||
runQuery(selector string) ([]string, error)
|
||||
subScrape(value string) mappedQuery
|
||||
subScrape(ctx context.Context, value string) mappedQuery
|
||||
}
|
||||
|
||||
type commonMappedConfig map[string]string
|
||||
@@ -38,7 +39,7 @@ func (s mappedConfig) applyCommon(c commonMappedConfig, src string) string {
|
||||
return ret
|
||||
}
|
||||
|
||||
func (s mappedConfig) process(q mappedQuery, common commonMappedConfig) mappedResults {
|
||||
func (s mappedConfig) process(ctx context.Context, q mappedQuery, common commonMappedConfig) mappedResults {
|
||||
var ret mappedResults
|
||||
|
||||
for k, attrConfig := range s {
|
||||
@@ -57,7 +58,7 @@ func (s mappedConfig) process(q mappedQuery, common commonMappedConfig) mappedRe
|
||||
}
|
||||
|
||||
if len(found) > 0 {
|
||||
result := s.postProcess(q, attrConfig, found)
|
||||
result := s.postProcess(ctx, q, attrConfig, found)
|
||||
for i, text := range result {
|
||||
ret = ret.setKey(i, k, text)
|
||||
}
|
||||
@@ -68,12 +69,12 @@ func (s mappedConfig) process(q mappedQuery, common commonMappedConfig) mappedRe
|
||||
return ret
|
||||
}
|
||||
|
||||
func (s mappedConfig) postProcess(q mappedQuery, attrConfig mappedScraperAttrConfig, found []string) []string {
|
||||
func (s mappedConfig) postProcess(ctx context.Context, q mappedQuery, attrConfig mappedScraperAttrConfig, found []string) []string {
|
||||
// check if we're concatenating the results into a single result
|
||||
var ret []string
|
||||
if attrConfig.hasConcat() {
|
||||
result := attrConfig.concatenateResults(found)
|
||||
result = attrConfig.postProcess(result, q)
|
||||
result = attrConfig.postProcess(ctx, result, q)
|
||||
if attrConfig.hasSplit() {
|
||||
results := attrConfig.splitString(result)
|
||||
results = attrConfig.cleanResults(results)
|
||||
@@ -83,7 +84,7 @@ func (s mappedConfig) postProcess(q mappedQuery, attrConfig mappedScraperAttrCon
|
||||
ret = []string{result}
|
||||
} else {
|
||||
for _, text := range found {
|
||||
text = attrConfig.postProcess(text, q)
|
||||
text = attrConfig.postProcess(ctx, text, q)
|
||||
if attrConfig.hasSplit() {
|
||||
return attrConfig.splitString(text)
|
||||
}
|
||||
@@ -359,12 +360,12 @@ func (c mappedRegexConfigs) apply(value string) string {
|
||||
}
|
||||
|
||||
type postProcessAction interface {
|
||||
Apply(value string, q mappedQuery) string
|
||||
Apply(ctx context.Context, value string, q mappedQuery) string
|
||||
}
|
||||
|
||||
type postProcessParseDate string
|
||||
|
||||
func (p *postProcessParseDate) Apply(value string, q mappedQuery) string {
|
||||
func (p *postProcessParseDate) Apply(ctx context.Context, value string, q mappedQuery) string {
|
||||
parseDate := string(*p)
|
||||
|
||||
const internalDateFormat = "2006-01-02"
|
||||
@@ -396,7 +397,7 @@ func (p *postProcessParseDate) Apply(value string, q mappedQuery) string {
|
||||
|
||||
type postProcessSubtractDays bool
|
||||
|
||||
func (p *postProcessSubtractDays) Apply(value string, q mappedQuery) string {
|
||||
func (p *postProcessSubtractDays) Apply(ctx context.Context, value string, q mappedQuery) string {
|
||||
const internalDateFormat = "2006-01-02"
|
||||
|
||||
i, err := strconv.Atoi(value)
|
||||
@@ -412,18 +413,18 @@ func (p *postProcessSubtractDays) Apply(value string, q mappedQuery) string {
|
||||
|
||||
type postProcessReplace mappedRegexConfigs
|
||||
|
||||
func (c *postProcessReplace) Apply(value string, q mappedQuery) string {
|
||||
func (c *postProcessReplace) Apply(ctx context.Context, value string, q mappedQuery) string {
|
||||
replace := mappedRegexConfigs(*c)
|
||||
return replace.apply(value)
|
||||
}
|
||||
|
||||
type postProcessSubScraper mappedScraperAttrConfig
|
||||
|
||||
func (p *postProcessSubScraper) Apply(value string, q mappedQuery) string {
|
||||
func (p *postProcessSubScraper) Apply(ctx context.Context, value string, q mappedQuery) string {
|
||||
subScrapeConfig := mappedScraperAttrConfig(*p)
|
||||
|
||||
logger.Debugf("Sub-scraping for: %s", value)
|
||||
ss := q.subScrape(value)
|
||||
ss := q.subScrape(ctx, value)
|
||||
|
||||
if ss != nil {
|
||||
found, err := ss.runQuery(subScrapeConfig.Selector)
|
||||
@@ -440,7 +441,7 @@ func (p *postProcessSubScraper) Apply(value string, q mappedQuery) string {
|
||||
result = found[0]
|
||||
}
|
||||
|
||||
result = subScrapeConfig.postProcess(result, ss)
|
||||
result = subScrapeConfig.postProcess(ctx, result, ss)
|
||||
return result
|
||||
}
|
||||
}
|
||||
@@ -450,7 +451,7 @@ func (p *postProcessSubScraper) Apply(value string, q mappedQuery) string {
|
||||
|
||||
type postProcessMap map[string]string
|
||||
|
||||
func (p *postProcessMap) Apply(value string, q mappedQuery) string {
|
||||
func (p *postProcessMap) Apply(ctx context.Context, value string, q mappedQuery) string {
|
||||
// return the mapped value if present
|
||||
m := *p
|
||||
mapped, ok := m[value]
|
||||
@@ -464,7 +465,7 @@ func (p *postProcessMap) Apply(value string, q mappedQuery) string {
|
||||
|
||||
type postProcessFeetToCm bool
|
||||
|
||||
func (p *postProcessFeetToCm) Apply(value string, q mappedQuery) string {
|
||||
func (p *postProcessFeetToCm) Apply(ctx context.Context, value string, q mappedQuery) string {
|
||||
const foot_in_cm = 30.48
|
||||
const inch_in_cm = 2.54
|
||||
|
||||
@@ -488,7 +489,7 @@ func (p *postProcessFeetToCm) Apply(value string, q mappedQuery) string {
|
||||
|
||||
type postProcessLbToKg bool
|
||||
|
||||
func (p *postProcessLbToKg) Apply(value string, q mappedQuery) string {
|
||||
func (p *postProcessLbToKg) Apply(ctx context.Context, value string, q mappedQuery) string {
|
||||
const lb_in_kg = 0.45359237
|
||||
w, err := strconv.ParseFloat(value, 64)
|
||||
if err == nil {
|
||||
@@ -690,9 +691,9 @@ func (c mappedScraperAttrConfig) splitString(value string) []string {
|
||||
return res
|
||||
}
|
||||
|
||||
func (c mappedScraperAttrConfig) postProcess(value string, q mappedQuery) string {
|
||||
func (c mappedScraperAttrConfig) postProcess(ctx context.Context, value string, q mappedQuery) string {
|
||||
for _, action := range c.postProcessActions {
|
||||
value = action.Apply(value, q)
|
||||
value = action.Apply(ctx, value, q)
|
||||
}
|
||||
|
||||
return value
|
||||
@@ -748,7 +749,7 @@ func (r mappedResults) setKey(index int, key string, value string) mappedResults
|
||||
return r
|
||||
}
|
||||
|
||||
func (s mappedScraper) scrapePerformer(q mappedQuery) (*models.ScrapedPerformer, error) {
|
||||
func (s mappedScraper) scrapePerformer(ctx context.Context, q mappedQuery) (*models.ScrapedPerformer, error) {
|
||||
var ret models.ScrapedPerformer
|
||||
|
||||
performerMap := s.Performer
|
||||
@@ -758,14 +759,14 @@ func (s mappedScraper) scrapePerformer(q mappedQuery) (*models.ScrapedPerformer,
|
||||
|
||||
performerTagsMap := performerMap.Tags
|
||||
|
||||
results := performerMap.process(q, s.Common)
|
||||
results := performerMap.process(ctx, q, s.Common)
|
||||
if len(results) > 0 {
|
||||
results[0].apply(&ret)
|
||||
|
||||
// now apply the tags
|
||||
if performerTagsMap != nil {
|
||||
logger.Debug(`Processing performer tags:`)
|
||||
tagResults := performerTagsMap.process(q, s.Common)
|
||||
tagResults := performerTagsMap.process(ctx, q, s.Common)
|
||||
|
||||
for _, p := range tagResults {
|
||||
tag := &models.ScrapedTag{}
|
||||
@@ -778,7 +779,7 @@ func (s mappedScraper) scrapePerformer(q mappedQuery) (*models.ScrapedPerformer,
|
||||
return &ret, nil
|
||||
}
|
||||
|
||||
func (s mappedScraper) scrapePerformers(q mappedQuery) ([]*models.ScrapedPerformer, error) {
|
||||
func (s mappedScraper) scrapePerformers(ctx context.Context, q mappedQuery) ([]*models.ScrapedPerformer, error) {
|
||||
var ret []*models.ScrapedPerformer
|
||||
|
||||
performerMap := s.Performer
|
||||
@@ -786,7 +787,7 @@ func (s mappedScraper) scrapePerformers(q mappedQuery) ([]*models.ScrapedPerform
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
results := performerMap.process(q, s.Common)
|
||||
results := performerMap.process(ctx, q, s.Common)
|
||||
for _, r := range results {
|
||||
var p models.ScrapedPerformer
|
||||
r.apply(&p)
|
||||
@@ -796,7 +797,7 @@ func (s mappedScraper) scrapePerformers(q mappedQuery) ([]*models.ScrapedPerform
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (s mappedScraper) processScene(q mappedQuery, r mappedResult) *models.ScrapedScene {
|
||||
func (s mappedScraper) processScene(ctx context.Context, q mappedQuery, r mappedResult) *models.ScrapedScene {
|
||||
var ret models.ScrapedScene
|
||||
|
||||
sceneScraperConfig := s.Scene
|
||||
@@ -813,13 +814,13 @@ func (s mappedScraper) processScene(q mappedQuery, r mappedResult) *models.Scrap
|
||||
// process performer tags once
|
||||
var performerTagResults mappedResults
|
||||
if scenePerformerTagsMap != nil {
|
||||
performerTagResults = scenePerformerTagsMap.process(q, s.Common)
|
||||
performerTagResults = scenePerformerTagsMap.process(ctx, q, s.Common)
|
||||
}
|
||||
|
||||
// now apply the performers and tags
|
||||
if scenePerformersMap.mappedConfig != nil {
|
||||
logger.Debug(`Processing scene performers:`)
|
||||
performerResults := scenePerformersMap.process(q, s.Common)
|
||||
performerResults := scenePerformersMap.process(ctx, q, s.Common)
|
||||
|
||||
for _, p := range performerResults {
|
||||
performer := &models.ScrapedPerformer{}
|
||||
@@ -837,7 +838,7 @@ func (s mappedScraper) processScene(q mappedQuery, r mappedResult) *models.Scrap
|
||||
|
||||
if sceneTagsMap != nil {
|
||||
logger.Debug(`Processing scene tags:`)
|
||||
tagResults := sceneTagsMap.process(q, s.Common)
|
||||
tagResults := sceneTagsMap.process(ctx, q, s.Common)
|
||||
|
||||
for _, p := range tagResults {
|
||||
tag := &models.ScrapedTag{}
|
||||
@@ -848,7 +849,7 @@ func (s mappedScraper) processScene(q mappedQuery, r mappedResult) *models.Scrap
|
||||
|
||||
if sceneStudioMap != nil {
|
||||
logger.Debug(`Processing scene studio:`)
|
||||
studioResults := sceneStudioMap.process(q, s.Common)
|
||||
studioResults := sceneStudioMap.process(ctx, q, s.Common)
|
||||
|
||||
if len(studioResults) > 0 {
|
||||
studio := &models.ScrapedStudio{}
|
||||
@@ -859,7 +860,7 @@ func (s mappedScraper) processScene(q mappedQuery, r mappedResult) *models.Scrap
|
||||
|
||||
if sceneMoviesMap != nil {
|
||||
logger.Debug(`Processing scene movies:`)
|
||||
movieResults := sceneMoviesMap.process(q, s.Common)
|
||||
movieResults := sceneMoviesMap.process(ctx, q, s.Common)
|
||||
|
||||
for _, p := range movieResults {
|
||||
movie := &models.ScrapedMovie{}
|
||||
@@ -871,7 +872,7 @@ func (s mappedScraper) processScene(q mappedQuery, r mappedResult) *models.Scrap
|
||||
return &ret
|
||||
}
|
||||
|
||||
func (s mappedScraper) scrapeScenes(q mappedQuery) ([]*models.ScrapedScene, error) {
|
||||
func (s mappedScraper) scrapeScenes(ctx context.Context, q mappedQuery) ([]*models.ScrapedScene, error) {
|
||||
var ret []*models.ScrapedScene
|
||||
|
||||
sceneScraperConfig := s.Scene
|
||||
@@ -881,16 +882,16 @@ func (s mappedScraper) scrapeScenes(q mappedQuery) ([]*models.ScrapedScene, erro
|
||||
}
|
||||
|
||||
logger.Debug(`Processing scenes:`)
|
||||
results := sceneMap.process(q, s.Common)
|
||||
results := sceneMap.process(ctx, q, s.Common)
|
||||
for _, r := range results {
|
||||
logger.Debug(`Processing scene:`)
|
||||
ret = append(ret, s.processScene(q, r))
|
||||
ret = append(ret, s.processScene(ctx, q, r))
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (s mappedScraper) scrapeScene(q mappedQuery) (*models.ScrapedScene, error) {
|
||||
func (s mappedScraper) scrapeScene(ctx context.Context, q mappedQuery) (*models.ScrapedScene, error) {
|
||||
var ret models.ScrapedScene
|
||||
|
||||
sceneScraperConfig := s.Scene
|
||||
@@ -900,16 +901,16 @@ func (s mappedScraper) scrapeScene(q mappedQuery) (*models.ScrapedScene, error)
|
||||
}
|
||||
|
||||
logger.Debug(`Processing scene:`)
|
||||
results := sceneMap.process(q, s.Common)
|
||||
results := sceneMap.process(ctx, q, s.Common)
|
||||
if len(results) > 0 {
|
||||
ss := s.processScene(q, results[0])
|
||||
ss := s.processScene(ctx, q, results[0])
|
||||
ret = *ss
|
||||
}
|
||||
|
||||
return &ret, nil
|
||||
}
|
||||
|
||||
func (s mappedScraper) scrapeGallery(q mappedQuery) (*models.ScrapedGallery, error) {
|
||||
func (s mappedScraper) scrapeGallery(ctx context.Context, q mappedQuery) (*models.ScrapedGallery, error) {
|
||||
var ret models.ScrapedGallery
|
||||
|
||||
galleryScraperConfig := s.Gallery
|
||||
@@ -923,14 +924,14 @@ func (s mappedScraper) scrapeGallery(q mappedQuery) (*models.ScrapedGallery, err
|
||||
galleryStudioMap := galleryScraperConfig.Studio
|
||||
|
||||
logger.Debug(`Processing gallery:`)
|
||||
results := galleryMap.process(q, s.Common)
|
||||
results := galleryMap.process(ctx, q, s.Common)
|
||||
if len(results) > 0 {
|
||||
results[0].apply(&ret)
|
||||
|
||||
// now apply the performers and tags
|
||||
if galleryPerformersMap != nil {
|
||||
logger.Debug(`Processing gallery performers:`)
|
||||
performerResults := galleryPerformersMap.process(q, s.Common)
|
||||
performerResults := galleryPerformersMap.process(ctx, q, s.Common)
|
||||
|
||||
for _, p := range performerResults {
|
||||
performer := &models.ScrapedPerformer{}
|
||||
@@ -941,7 +942,7 @@ func (s mappedScraper) scrapeGallery(q mappedQuery) (*models.ScrapedGallery, err
|
||||
|
||||
if galleryTagsMap != nil {
|
||||
logger.Debug(`Processing gallery tags:`)
|
||||
tagResults := galleryTagsMap.process(q, s.Common)
|
||||
tagResults := galleryTagsMap.process(ctx, q, s.Common)
|
||||
|
||||
for _, p := range tagResults {
|
||||
tag := &models.ScrapedTag{}
|
||||
@@ -952,7 +953,7 @@ func (s mappedScraper) scrapeGallery(q mappedQuery) (*models.ScrapedGallery, err
|
||||
|
||||
if galleryStudioMap != nil {
|
||||
logger.Debug(`Processing gallery studio:`)
|
||||
studioResults := galleryStudioMap.process(q, s.Common)
|
||||
studioResults := galleryStudioMap.process(ctx, q, s.Common)
|
||||
|
||||
if len(studioResults) > 0 {
|
||||
studio := &models.ScrapedStudio{}
|
||||
@@ -965,7 +966,7 @@ func (s mappedScraper) scrapeGallery(q mappedQuery) (*models.ScrapedGallery, err
|
||||
return &ret, nil
|
||||
}
|
||||
|
||||
func (s mappedScraper) scrapeMovie(q mappedQuery) (*models.ScrapedMovie, error) {
|
||||
func (s mappedScraper) scrapeMovie(ctx context.Context, q mappedQuery) (*models.ScrapedMovie, error) {
|
||||
var ret models.ScrapedMovie
|
||||
|
||||
movieScraperConfig := s.Movie
|
||||
@@ -976,13 +977,13 @@ func (s mappedScraper) scrapeMovie(q mappedQuery) (*models.ScrapedMovie, error)
|
||||
|
||||
movieStudioMap := movieScraperConfig.Studio
|
||||
|
||||
results := movieMap.process(q, s.Common)
|
||||
results := movieMap.process(ctx, q, s.Common)
|
||||
if len(results) > 0 {
|
||||
results[0].apply(&ret)
|
||||
|
||||
if movieStudioMap != nil {
|
||||
logger.Debug(`Processing movie studio:`)
|
||||
studioResults := movieStudioMap.process(q, s.Common)
|
||||
studioResults := movieStudioMap.process(ctx, q, s.Common)
|
||||
|
||||
if len(studioResults) > 0 {
|
||||
studio := &models.ScrapedStudio{}
|
||||
|
||||
Reference in New Issue
Block a user