From dd0fa4834506e0556fe9e84f7db95ab21ae6ec4b Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Sun, 20 Mar 2022 19:46:12 +1100 Subject: [PATCH] Move tag exclusion to scrape query resolver (#2391) --- internal/api/resolver_query_scraper.go | 111 ++++++++++++++++-- pkg/scraper/cache.go | 1 - pkg/scraper/postprocessing.go | 37 +----- .../components/Changelog/versions/v0140.md | 1 + 4 files changed, 104 insertions(+), 46 deletions(-) diff --git a/internal/api/resolver_query_scraper.go b/internal/api/resolver_query_scraper.go index cafe05f85..2208628d5 100644 --- a/internal/api/resolver_query_scraper.go +++ b/internal/api/resolver_query_scraper.go @@ -4,12 +4,17 @@ import ( "context" "errors" "fmt" + "regexp" "strconv" + "strings" + "github.com/stashapp/stash/internal/manager" "github.com/stashapp/stash/internal/manager/config" + "github.com/stashapp/stash/pkg/logger" "github.com/stashapp/stash/pkg/models" "github.com/stashapp/stash/pkg/scraper" "github.com/stashapp/stash/pkg/scraper/stashbox" + "github.com/stashapp/stash/pkg/sliceutil/stringslice" ) func (r *queryResolver) ScrapeURL(ctx context.Context, url string, ty models.ScrapeContentType) (models.ScrapedContent, error) { @@ -99,7 +104,13 @@ func (r *queryResolver) ScrapeSceneQuery(ctx context.Context, scraperID string, return nil, err } - return marshalScrapedScenes(content) + ret, err := marshalScrapedScenes(content) + if err != nil { + return nil, err + } + + filterSceneTags(ret) + return ret, nil } func (r *queryResolver) ScrapeScene(ctx context.Context, scraperID string, scene models.SceneUpdateInput) (*models.ScrapedScene, error) { @@ -113,7 +124,59 @@ func (r *queryResolver) ScrapeScene(ctx context.Context, scraperID string, scene return nil, err } - return marshalScrapedScene(content) + ret, err := marshalScrapedScene(content) + if err != nil { + return nil, err + } + + filterSceneTags([]*models.ScrapedScene{ret}) + + return ret, nil +} + +// filterSceneTags removes tags matching excluded tag patterns from the provided scraped scenes +func filterSceneTags(scenes []*models.ScrapedScene) { + excludePatterns := manager.GetInstance().Config.GetScraperExcludeTagPatterns() + var excludeRegexps []*regexp.Regexp + + for _, excludePattern := range excludePatterns { + reg, err := regexp.Compile(strings.ToLower(excludePattern)) + if err != nil { + logger.Errorf("Invalid tag exclusion pattern: %v", err) + } else { + excludeRegexps = append(excludeRegexps, reg) + } + } + + if len(excludeRegexps) == 0 { + return + } + + var ignoredTags []string + + for _, s := range scenes { + var newTags []*models.ScrapedTag + for _, t := range s.Tags { + ignore := false + for _, reg := range excludeRegexps { + if reg.MatchString(strings.ToLower(t.Name)) { + ignore = true + ignoredTags = stringslice.StrAppendUnique(ignoredTags, t.Name) + break + } + } + + if !ignore { + newTags = append(newTags, t) + } + } + + s.Tags = newTags + } + + if len(ignoredTags) > 0 { + logger.Debugf("Scraping ignored tags: %s", strings.Join(ignoredTags, ", ")) + } } func (r *queryResolver) ScrapeSceneURL(ctx context.Context, url string) (*models.ScrapedScene, error) { @@ -122,7 +185,14 @@ func (r *queryResolver) ScrapeSceneURL(ctx context.Context, url string) (*models return nil, err } - return marshalScrapedScene(content) + ret, err := marshalScrapedScene(content) + if err != nil { + return nil, err + } + + filterSceneTags([]*models.ScrapedScene{ret}) + + return ret, nil } func (r *queryResolver) ScrapeGallery(ctx context.Context, scraperID string, gallery models.GalleryUpdateInput) (*models.ScrapedGallery, error) { @@ -208,10 +278,13 @@ func (r *queryResolver) getStashBoxClient(index int) (*stashbox.Client, error) { } func (r *queryResolver) ScrapeSingleScene(ctx context.Context, source models.ScraperSourceInput, input models.ScrapeSingleSceneInput) ([]*models.ScrapedScene, error) { - if source.ScraperID != nil { + var ret []*models.ScrapedScene + + switch { + case source.ScraperID != nil: + var err error var c models.ScrapedContent var content []models.ScrapedContent - var err error switch { case input.SceneID != nil: @@ -239,23 +312,35 @@ func (r *queryResolver) ScrapeSingleScene(ctx context.Context, source models.Scr return nil, err } - return marshalScrapedScenes(content) - } else if source.StashBoxIndex != nil { + ret, err = marshalScrapedScenes(content) + if err != nil { + return nil, err + } + case source.StashBoxIndex != nil: client, err := r.getStashBoxClient(*source.StashBoxIndex) if err != nil { return nil, err } - if input.SceneID != nil { - return client.FindStashBoxScenesByFingerprintsFlat(ctx, []string{*input.SceneID}) - } else if input.Query != nil { - return client.QueryStashBoxScene(ctx, *input.Query) + switch { + case input.SceneID != nil: + ret, err = client.FindStashBoxScenesByFingerprintsFlat(ctx, []string{*input.SceneID}) + case input.Query != nil: + ret, err = client.QueryStashBoxScene(ctx, *input.Query) + default: + return nil, fmt.Errorf("%w: scene_id or query must be set", ErrInput) } - return nil, fmt.Errorf("%w: scene_id or query must be set", ErrInput) + if err != nil { + return nil, err + } + default: + return nil, fmt.Errorf("%w: scraper_id or stash_box_index must be set", ErrInput) } - return nil, fmt.Errorf("%w: scraper_id or stash_box_index must be set", ErrInput) + filterSceneTags(ret) + + return ret, nil } func (r *queryResolver) ScrapeMultiScenes(ctx context.Context, source models.ScraperSourceInput, input models.ScrapeMultiScenesInput) ([][]*models.ScrapedScene, error) { diff --git a/pkg/scraper/cache.go b/pkg/scraper/cache.go index e48cc51c2..4b1d67d35 100644 --- a/pkg/scraper/cache.go +++ b/pkg/scraper/cache.go @@ -36,7 +36,6 @@ type GlobalConfig interface { GetScrapersPath() string GetScraperCDPPath() string GetScraperCertCheck() bool - GetScraperExcludeTagPatterns() []string } func isCDPPathHTTP(c GlobalConfig) bool { diff --git a/pkg/scraper/postprocessing.go b/pkg/scraper/postprocessing.go index 4be2ea0ce..731769310 100644 --- a/pkg/scraper/postprocessing.go +++ b/pkg/scraper/postprocessing.go @@ -2,8 +2,6 @@ package scraper import ( "context" - "regexp" - "strings" "github.com/stashapp/stash/pkg/logger" "github.com/stashapp/stash/pkg/match" @@ -50,7 +48,7 @@ func (c Cache) postScrapePerformer(ctx context.Context, p models.ScrapedPerforme if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { tqb := r.Tag() - tags, err := postProcessTags(c.globalConfig, tqb, p.Tags) + tags, err := postProcessTags(tqb, p.Tags) if err != nil { return err } @@ -93,7 +91,7 @@ func (c Cache) postScrapeScenePerformer(ctx context.Context, p models.ScrapedPer if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { tqb := r.Tag() - tags, err := postProcessTags(c.globalConfig, tqb, p.Tags) + tags, err := postProcessTags(tqb, p.Tags) if err != nil { return err } @@ -135,7 +133,7 @@ func (c Cache) postScrapeScene(ctx context.Context, scene models.ScrapedScene) ( } } - tags, err := postProcessTags(c.globalConfig, tqb, scene.Tags) + tags, err := postProcessTags(tqb, scene.Tags) if err != nil { return err } @@ -174,7 +172,7 @@ func (c Cache) postScrapeGallery(ctx context.Context, g models.ScrapedGallery) ( } } - tags, err := postProcessTags(c.globalConfig, tqb, g.Tags) + tags, err := postProcessTags(tqb, g.Tags) if err != nil { return err } @@ -195,31 +193,10 @@ func (c Cache) postScrapeGallery(ctx context.Context, g models.ScrapedGallery) ( return g, nil } -func postProcessTags(globalConfig GlobalConfig, tqb models.TagReader, scrapedTags []*models.ScrapedTag) ([]*models.ScrapedTag, error) { +func postProcessTags(tqb models.TagReader, scrapedTags []*models.ScrapedTag) ([]*models.ScrapedTag, error) { var ret []*models.ScrapedTag - excludePatterns := globalConfig.GetScraperExcludeTagPatterns() - var excludeRegexps []*regexp.Regexp - - for _, excludePattern := range excludePatterns { - reg, err := regexp.Compile(strings.ToLower(excludePattern)) - if err != nil { - logger.Errorf("Invalid tag exclusion pattern :%v", err) - } else { - excludeRegexps = append(excludeRegexps, reg) - } - } - - var ignoredTags []string -ScrapeTag: for _, t := range scrapedTags { - for _, reg := range excludeRegexps { - if reg.MatchString(strings.ToLower(t.Name)) { - ignoredTags = append(ignoredTags, t.Name) - continue ScrapeTag - } - } - err := match.ScrapedTag(tqb, t) if err != nil { return nil, err @@ -227,9 +204,5 @@ ScrapeTag: ret = append(ret, t) } - if len(ignoredTags) > 0 { - logger.Infof("Scraping ignored tags: %s", strings.Join(ignoredTags, ", ")) - } - return ret, nil } diff --git a/ui/v2.5/src/components/Changelog/versions/v0140.md b/ui/v2.5/src/components/Changelog/versions/v0140.md index 2ff70066b..7ac520263 100644 --- a/ui/v2.5/src/components/Changelog/versions/v0140.md +++ b/ui/v2.5/src/components/Changelog/versions/v0140.md @@ -3,6 +3,7 @@ * Improved autotag performance. ([#2368](https://github.com/stashapp/stash/pull/2368)) ### 🐛 Bug fixes +* Perform tag pattern exclusion on stash-box sources. ([#2391](https://github.com/stashapp/stash/pull/2391)) * Don't generate jpg thumbnails for animated webp files. ([#2388](https://github.com/stashapp/stash/pull/2388)) * Removed warnings and incorrect error message in json scrapers. ([#2375](https://github.com/stashapp/stash/pull/2375)) * Ensure identify continues using other scrapers if a scrape returns no results. ([#2375](https://github.com/stashapp/stash/pull/2375))