Use tag exclusions when identifying scenes (#5686)

* Move tag exclusion code back into scraper package

Reverts #2391

* Rearrange stash box client code
* Filter excluded tags in stashbox queries

Re-application of fix for #2379
This commit is contained in:
WithoutPants
2025-02-27 09:07:02 +11:00
committed by GitHub
parent f23450c380
commit ccf79d077f
14 changed files with 1617 additions and 1575 deletions

View File

@@ -4,16 +4,11 @@ import (
"context"
"errors"
"fmt"
"regexp"
"strconv"
"strings"
"github.com/stashapp/stash/internal/manager"
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/models"
"github.com/stashapp/stash/pkg/scraper"
"github.com/stashapp/stash/pkg/scraper/stashbox"
"github.com/stashapp/stash/pkg/sliceutil"
"github.com/stashapp/stash/pkg/sliceutil/stringslice"
)
@@ -49,118 +44,9 @@ func (r *queryResolver) ScrapeSceneQuery(ctx context.Context, scraperID string,
return nil, err
}
filterSceneTags(ret)
return ret, nil
}
func compileRegexps(patterns []string) []*regexp.Regexp {
excludePatterns := patterns
var excludeRegexps []*regexp.Regexp
for _, excludePattern := range excludePatterns {
reg, err := regexp.Compile(strings.ToLower(excludePattern))
if err != nil {
logger.Errorf("Invalid tag exclusion pattern: %v", err)
} else {
excludeRegexps = append(excludeRegexps, reg)
}
}
return excludeRegexps
}
// filterSceneTags removes tags matching excluded tag patterns from the provided scraped scenes
func filterTags(excludeRegexps []*regexp.Regexp, tags []*models.ScrapedTag) (newTags []*models.ScrapedTag, ignoredTags []string) {
if len(excludeRegexps) == 0 {
return tags, nil
}
for _, t := range tags {
ignore := false
for _, reg := range excludeRegexps {
if reg.MatchString(strings.ToLower(t.Name)) {
ignore = true
ignoredTags = sliceutil.AppendUnique(ignoredTags, t.Name)
break
}
}
if !ignore {
newTags = append(newTags, t)
}
}
return
}
// filterSceneTags removes tags matching excluded tag patterns from the provided scraped scenes
func filterSceneTags(scenes []*scraper.ScrapedScene) {
excludeRegexps := compileRegexps(manager.GetInstance().Config.GetScraperExcludeTagPatterns())
var ignoredTags []string
for _, s := range scenes {
var ignored []string
s.Tags, ignored = filterTags(excludeRegexps, s.Tags)
ignoredTags = sliceutil.AppendUniques(ignoredTags, ignored)
}
if len(ignoredTags) > 0 {
logger.Debugf("Scraping ignored tags: %s", strings.Join(ignoredTags, ", "))
}
}
// filterGalleryTags removes tags matching excluded tag patterns from the provided scraped galleries
func filterGalleryTags(g []*scraper.ScrapedGallery) {
excludeRegexps := compileRegexps(manager.GetInstance().Config.GetScraperExcludeTagPatterns())
var ignoredTags []string
for _, s := range g {
var ignored []string
s.Tags, ignored = filterTags(excludeRegexps, s.Tags)
ignoredTags = sliceutil.AppendUniques(ignoredTags, ignored)
}
if len(ignoredTags) > 0 {
logger.Debugf("Scraping ignored tags: %s", strings.Join(ignoredTags, ", "))
}
}
// filterGalleryTags removes tags matching excluded tag patterns from the provided scraped galleries
func filterPerformerTags(p []*models.ScrapedPerformer) {
excludeRegexps := compileRegexps(manager.GetInstance().Config.GetScraperExcludeTagPatterns())
var ignoredTags []string
for _, s := range p {
var ignored []string
s.Tags, ignored = filterTags(excludeRegexps, s.Tags)
ignoredTags = sliceutil.AppendUniques(ignoredTags, ignored)
}
if len(ignoredTags) > 0 {
logger.Debugf("Scraping ignored tags: %s", strings.Join(ignoredTags, ", "))
}
}
// filterGroupTags removes tags matching excluded tag patterns from the provided scraped movies
func filterGroupTags(p []*models.ScrapedMovie) {
excludeRegexps := compileRegexps(manager.GetInstance().Config.GetScraperExcludeTagPatterns())
var ignoredTags []string
for _, s := range p {
var ignored []string
s.Tags, ignored = filterTags(excludeRegexps, s.Tags)
ignoredTags = sliceutil.AppendUniques(ignoredTags, ignored)
}
if len(ignoredTags) > 0 {
logger.Debugf("Scraping ignored tags: %s", strings.Join(ignoredTags, ", "))
}
}
func (r *queryResolver) ScrapeSceneURL(ctx context.Context, url string) (*scraper.ScrapedScene, error) {
content, err := r.scraperCache().ScrapeURL(ctx, url, scraper.ScrapeContentTypeScene)
if err != nil {
@@ -172,10 +58,6 @@ func (r *queryResolver) ScrapeSceneURL(ctx context.Context, url string) (*scrape
return nil, err
}
if ret != nil {
filterSceneTags([]*scraper.ScrapedScene{ret})
}
return ret, nil
}
@@ -190,10 +72,6 @@ func (r *queryResolver) ScrapeGalleryURL(ctx context.Context, url string) (*scra
return nil, err
}
if ret != nil {
filterGalleryTags([]*scraper.ScrapedGallery{ret})
}
return ret, nil
}
@@ -217,8 +95,6 @@ func (r *queryResolver) ScrapeMovieURL(ctx context.Context, url string) (*models
return nil, err
}
filterGroupTags([]*models.ScrapedMovie{ret})
return ret, nil
}
@@ -233,8 +109,6 @@ func (r *queryResolver) ScrapeGroupURL(ctx context.Context, url string) (*models
return nil, err
}
filterGroupTags([]*models.ScrapedMovie{ret})
// convert to scraped group
group := &models.ScrapedGroup{
StoredID: ret.StoredID,
@@ -322,8 +196,6 @@ func (r *queryResolver) ScrapeSingleScene(ctx context.Context, source scraper.So
return nil, fmt.Errorf("%w: scraper_id or stash_box_index must be set", ErrInput)
}
filterSceneTags(ret)
return ret, nil
}
@@ -434,8 +306,6 @@ func (r *queryResolver) ScrapeSinglePerformer(ctx context.Context, source scrape
return nil, errors.New("scraper_id or stash_box_index must be set")
}
filterPerformerTags(ret)
return ret, nil
}
@@ -496,7 +366,6 @@ func (r *queryResolver) ScrapeSingleGallery(ctx context.Context, source scraper.
return nil, ErrNotImplemented
}
filterGalleryTags(ret)
return ret, nil
}