mirror of
https://github.com/stashapp/stash.git
synced 2025-12-17 12:24:38 +03:00
Scrape tag exclusions (#1617)
* Add config option for scraper tag exclusion patterns Add a config option for exclusing tags / tag patterns from the scraper results. * Handle tag exclusion patterns during scraping
This commit is contained in:
@@ -69,6 +69,7 @@ fragment ConfigScrapingData on ConfigScrapingResult {
|
|||||||
scraperUserAgent
|
scraperUserAgent
|
||||||
scraperCertCheck
|
scraperCertCheck
|
||||||
scraperCDPPath
|
scraperCDPPath
|
||||||
|
excludeTagPatterns
|
||||||
}
|
}
|
||||||
|
|
||||||
fragment ConfigData on ConfigResult {
|
fragment ConfigData on ConfigResult {
|
||||||
|
|||||||
@@ -255,6 +255,8 @@ input ConfigScrapingInput {
|
|||||||
scraperCDPPath: String
|
scraperCDPPath: String
|
||||||
"""Whether the scraper should check for invalid certificates"""
|
"""Whether the scraper should check for invalid certificates"""
|
||||||
scraperCertCheck: Boolean!
|
scraperCertCheck: Boolean!
|
||||||
|
"""Tags blacklist during scraping"""
|
||||||
|
excludeTagPatterns: [String!]
|
||||||
}
|
}
|
||||||
|
|
||||||
type ConfigScrapingResult {
|
type ConfigScrapingResult {
|
||||||
@@ -264,6 +266,8 @@ type ConfigScrapingResult {
|
|||||||
scraperCDPPath: String
|
scraperCDPPath: String
|
||||||
"""Whether the scraper should check for invalid certificates"""
|
"""Whether the scraper should check for invalid certificates"""
|
||||||
scraperCertCheck: Boolean!
|
scraperCertCheck: Boolean!
|
||||||
|
"""Tags blacklist during scraping"""
|
||||||
|
excludeTagPatterns: [String!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
"""All configuration settings"""
|
"""All configuration settings"""
|
||||||
|
|||||||
@@ -312,6 +312,10 @@ func (r *mutationResolver) ConfigureScraping(ctx context.Context, input models.C
|
|||||||
refreshScraperCache = true
|
refreshScraperCache = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if input.ExcludeTagPatterns != nil {
|
||||||
|
c.Set(config.ScraperExcludeTagPatterns, input.ExcludeTagPatterns)
|
||||||
|
}
|
||||||
|
|
||||||
c.Set(config.ScraperCertCheck, input.ScraperCertCheck)
|
c.Set(config.ScraperCertCheck, input.ScraperCertCheck)
|
||||||
if refreshScraperCache {
|
if refreshScraperCache {
|
||||||
manager.GetInstance().RefreshScraperCache()
|
manager.GetInstance().RefreshScraperCache()
|
||||||
|
|||||||
@@ -144,8 +144,9 @@ func makeConfigScrapingResult() *models.ConfigScrapingResult {
|
|||||||
scraperCDPPath := config.GetScraperCDPPath()
|
scraperCDPPath := config.GetScraperCDPPath()
|
||||||
|
|
||||||
return &models.ConfigScrapingResult{
|
return &models.ConfigScrapingResult{
|
||||||
ScraperUserAgent: &scraperUserAgent,
|
ScraperUserAgent: &scraperUserAgent,
|
||||||
ScraperCertCheck: config.GetScraperCertCheck(),
|
ScraperCertCheck: config.GetScraperCertCheck(),
|
||||||
ScraperCDPPath: &scraperCDPPath,
|
ScraperCDPPath: &scraperCDPPath,
|
||||||
|
ExcludeTagPatterns: config.GetScraperExcludeTagPatterns(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -95,6 +95,7 @@ const ScrapersPath = "scrapers_path"
|
|||||||
const ScraperUserAgent = "scraper_user_agent"
|
const ScraperUserAgent = "scraper_user_agent"
|
||||||
const ScraperCertCheck = "scraper_cert_check"
|
const ScraperCertCheck = "scraper_cert_check"
|
||||||
const ScraperCDPPath = "scraper_cdp_path"
|
const ScraperCDPPath = "scraper_cdp_path"
|
||||||
|
const ScraperExcludeTagPatterns = "scraper_exclude_tag_patterns"
|
||||||
|
|
||||||
// stash-box options
|
// stash-box options
|
||||||
const StashBoxes = "stash_boxes"
|
const StashBoxes = "stash_boxes"
|
||||||
@@ -368,6 +369,15 @@ func (i *Instance) GetScraperCertCheck() bool {
|
|||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (i *Instance) GetScraperExcludeTagPatterns() []string {
|
||||||
|
var ret []string
|
||||||
|
if viper.IsSet(ScraperExcludeTagPatterns) {
|
||||||
|
ret = viper.GetStringSlice(ScraperExcludeTagPatterns)
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
|
||||||
func (i *Instance) GetStashBoxes() []*models.StashBox {
|
func (i *Instance) GetStashBoxes() []*models.StashBox {
|
||||||
var boxes []*models.StashBox
|
var boxes []*models.StashBox
|
||||||
viper.UnmarshalKey(StashBoxes, &boxes)
|
viper.UnmarshalKey(StashBoxes, &boxes)
|
||||||
|
|||||||
@@ -5,10 +5,12 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/stashapp/stash/pkg/logger"
|
"github.com/stashapp/stash/pkg/logger"
|
||||||
|
stash_config "github.com/stashapp/stash/pkg/manager/config"
|
||||||
"github.com/stashapp/stash/pkg/models"
|
"github.com/stashapp/stash/pkg/models"
|
||||||
"github.com/stashapp/stash/pkg/utils"
|
"github.com/stashapp/stash/pkg/utils"
|
||||||
)
|
)
|
||||||
@@ -239,12 +241,11 @@ func (c Cache) postScrapePerformer(ret *models.ScrapedPerformer) error {
|
|||||||
if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error {
|
if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error {
|
||||||
tqb := r.Tag()
|
tqb := r.Tag()
|
||||||
|
|
||||||
for _, t := range ret.Tags {
|
tags, err := postProcessTags(tqb, ret.Tags)
|
||||||
err := MatchScrapedSceneTag(tqb, t)
|
if err != nil {
|
||||||
if err != nil {
|
return err
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
ret.Tags = tags
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
@@ -263,12 +264,11 @@ func (c Cache) postScrapeScenePerformer(ret *models.ScrapedScenePerformer) error
|
|||||||
if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error {
|
if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error {
|
||||||
tqb := r.Tag()
|
tqb := r.Tag()
|
||||||
|
|
||||||
for _, t := range ret.Tags {
|
tags, err := postProcessTags(tqb, ret.Tags)
|
||||||
err := MatchScrapedSceneTag(tqb, t)
|
if err != nil {
|
||||||
if err != nil {
|
return err
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
ret.Tags = tags
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
@@ -302,12 +302,11 @@ func (c Cache) postScrapeScene(ret *models.ScrapedScene) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, t := range ret.Tags {
|
tags, err := postProcessTags(tqb, ret.Tags)
|
||||||
err := MatchScrapedSceneTag(tqb, t)
|
if err != nil {
|
||||||
if err != nil {
|
return err
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
ret.Tags = tags
|
||||||
|
|
||||||
if ret.Studio != nil {
|
if ret.Studio != nil {
|
||||||
err := MatchScrapedSceneStudio(sqb, ret.Studio)
|
err := MatchScrapedSceneStudio(sqb, ret.Studio)
|
||||||
@@ -342,12 +341,11 @@ func (c Cache) postScrapeGallery(ret *models.ScrapedGallery) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, t := range ret.Tags {
|
tags, err := postProcessTags(tqb, ret.Tags)
|
||||||
err := MatchScrapedSceneTag(tqb, t)
|
if err != nil {
|
||||||
if err != nil {
|
return err
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
ret.Tags = tags
|
||||||
|
|
||||||
if ret.Studio != nil {
|
if ret.Studio != nil {
|
||||||
err := MatchScrapedSceneStudio(sqb, ret.Studio)
|
err := MatchScrapedSceneStudio(sqb, ret.Studio)
|
||||||
@@ -509,3 +507,42 @@ func (c Cache) ScrapeMovieURL(url string) (*models.ScrapedMovie, error) {
|
|||||||
|
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func postProcessTags(tqb models.TagReader, scrapedTags []*models.ScrapedSceneTag) ([]*models.ScrapedSceneTag, error) {
|
||||||
|
var ret []*models.ScrapedSceneTag
|
||||||
|
|
||||||
|
excludePatterns := stash_config.GetInstance().GetScraperExcludeTagPatterns()
|
||||||
|
var excludeRegexps []*regexp.Regexp
|
||||||
|
|
||||||
|
for _, excludePattern := range excludePatterns {
|
||||||
|
reg, err := regexp.Compile(strings.ToLower(excludePattern))
|
||||||
|
if err != nil {
|
||||||
|
logger.Errorf("Invalid tag exclusion pattern :%v", err)
|
||||||
|
} else {
|
||||||
|
excludeRegexps = append(excludeRegexps, reg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var ignoredTags []string
|
||||||
|
ScrapeTag:
|
||||||
|
for _, t := range scrapedTags {
|
||||||
|
for _, reg := range excludeRegexps {
|
||||||
|
if reg.MatchString(strings.ToLower(t.Name)) {
|
||||||
|
ignoredTags = append(ignoredTags, t.Name)
|
||||||
|
continue ScrapeTag
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
err := MatchScrapedSceneTag(tqb, t)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
ret = append(ret, t)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(ignoredTags) > 0 {
|
||||||
|
logger.Infof("Scraping ignored tags: %s", strings.Join(ignoredTags, ", "))
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret, nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
### ✨ New Features
|
### ✨ New Features
|
||||||
|
* Support excluding tag patterns when scraping. ([#1617](https://github.com/stashapp/stash/pull/1617))
|
||||||
* Support setting a custom directory for default performer images. ([#1489](https://github.com/stashapp/stash/pull/1489))
|
* Support setting a custom directory for default performer images. ([#1489](https://github.com/stashapp/stash/pull/1489))
|
||||||
* Added filtering and sorting on scene marker count for tags. ([#1603](https://github.com/stashapp/stash/pull/1603))
|
* Added filtering and sorting on scene marker count for tags. ([#1603](https://github.com/stashapp/stash/pull/1603))
|
||||||
* Support excluding fields and editing tags when saving from scene tagger view. ([#1605](https://github.com/stashapp/stash/pull/1605))
|
* Support excluding fields and editing tags when saving from scene tagger view. ([#1605](https://github.com/stashapp/stash/pull/1605))
|
||||||
|
|||||||
@@ -17,9 +17,10 @@ import StashConfiguration from "./StashConfiguration";
|
|||||||
interface IExclusionPatternsProps {
|
interface IExclusionPatternsProps {
|
||||||
excludes: string[];
|
excludes: string[];
|
||||||
setExcludes: (value: string[]) => void;
|
setExcludes: (value: string[]) => void;
|
||||||
|
demo: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
const ExclusionPatterns: React.FC<IExclusionPatternsProps> = (props) => {
|
export const ExclusionPatterns: React.FC<IExclusionPatternsProps> = (props) => {
|
||||||
function excludeRegexChanged(idx: number, value: string) {
|
function excludeRegexChanged(idx: number, value: string) {
|
||||||
const newExcludes = props.excludes.map((regex, i) => {
|
const newExcludes = props.excludes.map((regex, i) => {
|
||||||
const ret = idx !== i ? regex : value;
|
const ret = idx !== i ? regex : value;
|
||||||
@@ -35,8 +36,7 @@ const ExclusionPatterns: React.FC<IExclusionPatternsProps> = (props) => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function excludeAddRegex() {
|
function excludeAddRegex() {
|
||||||
const demo = "sample\\.mp4$";
|
const newExcludes = props.excludes.concat(props.demo);
|
||||||
const newExcludes = props.excludes.concat(demo);
|
|
||||||
|
|
||||||
props.setExcludes(newExcludes);
|
props.setExcludes(newExcludes);
|
||||||
}
|
}
|
||||||
@@ -490,7 +490,11 @@ export const SettingsConfigurationPanel: React.FC = () => {
|
|||||||
id: "config.general.excluded_video_patterns_head",
|
id: "config.general.excluded_video_patterns_head",
|
||||||
})}
|
})}
|
||||||
</h6>
|
</h6>
|
||||||
<ExclusionPatterns excludes={excludes} setExcludes={setExcludes} />
|
<ExclusionPatterns
|
||||||
|
excludes={excludes}
|
||||||
|
setExcludes={setExcludes}
|
||||||
|
demo="sample\.mp4$"
|
||||||
|
/>
|
||||||
<Form.Text className="text-muted">
|
<Form.Text className="text-muted">
|
||||||
{intl.formatMessage({
|
{intl.formatMessage({
|
||||||
id: "config.general.excluded_video_patterns_desc",
|
id: "config.general.excluded_video_patterns_desc",
|
||||||
@@ -514,6 +518,7 @@ export const SettingsConfigurationPanel: React.FC = () => {
|
|||||||
<ExclusionPatterns
|
<ExclusionPatterns
|
||||||
excludes={imageExcludes}
|
excludes={imageExcludes}
|
||||||
setExcludes={setImageExcludes}
|
setExcludes={setImageExcludes}
|
||||||
|
demo="sample\.jpg$"
|
||||||
/>
|
/>
|
||||||
<Form.Text className="text-muted">
|
<Form.Text className="text-muted">
|
||||||
{intl.formatMessage({
|
{intl.formatMessage({
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ import { useToast } from "src/hooks";
|
|||||||
import { TextUtils } from "src/utils";
|
import { TextUtils } from "src/utils";
|
||||||
import { CollapseButton, Icon, LoadingIndicator } from "src/components/Shared";
|
import { CollapseButton, Icon, LoadingIndicator } from "src/components/Shared";
|
||||||
import { ScrapeType } from "src/core/generated-graphql";
|
import { ScrapeType } from "src/core/generated-graphql";
|
||||||
|
import { ExclusionPatterns } from "./SettingsConfigurationPanel";
|
||||||
|
|
||||||
interface IURLList {
|
interface IURLList {
|
||||||
urls: string[];
|
urls: string[];
|
||||||
@@ -96,6 +97,7 @@ export const SettingsScrapingPanel: React.FC = () => {
|
|||||||
undefined
|
undefined
|
||||||
);
|
);
|
||||||
const [scraperCertCheck, setScraperCertCheck] = useState<boolean>(true);
|
const [scraperCertCheck, setScraperCertCheck] = useState<boolean>(true);
|
||||||
|
const [excludeTagPatterns, setExcludeTagPatterns] = useState<string[]>([]);
|
||||||
|
|
||||||
const { data, error } = useConfiguration();
|
const { data, error } = useConfiguration();
|
||||||
|
|
||||||
@@ -103,6 +105,7 @@ export const SettingsScrapingPanel: React.FC = () => {
|
|||||||
scraperUserAgent,
|
scraperUserAgent,
|
||||||
scraperCDPPath,
|
scraperCDPPath,
|
||||||
scraperCertCheck,
|
scraperCertCheck,
|
||||||
|
excludeTagPatterns,
|
||||||
});
|
});
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
@@ -113,6 +116,7 @@ export const SettingsScrapingPanel: React.FC = () => {
|
|||||||
setScraperUserAgent(conf.scraping.scraperUserAgent ?? undefined);
|
setScraperUserAgent(conf.scraping.scraperUserAgent ?? undefined);
|
||||||
setScraperCDPPath(conf.scraping.scraperCDPPath ?? undefined);
|
setScraperCDPPath(conf.scraping.scraperCDPPath ?? undefined);
|
||||||
setScraperCertCheck(conf.scraping.scraperCertCheck);
|
setScraperCertCheck(conf.scraping.scraperCertCheck);
|
||||||
|
setExcludeTagPatterns(conf.scraping.excludeTagPatterns);
|
||||||
}
|
}
|
||||||
}, [data, error]);
|
}, [data, error]);
|
||||||
|
|
||||||
@@ -398,6 +402,24 @@ export const SettingsScrapingPanel: React.FC = () => {
|
|||||||
</Form.Group>
|
</Form.Group>
|
||||||
</Form.Group>
|
</Form.Group>
|
||||||
|
|
||||||
|
<Form.Group>
|
||||||
|
<h6>
|
||||||
|
{intl.formatMessage({
|
||||||
|
id: "config.scraping.excluded_tag_patterns_head",
|
||||||
|
})}
|
||||||
|
</h6>
|
||||||
|
<ExclusionPatterns
|
||||||
|
excludes={excludeTagPatterns}
|
||||||
|
setExcludes={setExcludeTagPatterns}
|
||||||
|
demo="4K"
|
||||||
|
/>
|
||||||
|
<Form.Text className="text-muted">
|
||||||
|
{intl.formatMessage({
|
||||||
|
id: "config.scraping.excluded_tag_patterns_desc",
|
||||||
|
})}
|
||||||
|
</Form.Text>
|
||||||
|
</Form.Group>
|
||||||
|
|
||||||
<hr />
|
<hr />
|
||||||
|
|
||||||
<h4>{intl.formatMessage({ id: "config.scraping.scrapers" })}</h4>
|
<h4>{intl.formatMessage({ id: "config.scraping.scrapers" })}</h4>
|
||||||
|
|||||||
@@ -243,6 +243,8 @@
|
|||||||
"scraping": {
|
"scraping": {
|
||||||
"entity_metadata": "{entityType} Metadata",
|
"entity_metadata": "{entityType} Metadata",
|
||||||
"entity_scrapers": "{entityType} scrapers",
|
"entity_scrapers": "{entityType} scrapers",
|
||||||
|
"excluded_tag_patterns_desc": "Regexps of tag names to exclude from scraping results",
|
||||||
|
"excluded_tag_patterns_head": "Excluded Tag Patterns",
|
||||||
"scrapers": "Scrapers",
|
"scrapers": "Scrapers",
|
||||||
"search_by_name": "Search by name",
|
"search_by_name": "Search by name",
|
||||||
"supported_types": "Supported types",
|
"supported_types": "Supported types",
|
||||||
|
|||||||
Reference in New Issue
Block a user