mirror of
https://github.com/stashapp/stash.git
synced 2025-12-17 12:24:38 +03:00
Configurable scraper user agent string (#409)
* Add debug scrape option. Co-authored-by: HiddenPants255 <>
This commit is contained in:
@@ -11,6 +11,7 @@ fragment ConfigGeneralData on ConfigGeneralResult {
|
|||||||
logLevel
|
logLevel
|
||||||
logAccess
|
logAccess
|
||||||
excludes
|
excludes
|
||||||
|
scraperUserAgent
|
||||||
}
|
}
|
||||||
|
|
||||||
fragment ConfigInterfaceData on ConfigInterfaceResult {
|
fragment ConfigInterfaceData on ConfigInterfaceResult {
|
||||||
|
|||||||
@@ -32,6 +32,8 @@ input ConfigGeneralInput {
|
|||||||
logAccess: Boolean!
|
logAccess: Boolean!
|
||||||
"""Array of file regexp to exclude from Scan"""
|
"""Array of file regexp to exclude from Scan"""
|
||||||
excludes: [String!]
|
excludes: [String!]
|
||||||
|
"""Scraper user agent string"""
|
||||||
|
scraperUserAgent: String
|
||||||
}
|
}
|
||||||
|
|
||||||
type ConfigGeneralResult {
|
type ConfigGeneralResult {
|
||||||
@@ -59,6 +61,8 @@ type ConfigGeneralResult {
|
|||||||
logAccess: Boolean!
|
logAccess: Boolean!
|
||||||
"""Array of file regexp to exclude from Scan"""
|
"""Array of file regexp to exclude from Scan"""
|
||||||
excludes: [String!]!
|
excludes: [String!]!
|
||||||
|
"""Scraper user agent string"""
|
||||||
|
scraperUserAgent: String
|
||||||
}
|
}
|
||||||
|
|
||||||
input ConfigInterfaceInput {
|
input ConfigInterfaceInput {
|
||||||
|
|||||||
@@ -76,6 +76,10 @@ func (r *mutationResolver) ConfigureGeneral(ctx context.Context, input models.Co
|
|||||||
config.Set(config.Exclude, input.Excludes)
|
config.Set(config.Exclude, input.Excludes)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if input.ScraperUserAgent != nil {
|
||||||
|
config.Set(config.ScraperUserAgent, input.ScraperUserAgent)
|
||||||
|
}
|
||||||
|
|
||||||
if err := config.Write(); err != nil {
|
if err := config.Write(); err != nil {
|
||||||
return makeConfigGeneralResult(), err
|
return makeConfigGeneralResult(), err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -33,6 +33,8 @@ func makeConfigGeneralResult() *models.ConfigGeneralResult {
|
|||||||
maxTranscodeSize := config.GetMaxTranscodeSize()
|
maxTranscodeSize := config.GetMaxTranscodeSize()
|
||||||
maxStreamingTranscodeSize := config.GetMaxStreamingTranscodeSize()
|
maxStreamingTranscodeSize := config.GetMaxStreamingTranscodeSize()
|
||||||
|
|
||||||
|
scraperUserAgent := config.GetScraperUserAgent()
|
||||||
|
|
||||||
return &models.ConfigGeneralResult{
|
return &models.ConfigGeneralResult{
|
||||||
Stashes: config.GetStashPaths(),
|
Stashes: config.GetStashPaths(),
|
||||||
DatabasePath: config.GetDatabasePath(),
|
DatabasePath: config.GetDatabasePath(),
|
||||||
@@ -46,6 +48,7 @@ func makeConfigGeneralResult() *models.ConfigGeneralResult {
|
|||||||
LogLevel: config.GetLogLevel(),
|
LogLevel: config.GetLogLevel(),
|
||||||
LogAccess: config.GetLogAccess(),
|
LogAccess: config.GetLogAccess(),
|
||||||
Excludes: config.GetExcludes(),
|
Excludes: config.GetExcludes(),
|
||||||
|
ScraperUserAgent: &scraperUserAgent,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -59,7 +62,6 @@ func makeConfigInterfaceResult() *models.ConfigInterfaceResult {
|
|||||||
cssEnabled := config.GetCSSEnabled()
|
cssEnabled := config.GetCSSEnabled()
|
||||||
language := config.GetLanguage()
|
language := config.GetLanguage()
|
||||||
|
|
||||||
|
|
||||||
return &models.ConfigInterfaceResult{
|
return &models.ConfigInterfaceResult{
|
||||||
SoundOnPreview: &soundOnPreview,
|
SoundOnPreview: &soundOnPreview,
|
||||||
WallShowTitle: &wallShowTitle,
|
WallShowTitle: &wallShowTitle,
|
||||||
|
|||||||
@@ -22,7 +22,6 @@ const Password = "password"
|
|||||||
|
|
||||||
const Database = "database"
|
const Database = "database"
|
||||||
|
|
||||||
const ScrapersPath = "scrapers_path"
|
|
||||||
const Exclude = "exclude"
|
const Exclude = "exclude"
|
||||||
|
|
||||||
const MaxTranscodeSize = "max_transcode_size"
|
const MaxTranscodeSize = "max_transcode_size"
|
||||||
@@ -32,6 +31,10 @@ const Host = "host"
|
|||||||
const Port = "port"
|
const Port = "port"
|
||||||
const ExternalHost = "external_host"
|
const ExternalHost = "external_host"
|
||||||
|
|
||||||
|
// scraping options
|
||||||
|
const ScrapersPath = "scrapers_path"
|
||||||
|
const ScraperUserAgent = "scraper_user_agent"
|
||||||
|
|
||||||
// i18n
|
// i18n
|
||||||
const Language = "language"
|
const Language = "language"
|
||||||
|
|
||||||
@@ -115,6 +118,10 @@ func GetScrapersPath() string {
|
|||||||
return viper.GetString(ScrapersPath)
|
return viper.GetString(ScrapersPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GetScraperUserAgent() string {
|
||||||
|
return viper.GetString(ScraperUserAgent)
|
||||||
|
}
|
||||||
|
|
||||||
func GetHost() string {
|
func GetHost() string {
|
||||||
return viper.GetString(Host)
|
return viper.GetString(Host)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -139,6 +139,10 @@ func (c *scrapeSceneByURLConfig) resolveFn() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type scraperDebugOptions struct {
|
||||||
|
PrintHTML bool `yaml:"printHTML"`
|
||||||
|
}
|
||||||
|
|
||||||
type scraperConfig struct {
|
type scraperConfig struct {
|
||||||
ID string
|
ID string
|
||||||
Name string `yaml:"name"`
|
Name string `yaml:"name"`
|
||||||
@@ -148,8 +152,9 @@ type scraperConfig struct {
|
|||||||
SceneByFragment *sceneByFragmentConfig `yaml:"sceneByFragment"`
|
SceneByFragment *sceneByFragmentConfig `yaml:"sceneByFragment"`
|
||||||
SceneByURL []*scrapeSceneByURLConfig `yaml:"sceneByURL"`
|
SceneByURL []*scrapeSceneByURLConfig `yaml:"sceneByURL"`
|
||||||
|
|
||||||
StashServer *stashServer `yaml:"stashServer"`
|
DebugOptions *scraperDebugOptions `yaml:"debug"`
|
||||||
XPathScrapers xpathScrapers `yaml:"xPathScrapers"`
|
StashServer *stashServer `yaml:"stashServer"`
|
||||||
|
XPathScrapers xpathScrapers `yaml:"xPathScrapers"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func loadScraperFromYAML(path string) (*scraperConfig, error) {
|
func loadScraperFromYAML(path string) (*scraperConfig, error) {
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/stashapp/stash/pkg/manager/config"
|
||||||
"github.com/stashapp/stash/pkg/models"
|
"github.com/stashapp/stash/pkg/models"
|
||||||
"github.com/stashapp/stash/pkg/utils"
|
"github.com/stashapp/stash/pkg/utils"
|
||||||
)
|
)
|
||||||
@@ -52,8 +53,18 @@ func getImage(url string) (*string, error) {
|
|||||||
Timeout: imageGetTimeout,
|
Timeout: imageGetTimeout,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
req, err := http.NewRequest("GET", url, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
userAgent := config.GetScraperUserAgent()
|
||||||
|
if userAgent != "" {
|
||||||
|
req.Header.Set("User-Agent", userAgent)
|
||||||
|
}
|
||||||
|
|
||||||
// assume is a URL for now
|
// assume is a URL for now
|
||||||
resp, err := client.Get(url)
|
resp, err := client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
package scraper
|
package scraper
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"errors"
|
"errors"
|
||||||
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"reflect"
|
"reflect"
|
||||||
"regexp"
|
"regexp"
|
||||||
@@ -10,11 +12,17 @@ import (
|
|||||||
|
|
||||||
"github.com/antchfx/htmlquery"
|
"github.com/antchfx/htmlquery"
|
||||||
"golang.org/x/net/html"
|
"golang.org/x/net/html"
|
||||||
|
"golang.org/x/net/html/charset"
|
||||||
|
|
||||||
"github.com/stashapp/stash/pkg/logger"
|
"github.com/stashapp/stash/pkg/logger"
|
||||||
|
"github.com/stashapp/stash/pkg/manager/config"
|
||||||
"github.com/stashapp/stash/pkg/models"
|
"github.com/stashapp/stash/pkg/models"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Timeout for the scrape http request. Includes transfer time. May want to make this
|
||||||
|
// configurable at some point.
|
||||||
|
const scrapeGetTimeout = time.Second * 30
|
||||||
|
|
||||||
type commonXPathConfig map[string]string
|
type commonXPathConfig map[string]string
|
||||||
|
|
||||||
func (c commonXPathConfig) applyCommon(src string) string {
|
func (c commonXPathConfig) applyCommon(src string) string {
|
||||||
@@ -197,7 +205,7 @@ func (c xpathScraperAttrConfig) applySubScraper(value string) string {
|
|||||||
return value
|
return value
|
||||||
}
|
}
|
||||||
|
|
||||||
doc, err := htmlquery.LoadURL(value)
|
doc, err := loadURL(value, nil)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Warnf("Error getting URL '%s' for sub-scraper: %s", value, err.Error())
|
logger.Warnf("Error getting URL '%s' for sub-scraper: %s", value, err.Error())
|
||||||
@@ -504,6 +512,42 @@ func (r xPathResults) setKey(index int, key string, value string) xPathResults {
|
|||||||
return r
|
return r
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func loadURL(url string, c *scraperConfig) (*html.Node, error) {
|
||||||
|
client := &http.Client{
|
||||||
|
Timeout: scrapeGetTimeout,
|
||||||
|
}
|
||||||
|
req, err := http.NewRequest("GET", url, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
userAgent := config.GetScraperUserAgent()
|
||||||
|
if userAgent != "" {
|
||||||
|
req.Header.Set("User-Agent", userAgent)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
r, err := charset.NewReader(resp.Body, resp.Header.Get("Content-Type"))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
ret, err := html.Parse(r)
|
||||||
|
|
||||||
|
if err == nil && c != nil && c.DebugOptions != nil && c.DebugOptions.PrintHTML {
|
||||||
|
var b bytes.Buffer
|
||||||
|
html.Render(&b, ret)
|
||||||
|
logger.Infof("loadURL (%s) response: \n%s", url, b.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret, err
|
||||||
|
}
|
||||||
|
|
||||||
func scrapePerformerURLXpath(c scraperTypeConfig, url string) (*models.ScrapedPerformer, error) {
|
func scrapePerformerURLXpath(c scraperTypeConfig, url string) (*models.ScrapedPerformer, error) {
|
||||||
scraper := c.scraperConfig.XPathScrapers[c.Scraper]
|
scraper := c.scraperConfig.XPathScrapers[c.Scraper]
|
||||||
|
|
||||||
@@ -511,7 +555,7 @@ func scrapePerformerURLXpath(c scraperTypeConfig, url string) (*models.ScrapedPe
|
|||||||
return nil, errors.New("xpath scraper with name " + c.Scraper + " not found in config")
|
return nil, errors.New("xpath scraper with name " + c.Scraper + " not found in config")
|
||||||
}
|
}
|
||||||
|
|
||||||
doc, err := htmlquery.LoadURL(url)
|
doc, err := loadURL(url, c.scraperConfig)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -527,7 +571,7 @@ func scrapeSceneURLXPath(c scraperTypeConfig, url string) (*models.ScrapedScene,
|
|||||||
return nil, errors.New("xpath scraper with name " + c.Scraper + " not found in config")
|
return nil, errors.New("xpath scraper with name " + c.Scraper + " not found in config")
|
||||||
}
|
}
|
||||||
|
|
||||||
doc, err := htmlquery.LoadURL(url)
|
doc, err := loadURL(url, c.scraperConfig)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -551,7 +595,7 @@ func scrapePerformerNamesXPath(c scraperTypeConfig, name string) ([]*models.Scra
|
|||||||
u := c.QueryURL
|
u := c.QueryURL
|
||||||
u = strings.Replace(u, placeholder, escapedName, -1)
|
u = strings.Replace(u, placeholder, escapedName, -1)
|
||||||
|
|
||||||
doc, err := htmlquery.LoadURL(u)
|
doc, err := loadURL(u, c.scraperConfig)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ export const SettingsConfigurationPanel: React.FC = () => {
|
|||||||
const [logLevel, setLogLevel] = useState<string>("Info");
|
const [logLevel, setLogLevel] = useState<string>("Info");
|
||||||
const [logAccess, setLogAccess] = useState<boolean>(true);
|
const [logAccess, setLogAccess] = useState<boolean>(true);
|
||||||
const [excludes, setExcludes] = useState<string[]>([]);
|
const [excludes, setExcludes] = useState<string[]>([]);
|
||||||
|
const [scraperUserAgent, setScraperUserAgent] = useState<string | undefined>(undefined);
|
||||||
|
|
||||||
const { data, error, loading } = StashService.useConfiguration();
|
const { data, error, loading } = StashService.useConfiguration();
|
||||||
|
|
||||||
@@ -44,7 +45,8 @@ export const SettingsConfigurationPanel: React.FC = () => {
|
|||||||
logOut,
|
logOut,
|
||||||
logLevel,
|
logLevel,
|
||||||
logAccess,
|
logAccess,
|
||||||
excludes
|
excludes,
|
||||||
|
scraperUserAgent
|
||||||
});
|
});
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
@@ -66,6 +68,7 @@ export const SettingsConfigurationPanel: React.FC = () => {
|
|||||||
setLogLevel(conf.general.logLevel);
|
setLogLevel(conf.general.logLevel);
|
||||||
setLogAccess(conf.general.logAccess);
|
setLogAccess(conf.general.logAccess);
|
||||||
setExcludes(conf.general.excludes);
|
setExcludes(conf.general.excludes);
|
||||||
|
setScraperUserAgent(conf.general.scraperUserAgent ?? undefined);
|
||||||
}
|
}
|
||||||
}, [data, error]);
|
}, [data, error]);
|
||||||
|
|
||||||
@@ -289,6 +292,22 @@ export const SettingsConfigurationPanel: React.FC = () => {
|
|||||||
|
|
||||||
<hr />
|
<hr />
|
||||||
|
|
||||||
|
<Form.Group id="generated-path">
|
||||||
|
<h6>Scraping</h6>
|
||||||
|
<Form.Control
|
||||||
|
className="col col-sm-6 text-input"
|
||||||
|
defaultValue={scraperUserAgent}
|
||||||
|
onChange={(e: React.FormEvent<HTMLInputElement>) =>
|
||||||
|
setScraperUserAgent(e.currentTarget.value)
|
||||||
|
}
|
||||||
|
/>
|
||||||
|
<Form.Text className="text-muted">
|
||||||
|
User-Agent string used during scrape http requests
|
||||||
|
</Form.Text>
|
||||||
|
</Form.Group>
|
||||||
|
|
||||||
|
<hr />
|
||||||
|
|
||||||
<Form.Group>
|
<Form.Group>
|
||||||
<h4>Authentication</h4>
|
<h4>Authentication</h4>
|
||||||
<Form.Group id="username">
|
<Form.Group id="username">
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ export const SettingsConfigurationPanel: FunctionComponent<IProps> = (props: IPr
|
|||||||
const [logLevel, setLogLevel] = useState<string>("Info");
|
const [logLevel, setLogLevel] = useState<string>("Info");
|
||||||
const [logAccess, setLogAccess] = useState<boolean>(true);
|
const [logAccess, setLogAccess] = useState<boolean>(true);
|
||||||
const [excludes, setExcludes] = useState<(string)[]>([]);
|
const [excludes, setExcludes] = useState<(string)[]>([]);
|
||||||
|
const [scraperUserAgent, setScraperUserAgent] = useState<string | undefined>(undefined);
|
||||||
|
|
||||||
const { data, error, loading } = StashService.useConfiguration();
|
const { data, error, loading } = StashService.useConfiguration();
|
||||||
|
|
||||||
@@ -48,7 +49,7 @@ export const SettingsConfigurationPanel: FunctionComponent<IProps> = (props: IPr
|
|||||||
logLevel,
|
logLevel,
|
||||||
logAccess,
|
logAccess,
|
||||||
excludes,
|
excludes,
|
||||||
|
scraperUserAgent,
|
||||||
});
|
});
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
@@ -67,6 +68,7 @@ export const SettingsConfigurationPanel: FunctionComponent<IProps> = (props: IPr
|
|||||||
setLogLevel(conf.general.logLevel);
|
setLogLevel(conf.general.logLevel);
|
||||||
setLogAccess(conf.general.logAccess);
|
setLogAccess(conf.general.logAccess);
|
||||||
setExcludes(conf.general.excludes);
|
setExcludes(conf.general.excludes);
|
||||||
|
setScraperUserAgent(conf.general.scraperUserAgent);
|
||||||
}
|
}
|
||||||
}, [data, error]);
|
}, [data, error]);
|
||||||
|
|
||||||
@@ -229,6 +231,18 @@ export const SettingsConfigurationPanel: FunctionComponent<IProps> = (props: IPr
|
|||||||
</FormGroup>
|
</FormGroup>
|
||||||
<Divider />
|
<Divider />
|
||||||
|
|
||||||
|
<FormGroup>
|
||||||
|
<H4>Scraping</H4>
|
||||||
|
<FormGroup
|
||||||
|
label="Scraper User-Agent string"
|
||||||
|
helperText="User-Agent string used during scrape http requests"
|
||||||
|
>
|
||||||
|
<InputGroup value={scraperUserAgent} onChange={(e: any) => setScraperUserAgent(e.target.value)} />
|
||||||
|
</FormGroup>
|
||||||
|
</FormGroup>
|
||||||
|
|
||||||
|
<Divider />
|
||||||
|
|
||||||
<FormGroup>
|
<FormGroup>
|
||||||
<H4>Authentication</H4>
|
<H4>Authentication</H4>
|
||||||
<FormGroup
|
<FormGroup
|
||||||
|
|||||||
Reference in New Issue
Block a user