mirror of
https://github.com/stashapp/stash.git
synced 2025-12-18 12:54:38 +03:00
Cache and reuse the scraper HTTP client (#1855)
* Add Cookies directly to the request Rather than maintaining a cookie jar on a one-shot HTTP client, maintain the jar ourselves: make a new jar, then use it to select the right cookies. The cookies are set on the request rather than on the client. This will retain the current behavior as we are always throwing the client away after each use. This patch enables the lifting of the http client as well over time. * Introduce a cached scraper HTTP client The scraper cache is augmented with an *http.Client. These are safe for concurrent use, so the pointer can safely be passed around. Push this into scraper configurations where applicable, next to the txnManagers. When we issue a loadUrl request, do so on the cached *http.Client, which will reuse existing idle connections in the client if any are present. * Set MaxIdleConnsPerHost. Closes #1850 We allow for up to 8 idle connections to a single host. This should make concurrent operation toward the same host reuse connections, even for sizeable concurrency. The number isn't bumped excessively high. We should probably limit concurrency toward a single site anyway, since we'll be able to overrun a site with queries quite easily if we have many concurrent goroutines issuing requests at the same time. * Reinstate driverOptions / useCDP check Use DeMorgan's laws to invert the logic and exit early. Fixes tests breaking. * Documentation fixup. * Use the scraper http.Client when fetching images Fold image fetchers onto the cached scraper http.Client as well. This makes the scraper have a single http.Client cache for all its operations. Thread the client upwards to the relevant attachment points: either the cache, or a stash_box instance, which is extended to include a pointer to the client. Style roughly follows that of txnManagers. * Use the same http Client as the GraphQL client use Rather than using http.DefaultClient, use the same client as the GraphQL client use in the stash_box subsystem. This localizes the client used in the subsystem into the constructing New.. call. * Hoist HTTP client construction Create a function for initializaing the HTTP Client we use. While here hoist magic numbers into constants. Introduce a proper static redirect error and use it in the client code as well. * Reinstate printCookies This is a debugging function, and it might still come in handy in the future at some point. * Nitpick comment. * Minor tidy Co-authored-by: WithoutPants <53250216+WithoutPants@users.noreply.github.com>
This commit is contained in:
@@ -1,6 +1,8 @@
|
|||||||
package scraper
|
package scraper
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"net/http"
|
||||||
|
|
||||||
"github.com/stashapp/stash/pkg/models"
|
"github.com/stashapp/stash/pkg/models"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -38,16 +40,16 @@ type scraperActionImpl interface {
|
|||||||
scrapeMovieByURL(url string) (*models.ScrapedMovie, error)
|
scrapeMovieByURL(url string) (*models.ScrapedMovie, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c config) getScraper(scraper scraperTypeConfig, txnManager models.TransactionManager, globalConfig GlobalConfig) scraperActionImpl {
|
func (c config) getScraper(scraper scraperTypeConfig, client *http.Client, txnManager models.TransactionManager, globalConfig GlobalConfig) scraperActionImpl {
|
||||||
switch scraper.Action {
|
switch scraper.Action {
|
||||||
case scraperActionScript:
|
case scraperActionScript:
|
||||||
return newScriptScraper(scraper, c, globalConfig)
|
return newScriptScraper(scraper, c, globalConfig)
|
||||||
case scraperActionStash:
|
case scraperActionStash:
|
||||||
return newStashScraper(scraper, txnManager, c, globalConfig)
|
return newStashScraper(scraper, client, txnManager, c, globalConfig)
|
||||||
case scraperActionXPath:
|
case scraperActionXPath:
|
||||||
return newXpathScraper(scraper, txnManager, c, globalConfig)
|
return newXpathScraper(scraper, client, txnManager, c, globalConfig)
|
||||||
case scraperActionJson:
|
case scraperActionJson:
|
||||||
return newJsonScraper(scraper, txnManager, c, globalConfig)
|
return newJsonScraper(scraper, client, txnManager, c, globalConfig)
|
||||||
}
|
}
|
||||||
|
|
||||||
panic("unknown scraper action: " + scraper.Action)
|
panic("unknown scraper action: " + scraper.Action)
|
||||||
|
|||||||
@@ -1,6 +1,10 @@
|
|||||||
package scraper
|
package scraper
|
||||||
|
|
||||||
import "github.com/stashapp/stash/pkg/models"
|
import (
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/stashapp/stash/pkg/models"
|
||||||
|
)
|
||||||
|
|
||||||
type configSceneScraper struct {
|
type configSceneScraper struct {
|
||||||
*configScraper
|
*configScraper
|
||||||
@@ -12,7 +16,7 @@ func (c *configSceneScraper) matchesURL(url string) bool {
|
|||||||
|
|
||||||
func (c *configSceneScraper) scrapeByName(name string) ([]*models.ScrapedScene, error) {
|
func (c *configSceneScraper) scrapeByName(name string) ([]*models.ScrapedScene, error) {
|
||||||
if c.config.SceneByName != nil {
|
if c.config.SceneByName != nil {
|
||||||
s := c.config.getScraper(*c.config.SceneByName, c.txnManager, c.globalConfig)
|
s := c.config.getScraper(*c.config.SceneByName, c.client, c.txnManager, c.globalConfig)
|
||||||
return s.scrapeScenesByName(name)
|
return s.scrapeScenesByName(name)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -21,7 +25,7 @@ func (c *configSceneScraper) scrapeByName(name string) ([]*models.ScrapedScene,
|
|||||||
|
|
||||||
func (c *configSceneScraper) scrapeByScene(scene *models.Scene) (*models.ScrapedScene, error) {
|
func (c *configSceneScraper) scrapeByScene(scene *models.Scene) (*models.ScrapedScene, error) {
|
||||||
if c.config.SceneByFragment != nil {
|
if c.config.SceneByFragment != nil {
|
||||||
s := c.config.getScraper(*c.config.SceneByFragment, c.txnManager, c.globalConfig)
|
s := c.config.getScraper(*c.config.SceneByFragment, c.client, c.txnManager, c.globalConfig)
|
||||||
return s.scrapeSceneByScene(scene)
|
return s.scrapeSceneByScene(scene)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -30,7 +34,7 @@ func (c *configSceneScraper) scrapeByScene(scene *models.Scene) (*models.Scraped
|
|||||||
|
|
||||||
func (c *configSceneScraper) scrapeByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
|
func (c *configSceneScraper) scrapeByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
|
||||||
if c.config.SceneByQueryFragment != nil {
|
if c.config.SceneByQueryFragment != nil {
|
||||||
s := c.config.getScraper(*c.config.SceneByQueryFragment, c.txnManager, c.globalConfig)
|
s := c.config.getScraper(*c.config.SceneByQueryFragment, c.client, c.txnManager, c.globalConfig)
|
||||||
return s.scrapeSceneByFragment(scene)
|
return s.scrapeSceneByFragment(scene)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -40,7 +44,7 @@ func (c *configSceneScraper) scrapeByFragment(scene models.ScrapedSceneInput) (*
|
|||||||
func (c *configSceneScraper) scrapeByURL(url string) (*models.ScrapedScene, error) {
|
func (c *configSceneScraper) scrapeByURL(url string) (*models.ScrapedScene, error) {
|
||||||
for _, scraper := range c.config.SceneByURL {
|
for _, scraper := range c.config.SceneByURL {
|
||||||
if scraper.matchesURL(url) {
|
if scraper.matchesURL(url) {
|
||||||
s := c.config.getScraper(scraper.scraperTypeConfig, c.txnManager, c.globalConfig)
|
s := c.config.getScraper(scraper.scraperTypeConfig, c.client, c.txnManager, c.globalConfig)
|
||||||
ret, err := s.scrapeSceneByURL(url)
|
ret, err := s.scrapeSceneByURL(url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -65,7 +69,7 @@ func (c *configPerformerScraper) matchesURL(url string) bool {
|
|||||||
|
|
||||||
func (c *configPerformerScraper) scrapeByName(name string) ([]*models.ScrapedPerformer, error) {
|
func (c *configPerformerScraper) scrapeByName(name string) ([]*models.ScrapedPerformer, error) {
|
||||||
if c.config.PerformerByName != nil {
|
if c.config.PerformerByName != nil {
|
||||||
s := c.config.getScraper(*c.config.PerformerByName, c.txnManager, c.globalConfig)
|
s := c.config.getScraper(*c.config.PerformerByName, c.client, c.txnManager, c.globalConfig)
|
||||||
return s.scrapePerformersByName(name)
|
return s.scrapePerformersByName(name)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -74,7 +78,7 @@ func (c *configPerformerScraper) scrapeByName(name string) ([]*models.ScrapedPer
|
|||||||
|
|
||||||
func (c *configPerformerScraper) scrapeByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
|
func (c *configPerformerScraper) scrapeByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
|
||||||
if c.config.PerformerByFragment != nil {
|
if c.config.PerformerByFragment != nil {
|
||||||
s := c.config.getScraper(*c.config.PerformerByFragment, c.txnManager, c.globalConfig)
|
s := c.config.getScraper(*c.config.PerformerByFragment, c.client, c.txnManager, c.globalConfig)
|
||||||
return s.scrapePerformerByFragment(scrapedPerformer)
|
return s.scrapePerformerByFragment(scrapedPerformer)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -89,7 +93,7 @@ func (c *configPerformerScraper) scrapeByFragment(scrapedPerformer models.Scrape
|
|||||||
func (c *configPerformerScraper) scrapeByURL(url string) (*models.ScrapedPerformer, error) {
|
func (c *configPerformerScraper) scrapeByURL(url string) (*models.ScrapedPerformer, error) {
|
||||||
for _, scraper := range c.config.PerformerByURL {
|
for _, scraper := range c.config.PerformerByURL {
|
||||||
if scraper.matchesURL(url) {
|
if scraper.matchesURL(url) {
|
||||||
s := c.config.getScraper(scraper.scraperTypeConfig, c.txnManager, c.globalConfig)
|
s := c.config.getScraper(scraper.scraperTypeConfig, c.client, c.txnManager, c.globalConfig)
|
||||||
ret, err := s.scrapePerformerByURL(url)
|
ret, err := s.scrapePerformerByURL(url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -114,7 +118,7 @@ func (c *configGalleryScraper) matchesURL(url string) bool {
|
|||||||
|
|
||||||
func (c *configGalleryScraper) scrapeByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) {
|
func (c *configGalleryScraper) scrapeByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) {
|
||||||
if c.config.GalleryByFragment != nil {
|
if c.config.GalleryByFragment != nil {
|
||||||
s := c.config.getScraper(*c.config.GalleryByFragment, c.txnManager, c.globalConfig)
|
s := c.config.getScraper(*c.config.GalleryByFragment, c.client, c.txnManager, c.globalConfig)
|
||||||
return s.scrapeGalleryByGallery(gallery)
|
return s.scrapeGalleryByGallery(gallery)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -124,7 +128,7 @@ func (c *configGalleryScraper) scrapeByGallery(gallery *models.Gallery) (*models
|
|||||||
func (c *configGalleryScraper) scrapeByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) {
|
func (c *configGalleryScraper) scrapeByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) {
|
||||||
if c.config.GalleryByFragment != nil {
|
if c.config.GalleryByFragment != nil {
|
||||||
// TODO - this should be galleryByQueryFragment
|
// TODO - this should be galleryByQueryFragment
|
||||||
s := c.config.getScraper(*c.config.GalleryByFragment, c.txnManager, c.globalConfig)
|
s := c.config.getScraper(*c.config.GalleryByFragment, c.client, c.txnManager, c.globalConfig)
|
||||||
return s.scrapeGalleryByFragment(gallery)
|
return s.scrapeGalleryByFragment(gallery)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -134,7 +138,7 @@ func (c *configGalleryScraper) scrapeByFragment(gallery models.ScrapedGalleryInp
|
|||||||
func (c *configGalleryScraper) scrapeByURL(url string) (*models.ScrapedGallery, error) {
|
func (c *configGalleryScraper) scrapeByURL(url string) (*models.ScrapedGallery, error) {
|
||||||
for _, scraper := range c.config.GalleryByURL {
|
for _, scraper := range c.config.GalleryByURL {
|
||||||
if scraper.matchesURL(url) {
|
if scraper.matchesURL(url) {
|
||||||
s := c.config.getScraper(scraper.scraperTypeConfig, c.txnManager, c.globalConfig)
|
s := c.config.getScraper(scraper.scraperTypeConfig, c.client, c.txnManager, c.globalConfig)
|
||||||
ret, err := s.scrapeGalleryByURL(url)
|
ret, err := s.scrapeGalleryByURL(url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -160,7 +164,7 @@ func (c *configMovieScraper) matchesURL(url string) bool {
|
|||||||
func (c *configMovieScraper) scrapeByURL(url string) (*models.ScrapedMovie, error) {
|
func (c *configMovieScraper) scrapeByURL(url string) (*models.ScrapedMovie, error) {
|
||||||
for _, scraper := range c.config.MovieByURL {
|
for _, scraper := range c.config.MovieByURL {
|
||||||
if scraper.matchesURL(url) {
|
if scraper.matchesURL(url) {
|
||||||
s := c.config.getScraper(scraper.scraperTypeConfig, c.txnManager, c.globalConfig)
|
s := c.config.getScraper(scraper.scraperTypeConfig, c.client, c.txnManager, c.globalConfig)
|
||||||
ret, err := s.scrapeMovieByURL(url)
|
ret, err := s.scrapeMovieByURL(url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -177,12 +181,14 @@ func (c *configMovieScraper) scrapeByURL(url string) (*models.ScrapedMovie, erro
|
|||||||
|
|
||||||
type configScraper struct {
|
type configScraper struct {
|
||||||
config config
|
config config
|
||||||
|
client *http.Client
|
||||||
txnManager models.TransactionManager
|
txnManager models.TransactionManager
|
||||||
globalConfig GlobalConfig
|
globalConfig GlobalConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
func createScraperFromConfig(c config, txnManager models.TransactionManager, globalConfig GlobalConfig) scraper {
|
func createScraperFromConfig(c config, client *http.Client, txnManager models.TransactionManager, globalConfig GlobalConfig) scraper {
|
||||||
base := configScraper{
|
base := configScraper{
|
||||||
|
client: client,
|
||||||
config: c,
|
config: c,
|
||||||
txnManager: txnManager,
|
txnManager: txnManager,
|
||||||
globalConfig: globalConfig,
|
globalConfig: globalConfig,
|
||||||
|
|||||||
@@ -11,42 +11,51 @@ import (
|
|||||||
"github.com/chromedp/cdproto/cdp"
|
"github.com/chromedp/cdproto/cdp"
|
||||||
"github.com/chromedp/cdproto/network"
|
"github.com/chromedp/cdproto/network"
|
||||||
"github.com/chromedp/chromedp"
|
"github.com/chromedp/chromedp"
|
||||||
|
"golang.org/x/net/publicsuffix"
|
||||||
|
|
||||||
"github.com/stashapp/stash/pkg/logger"
|
"github.com/stashapp/stash/pkg/logger"
|
||||||
"github.com/stashapp/stash/pkg/utils"
|
"github.com/stashapp/stash/pkg/utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
// set cookies for the native http client
|
// jar constructs a cookie jar from a configuration
|
||||||
func setCookies(jar *cookiejar.Jar, scraperConfig config) {
|
func (c config) jar() (*cookiejar.Jar, error) {
|
||||||
driverOptions := scraperConfig.DriverOptions
|
opts := c.DriverOptions
|
||||||
if driverOptions != nil && !driverOptions.UseCDP {
|
jar, err := cookiejar.New(&cookiejar.Options{
|
||||||
|
PublicSuffixList: publicsuffix.List,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
for _, ckURL := range driverOptions.Cookies { // go through all cookies
|
if opts == nil || opts.UseCDP {
|
||||||
url, err := url.Parse(ckURL.CookieURL) // CookieURL must be valid, include schema
|
return jar, nil
|
||||||
if err != nil {
|
}
|
||||||
logger.Warnf("Skipping jar cookies for cookieURL %s. Error %s", ckURL.CookieURL, err)
|
|
||||||
} else {
|
|
||||||
var httpCookies []*http.Cookie
|
|
||||||
var httpCookie *http.Cookie
|
|
||||||
|
|
||||||
for _, cookie := range ckURL.Cookies {
|
for i, ckURL := range opts.Cookies {
|
||||||
httpCookie = &http.Cookie{
|
url, err := url.Parse(ckURL.CookieURL) // CookieURL must be valid, include schema
|
||||||
Name: cookie.Name,
|
if err != nil {
|
||||||
Value: getCookieValue(cookie),
|
logger.Warnf("skipping cookie [%d] for cookieURL %s: %v", i, ckURL.CookieURL, err)
|
||||||
Path: cookie.Path,
|
continue
|
||||||
Domain: cookie.Domain,
|
}
|
||||||
}
|
|
||||||
|
|
||||||
httpCookies = append(httpCookies, httpCookie)
|
var httpCookies []*http.Cookie
|
||||||
}
|
for _, cookie := range ckURL.Cookies {
|
||||||
jar.SetCookies(url, httpCookies) // jar.SetCookies only sets cookies with the domain matching the URL
|
c := &http.Cookie{
|
||||||
|
Name: cookie.Name,
|
||||||
if jar.Cookies(url) == nil {
|
Value: getCookieValue(cookie),
|
||||||
logger.Warnf("Setting jar cookies for %s failed", url.String())
|
Path: cookie.Path,
|
||||||
}
|
Domain: cookie.Domain,
|
||||||
}
|
}
|
||||||
|
httpCookies = append(httpCookies, c)
|
||||||
|
}
|
||||||
|
|
||||||
|
jar.SetCookies(url, httpCookies)
|
||||||
|
if jar.Cookies(url) == nil {
|
||||||
|
logger.Warnf("setting jar cookies for %s failed", url.String())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return jar, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getCookieValue(cookie *scraperCookies) string {
|
func getCookieValue(cookie *scraperCookies) string {
|
||||||
@@ -56,7 +65,7 @@ func getCookieValue(cookie *scraperCookies) string {
|
|||||||
return cookie.Value
|
return cookie.Value
|
||||||
}
|
}
|
||||||
|
|
||||||
// print all cookies from the jar of the native http client
|
// printCookies prints all cookies from the given cookie jar
|
||||||
func printCookies(jar *cookiejar.Jar, scraperConfig config, msg string) {
|
func printCookies(jar *cookiejar.Jar, scraperConfig config, msg string) {
|
||||||
driverOptions := scraperConfig.DriverOptions
|
driverOptions := scraperConfig.DriverOptions
|
||||||
if driverOptions != nil && !driverOptions.UseCDP {
|
if driverOptions != nil && !driverOptions.UseCDP {
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package scraper
|
package scraper
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/stashapp/stash/pkg/logger"
|
"github.com/stashapp/stash/pkg/logger"
|
||||||
@@ -123,7 +124,7 @@ xPathScrapers:
|
|||||||
# Last updated April 13, 2021
|
# Last updated April 13, 2021
|
||||||
`
|
`
|
||||||
|
|
||||||
func getFreeonesScraper(txnManager models.TransactionManager, globalConfig GlobalConfig) scraper {
|
func getFreeonesScraper(client *http.Client, txnManager models.TransactionManager, globalConfig GlobalConfig) scraper {
|
||||||
yml := freeonesScraperConfig
|
yml := freeonesScraperConfig
|
||||||
|
|
||||||
c, err := loadConfigFromYAML(FreeonesScraperID, strings.NewReader(yml))
|
c, err := loadConfigFromYAML(FreeonesScraperID, strings.NewReader(yml))
|
||||||
@@ -131,5 +132,5 @@ func getFreeonesScraper(txnManager models.TransactionManager, globalConfig Globa
|
|||||||
logger.Fatalf("Error loading builtin freeones scraper: %s", err.Error())
|
logger.Fatalf("Error loading builtin freeones scraper: %s", err.Error())
|
||||||
}
|
}
|
||||||
|
|
||||||
return createScraperFromConfig(*c, txnManager, globalConfig)
|
return createScraperFromConfig(*c, client, txnManager, globalConfig)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,28 +2,22 @@ package scraper
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"crypto/tls"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/stashapp/stash/pkg/models"
|
"github.com/stashapp/stash/pkg/models"
|
||||||
"github.com/stashapp/stash/pkg/utils"
|
"github.com/stashapp/stash/pkg/utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Timeout to get the image. Includes transfer time. May want to make this
|
func setPerformerImage(ctx context.Context, client *http.Client, p *models.ScrapedPerformer, globalConfig GlobalConfig) error {
|
||||||
// configurable at some point.
|
|
||||||
const imageGetTimeout = time.Second * 30
|
|
||||||
|
|
||||||
func setPerformerImage(ctx context.Context, p *models.ScrapedPerformer, globalConfig GlobalConfig) error {
|
|
||||||
if p == nil || p.Image == nil || !strings.HasPrefix(*p.Image, "http") {
|
if p == nil || p.Image == nil || !strings.HasPrefix(*p.Image, "http") {
|
||||||
// nothing to do
|
// nothing to do
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
img, err := getImage(ctx, *p.Image, globalConfig)
|
img, err := getImage(ctx, *p.Image, client, globalConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -35,14 +29,14 @@ func setPerformerImage(ctx context.Context, p *models.ScrapedPerformer, globalCo
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func setSceneImage(ctx context.Context, s *models.ScrapedScene, globalConfig GlobalConfig) error {
|
func setSceneImage(ctx context.Context, client *http.Client, s *models.ScrapedScene, globalConfig GlobalConfig) error {
|
||||||
// don't try to get the image if it doesn't appear to be a URL
|
// don't try to get the image if it doesn't appear to be a URL
|
||||||
if s == nil || s.Image == nil || !strings.HasPrefix(*s.Image, "http") {
|
if s == nil || s.Image == nil || !strings.HasPrefix(*s.Image, "http") {
|
||||||
// nothing to do
|
// nothing to do
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
img, err := getImage(ctx, *s.Image, globalConfig)
|
img, err := getImage(ctx, *s.Image, client, globalConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -52,14 +46,14 @@ func setSceneImage(ctx context.Context, s *models.ScrapedScene, globalConfig Glo
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func setMovieFrontImage(ctx context.Context, m *models.ScrapedMovie, globalConfig GlobalConfig) error {
|
func setMovieFrontImage(ctx context.Context, client *http.Client, m *models.ScrapedMovie, globalConfig GlobalConfig) error {
|
||||||
// don't try to get the image if it doesn't appear to be a URL
|
// don't try to get the image if it doesn't appear to be a URL
|
||||||
if m == nil || m.FrontImage == nil || !strings.HasPrefix(*m.FrontImage, "http") {
|
if m == nil || m.FrontImage == nil || !strings.HasPrefix(*m.FrontImage, "http") {
|
||||||
// nothing to do
|
// nothing to do
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
img, err := getImage(ctx, *m.FrontImage, globalConfig)
|
img, err := getImage(ctx, *m.FrontImage, client, globalConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -69,14 +63,14 @@ func setMovieFrontImage(ctx context.Context, m *models.ScrapedMovie, globalConfi
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func setMovieBackImage(ctx context.Context, m *models.ScrapedMovie, globalConfig GlobalConfig) error {
|
func setMovieBackImage(ctx context.Context, client *http.Client, m *models.ScrapedMovie, globalConfig GlobalConfig) error {
|
||||||
// don't try to get the image if it doesn't appear to be a URL
|
// don't try to get the image if it doesn't appear to be a URL
|
||||||
if m == nil || m.BackImage == nil || !strings.HasPrefix(*m.BackImage, "http") {
|
if m == nil || m.BackImage == nil || !strings.HasPrefix(*m.BackImage, "http") {
|
||||||
// nothing to do
|
// nothing to do
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
img, err := getImage(ctx, *m.BackImage, globalConfig)
|
img, err := getImage(ctx, *m.BackImage, client, globalConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -86,13 +80,7 @@ func setMovieBackImage(ctx context.Context, m *models.ScrapedMovie, globalConfig
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getImage(ctx context.Context, url string, globalConfig GlobalConfig) (*string, error) {
|
func getImage(ctx context.Context, url string, client *http.Client, globalConfig GlobalConfig) (*string, error) {
|
||||||
client := &http.Client{
|
|
||||||
Transport: &http.Transport{ // ignore insecure certificates
|
|
||||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: !globalConfig.GetScraperCertCheck()}},
|
|
||||||
Timeout: imageGetTimeout,
|
|
||||||
}
|
|
||||||
|
|
||||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -137,10 +125,10 @@ func getImage(ctx context.Context, url string, globalConfig GlobalConfig) (*stri
|
|||||||
return &img, nil
|
return &img, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getStashPerformerImage(ctx context.Context, stashURL string, performerID string, globalConfig GlobalConfig) (*string, error) {
|
func getStashPerformerImage(ctx context.Context, stashURL string, performerID string, client *http.Client, globalConfig GlobalConfig) (*string, error) {
|
||||||
return getImage(ctx, stashURL+"/performer/"+performerID+"/image", globalConfig)
|
return getImage(ctx, stashURL+"/performer/"+performerID+"/image", client, globalConfig)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getStashSceneImage(ctx context.Context, stashURL string, sceneID string, globalConfig GlobalConfig) (*string, error) {
|
func getStashSceneImage(ctx context.Context, stashURL string, sceneID string, client *http.Client, globalConfig GlobalConfig) (*string, error) {
|
||||||
return getImage(ctx, stashURL+"/scene/"+sceneID+"/screenshot", globalConfig)
|
return getImage(ctx, stashURL+"/scene/"+sceneID+"/screenshot", client, globalConfig)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"io"
|
"io"
|
||||||
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
@@ -16,13 +17,15 @@ type jsonScraper struct {
|
|||||||
scraper scraperTypeConfig
|
scraper scraperTypeConfig
|
||||||
config config
|
config config
|
||||||
globalConfig GlobalConfig
|
globalConfig GlobalConfig
|
||||||
|
client *http.Client
|
||||||
txnManager models.TransactionManager
|
txnManager models.TransactionManager
|
||||||
}
|
}
|
||||||
|
|
||||||
func newJsonScraper(scraper scraperTypeConfig, txnManager models.TransactionManager, config config, globalConfig GlobalConfig) *jsonScraper {
|
func newJsonScraper(scraper scraperTypeConfig, client *http.Client, txnManager models.TransactionManager, config config, globalConfig GlobalConfig) *jsonScraper {
|
||||||
return &jsonScraper{
|
return &jsonScraper{
|
||||||
scraper: scraper,
|
scraper: scraper,
|
||||||
config: config,
|
config: config,
|
||||||
|
client: client,
|
||||||
globalConfig: globalConfig,
|
globalConfig: globalConfig,
|
||||||
txnManager: txnManager,
|
txnManager: txnManager,
|
||||||
}
|
}
|
||||||
@@ -49,7 +52,7 @@ func (s *jsonScraper) scrapeURL(ctx context.Context, url string) (string, *mappe
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *jsonScraper) loadURL(ctx context.Context, url string) (string, error) {
|
func (s *jsonScraper) loadURL(ctx context.Context, url string) (string, error) {
|
||||||
r, err := loadURL(ctx, url, s.config, s.globalConfig)
|
r, err := loadURL(ctx, url, s.client, s.config, s.globalConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,11 +2,15 @@ package scraper
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"crypto/tls"
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/stashapp/stash/pkg/logger"
|
"github.com/stashapp/stash/pkg/logger"
|
||||||
stash_config "github.com/stashapp/stash/pkg/manager/config"
|
stash_config "github.com/stashapp/stash/pkg/manager/config"
|
||||||
@@ -15,6 +19,22 @@ import (
|
|||||||
"github.com/stashapp/stash/pkg/utils"
|
"github.com/stashapp/stash/pkg/utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var ErrMaxRedirects = errors.New("maximum number of HTTP redirects reached")
|
||||||
|
|
||||||
|
const (
|
||||||
|
// scrapeGetTimeout is the timeout for scraper HTTP requests. Includes transfer time.
|
||||||
|
// We may want to bump this at some point and use local context-timeouts if more granularity
|
||||||
|
// is needed.
|
||||||
|
scrapeGetTimeout = time.Second * 60
|
||||||
|
|
||||||
|
// maxIdleConnsPerHost is the maximum number of idle connections the HTTP client will
|
||||||
|
// keep on a per-host basis.
|
||||||
|
maxIdleConnsPerHost = 8
|
||||||
|
|
||||||
|
// maxRedirects defines the maximum number of redirects the HTTP client will follow
|
||||||
|
maxRedirects = 20
|
||||||
|
)
|
||||||
|
|
||||||
// GlobalConfig contains the global scraper options.
|
// GlobalConfig contains the global scraper options.
|
||||||
type GlobalConfig interface {
|
type GlobalConfig interface {
|
||||||
GetScraperUserAgent() string
|
GetScraperUserAgent() string
|
||||||
@@ -33,11 +53,32 @@ func isCDPPathWS(c GlobalConfig) bool {
|
|||||||
|
|
||||||
// Cache stores scraper details.
|
// Cache stores scraper details.
|
||||||
type Cache struct {
|
type Cache struct {
|
||||||
|
client *http.Client
|
||||||
scrapers []scraper
|
scrapers []scraper
|
||||||
globalConfig GlobalConfig
|
globalConfig GlobalConfig
|
||||||
txnManager models.TransactionManager
|
txnManager models.TransactionManager
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// newClient creates a scraper-local http client we use throughout the scraper subsystem.
|
||||||
|
func newClient(gc GlobalConfig) *http.Client {
|
||||||
|
client := &http.Client{
|
||||||
|
Transport: &http.Transport{ // ignore insecure certificates
|
||||||
|
TLSClientConfig: &tls.Config{InsecureSkipVerify: !gc.GetScraperCertCheck()},
|
||||||
|
MaxIdleConnsPerHost: maxIdleConnsPerHost,
|
||||||
|
},
|
||||||
|
Timeout: scrapeGetTimeout,
|
||||||
|
// defaultCheckRedirect code with max changed from 10 to maxRedirects
|
||||||
|
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||||
|
if len(via) >= maxRedirects {
|
||||||
|
return fmt.Errorf("after %d redirects: %w", maxRedirects, ErrMaxRedirects)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
return client
|
||||||
|
}
|
||||||
|
|
||||||
// NewCache returns a new Cache loading scraper configurations from the
|
// NewCache returns a new Cache loading scraper configurations from the
|
||||||
// scraper path provided in the global config object. It returns a new
|
// scraper path provided in the global config object. It returns a new
|
||||||
// instance and an error if the scraper directory could not be loaded.
|
// instance and an error if the scraper directory could not be loaded.
|
||||||
@@ -45,19 +86,23 @@ type Cache struct {
|
|||||||
// Scraper configurations are loaded from yml files in the provided scrapers
|
// Scraper configurations are loaded from yml files in the provided scrapers
|
||||||
// directory and any subdirectories.
|
// directory and any subdirectories.
|
||||||
func NewCache(globalConfig GlobalConfig, txnManager models.TransactionManager) (*Cache, error) {
|
func NewCache(globalConfig GlobalConfig, txnManager models.TransactionManager) (*Cache, error) {
|
||||||
scrapers, err := loadScrapers(globalConfig, txnManager)
|
// HTTP Client setup
|
||||||
|
client := newClient(globalConfig)
|
||||||
|
|
||||||
|
scrapers, err := loadScrapers(globalConfig, client, txnManager)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return &Cache{
|
return &Cache{
|
||||||
|
client: client,
|
||||||
globalConfig: globalConfig,
|
globalConfig: globalConfig,
|
||||||
scrapers: scrapers,
|
scrapers: scrapers,
|
||||||
txnManager: txnManager,
|
txnManager: txnManager,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func loadScrapers(globalConfig GlobalConfig, txnManager models.TransactionManager) ([]scraper, error) {
|
func loadScrapers(globalConfig GlobalConfig, client *http.Client, txnManager models.TransactionManager) ([]scraper, error) {
|
||||||
path := globalConfig.GetScrapersPath()
|
path := globalConfig.GetScrapersPath()
|
||||||
scrapers := make([]scraper, 0)
|
scrapers := make([]scraper, 0)
|
||||||
|
|
||||||
@@ -76,14 +121,14 @@ func loadScrapers(globalConfig GlobalConfig, txnManager models.TransactionManage
|
|||||||
}
|
}
|
||||||
|
|
||||||
// add built-in freeones scraper
|
// add built-in freeones scraper
|
||||||
scrapers = append(scrapers, getFreeonesScraper(txnManager, globalConfig), getAutoTagScraper(txnManager, globalConfig))
|
scrapers = append(scrapers, getFreeonesScraper(client, txnManager, globalConfig), getAutoTagScraper(txnManager, globalConfig))
|
||||||
|
|
||||||
for _, file := range scraperFiles {
|
for _, file := range scraperFiles {
|
||||||
c, err := loadConfigFromYAMLFile(file)
|
c, err := loadConfigFromYAMLFile(file)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Errorf("Error loading scraper %s: %s", file, err.Error())
|
logger.Errorf("Error loading scraper %s: %s", file, err.Error())
|
||||||
} else {
|
} else {
|
||||||
scraper := createScraperFromConfig(*c, txnManager, globalConfig)
|
scraper := createScraperFromConfig(*c, client, txnManager, globalConfig)
|
||||||
scrapers = append(scrapers, scraper)
|
scrapers = append(scrapers, scraper)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -95,7 +140,7 @@ func loadScrapers(globalConfig GlobalConfig, txnManager models.TransactionManage
|
|||||||
// In the event of an error during loading, the cache will be left empty.
|
// In the event of an error during loading, the cache will be left empty.
|
||||||
func (c *Cache) ReloadScrapers() error {
|
func (c *Cache) ReloadScrapers() error {
|
||||||
c.scrapers = nil
|
c.scrapers = nil
|
||||||
scrapers, err := loadScrapers(c.globalConfig, c.txnManager)
|
scrapers, err := loadScrapers(c.globalConfig, c.client, c.txnManager)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -255,7 +300,7 @@ func (c Cache) postScrapePerformer(ctx context.Context, ret *models.ScrapedPerfo
|
|||||||
}
|
}
|
||||||
|
|
||||||
// post-process - set the image if applicable
|
// post-process - set the image if applicable
|
||||||
if err := setPerformerImage(ctx, ret, c.globalConfig); err != nil {
|
if err := setPerformerImage(ctx, c.client, ret, c.globalConfig); err != nil {
|
||||||
logger.Warnf("Could not set image using URL %s: %s", *ret.Image, err.Error())
|
logger.Warnf("Could not set image using URL %s: %s", *ret.Image, err.Error())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -323,7 +368,7 @@ func (c Cache) postScrapeScene(ctx context.Context, ret *models.ScrapedScene) er
|
|||||||
}
|
}
|
||||||
|
|
||||||
// post-process - set the image if applicable
|
// post-process - set the image if applicable
|
||||||
if err := setSceneImage(ctx, ret, c.globalConfig); err != nil {
|
if err := setSceneImage(ctx, c.client, ret, c.globalConfig); err != nil {
|
||||||
logger.Warnf("Could not set image using URL %s: %v", *ret.Image, err)
|
logger.Warnf("Could not set image using URL %s: %v", *ret.Image, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -551,10 +596,10 @@ func (c Cache) ScrapeMovieURL(url string) (*models.ScrapedMovie, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// post-process - set the image if applicable
|
// post-process - set the image if applicable
|
||||||
if err := setMovieFrontImage(context.TODO(), ret, c.globalConfig); err != nil {
|
if err := setMovieFrontImage(context.TODO(), c.client, ret, c.globalConfig); err != nil {
|
||||||
logger.Warnf("Could not set front image using URL %s: %s", *ret.FrontImage, err.Error())
|
logger.Warnf("Could not set front image using URL %s: %s", *ret.FrontImage, err.Error())
|
||||||
}
|
}
|
||||||
if err := setMovieBackImage(context.TODO(), ret, c.globalConfig); err != nil {
|
if err := setMovieBackImage(context.TODO(), c.client, ret, c.globalConfig); err != nil {
|
||||||
logger.Warnf("Could not set back image using URL %s: %s", *ret.BackImage, err.Error())
|
logger.Warnf("Could not set back image using URL %s: %s", *ret.BackImage, err.Error())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"database/sql"
|
"database/sql"
|
||||||
"errors"
|
"errors"
|
||||||
|
"net/http"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
|
||||||
"github.com/jinzhu/copier"
|
"github.com/jinzhu/copier"
|
||||||
@@ -16,13 +17,15 @@ type stashScraper struct {
|
|||||||
scraper scraperTypeConfig
|
scraper scraperTypeConfig
|
||||||
config config
|
config config
|
||||||
globalConfig GlobalConfig
|
globalConfig GlobalConfig
|
||||||
|
client *http.Client
|
||||||
txnManager models.TransactionManager
|
txnManager models.TransactionManager
|
||||||
}
|
}
|
||||||
|
|
||||||
func newStashScraper(scraper scraperTypeConfig, txnManager models.TransactionManager, config config, globalConfig GlobalConfig) *stashScraper {
|
func newStashScraper(scraper scraperTypeConfig, client *http.Client, txnManager models.TransactionManager, config config, globalConfig GlobalConfig) *stashScraper {
|
||||||
return &stashScraper{
|
return &stashScraper{
|
||||||
scraper: scraper,
|
scraper: scraper,
|
||||||
config: config,
|
config: config,
|
||||||
|
client: client,
|
||||||
globalConfig: globalConfig,
|
globalConfig: globalConfig,
|
||||||
txnManager: txnManager,
|
txnManager: txnManager,
|
||||||
}
|
}
|
||||||
@@ -138,7 +141,7 @@ func (s *stashScraper) scrapePerformerByFragment(scrapedPerformer models.Scraped
|
|||||||
}
|
}
|
||||||
|
|
||||||
// get the performer image directly
|
// get the performer image directly
|
||||||
ret.Image, err = getStashPerformerImage(context.TODO(), s.config.StashServer.URL, performerID, s.globalConfig)
|
ret.Image, err = getStashPerformerImage(context.TODO(), s.config.StashServer.URL, performerID, s.client, s.globalConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -164,7 +167,7 @@ func (s *stashScraper) scrapedStashSceneToScrapedScene(scene *scrapedSceneStash)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// get the performer image directly
|
// get the performer image directly
|
||||||
ret.Image, err = getStashSceneImage(context.TODO(), s.config.StashServer.URL, scene.ID, s.globalConfig)
|
ret.Image, err = getStashSceneImage(context.TODO(), s.config.StashServer.URL, scene.ID, s.client, s.globalConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -251,7 +254,7 @@ func (s *stashScraper) scrapeSceneByScene(scene *models.Scene) (*models.ScrapedS
|
|||||||
}
|
}
|
||||||
|
|
||||||
// get the performer image directly
|
// get the performer image directly
|
||||||
ret.Image, err = getStashSceneImage(context.TODO(), s.config.StashServer.URL, q.FindScene.ID, s.globalConfig)
|
ret.Image, err = getStashSceneImage(context.TODO(), s.config.StashServer.URL, q.FindScene.ID, s.client, s.globalConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/Yamashou/gqlgenc/client"
|
"github.com/Yamashou/gqlgenc/client"
|
||||||
|
|
||||||
@@ -18,10 +17,6 @@ import (
|
|||||||
"github.com/stashapp/stash/pkg/utils"
|
"github.com/stashapp/stash/pkg/utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Timeout to get the image. Includes transfer time. May want to make this
|
|
||||||
// configurable at some point.
|
|
||||||
const imageGetTimeout = time.Second * 30
|
|
||||||
|
|
||||||
// Client represents the client interface to a stash-box server instance.
|
// Client represents the client interface to a stash-box server instance.
|
||||||
type Client struct {
|
type Client struct {
|
||||||
client *graphql.Client
|
client *graphql.Client
|
||||||
@@ -44,6 +39,10 @@ func NewClient(box models.StashBox, txnManager models.TransactionManager) *Clien
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c Client) getHTTPClient() *http.Client {
|
||||||
|
return c.client.Client.Client
|
||||||
|
}
|
||||||
|
|
||||||
// QueryStashBoxScene queries stash-box for scenes using a query string.
|
// QueryStashBoxScene queries stash-box for scenes using a query string.
|
||||||
func (c Client) QueryStashBoxScene(ctx context.Context, queryStr string) ([]*models.ScrapedScene, error) {
|
func (c Client) QueryStashBoxScene(ctx context.Context, queryStr string) ([]*models.ScrapedScene, error) {
|
||||||
scenes, err := c.client.SearchScene(ctx, queryStr)
|
scenes, err := c.client.SearchScene(ctx, queryStr)
|
||||||
@@ -55,7 +54,7 @@ func (c Client) QueryStashBoxScene(ctx context.Context, queryStr string) ([]*mod
|
|||||||
|
|
||||||
var ret []*models.ScrapedScene
|
var ret []*models.ScrapedScene
|
||||||
for _, s := range sceneFragments {
|
for _, s := range sceneFragments {
|
||||||
ss, err := sceneFragmentToScrapedScene(context.TODO(), c.txnManager, s)
|
ss, err := sceneFragmentToScrapedScene(context.TODO(), c.getHTTPClient(), c.txnManager, s)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -201,7 +200,7 @@ func (c Client) findStashBoxScenesByFingerprints(ctx context.Context, fingerprin
|
|||||||
sceneFragments := scenes.FindScenesByFingerprints
|
sceneFragments := scenes.FindScenesByFingerprints
|
||||||
|
|
||||||
for _, s := range sceneFragments {
|
for _, s := range sceneFragments {
|
||||||
ss, err := sceneFragmentToScrapedScene(ctx, c.txnManager, s)
|
ss, err := sceneFragmentToScrapedScene(ctx, c.getHTTPClient(), c.txnManager, s)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -509,11 +508,7 @@ func formatBodyModifications(m []*graphql.BodyModificationFragment) *string {
|
|||||||
return &ret
|
return &ret
|
||||||
}
|
}
|
||||||
|
|
||||||
func fetchImage(ctx context.Context, url string) (*string, error) {
|
func fetchImage(ctx context.Context, client *http.Client, url string) (*string, error) {
|
||||||
client := &http.Client{
|
|
||||||
Timeout: imageGetTimeout,
|
|
||||||
}
|
|
||||||
|
|
||||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -595,8 +590,8 @@ func performerFragmentToScrapedScenePerformer(p graphql.PerformerFragment) *mode
|
|||||||
return sp
|
return sp
|
||||||
}
|
}
|
||||||
|
|
||||||
func getFirstImage(ctx context.Context, images []*graphql.ImageFragment) *string {
|
func getFirstImage(ctx context.Context, client *http.Client, images []*graphql.ImageFragment) *string {
|
||||||
ret, err := fetchImage(ctx, images[0].URL)
|
ret, err := fetchImage(ctx, client, images[0].URL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Warnf("Error fetching image %s: %s", images[0].URL, err.Error())
|
logger.Warnf("Error fetching image %s: %s", images[0].URL, err.Error())
|
||||||
}
|
}
|
||||||
@@ -617,7 +612,7 @@ func getFingerprints(scene *graphql.SceneFragment) []*models.StashBoxFingerprint
|
|||||||
return fingerprints
|
return fingerprints
|
||||||
}
|
}
|
||||||
|
|
||||||
func sceneFragmentToScrapedScene(ctx context.Context, txnManager models.TransactionManager, s *graphql.SceneFragment) (*models.ScrapedScene, error) {
|
func sceneFragmentToScrapedScene(ctx context.Context, client *http.Client, txnManager models.TransactionManager, s *graphql.SceneFragment) (*models.ScrapedScene, error) {
|
||||||
stashID := s.ID
|
stashID := s.ID
|
||||||
ss := &models.ScrapedScene{
|
ss := &models.ScrapedScene{
|
||||||
Title: s.Title,
|
Title: s.Title,
|
||||||
@@ -634,7 +629,7 @@ func sceneFragmentToScrapedScene(ctx context.Context, txnManager models.Transact
|
|||||||
if len(s.Images) > 0 {
|
if len(s.Images) > 0 {
|
||||||
// TODO - #454 code sorts images by aspect ratio according to a wanted
|
// TODO - #454 code sorts images by aspect ratio according to a wanted
|
||||||
// orientation. I'm just grabbing the first for now
|
// orientation. I'm just grabbing the first for now
|
||||||
ss.Image = getFirstImage(ctx, s.Images)
|
ss.Image = getFirstImage(ctx, client, s.Images)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
|
if err := txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
|
||||||
|
|||||||
@@ -3,12 +3,10 @@ package scraper
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"crypto/tls"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/cookiejar"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@@ -18,55 +16,40 @@ import (
|
|||||||
"github.com/chromedp/chromedp"
|
"github.com/chromedp/chromedp"
|
||||||
jsoniter "github.com/json-iterator/go"
|
jsoniter "github.com/json-iterator/go"
|
||||||
"golang.org/x/net/html/charset"
|
"golang.org/x/net/html/charset"
|
||||||
"golang.org/x/net/publicsuffix"
|
|
||||||
|
|
||||||
"github.com/stashapp/stash/pkg/logger"
|
"github.com/stashapp/stash/pkg/logger"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Timeout for the scrape http request. Includes transfer time. May want to make this
|
|
||||||
// configurable at some point.
|
|
||||||
const scrapeGetTimeout = time.Second * 60
|
|
||||||
const scrapeDefaultSleep = time.Second * 2
|
const scrapeDefaultSleep = time.Second * 2
|
||||||
|
|
||||||
func loadURL(ctx context.Context, url string, scraperConfig config, globalConfig GlobalConfig) (io.Reader, error) {
|
func loadURL(ctx context.Context, loadURL string, client *http.Client, scraperConfig config, globalConfig GlobalConfig) (io.Reader, error) {
|
||||||
driverOptions := scraperConfig.DriverOptions
|
driverOptions := scraperConfig.DriverOptions
|
||||||
if driverOptions != nil && driverOptions.UseCDP {
|
if driverOptions != nil && driverOptions.UseCDP {
|
||||||
// get the page using chrome dp
|
// get the page using chrome dp
|
||||||
return urlFromCDP(ctx, url, *driverOptions, globalConfig)
|
return urlFromCDP(ctx, loadURL, *driverOptions, globalConfig)
|
||||||
}
|
}
|
||||||
|
|
||||||
// get the page using http.Client
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, loadURL, nil)
|
||||||
options := cookiejar.Options{
|
|
||||||
PublicSuffixList: publicsuffix.List,
|
|
||||||
}
|
|
||||||
jar, er := cookiejar.New(&options)
|
|
||||||
if er != nil {
|
|
||||||
return nil, er
|
|
||||||
}
|
|
||||||
|
|
||||||
setCookies(jar, scraperConfig)
|
|
||||||
printCookies(jar, scraperConfig, "Jar cookies set from scraper")
|
|
||||||
|
|
||||||
client := &http.Client{
|
|
||||||
Transport: &http.Transport{ // ignore insecure certificates
|
|
||||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: !globalConfig.GetScraperCertCheck()},
|
|
||||||
},
|
|
||||||
Timeout: scrapeGetTimeout,
|
|
||||||
// defaultCheckRedirect code with max changed from 10 to 20
|
|
||||||
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
|
||||||
if len(via) >= 20 {
|
|
||||||
return errors.New("stopped after 20 redirects")
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
},
|
|
||||||
Jar: jar,
|
|
||||||
}
|
|
||||||
|
|
||||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
jar, err := scraperConfig.jar()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error creating cookie jar: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
u, err := url.Parse(loadURL)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error parsing url %s: %w", loadURL, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch relevant cookies from the jar for url u and add them to the request
|
||||||
|
cookies := jar.Cookies(u)
|
||||||
|
for _, cookie := range cookies {
|
||||||
|
req.AddCookie(cookie)
|
||||||
|
}
|
||||||
|
|
||||||
userAgent := globalConfig.GetScraperUserAgent()
|
userAgent := globalConfig.GetScraperUserAgent()
|
||||||
if userAgent != "" {
|
if userAgent != "" {
|
||||||
req.Header.Set("User-Agent", userAgent)
|
req.Header.Set("User-Agent", userAgent)
|
||||||
@@ -98,7 +81,6 @@ func loadURL(ctx context.Context, url string, scraperConfig config, globalConfig
|
|||||||
|
|
||||||
bodyReader := bytes.NewReader(body)
|
bodyReader := bytes.NewReader(body)
|
||||||
printCookies(jar, scraperConfig, "Jar cookies found for scraper urls")
|
printCookies(jar, scraperConfig, "Jar cookies found for scraper urls")
|
||||||
|
|
||||||
return charset.NewReader(bodyReader, resp.Header.Get("Content-Type"))
|
return charset.NewReader(bodyReader, resp.Header.Get("Content-Type"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -20,14 +21,16 @@ type xpathScraper struct {
|
|||||||
scraper scraperTypeConfig
|
scraper scraperTypeConfig
|
||||||
config config
|
config config
|
||||||
globalConfig GlobalConfig
|
globalConfig GlobalConfig
|
||||||
|
client *http.Client
|
||||||
txnManager models.TransactionManager
|
txnManager models.TransactionManager
|
||||||
}
|
}
|
||||||
|
|
||||||
func newXpathScraper(scraper scraperTypeConfig, txnManager models.TransactionManager, config config, globalConfig GlobalConfig) *xpathScraper {
|
func newXpathScraper(scraper scraperTypeConfig, client *http.Client, txnManager models.TransactionManager, config config, globalConfig GlobalConfig) *xpathScraper {
|
||||||
return &xpathScraper{
|
return &xpathScraper{
|
||||||
scraper: scraper,
|
scraper: scraper,
|
||||||
config: config,
|
config: config,
|
||||||
globalConfig: globalConfig,
|
globalConfig: globalConfig,
|
||||||
|
client: client,
|
||||||
txnManager: txnManager,
|
txnManager: txnManager,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -227,7 +230,7 @@ func (s *xpathScraper) scrapeGalleryByFragment(gallery models.ScrapedGalleryInpu
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *xpathScraper) loadURL(ctx context.Context, url string) (*html.Node, error) {
|
func (s *xpathScraper) loadURL(ctx context.Context, url string) (*html.Node, error) {
|
||||||
r, err := loadURL(ctx, url, s.config, s.globalConfig)
|
r, err := loadURL(ctx, url, s.client, s.config, s.globalConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -874,7 +874,8 @@ xPathScrapers:
|
|||||||
|
|
||||||
globalConfig := mockGlobalConfig{}
|
globalConfig := mockGlobalConfig{}
|
||||||
|
|
||||||
s := createScraperFromConfig(*c, nil, globalConfig)
|
client := &http.Client{}
|
||||||
|
s := createScraperFromConfig(*c, client, nil, globalConfig)
|
||||||
performer, err := s.Performer.scrapeByURL(ts.URL)
|
performer, err := s.Performer.scrapeByURL(ts.URL)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
Reference in New Issue
Block a user