mirror of
https://github.com/stashapp/stash.git
synced 2025-12-17 20:34:37 +03:00
Add cdp support for xpath scrapers (#625)
Co-authored-by: WithoutPants <53250216+WithoutPants@users.noreply.github.com>
This commit is contained in:
@@ -3,17 +3,14 @@ package scraper
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/http/cookiejar"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/antchfx/htmlquery"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/charset"
|
||||
"golang.org/x/net/publicsuffix"
|
||||
|
||||
"github.com/stashapp/stash/pkg/logger"
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
@@ -111,42 +108,7 @@ func (s *xpathScraper) scrapeSceneByFragment(scene models.SceneUpdateInput) (*mo
|
||||
}
|
||||
|
||||
func (s *xpathScraper) loadURL(url string) (*html.Node, error) {
|
||||
options := cookiejar.Options{
|
||||
PublicSuffixList: publicsuffix.List,
|
||||
}
|
||||
jar, er := cookiejar.New(&options)
|
||||
if er != nil {
|
||||
return nil, er
|
||||
}
|
||||
|
||||
client := &http.Client{
|
||||
Timeout: scrapeGetTimeout,
|
||||
// defaultCheckRedirect code with max changed from 10 to 20
|
||||
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||
if len(via) >= 20 {
|
||||
return errors.New("stopped after 20 redirects")
|
||||
}
|
||||
return nil
|
||||
},
|
||||
Jar: jar,
|
||||
}
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
userAgent := s.globalConfig.UserAgent
|
||||
if userAgent != "" {
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
r, err := charset.NewReader(resp.Body, resp.Header.Get("Content-Type"))
|
||||
r, err := loadURL(url, s.config, s.globalConfig)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user