Add cdp support for xpath scrapers (#625)

Co-authored-by: WithoutPants <53250216+WithoutPants@users.noreply.github.com>
This commit is contained in:
bnkai
2020-08-04 03:42:40 +03:00
committed by GitHub
parent 1b4a9eed36
commit 4373f9bf01
284 changed files with 133250 additions and 54 deletions

View File

@@ -3,17 +3,14 @@ package scraper
import (
"bytes"
"errors"
"net/http"
"net/http/cookiejar"
"net/url"
"regexp"
"strings"
"time"
"github.com/antchfx/htmlquery"
"golang.org/x/net/html"
"golang.org/x/net/html/charset"
"golang.org/x/net/publicsuffix"
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/models"
@@ -111,42 +108,7 @@ func (s *xpathScraper) scrapeSceneByFragment(scene models.SceneUpdateInput) (*mo
}
func (s *xpathScraper) loadURL(url string) (*html.Node, error) {
options := cookiejar.Options{
PublicSuffixList: publicsuffix.List,
}
jar, er := cookiejar.New(&options)
if er != nil {
return nil, er
}
client := &http.Client{
Timeout: scrapeGetTimeout,
// defaultCheckRedirect code with max changed from 10 to 20
CheckRedirect: func(req *http.Request, via []*http.Request) error {
if len(via) >= 20 {
return errors.New("stopped after 20 redirects")
}
return nil
},
Jar: jar,
}
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, err
}
userAgent := s.globalConfig.UserAgent
if userAgent != "" {
req.Header.Set("User-Agent", userAgent)
}
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
r, err := charset.NewReader(resp.Body, resp.Header.Get("Content-Type"))
r, err := loadURL(url, s.config, s.globalConfig)
if err != nil {
return nil, err
}