diff --git a/pkg/scraper/image.go b/pkg/scraper/image.go index 6b41b502d..08cb6725d 100644 --- a/pkg/scraper/image.go +++ b/pkg/scraper/image.go @@ -2,6 +2,7 @@ package scraper import ( "crypto/tls" + "fmt" "io/ioutil" "net/http" "strings" @@ -113,6 +114,10 @@ func getImage(url string, globalConfig GlobalConfig) (*string, error) { return nil, err } + if resp.StatusCode >= 400 { + return nil, fmt.Errorf("http error %d", resp.StatusCode) + } + defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) diff --git a/pkg/scraper/url.go b/pkg/scraper/url.go index 60573f76a..85e1590ee 100644 --- a/pkg/scraper/url.go +++ b/pkg/scraper/url.go @@ -79,6 +79,10 @@ func loadURL(url string, scraperConfig config, globalConfig GlobalConfig) (io.Re if err != nil { return nil, err } + if resp.StatusCode >= 400 { + return nil, fmt.Errorf("http error %d", resp.StatusCode) + } + defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) diff --git a/pkg/utils/image.go b/pkg/utils/image.go index 488e8049a..ad4f56941 100644 --- a/pkg/utils/image.go +++ b/pkg/utils/image.go @@ -2,6 +2,7 @@ package utils import ( "crypto/md5" + "crypto/tls" "encoding/base64" "fmt" "io/ioutil" @@ -33,6 +34,10 @@ func ProcessImageInput(imageInput string) ([]byte, error) { // ReadImageFromURL returns image data from a URL func ReadImageFromURL(url string) ([]byte, error) { client := &http.Client{ + Transport: &http.Transport{ // ignore insecure certificates + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + }, + Timeout: imageGetTimeout, } @@ -47,6 +52,7 @@ func ReadImageFromURL(url string) ([]byte, error) { if req.URL.Scheme != "" { req.Header.Set("Referer", req.URL.Scheme+"://"+req.Host+"/") } + req.Header.Set("User-Agent", GetUserAgent()) resp, err := client.Do(req) @@ -54,6 +60,10 @@ func ReadImageFromURL(url string) ([]byte, error) { return nil, err } + if resp.StatusCode >= 400 { + return nil, fmt.Errorf("http error %d", resp.StatusCode) + } + defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) diff --git a/pkg/utils/user_agent.go b/pkg/utils/user_agent.go new file mode 100644 index 000000000..533b6b6ab --- /dev/null +++ b/pkg/utils/user_agent.go @@ -0,0 +1,36 @@ +package utils + +import "runtime" + +// valid UA from https://user-agents.net +const Safari = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15/iY0wnXbs-59" +const FirefoxWindows = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0" +const FirefoxLinux = "Mozilla/5.0 (X11; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0" +const FirefoxLinuxArm = "Mozilla/5.0 (X11; Linux armv7l; rv:86.0) Gecko/20100101 Firefox/86.0" +const FirefoxLinuxArm64 = "Mozilla/5.0 (X11; Linux aarch64; rv:86.0) Gecko/20100101 Firefox/86.0" + +// GetUserAgent returns a valid User Agent string that matches the running os/arch +func GetUserAgent() string { + arch := runtime.GOARCH + os := runtime.GOOS + + switch os { + case "darwin": + return Safari + case "windows": + return FirefoxWindows + case "linux": + switch arch { + case "arm": + return FirefoxLinuxArm + case "arm64": + return FirefoxLinuxArm64 + case "amd64": + return FirefoxLinux + default: + return FirefoxLinux + } + default: + return FirefoxLinux + } +}