Query url parameters (#878)

This commit is contained in:
WithoutPants
2020-10-22 11:56:04 +11:00
committed by GitHub
parent 228a5c5537
commit 109e55a25a
7 changed files with 84 additions and 29 deletions

View File

@@ -115,6 +115,7 @@ type scraperTypeConfig struct {
// for xpath name scraper only // for xpath name scraper only
QueryURL string `yaml:"queryURL"` QueryURL string `yaml:"queryURL"`
QueryURLReplacements queryURLReplacements `yaml:"queryURLReplace"`
} }
func (c scraperTypeConfig) validate() error { func (c scraperTypeConfig) validate() error {

View File

@@ -148,7 +148,11 @@ func (s *jsonScraper) scrapeSceneByFragment(scene models.SceneUpdateInput) (*mod
} }
// construct the URL // construct the URL
url := constructSceneURL(s.scraper.QueryURL, storedScene) queryURL := queryURLParametersFromScene(storedScene)
if s.scraper.QueryURLReplacements != nil {
queryURL.applyReplacements(s.scraper.QueryURLReplacements)
}
url := queryURL.constructURL(s.scraper.QueryURL)
scraper := s.getJsonScraper() scraper := s.getJsonScraper()
@@ -176,7 +180,12 @@ func (s *jsonScraper) scrapeGalleryByFragment(gallery models.GalleryUpdateInput)
return nil, errors.New("no scene found") return nil, errors.New("no scene found")
} }
url := constructGalleryURL(s.scraper.QueryURL, storedGallery) // construct the URL
queryURL := queryURLParametersFromGallery(storedGallery)
if s.scraper.QueryURLReplacements != nil {
queryURL.applyReplacements(s.scraper.QueryURLReplacements)
}
url := queryURL.constructURL(s.scraper.QueryURL)
scraper := s.getJsonScraper() scraper := s.getJsonScraper()

51
pkg/scraper/query_url.go Normal file
View File

@@ -0,0 +1,51 @@
package scraper
import (
"path/filepath"
"strings"
"github.com/stashapp/stash/pkg/models"
)
type queryURLReplacements map[string]mappedRegexConfigs
type queryURLParameters map[string]string
func queryURLParametersFromScene(scene *models.Scene) queryURLParameters {
ret := make(queryURLParameters)
ret["checksum"] = scene.Checksum.String
ret["oshash"] = scene.OSHash.String
ret["filename"] = filepath.Base(scene.Path)
ret["title"] = scene.Title.String
return ret
}
func queryURLParametersFromGallery(gallery *models.Gallery) queryURLParameters {
ret := make(queryURLParameters)
ret["checksum"] = gallery.Checksum
if gallery.Path.Valid {
ret["filename"] = filepath.Base(gallery.Path.String)
}
ret["title"] = gallery.Title.String
return ret
}
func (p queryURLParameters) applyReplacements(r queryURLReplacements) {
for k, v := range p {
rpl, found := r[k]
if found {
p[k] = rpl.apply(v)
}
}
}
func (p queryURLParameters) constructURL(url string) string {
ret := url
for k, v := range p {
ret = strings.Replace(ret, "{"+k+"}", v, -1)
}
return ret
}

View File

@@ -10,7 +10,6 @@ import (
"net/http" "net/http"
"net/http/cookiejar" "net/http/cookiejar"
"os" "os"
"path/filepath"
"strings" "strings"
"time" "time"
@@ -19,7 +18,6 @@ import (
"github.com/chromedp/chromedp" "github.com/chromedp/chromedp"
jsoniter "github.com/json-iterator/go" jsoniter "github.com/json-iterator/go"
"github.com/stashapp/stash/pkg/logger" "github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/models"
"golang.org/x/net/html/charset" "golang.org/x/net/html/charset"
"golang.org/x/net/publicsuffix" "golang.org/x/net/publicsuffix"
) )
@@ -28,27 +26,6 @@ import (
// configurable at some point. // configurable at some point.
const scrapeGetTimeout = time.Second * 30 const scrapeGetTimeout = time.Second * 30
func constructSceneURL(url string, scene *models.Scene) string {
// support checksum, title and filename
ret := strings.Replace(url, "{checksum}", scene.Checksum.String, -1)
ret = strings.Replace(url, "{oshash}", scene.OSHash.String, -1)
ret = strings.Replace(ret, "{filename}", filepath.Base(scene.Path), -1)
ret = strings.Replace(ret, "{title}", scene.Title.String, -1)
return ret
}
func constructGalleryURL(url string, gallery *models.Gallery) string {
// support checksum, title and filename
ret := strings.Replace(url, "{checksum}", gallery.Checksum, -1)
if gallery.Path.Valid {
ret = strings.Replace(url, "{filename}", filepath.Base(gallery.Path.String), -1)
}
ret = strings.Replace(url, "{title}", gallery.Title.String, -1)
return ret
}
func loadURL(url string, scraperConfig config, globalConfig GlobalConfig) (io.Reader, error) { func loadURL(url string, scraperConfig config, globalConfig GlobalConfig) (io.Reader, error) {
driverOptions := scraperConfig.DriverOptions driverOptions := scraperConfig.DriverOptions
if driverOptions != nil && driverOptions.UseCDP { if driverOptions != nil && driverOptions.UseCDP {

View File

@@ -129,7 +129,11 @@ func (s *xpathScraper) scrapeSceneByFragment(scene models.SceneUpdateInput) (*mo
} }
// construct the URL // construct the URL
url := constructSceneURL(s.scraper.QueryURL, storedScene) queryURL := queryURLParametersFromScene(storedScene)
if s.scraper.QueryURLReplacements != nil {
queryURL.applyReplacements(s.scraper.QueryURLReplacements)
}
url := queryURL.constructURL(s.scraper.QueryURL)
scraper := s.getXpathScraper() scraper := s.getXpathScraper()
@@ -158,7 +162,11 @@ func (s *xpathScraper) scrapeGalleryByFragment(gallery models.GalleryUpdateInput
} }
// construct the URL // construct the URL
url := constructGalleryURL(s.scraper.QueryURL, storedGallery) queryURL := queryURLParametersFromGallery(storedGallery)
if s.scraper.QueryURLReplacements != nil {
queryURL.applyReplacements(s.scraper.QueryURLReplacements)
}
url := queryURL.constructURL(s.scraper.QueryURL)
scraper := s.getXpathScraper() scraper := s.getXpathScraper()

View File

@@ -6,6 +6,7 @@
* Add selective scene export. * Add selective scene export.
### 🎨 Improvements ### 🎨 Improvements
* Add support for query URL parameter regex replacement when scraping by query URL.
* Include empty fields in isMissing filter * Include empty fields in isMissing filter
* Show static image on scene wall if preview video is missing. * Show static image on scene wall if preview video is missing.
* Add path filter to scene and gallery query. * Add path filter to scene and gallery query.

View File

@@ -216,15 +216,23 @@ For `sceneByFragment`, the `queryURL` field must also be present. This field is
* `{filename}` - the base filename of the scene * `{filename}` - the base filename of the scene
* `{title}` - the title of the scene * `{title}` - the title of the scene
These placeholder field values may be manipulated with regex replacements by adding a `queryURLReplace` section, containing a map of placeholder field to regex configuration which uses the same format as the `replace` post-process action covered below.
For example: For example:
``` ```
sceneByFragment: sceneByFragment:
action: scrapeJson action: scrapeJson
queryURL: https://metadataapi.net/api/scenes?parse={filename}&limit=1
scraper: sceneQueryScraper scraper: sceneQueryScraper
queryURL: https://metadataapi.net/api/scenes?parse={filename}&limit=1
queryURLReplace:
filename:
- regex: <some regex>
with: <replacement>
``` ```
The above configuration would scrape from the value of `queryURL`, replacing `{filename}` with the base filename of the scene, after it has been manipulated by the regex replacements.
### Xpath and JSON scrapers configuration ### Xpath and JSON scrapers configuration
The top-level `xPathScrapers` field contains xpath scraping configurations, freely named. These are referenced in the `scraper` field for `scrapeXPath` scrapers. The top-level `xPathScrapers` field contains xpath scraping configurations, freely named. These are referenced in the `scraper` field for `scrapeXPath` scrapers.