Query url parameters (#878)

This commit is contained in:
WithoutPants
2020-10-22 11:56:04 +11:00
committed by GitHub
parent 228a5c5537
commit 109e55a25a
7 changed files with 84 additions and 29 deletions

View File

@@ -115,6 +115,7 @@ type scraperTypeConfig struct {
// for xpath name scraper only
QueryURL string `yaml:"queryURL"`
QueryURLReplacements queryURLReplacements `yaml:"queryURLReplace"`
}
func (c scraperTypeConfig) validate() error {

View File

@@ -148,7 +148,11 @@ func (s *jsonScraper) scrapeSceneByFragment(scene models.SceneUpdateInput) (*mod
}
// construct the URL
url := constructSceneURL(s.scraper.QueryURL, storedScene)
queryURL := queryURLParametersFromScene(storedScene)
if s.scraper.QueryURLReplacements != nil {
queryURL.applyReplacements(s.scraper.QueryURLReplacements)
}
url := queryURL.constructURL(s.scraper.QueryURL)
scraper := s.getJsonScraper()
@@ -176,7 +180,12 @@ func (s *jsonScraper) scrapeGalleryByFragment(gallery models.GalleryUpdateInput)
return nil, errors.New("no scene found")
}
url := constructGalleryURL(s.scraper.QueryURL, storedGallery)
// construct the URL
queryURL := queryURLParametersFromGallery(storedGallery)
if s.scraper.QueryURLReplacements != nil {
queryURL.applyReplacements(s.scraper.QueryURLReplacements)
}
url := queryURL.constructURL(s.scraper.QueryURL)
scraper := s.getJsonScraper()

51
pkg/scraper/query_url.go Normal file
View File

@@ -0,0 +1,51 @@
package scraper
import (
"path/filepath"
"strings"
"github.com/stashapp/stash/pkg/models"
)
type queryURLReplacements map[string]mappedRegexConfigs
type queryURLParameters map[string]string
func queryURLParametersFromScene(scene *models.Scene) queryURLParameters {
ret := make(queryURLParameters)
ret["checksum"] = scene.Checksum.String
ret["oshash"] = scene.OSHash.String
ret["filename"] = filepath.Base(scene.Path)
ret["title"] = scene.Title.String
return ret
}
func queryURLParametersFromGallery(gallery *models.Gallery) queryURLParameters {
ret := make(queryURLParameters)
ret["checksum"] = gallery.Checksum
if gallery.Path.Valid {
ret["filename"] = filepath.Base(gallery.Path.String)
}
ret["title"] = gallery.Title.String
return ret
}
func (p queryURLParameters) applyReplacements(r queryURLReplacements) {
for k, v := range p {
rpl, found := r[k]
if found {
p[k] = rpl.apply(v)
}
}
}
func (p queryURLParameters) constructURL(url string) string {
ret := url
for k, v := range p {
ret = strings.Replace(ret, "{"+k+"}", v, -1)
}
return ret
}

View File

@@ -10,7 +10,6 @@ import (
"net/http"
"net/http/cookiejar"
"os"
"path/filepath"
"strings"
"time"
@@ -19,7 +18,6 @@ import (
"github.com/chromedp/chromedp"
jsoniter "github.com/json-iterator/go"
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/models"
"golang.org/x/net/html/charset"
"golang.org/x/net/publicsuffix"
)
@@ -28,27 +26,6 @@ import (
// configurable at some point.
const scrapeGetTimeout = time.Second * 30
func constructSceneURL(url string, scene *models.Scene) string {
// support checksum, title and filename
ret := strings.Replace(url, "{checksum}", scene.Checksum.String, -1)
ret = strings.Replace(url, "{oshash}", scene.OSHash.String, -1)
ret = strings.Replace(ret, "{filename}", filepath.Base(scene.Path), -1)
ret = strings.Replace(ret, "{title}", scene.Title.String, -1)
return ret
}
func constructGalleryURL(url string, gallery *models.Gallery) string {
// support checksum, title and filename
ret := strings.Replace(url, "{checksum}", gallery.Checksum, -1)
if gallery.Path.Valid {
ret = strings.Replace(url, "{filename}", filepath.Base(gallery.Path.String), -1)
}
ret = strings.Replace(url, "{title}", gallery.Title.String, -1)
return ret
}
func loadURL(url string, scraperConfig config, globalConfig GlobalConfig) (io.Reader, error) {
driverOptions := scraperConfig.DriverOptions
if driverOptions != nil && driverOptions.UseCDP {

View File

@@ -129,7 +129,11 @@ func (s *xpathScraper) scrapeSceneByFragment(scene models.SceneUpdateInput) (*mo
}
// construct the URL
url := constructSceneURL(s.scraper.QueryURL, storedScene)
queryURL := queryURLParametersFromScene(storedScene)
if s.scraper.QueryURLReplacements != nil {
queryURL.applyReplacements(s.scraper.QueryURLReplacements)
}
url := queryURL.constructURL(s.scraper.QueryURL)
scraper := s.getXpathScraper()
@@ -158,7 +162,11 @@ func (s *xpathScraper) scrapeGalleryByFragment(gallery models.GalleryUpdateInput
}
// construct the URL
url := constructGalleryURL(s.scraper.QueryURL, storedGallery)
queryURL := queryURLParametersFromGallery(storedGallery)
if s.scraper.QueryURLReplacements != nil {
queryURL.applyReplacements(s.scraper.QueryURLReplacements)
}
url := queryURL.constructURL(s.scraper.QueryURL)
scraper := s.getXpathScraper()

View File

@@ -6,6 +6,7 @@
* Add selective scene export.
### 🎨 Improvements
* Add support for query URL parameter regex replacement when scraping by query URL.
* Include empty fields in isMissing filter
* Show static image on scene wall if preview video is missing.
* Add path filter to scene and gallery query.

View File

@@ -216,15 +216,23 @@ For `sceneByFragment`, the `queryURL` field must also be present. This field is
* `{filename}` - the base filename of the scene
* `{title}` - the title of the scene
These placeholder field values may be manipulated with regex replacements by adding a `queryURLReplace` section, containing a map of placeholder field to regex configuration which uses the same format as the `replace` post-process action covered below.
For example:
```
sceneByFragment:
action: scrapeJson
queryURL: https://metadataapi.net/api/scenes?parse={filename}&limit=1
scraper: sceneQueryScraper
queryURL: https://metadataapi.net/api/scenes?parse={filename}&limit=1
queryURLReplace:
filename:
- regex: <some regex>
with: <replacement>
```
The above configuration would scrape from the value of `queryURL`, replacing `{filename}` with the base filename of the scene, after it has been manipulated by the regex replacements.
### Xpath and JSON scrapers configuration
The top-level `xPathScrapers` field contains xpath scraping configurations, freely named. These are referenced in the `scraper` field for `scrapeXPath` scrapers.