mirror of
https://github.com/stashapp/stash.git
synced 2025-12-17 20:34:37 +03:00
Expose url for URLReplace in JSON scrapeByURL and scrapeByFragment (#1150)
* Expose url for URLReplace in JSON scrapeByURL and scrapeByFragment * Apply queryURLReplace to xpath scrapers Co-authored-by: WithoutPants <53250216+WithoutPants@users.noreply.github.com>
This commit is contained in:
@@ -52,7 +52,7 @@ func (s *jsonScraper) loadURL(url string) (string, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
logger.Infof("loadURL (%s)\n", url)
|
||||||
doc, err := ioutil.ReadAll(r)
|
doc, err := ioutil.ReadAll(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
@@ -71,7 +71,8 @@ func (s *jsonScraper) loadURL(url string) (string, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *jsonScraper) scrapePerformerByURL(url string) (*models.ScrapedPerformer, error) {
|
func (s *jsonScraper) scrapePerformerByURL(url string) (*models.ScrapedPerformer, error) {
|
||||||
doc, scraper, err := s.scrapeURL(url)
|
u := replaceURL(url, s.scraper) // allow a URL Replace for performer by URL queries
|
||||||
|
doc, scraper, err := s.scrapeURL(u)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -81,7 +82,8 @@ func (s *jsonScraper) scrapePerformerByURL(url string) (*models.ScrapedPerformer
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *jsonScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error) {
|
func (s *jsonScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error) {
|
||||||
doc, scraper, err := s.scrapeURL(url)
|
u := replaceURL(url, s.scraper) // allow a URL Replace for scene by URL queries
|
||||||
|
doc, scraper, err := s.scrapeURL(u)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -91,7 +93,8 @@ func (s *jsonScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error)
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *jsonScraper) scrapeGalleryByURL(url string) (*models.ScrapedGallery, error) {
|
func (s *jsonScraper) scrapeGalleryByURL(url string) (*models.ScrapedGallery, error) {
|
||||||
doc, scraper, err := s.scrapeURL(url)
|
u := replaceURL(url, s.scraper) // allow a URL Replace for gallery by URL queries
|
||||||
|
doc, scraper, err := s.scrapeURL(u)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -101,7 +104,8 @@ func (s *jsonScraper) scrapeGalleryByURL(url string) (*models.ScrapedGallery, er
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *jsonScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error) {
|
func (s *jsonScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error) {
|
||||||
doc, scraper, err := s.scrapeURL(url)
|
u := replaceURL(url, s.scraper) // allow a URL Replace for movie by URL queries
|
||||||
|
doc, scraper, err := s.scrapeURL(u)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,6 +17,13 @@ func queryURLParametersFromScene(scene *models.Scene) queryURLParameters {
|
|||||||
ret["oshash"] = scene.OSHash.String
|
ret["oshash"] = scene.OSHash.String
|
||||||
ret["filename"] = filepath.Base(scene.Path)
|
ret["filename"] = filepath.Base(scene.Path)
|
||||||
ret["title"] = scene.Title.String
|
ret["title"] = scene.Title.String
|
||||||
|
ret["url"] = scene.URL.String
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
|
||||||
|
func queryURLParameterFromURL(url string) queryURLParameters {
|
||||||
|
ret := make(queryURLParameters)
|
||||||
|
ret["url"] = url
|
||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -28,6 +35,7 @@ func queryURLParametersFromGallery(gallery *models.Gallery) queryURLParameters {
|
|||||||
ret["filename"] = filepath.Base(gallery.Path.String)
|
ret["filename"] = filepath.Base(gallery.Path.String)
|
||||||
}
|
}
|
||||||
ret["title"] = gallery.Title.String
|
ret["title"] = gallery.Title.String
|
||||||
|
ret["url"] = gallery.URL.String
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
@@ -49,3 +57,14 @@ func (p queryURLParameters) constructURL(url string) string {
|
|||||||
|
|
||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// replaceURL does a partial URL Replace ( only url parameter is used)
|
||||||
|
func replaceURL(url string, scraperConfig scraperTypeConfig) string {
|
||||||
|
u := url
|
||||||
|
queryURL := queryURLParameterFromURL(u)
|
||||||
|
if scraperConfig.QueryURLReplacements != nil {
|
||||||
|
queryURL.applyReplacements(scraperConfig.QueryURLReplacements)
|
||||||
|
u = queryURL.constructURL(scraperConfig.QueryURL)
|
||||||
|
}
|
||||||
|
return u
|
||||||
|
}
|
||||||
|
|||||||
@@ -52,7 +52,8 @@ func (s *xpathScraper) scrapeURL(url string) (*html.Node, *mappedScraper, error)
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *xpathScraper) scrapePerformerByURL(url string) (*models.ScrapedPerformer, error) {
|
func (s *xpathScraper) scrapePerformerByURL(url string) (*models.ScrapedPerformer, error) {
|
||||||
doc, scraper, err := s.scrapeURL(url)
|
u := replaceURL(url, s.scraper) // allow a URL Replace for performer by URL queries
|
||||||
|
doc, scraper, err := s.scrapeURL(u)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -62,7 +63,8 @@ func (s *xpathScraper) scrapePerformerByURL(url string) (*models.ScrapedPerforme
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *xpathScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error) {
|
func (s *xpathScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error) {
|
||||||
doc, scraper, err := s.scrapeURL(url)
|
u := replaceURL(url, s.scraper) // allow a URL Replace for scene by URL queries
|
||||||
|
doc, scraper, err := s.scrapeURL(u)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -72,7 +74,8 @@ func (s *xpathScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *xpathScraper) scrapeGalleryByURL(url string) (*models.ScrapedGallery, error) {
|
func (s *xpathScraper) scrapeGalleryByURL(url string) (*models.ScrapedGallery, error) {
|
||||||
doc, scraper, err := s.scrapeURL(url)
|
u := replaceURL(url, s.scraper) // allow a URL Replace for gallery by URL queries
|
||||||
|
doc, scraper, err := s.scrapeURL(u)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -82,7 +85,8 @@ func (s *xpathScraper) scrapeGalleryByURL(url string) (*models.ScrapedGallery, e
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *xpathScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error) {
|
func (s *xpathScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error) {
|
||||||
doc, scraper, err := s.scrapeURL(url)
|
u := replaceURL(url, s.scraper) // allow a URL Replace for movie by URL queries
|
||||||
|
doc, scraper, err := s.scrapeURL(u)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
### 🎨 Improvements
|
### 🎨 Improvements
|
||||||
|
* Add `url` field to `URLReplace`, and make `queryURLReplace` available when scraping by URL.
|
||||||
* Make logging format consistent across platforms and include full timestamp.
|
* Make logging format consistent across platforms and include full timestamp.
|
||||||
* Remember gallery images view mode.
|
* Remember gallery images view mode.
|
||||||
* Add option to skip checking of insecure SSL certificates when scraping.
|
* Add option to skip checking of insecure SSL certificates when scraping.
|
||||||
|
|||||||
@@ -223,6 +223,7 @@ For `sceneByFragment`, the `queryURL` field must also be present. This field is
|
|||||||
* `{oshash}` - the oshash of the scene
|
* `{oshash}` - the oshash of the scene
|
||||||
* `{filename}` - the base filename of the scene
|
* `{filename}` - the base filename of the scene
|
||||||
* `{title}` - the title of the scene
|
* `{title}` - the title of the scene
|
||||||
|
* `{url}` - the url of the scene
|
||||||
|
|
||||||
These placeholder field values may be manipulated with regex replacements by adding a `queryURLReplace` section, containing a map of placeholder field to regex configuration which uses the same format as the `replace` post-process action covered below.
|
These placeholder field values may be manipulated with regex replacements by adding a `queryURLReplace` section, containing a map of placeholder field to regex configuration which uses the same format as the `replace` post-process action covered below.
|
||||||
|
|
||||||
@@ -241,6 +242,24 @@ sceneByFragment:
|
|||||||
|
|
||||||
The above configuration would scrape from the value of `queryURL`, replacing `{filename}` with the base filename of the scene, after it has been manipulated by the regex replacements.
|
The above configuration would scrape from the value of `queryURL`, replacing `{filename}` with the base filename of the scene, after it has been manipulated by the regex replacements.
|
||||||
|
|
||||||
|
### scrapeXPath and scrapeJson use with `<scene|performer|gallery|movie>ByURL`
|
||||||
|
|
||||||
|
For `sceneByURL`, `performerByURL`, `galleryByURL` the `queryURL` can also be present if we want to use `queryURLReplace`. The functionality is the same as `sceneByFragment`, the only placeholder field available though is the `url`:
|
||||||
|
* `{url}` - the url of the scene/performer/gallery
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
sceneByURL:
|
||||||
|
- action: scrapeJson
|
||||||
|
url:
|
||||||
|
- metartnetwork.com
|
||||||
|
scraper: sceneScraper
|
||||||
|
queryURL: "{url}"
|
||||||
|
queryURLReplace:
|
||||||
|
url:
|
||||||
|
- regex: '^(?:.+\.)?([^.]+)\.com/.+movie/(\d+)/(\w+)/?$'
|
||||||
|
with: https://www.$1.com/api/movie?name=$3&date=$2
|
||||||
|
```
|
||||||
|
|
||||||
### Stash
|
### Stash
|
||||||
|
|
||||||
A different stash server can be configured as a scraping source. This action applies only to `performerByName`, `performerByFragment`, and `sceneByFragment` types. This action requires that the top-level `stashServer` field is configured.
|
A different stash server can be configured as a scraping source. This action applies only to `performerByName`, `performerByFragment`, and `sceneByFragment` types. This action requires that the top-level `stashServer` field is configured.
|
||||||
|
|||||||
Reference in New Issue
Block a user