mirror of
https://github.com/stashapp/stash.git
synced 2025-12-16 20:07:05 +03:00
Add Image Scraping (#5562)
Co-authored-by: keenbed <155155956+keenbed@users.noreply.github.com> Co-authored-by: WithoutPants <53250216+WithoutPants@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
b6ace42973
commit
e97f647a43
@@ -63,6 +63,28 @@ type ImageFilterType struct {
|
||||
UpdatedAt *TimestampCriterionInput `json:"updated_at"`
|
||||
}
|
||||
|
||||
type ImageUpdateInput struct {
|
||||
ClientMutationID *string `json:"clientMutationId"`
|
||||
ID string `json:"id"`
|
||||
Title *string `json:"title"`
|
||||
Code *string `json:"code"`
|
||||
Urls []string `json:"urls"`
|
||||
Date *string `json:"date"`
|
||||
Details *string `json:"details"`
|
||||
Photographer *string `json:"photographer"`
|
||||
Rating100 *int `json:"rating100"`
|
||||
Organized *bool `json:"organized"`
|
||||
SceneIds []string `json:"scene_ids"`
|
||||
StudioID *string `json:"studio_id"`
|
||||
TagIds []string `json:"tag_ids"`
|
||||
PerformerIds []string `json:"performer_ids"`
|
||||
GalleryIds []string `json:"gallery_ids"`
|
||||
PrimaryFileID *string `json:"primary_file_id"`
|
||||
|
||||
// deprecated
|
||||
URL *string `json:"url"`
|
||||
}
|
||||
|
||||
type ImageDestroyInput struct {
|
||||
ID string `json:"id"`
|
||||
DeleteFile *bool `json:"delete_file"`
|
||||
|
||||
@@ -31,6 +31,7 @@ type scraperActionImpl interface {
|
||||
|
||||
scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*ScrapedScene, error)
|
||||
scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*ScrapedGallery, error)
|
||||
scrapeImageByImage(ctx context.Context, image *models.Image) (*ScrapedImage, error)
|
||||
}
|
||||
|
||||
func (c config) getScraper(scraper scraperTypeConfig, client *http.Client, globalConfig GlobalConfig) scraperActionImpl {
|
||||
|
||||
@@ -77,11 +77,18 @@ type GalleryFinder interface {
|
||||
models.URLLoader
|
||||
}
|
||||
|
||||
type ImageFinder interface {
|
||||
models.ImageGetter
|
||||
models.FileLoader
|
||||
models.URLLoader
|
||||
}
|
||||
|
||||
type Repository struct {
|
||||
TxnManager models.TxnManager
|
||||
|
||||
SceneFinder SceneFinder
|
||||
GalleryFinder GalleryFinder
|
||||
ImageFinder ImageFinder
|
||||
TagFinder TagFinder
|
||||
PerformerFinder PerformerFinder
|
||||
GroupFinder match.GroupNamesFinder
|
||||
@@ -93,6 +100,7 @@ func NewRepository(repo models.Repository) Repository {
|
||||
TxnManager: repo.TxnManager,
|
||||
SceneFinder: repo.Scene,
|
||||
GalleryFinder: repo.Gallery,
|
||||
ImageFinder: repo.Image,
|
||||
TagFinder: repo.Tag,
|
||||
PerformerFinder: repo.Performer,
|
||||
GroupFinder: repo.Group,
|
||||
@@ -357,6 +365,28 @@ func (c Cache) ScrapeID(ctx context.Context, scraperID string, id int, ty Scrape
|
||||
return nil, fmt.Errorf("scraper %s: %w", scraperID, err)
|
||||
}
|
||||
|
||||
if scraped != nil {
|
||||
ret = scraped
|
||||
}
|
||||
|
||||
case ScrapeContentTypeImage:
|
||||
is, ok := s.(imageScraper)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("%w: cannot use scraper %s as a image scraper", ErrNotSupported, scraperID)
|
||||
}
|
||||
|
||||
scene, err := c.getImage(ctx, id)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scraper %s: unable to load image id %v: %w", scraperID, id, err)
|
||||
}
|
||||
|
||||
// don't assign nil concrete pointer to ret interface, otherwise nil
|
||||
// detection is harder
|
||||
scraped, err := is.viaImage(ctx, c.client, scene)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scraper %s: %w", scraperID, err)
|
||||
}
|
||||
|
||||
if scraped != nil {
|
||||
ret = scraped
|
||||
}
|
||||
@@ -426,3 +456,31 @@ func (c Cache) getGallery(ctx context.Context, galleryID int) (*models.Gallery,
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (c Cache) getImage(ctx context.Context, imageID int) (*models.Image, error) {
|
||||
var ret *models.Image
|
||||
r := c.repository
|
||||
if err := r.WithReadTxn(ctx, func(ctx context.Context) error {
|
||||
qb := r.ImageFinder
|
||||
|
||||
var err error
|
||||
ret, err = qb.Find(ctx, imageID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if ret == nil {
|
||||
return fmt.Errorf("image with id %d not found", imageID)
|
||||
}
|
||||
|
||||
err = ret.LoadFiles(ctx, qb)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return ret.LoadURLs(ctx, qb)
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
@@ -45,6 +45,13 @@ type config struct {
|
||||
// Configuration for querying a gallery by a URL
|
||||
GalleryByURL []*scrapeByURLConfig `yaml:"galleryByURL"`
|
||||
|
||||
// Configuration for querying an image by a URL
|
||||
ImageByURL []*scrapeByURLConfig `yaml:"imageByURL"`
|
||||
|
||||
// Configuration for querying image by an Image fragment
|
||||
ImageByFragment *scraperTypeConfig `yaml:"imageByFragment"`
|
||||
|
||||
// Configuration for querying a movie by a URL
|
||||
// Configuration for querying a movie by a URL - deprecated, use GroupByURL
|
||||
MovieByURL []*scrapeByURLConfig `yaml:"movieByURL"`
|
||||
GroupByURL []*scrapeByURLConfig `yaml:"groupByURL"`
|
||||
@@ -295,6 +302,21 @@ func (c config) spec() Scraper {
|
||||
ret.Gallery = &gallery
|
||||
}
|
||||
|
||||
image := ScraperSpec{}
|
||||
if c.ImageByFragment != nil {
|
||||
image.SupportedScrapes = append(image.SupportedScrapes, ScrapeTypeFragment)
|
||||
}
|
||||
if len(c.ImageByURL) > 0 {
|
||||
image.SupportedScrapes = append(image.SupportedScrapes, ScrapeTypeURL)
|
||||
for _, v := range c.ImageByURL {
|
||||
image.Urls = append(image.Urls, v.URL...)
|
||||
}
|
||||
}
|
||||
|
||||
if len(image.SupportedScrapes) > 0 {
|
||||
ret.Image = &image
|
||||
}
|
||||
|
||||
group := ScraperSpec{}
|
||||
if len(c.MovieByURL) > 0 || len(c.GroupByURL) > 0 {
|
||||
group.SupportedScrapes = append(group.SupportedScrapes, ScrapeTypeURL)
|
||||
@@ -319,6 +341,8 @@ func (c config) supports(ty ScrapeContentType) bool {
|
||||
return (c.SceneByName != nil && c.SceneByQueryFragment != nil) || c.SceneByFragment != nil || len(c.SceneByURL) > 0
|
||||
case ScrapeContentTypeGallery:
|
||||
return c.GalleryByFragment != nil || len(c.GalleryByURL) > 0
|
||||
case ScrapeContentTypeImage:
|
||||
return c.ImageByFragment != nil || len(c.ImageByURL) > 0
|
||||
case ScrapeContentTypeMovie, ScrapeContentTypeGroup:
|
||||
return len(c.MovieByURL) > 0 || len(c.GroupByURL) > 0
|
||||
}
|
||||
@@ -346,6 +370,12 @@ func (c config) matchesURL(url string, ty ScrapeContentType) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
case ScrapeContentTypeImage:
|
||||
for _, scraper := range c.ImageByURL {
|
||||
if scraper.matchesURL(url) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
case ScrapeContentTypeMovie, ScrapeContentTypeGroup:
|
||||
for _, scraper := range c.MovieByURL {
|
||||
if scraper.matchesURL(url) {
|
||||
|
||||
@@ -33,6 +33,9 @@ func (g group) fragmentScraper(input Input) *scraperTypeConfig {
|
||||
case input.Gallery != nil:
|
||||
// TODO - this should be galleryByQueryFragment
|
||||
return g.config.GalleryByFragment
|
||||
case input.Image != nil:
|
||||
// TODO - this should be imageByImageFragment
|
||||
return g.config.ImageByFragment
|
||||
case input.Scene != nil:
|
||||
return g.config.SceneByQueryFragment
|
||||
}
|
||||
@@ -75,6 +78,15 @@ func (g group) viaGallery(ctx context.Context, client *http.Client, gallery *mod
|
||||
return s.scrapeGalleryByGallery(ctx, gallery)
|
||||
}
|
||||
|
||||
func (g group) viaImage(ctx context.Context, client *http.Client, gallery *models.Image) (*ScrapedImage, error) {
|
||||
if g.config.ImageByFragment == nil {
|
||||
return nil, ErrNotSupported
|
||||
}
|
||||
|
||||
s := g.config.getScraper(*g.config.ImageByFragment, client, g.globalConf)
|
||||
return s.scrapeImageByImage(ctx, gallery)
|
||||
}
|
||||
|
||||
func loadUrlCandidates(c config, ty ScrapeContentType) []*scrapeByURLConfig {
|
||||
switch ty {
|
||||
case ScrapeContentTypePerformer:
|
||||
@@ -85,6 +97,8 @@ func loadUrlCandidates(c config, ty ScrapeContentType) []*scrapeByURLConfig {
|
||||
return append(c.MovieByURL, c.GroupByURL...)
|
||||
case ScrapeContentTypeGallery:
|
||||
return c.GalleryByURL
|
||||
case ScrapeContentTypeImage:
|
||||
return c.ImageByURL
|
||||
}
|
||||
|
||||
panic("loadUrlCandidates: unreachable")
|
||||
|
||||
@@ -11,6 +11,28 @@ import (
|
||||
"github.com/stashapp/stash/pkg/utils"
|
||||
)
|
||||
|
||||
type ScrapedImage struct {
|
||||
Title *string `json:"title"`
|
||||
Code *string `json:"code"`
|
||||
Details *string `json:"details"`
|
||||
Photographer *string `json:"photographer"`
|
||||
URLs []string `json:"urls"`
|
||||
Date *string `json:"date"`
|
||||
Studio *models.ScrapedStudio `json:"studio"`
|
||||
Tags []*models.ScrapedTag `json:"tags"`
|
||||
Performers []*models.ScrapedPerformer `json:"performers"`
|
||||
}
|
||||
|
||||
func (ScrapedImage) IsScrapedContent() {}
|
||||
|
||||
type ScrapedImageInput struct {
|
||||
Title *string `json:"title"`
|
||||
Code *string `json:"code"`
|
||||
Details *string `json:"details"`
|
||||
URLs []string `json:"urls"`
|
||||
Date *string `json:"date"`
|
||||
}
|
||||
|
||||
func setPerformerImage(ctx context.Context, client *http.Client, p *models.ScrapedPerformer, globalConfig GlobalConfig) error {
|
||||
// backwards compatibility: we fetch the image if it's a URL and set it to the first image
|
||||
// Image is deprecated, so only do this if Images is unset
|
||||
|
||||
@@ -102,6 +102,12 @@ func (s *jsonScraper) scrapeByURL(ctx context.Context, url string, ty ScrapeCont
|
||||
return nil, err
|
||||
}
|
||||
return ret, nil
|
||||
case ScrapeContentTypeImage:
|
||||
ret, err := scraper.scrapeImage(ctx, q)
|
||||
if err != nil || ret == nil {
|
||||
return nil, err
|
||||
}
|
||||
return ret, nil
|
||||
case ScrapeContentTypeMovie, ScrapeContentTypeGroup:
|
||||
ret, err := scraper.scrapeGroup(ctx, q)
|
||||
if err != nil || ret == nil {
|
||||
@@ -225,6 +231,30 @@ func (s *jsonScraper) scrapeByFragment(ctx context.Context, input Input) (Scrape
|
||||
return scraper.scrapeScene(ctx, q)
|
||||
}
|
||||
|
||||
func (s *jsonScraper) scrapeImageByImage(ctx context.Context, image *models.Image) (*ScrapedImage, error) {
|
||||
// construct the URL
|
||||
queryURL := queryURLParametersFromImage(image)
|
||||
if s.scraper.QueryURLReplacements != nil {
|
||||
queryURL.applyReplacements(s.scraper.QueryURLReplacements)
|
||||
}
|
||||
url := queryURL.constructURL(s.scraper.QueryURL)
|
||||
|
||||
scraper := s.getJsonScraper()
|
||||
|
||||
if scraper == nil {
|
||||
return nil, errors.New("json scraper with name " + s.scraper.Scraper + " not found in config")
|
||||
}
|
||||
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
q := s.getJsonQuery(doc)
|
||||
return scraper.scrapeImage(ctx, q)
|
||||
}
|
||||
|
||||
func (s *jsonScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*ScrapedGallery, error) {
|
||||
// construct the URL
|
||||
queryURL := queryURLParametersFromGallery(gallery)
|
||||
|
||||
@@ -181,6 +181,7 @@ type mappedGalleryScraperConfig struct {
|
||||
Performers mappedConfig `yaml:"Performers"`
|
||||
Studio mappedConfig `yaml:"Studio"`
|
||||
}
|
||||
|
||||
type _mappedGalleryScraperConfig mappedGalleryScraperConfig
|
||||
|
||||
func (s *mappedGalleryScraperConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
@@ -228,6 +229,60 @@ func (s *mappedGalleryScraperConfig) UnmarshalYAML(unmarshal func(interface{}) e
|
||||
return nil
|
||||
}
|
||||
|
||||
type mappedImageScraperConfig struct {
|
||||
mappedConfig
|
||||
|
||||
Tags mappedConfig `yaml:"Tags"`
|
||||
Performers mappedConfig `yaml:"Performers"`
|
||||
Studio mappedConfig `yaml:"Studio"`
|
||||
}
|
||||
type _mappedImageScraperConfig mappedImageScraperConfig
|
||||
|
||||
func (s *mappedImageScraperConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
// HACK - unmarshal to map first, then remove known scene sub-fields, then
|
||||
// remarshal to yaml and pass that down to the base map
|
||||
parentMap := make(map[string]interface{})
|
||||
if err := unmarshal(parentMap); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// move the known sub-fields to a separate map
|
||||
thisMap := make(map[string]interface{})
|
||||
|
||||
thisMap[mappedScraperConfigSceneTags] = parentMap[mappedScraperConfigSceneTags]
|
||||
thisMap[mappedScraperConfigScenePerformers] = parentMap[mappedScraperConfigScenePerformers]
|
||||
thisMap[mappedScraperConfigSceneStudio] = parentMap[mappedScraperConfigSceneStudio]
|
||||
|
||||
delete(parentMap, mappedScraperConfigSceneTags)
|
||||
delete(parentMap, mappedScraperConfigScenePerformers)
|
||||
delete(parentMap, mappedScraperConfigSceneStudio)
|
||||
|
||||
// re-unmarshal the sub-fields
|
||||
yml, err := yaml.Marshal(thisMap)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// needs to be a different type to prevent infinite recursion
|
||||
c := _mappedImageScraperConfig{}
|
||||
if err := yaml.Unmarshal(yml, &c); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
*s = mappedImageScraperConfig(c)
|
||||
|
||||
yml, err = yaml.Marshal(parentMap)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := yaml.Unmarshal(yml, &s.mappedConfig); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type mappedPerformerScraperConfig struct {
|
||||
mappedConfig
|
||||
|
||||
@@ -785,6 +840,7 @@ type mappedScraper struct {
|
||||
Common commonMappedConfig `yaml:"common"`
|
||||
Scene *mappedSceneScraperConfig `yaml:"scene"`
|
||||
Gallery *mappedGalleryScraperConfig `yaml:"gallery"`
|
||||
Image *mappedImageScraperConfig `yaml:"image"`
|
||||
Performer *mappedPerformerScraperConfig `yaml:"performer"`
|
||||
Movie *mappedMovieScraperConfig `yaml:"movie"`
|
||||
}
|
||||
@@ -1016,6 +1072,57 @@ func (s mappedScraper) scrapeScene(ctx context.Context, q mappedQuery) (*Scraped
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (s mappedScraper) scrapeImage(ctx context.Context, q mappedQuery) (*ScrapedImage, error) {
|
||||
var ret ScrapedImage
|
||||
|
||||
imageScraperConfig := s.Image
|
||||
if imageScraperConfig == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
imageMap := imageScraperConfig.mappedConfig
|
||||
|
||||
imagePerformersMap := imageScraperConfig.Performers
|
||||
imageTagsMap := imageScraperConfig.Tags
|
||||
imageStudioMap := imageScraperConfig.Studio
|
||||
|
||||
logger.Debug(`Processing image:`)
|
||||
results := imageMap.process(ctx, q, s.Common)
|
||||
|
||||
// now apply the performers and tags
|
||||
if imagePerformersMap != nil {
|
||||
logger.Debug(`Processing image performers:`)
|
||||
ret.Performers = processRelationships[models.ScrapedPerformer](ctx, s, imagePerformersMap, q)
|
||||
}
|
||||
|
||||
if imageTagsMap != nil {
|
||||
logger.Debug(`Processing image tags:`)
|
||||
ret.Tags = processRelationships[models.ScrapedTag](ctx, s, imageTagsMap, q)
|
||||
}
|
||||
|
||||
if imageStudioMap != nil {
|
||||
logger.Debug(`Processing image studio:`)
|
||||
studioResults := imageStudioMap.process(ctx, q, s.Common)
|
||||
|
||||
if len(studioResults) > 0 {
|
||||
studio := &models.ScrapedStudio{}
|
||||
studioResults[0].apply(studio)
|
||||
ret.Studio = studio
|
||||
}
|
||||
}
|
||||
|
||||
// if no basic fields are populated, and no relationships, then return nil
|
||||
if len(results) == 0 && len(ret.Performers) == 0 && len(ret.Tags) == 0 && ret.Studio == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if len(results) > 0 {
|
||||
results[0].apply(&ret)
|
||||
}
|
||||
|
||||
return &ret, nil
|
||||
}
|
||||
|
||||
func (s mappedScraper) scrapeGallery(ctx context.Context, q mappedQuery) (*ScrapedGallery, error) {
|
||||
var ret ScrapedGallery
|
||||
|
||||
|
||||
@@ -33,6 +33,12 @@ func (c Cache) postScrape(ctx context.Context, content ScrapedContent) (ScrapedC
|
||||
}
|
||||
case ScrapedGallery:
|
||||
return c.postScrapeGallery(ctx, v)
|
||||
case *ScrapedImage:
|
||||
if v != nil {
|
||||
return c.postScrapeImage(ctx, *v)
|
||||
}
|
||||
case ScrapedImage:
|
||||
return c.postScrapeImage(ctx, v)
|
||||
case *models.ScrapedMovie:
|
||||
if v != nil {
|
||||
return c.postScrapeMovie(ctx, *v)
|
||||
@@ -315,6 +321,40 @@ func (c Cache) postScrapeGallery(ctx context.Context, g ScrapedGallery) (Scraped
|
||||
return g, nil
|
||||
}
|
||||
|
||||
func (c Cache) postScrapeImage(ctx context.Context, image ScrapedImage) (ScrapedContent, error) {
|
||||
r := c.repository
|
||||
if err := r.WithReadTxn(ctx, func(ctx context.Context) error {
|
||||
pqb := r.PerformerFinder
|
||||
tqb := r.TagFinder
|
||||
sqb := r.StudioFinder
|
||||
|
||||
for _, p := range image.Performers {
|
||||
if err := match.ScrapedPerformer(ctx, pqb, p, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
tags, err := postProcessTags(ctx, tqb, image.Tags)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
image.Tags = tags
|
||||
|
||||
if image.Studio != nil {
|
||||
err := match.ScrapedStudio(ctx, sqb, image.Studio, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return image, nil
|
||||
}
|
||||
|
||||
func postProcessTags(ctx context.Context, tqb models.TagQueryer, scrapedTags []*models.ScrapedTag) ([]*models.ScrapedTag, error) {
|
||||
var ret []*models.ScrapedTag
|
||||
|
||||
|
||||
@@ -73,6 +73,24 @@ func queryURLParametersFromGallery(gallery *models.Gallery) queryURLParameters {
|
||||
return ret
|
||||
}
|
||||
|
||||
func queryURLParametersFromImage(image *models.Image) queryURLParameters {
|
||||
ret := make(queryURLParameters)
|
||||
ret["checksum"] = image.Checksum
|
||||
|
||||
if image.Path != "" {
|
||||
ret["filename"] = filepath.Base(image.Path)
|
||||
}
|
||||
if image.Title != "" {
|
||||
ret["title"] = image.Title
|
||||
}
|
||||
|
||||
if len(image.URLs.List()) > 0 {
|
||||
ret["url"] = image.URLs.List()[0]
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func (p queryURLParameters) applyReplacements(r queryURLReplacements) {
|
||||
for k, v := range p {
|
||||
rpl, found := r[k]
|
||||
|
||||
@@ -36,6 +36,7 @@ const (
|
||||
ScrapeContentTypeGroup ScrapeContentType = "GROUP"
|
||||
ScrapeContentTypePerformer ScrapeContentType = "PERFORMER"
|
||||
ScrapeContentTypeScene ScrapeContentType = "SCENE"
|
||||
ScrapeContentTypeImage ScrapeContentType = "IMAGE"
|
||||
)
|
||||
|
||||
var AllScrapeContentType = []ScrapeContentType{
|
||||
@@ -44,11 +45,12 @@ var AllScrapeContentType = []ScrapeContentType{
|
||||
ScrapeContentTypeGroup,
|
||||
ScrapeContentTypePerformer,
|
||||
ScrapeContentTypeScene,
|
||||
ScrapeContentTypeImage,
|
||||
}
|
||||
|
||||
func (e ScrapeContentType) IsValid() bool {
|
||||
switch e {
|
||||
case ScrapeContentTypeGallery, ScrapeContentTypeMovie, ScrapeContentTypeGroup, ScrapeContentTypePerformer, ScrapeContentTypeScene:
|
||||
case ScrapeContentTypeGallery, ScrapeContentTypeMovie, ScrapeContentTypeGroup, ScrapeContentTypePerformer, ScrapeContentTypeScene, ScrapeContentTypeImage:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
@@ -84,6 +86,8 @@ type Scraper struct {
|
||||
Scene *ScraperSpec `json:"scene"`
|
||||
// Details for gallery scraper
|
||||
Gallery *ScraperSpec `json:"gallery"`
|
||||
// Details for image scraper
|
||||
Image *ScraperSpec `json:"image"`
|
||||
// Details for movie scraper
|
||||
Group *ScraperSpec `json:"group"`
|
||||
// Details for movie scraper
|
||||
@@ -161,6 +165,7 @@ type Input struct {
|
||||
Performer *ScrapedPerformerInput
|
||||
Scene *ScrapedSceneInput
|
||||
Gallery *ScrapedGalleryInput
|
||||
Image *ScrapedImageInput
|
||||
}
|
||||
|
||||
// populateURL populates the URL field of the input based on the
|
||||
@@ -225,6 +230,14 @@ type sceneScraper interface {
|
||||
viaScene(ctx context.Context, client *http.Client, scene *models.Scene) (*ScrapedScene, error)
|
||||
}
|
||||
|
||||
// imageScraper is a scraper which supports image scrapes with
|
||||
// image data as the input.
|
||||
type imageScraper interface {
|
||||
scraper
|
||||
|
||||
viaImage(ctx context.Context, client *http.Client, image *models.Image) (*ScrapedImage, error)
|
||||
}
|
||||
|
||||
// galleryScraper is a scraper which supports gallery scrapes with
|
||||
// gallery data as the input.
|
||||
type galleryScraper interface {
|
||||
|
||||
@@ -388,6 +388,10 @@ func (s *scriptScraper) scrape(ctx context.Context, input string, ty ScrapeConte
|
||||
var movie *models.ScrapedMovie
|
||||
err := s.runScraperScript(ctx, input, &movie)
|
||||
return movie, err
|
||||
case ScrapeContentTypeImage:
|
||||
var image *ScrapedImage
|
||||
err := s.runScraperScript(ctx, input, &image)
|
||||
return image, err
|
||||
}
|
||||
|
||||
return nil, ErrNotSupported
|
||||
@@ -421,6 +425,20 @@ func (s *scriptScraper) scrapeGalleryByGallery(ctx context.Context, gallery *mod
|
||||
return ret, err
|
||||
}
|
||||
|
||||
func (s *scriptScraper) scrapeImageByImage(ctx context.Context, image *models.Image) (*ScrapedImage, error) {
|
||||
inString, err := json.Marshal(imageToUpdateInput(image))
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var ret *ScrapedImage
|
||||
|
||||
err = s.runScraperScript(ctx, string(inString), &ret)
|
||||
|
||||
return ret, err
|
||||
}
|
||||
|
||||
func handleScraperStderr(name string, scraperOutputReader io.ReadCloser) {
|
||||
const scraperPrefix = "[Scrape / %s] "
|
||||
|
||||
|
||||
@@ -388,6 +388,33 @@ func (s *stashScraper) scrapeGalleryByGallery(ctx context.Context, gallery *mode
|
||||
return &ret, nil
|
||||
}
|
||||
|
||||
func (s *stashScraper) scrapeImageByImage(ctx context.Context, image *models.Image) (*ScrapedImage, error) {
|
||||
return nil, ErrNotSupported
|
||||
}
|
||||
|
||||
func (s *stashScraper) scrapeByURL(_ context.Context, _ string, _ ScrapeContentType) (ScrapedContent, error) {
|
||||
return nil, ErrNotSupported
|
||||
}
|
||||
|
||||
func imageToUpdateInput(gallery *models.Image) models.ImageUpdateInput {
|
||||
dateToStringPtr := func(s *models.Date) *string {
|
||||
if s != nil {
|
||||
v := s.String()
|
||||
return &v
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// fallback to file basename if title is empty
|
||||
title := gallery.GetTitle()
|
||||
urls := gallery.URLs.List()
|
||||
|
||||
return models.ImageUpdateInput{
|
||||
ID: strconv.Itoa(gallery.ID),
|
||||
Title: &title,
|
||||
Details: &gallery.Details,
|
||||
Urls: urls,
|
||||
Date: dateToStringPtr(gallery.Date),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,6 +83,12 @@ func (s *xpathScraper) scrapeByURL(ctx context.Context, url string, ty ScrapeCon
|
||||
return nil, err
|
||||
}
|
||||
return ret, nil
|
||||
case ScrapeContentTypeImage:
|
||||
ret, err := scraper.scrapeImage(ctx, q)
|
||||
if err != nil || ret == nil {
|
||||
return nil, err
|
||||
}
|
||||
return ret, nil
|
||||
case ScrapeContentTypeMovie, ScrapeContentTypeGroup:
|
||||
ret, err := scraper.scrapeGroup(ctx, q)
|
||||
if err != nil || ret == nil {
|
||||
@@ -228,6 +234,30 @@ func (s *xpathScraper) scrapeGalleryByGallery(ctx context.Context, gallery *mode
|
||||
return scraper.scrapeGallery(ctx, q)
|
||||
}
|
||||
|
||||
func (s *xpathScraper) scrapeImageByImage(ctx context.Context, image *models.Image) (*ScrapedImage, error) {
|
||||
// construct the URL
|
||||
queryURL := queryURLParametersFromImage(image)
|
||||
if s.scraper.QueryURLReplacements != nil {
|
||||
queryURL.applyReplacements(s.scraper.QueryURLReplacements)
|
||||
}
|
||||
url := queryURL.constructURL(s.scraper.QueryURL)
|
||||
|
||||
scraper := s.getXpathScraper()
|
||||
|
||||
if scraper == nil {
|
||||
return nil, errors.New("xpath scraper with name " + s.scraper.Scraper + " not found in config")
|
||||
}
|
||||
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
q := s.getXPathQuery(doc)
|
||||
return scraper.scrapeImage(ctx, q)
|
||||
}
|
||||
|
||||
func (s *xpathScraper) loadURL(ctx context.Context, url string) (*html.Node, error) {
|
||||
r, err := loadURL(ctx, url, s.client, s.config, s.globalConfig)
|
||||
if err != nil {
|
||||
|
||||
Reference in New Issue
Block a user