From 4045ddf3e9fdbb53f78ad2c2b43ed273a84e37c3 Mon Sep 17 00:00:00 2001 From: woodgen <69223967+woodgen@users.noreply.github.com> Date: Mon, 10 Aug 2020 07:34:15 +0200 Subject: [PATCH] Implement scraping movies by URL (#709) * api/urlbuilders/movie: Auto format. * graphql+pkg+ui: Implement scraping movies by URL. This patch implements the missing required boilerplate for scraping movies by URL, using performers and scenes as a reference. Although this patch contains a big chunck of ground work for enabling scraping movies by fragment, the feature would require additional changes to be completely implemented and was not tested. * graphql+pkg+ui: Scrape movie studio. Extends and corrects the movie model for the ability to store and dereference studio IDs with received studio string from the scraper. This was done with Scenes as a reference. For simplicity the duplication of having `ScrapedMovieStudio` and `ScrapedSceneStudio` was kept, which should probably be refactored to be the same type in the model in the future. * ui/movies: Add movie scrape dialog. Adds possibility to update existing movie entries with the URL scraper. For this the MovieScrapeDialog.tsx was implemented with Performers and Scenes as a reference. In addition DurationUtils needs to be called one time for converting seconds from the model to the string that is displayed in the component. This seemed the least intrusive to me as it kept a ScrapeResult type compatible with ScrapedInputGroupRow. --- gqlgen.yml | 4 + graphql/documents/data/scrapers.graphql | 14 +- .../queries/scrapers/scrapers.graphql | 19 +- graphql/schema/schema.graphql | 3 + graphql/schema/types/scraped-movie.graphql | 14 +- graphql/schema/types/scraper.graphql | 2 + pkg/api/resolver_query_scraper.go | 8 + pkg/api/urlbuilders/movie.go | 5 +- pkg/models/model_scraped_item.go | 21 ++ pkg/scraper/action.go | 2 + pkg/scraper/config.go | 53 +++++ pkg/scraper/image.go | 34 +++ pkg/scraper/json.go | 10 + pkg/scraper/mapped.go | 83 +++++++ pkg/scraper/scrapers.go | 66 ++++++ pkg/scraper/script.go | 10 + pkg/scraper/stash.go | 4 + pkg/scraper/xpath.go | 10 + .../components/Changelog/versions/v030.tsx | 1 + .../components/Movies/MovieDetails/Movie.tsx | 147 ++++++++++++- .../Movies/MovieDetails/MovieScrapeDialog.tsx | 204 ++++++++++++++++++ ui/v2.5/src/core/StashService.ts | 10 + ui/v2.5/src/docs/en/Scraping.md | 13 +- 23 files changed, 728 insertions(+), 9 deletions(-) create mode 100644 ui/v2.5/src/components/Movies/MovieDetails/MovieScrapeDialog.tsx diff --git a/gqlgen.yml b/gqlgen.yml index 29e794f31..0d1a780e5 100644 --- a/gqlgen.yml +++ b/gqlgen.yml @@ -44,3 +44,7 @@ models: model: github.com/stashapp/stash/pkg/models.ScrapedSceneTag SceneFileType: model: github.com/stashapp/stash/pkg/models.SceneFileType + ScrapedMovie: + model: github.com/stashapp/stash/pkg/models.ScrapedMovie + ScrapedMovieStudio: + model: github.com/stashapp/stash/pkg/models.ScrapedMovieStudio diff --git a/graphql/documents/data/scrapers.graphql b/graphql/documents/data/scrapers.graphql index e9c8f324b..9270b8c8e 100644 --- a/graphql/documents/data/scrapers.graphql +++ b/graphql/documents/data/scrapers.graphql @@ -38,6 +38,12 @@ fragment ScrapedScenePerformerData on ScrapedScenePerformer { aliases } +fragment ScrapedMovieStudioData on ScrapedMovieStudio { + id + name + url +} + fragment ScrapedMovieData on ScrapedMovie { name aliases @@ -47,6 +53,12 @@ fragment ScrapedMovieData on ScrapedMovie { director url synopsis + front_image + back_image + + studio { + ...ScrapedMovieStudioData + } } fragment ScrapedSceneMovieData on ScrapedSceneMovie { @@ -105,4 +117,4 @@ fragment ScrapedSceneData on ScrapedScene { movies { ...ScrapedSceneMovieData } -} \ No newline at end of file +} diff --git a/graphql/documents/queries/scrapers/scrapers.graphql b/graphql/documents/queries/scrapers/scrapers.graphql index cc4fa15e9..9904f31f5 100644 --- a/graphql/documents/queries/scrapers/scrapers.graphql +++ b/graphql/documents/queries/scrapers/scrapers.graphql @@ -20,6 +20,17 @@ query ListSceneScrapers { } } +query ListMovieScrapers { + listMovieScrapers { + id + name + movie { + urls + supported_scrapes + } + } +} + query ScrapePerformerList($scraper_id: ID!, $query: String!) { scrapePerformerList(scraper_id: $scraper_id, query: $query) { ...ScrapedPerformerData @@ -48,4 +59,10 @@ query ScrapeSceneURL($url: String!) { scrapeSceneURL(url: $url) { ...ScrapedSceneData } -} \ No newline at end of file +} + +query ScrapeMovieURL($url: String!) { + scrapeMovieURL(url: $url) { + ...ScrapedMovieData + } +} diff --git a/graphql/schema/schema.graphql b/graphql/schema/schema.graphql index 6ecdccb91..e2ebf0280 100644 --- a/graphql/schema/schema.graphql +++ b/graphql/schema/schema.graphql @@ -59,6 +59,7 @@ type Query { """List available scrapers""" listPerformerScrapers: [Scraper!]! listSceneScrapers: [Scraper!]! + listMovieScrapers: [Scraper!]! """Scrape a list of performers based on name""" scrapePerformerList(scraper_id: ID!, query: String!): [ScrapedPerformer!]! @@ -70,6 +71,8 @@ type Query { scrapeScene(scraper_id: ID!, scene: SceneUpdateInput!): ScrapedScene """Scrapes a complete performer record based on a URL""" scrapeSceneURL(url: String!): ScrapedScene + """Scrapes a complete movie record based on a URL""" + scrapeMovieURL(url: String!): ScrapedMovie """Scrape a performer using Freeones""" scrapeFreeones(performer_name: String!): ScrapedPerformer diff --git a/graphql/schema/types/scraped-movie.graphql b/graphql/schema/types/scraped-movie.graphql index 7589de364..ac221fb88 100644 --- a/graphql/schema/types/scraped-movie.graphql +++ b/graphql/schema/types/scraped-movie.graphql @@ -1,3 +1,10 @@ +type ScrapedMovieStudio { + """Set if studio matched""" + id: ID + name: String! + url: String +} + """A movie from a scraping operation...""" type ScrapedMovie { name: String @@ -8,6 +15,11 @@ type ScrapedMovie { director: String url: String synopsis: String + studio: ScrapedMovieStudio + + """This should be base64 encoded""" + front_image: String + back_image: String } input ScrapedMovieInput { @@ -19,4 +31,4 @@ input ScrapedMovieInput { director: String url: String synopsis: String -} \ No newline at end of file +} diff --git a/graphql/schema/types/scraper.graphql b/graphql/schema/types/scraper.graphql index 69c050a63..8cb0383ba 100644 --- a/graphql/schema/types/scraper.graphql +++ b/graphql/schema/types/scraper.graphql @@ -20,6 +20,8 @@ type Scraper { performer: ScraperSpec """Details for scene scraper""" scene: ScraperSpec + """Details for movie scraper""" + movie: ScraperSpec } diff --git a/pkg/api/resolver_query_scraper.go b/pkg/api/resolver_query_scraper.go index 51b758b41..c66f2ebc6 100644 --- a/pkg/api/resolver_query_scraper.go +++ b/pkg/api/resolver_query_scraper.go @@ -41,6 +41,10 @@ func (r *queryResolver) ListSceneScrapers(ctx context.Context) ([]*models.Scrape return manager.GetInstance().ScraperCache.ListSceneScrapers(), nil } +func (r *queryResolver) ListMovieScrapers(ctx context.Context) ([]*models.Scraper, error) { + return manager.GetInstance().ScraperCache.ListMovieScrapers(), nil +} + func (r *queryResolver) ScrapePerformerList(ctx context.Context, scraperID string, query string) ([]*models.ScrapedPerformer, error) { if query == "" { return nil, nil @@ -64,3 +68,7 @@ func (r *queryResolver) ScrapeScene(ctx context.Context, scraperID string, scene func (r *queryResolver) ScrapeSceneURL(ctx context.Context, url string) (*models.ScrapedScene, error) { return manager.GetInstance().ScraperCache.ScrapeSceneURL(url) } + +func (r *queryResolver) ScrapeMovieURL(ctx context.Context, url string) (*models.ScrapedMovie, error) { + return manager.GetInstance().ScraperCache.ScrapeMovieURL(url) +} diff --git a/pkg/api/urlbuilders/movie.go b/pkg/api/urlbuilders/movie.go index 7e454c070..6f7694b83 100644 --- a/pkg/api/urlbuilders/movie.go +++ b/pkg/api/urlbuilders/movie.go @@ -3,13 +3,13 @@ package urlbuilders import "strconv" type MovieURLBuilder struct { - BaseURL string + BaseURL string MovieID string } func NewMovieURLBuilder(baseURL string, movieID int) MovieURLBuilder { return MovieURLBuilder{ - BaseURL: baseURL, + BaseURL: baseURL, MovieID: strconv.Itoa(movieID), } } @@ -21,4 +21,3 @@ func (b MovieURLBuilder) GetMovieFrontImageURL() string { func (b MovieURLBuilder) GetMovieBackImageURL() string { return b.BaseURL + "/movie/" + b.MovieID + "/backimage" } - diff --git a/pkg/models/model_scraped_item.go b/pkg/models/model_scraped_item.go index 719274b5f..ab0799f9d 100644 --- a/pkg/models/model_scraped_item.go +++ b/pkg/models/model_scraped_item.go @@ -135,3 +135,24 @@ type ScrapedSceneTag struct { ID *string `graphql:"id" json:"id"` Name string `graphql:"name" json:"name"` } + +type ScrapedMovie struct { + Name *string `graphql:"name" json:"name"` + Aliases *string `graphql:"aliases" json:"aliases"` + Duration *string `graphql:"duration" json:"duration"` + Date *string `graphql:"date" json:"date"` + Rating *string `graphql:"rating" json:"rating"` + Director *string `graphql:"director" json:"director"` + Studio *ScrapedMovieStudio `graphql:"studio" json:"studio"` + Synopsis *string `graphql:"synopsis" json:"synopsis"` + URL *string `graphql:"url" json:"url"` + FrontImage *string `graphql:"front_image" json:"front_image"` + BackImage *string `graphql:"back_image" json:"back_image"` +} + +type ScrapedMovieStudio struct { + // Set if studio matched + ID *string `graphql:"id" json:"id"` + Name string `graphql:"name" json:"name"` + URL *string `graphql:"url" json:"url"` +} diff --git a/pkg/scraper/action.go b/pkg/scraper/action.go index b7edb7140..8156fb6ce 100644 --- a/pkg/scraper/action.go +++ b/pkg/scraper/action.go @@ -39,6 +39,8 @@ type scraper interface { scrapeSceneByFragment(scene models.SceneUpdateInput) (*models.ScrapedScene, error) scrapeSceneByURL(url string) (*models.ScrapedScene, error) + + scrapeMovieByURL(url string) (*models.ScrapedMovie, error) } func getScraper(scraper scraperTypeConfig, config config, globalConfig GlobalConfig) scraper { diff --git a/pkg/scraper/config.go b/pkg/scraper/config.go index d270c63fb..fad5e04e4 100644 --- a/pkg/scraper/config.go +++ b/pkg/scraper/config.go @@ -35,6 +35,9 @@ type config struct { // Configuration for querying a scene by a URL SceneByURL []*scrapeByURLConfig `yaml:"sceneByURL"` + // Configuration for querying a movie by a URL + MovieByURL []*scrapeByURLConfig `yaml:"movieByURL"` + // Scraper debugging options DebugOptions *scraperDebugOptions `yaml:"debug"` @@ -86,6 +89,12 @@ func (c config) validate() error { } } + for _, s := range c.MovieByURL { + if err := s.validate(); err != nil { + return err + } + } + return nil } @@ -225,6 +234,18 @@ func (c config) toScraper() *models.Scraper { ret.Scene = &scene } + movie := models.ScraperSpec{} + if len(c.MovieByURL) > 0 { + movie.SupportedScrapes = append(movie.SupportedScrapes, models.ScrapeTypeURL) + for _, v := range c.MovieByURL { + movie.Urls = append(movie.Urls, v.URL...) + } + } + + if len(movie.SupportedScrapes) > 0 { + ret.Movie = &movie + } + return &ret } @@ -297,6 +318,20 @@ func (c config) matchesSceneURL(url string) bool { return false } +func (c config) supportsMovies() bool { + return len(c.MovieByURL) > 0 +} + +func (c config) matchesMovieURL(url string) bool { + for _, scraper := range c.MovieByURL { + if scraper.matchesURL(url) { + return true + } + } + + return false +} + func (c config) ScrapeScene(scene models.SceneUpdateInput, globalConfig GlobalConfig) (*models.ScrapedScene, error) { if c.SceneByFragment != nil { s := getScraper(*c.SceneByFragment, c, globalConfig) @@ -323,3 +358,21 @@ func (c config) ScrapeSceneURL(url string, globalConfig GlobalConfig) (*models.S return nil, nil } + +func (c config) ScrapeMovieURL(url string, globalConfig GlobalConfig) (*models.ScrapedMovie, error) { + for _, scraper := range c.MovieByURL { + if scraper.matchesURL(url) { + s := getScraper(scraper.scraperTypeConfig, c, globalConfig) + ret, err := s.scrapeMovieByURL(url) + if err != nil { + return nil, err + } + + if ret != nil { + return ret, nil + } + } + } + + return nil, nil +} diff --git a/pkg/scraper/image.go b/pkg/scraper/image.go index 03e63d34b..5ab845444 100644 --- a/pkg/scraper/image.go +++ b/pkg/scraper/image.go @@ -47,6 +47,40 @@ func setSceneImage(s *models.ScrapedScene, globalConfig GlobalConfig) error { return nil } +func setMovieFrontImage(m *models.ScrapedMovie, globalConfig GlobalConfig) error { + // don't try to get the image if it doesn't appear to be a URL + if m == nil || m.FrontImage == nil || !strings.HasPrefix(*m.FrontImage, "http") { + // nothing to do + return nil + } + + img, err := getImage(*m.FrontImage, globalConfig) + if err != nil { + return err + } + + m.FrontImage = img + + return nil +} + +func setMovieBackImage(m *models.ScrapedMovie, globalConfig GlobalConfig) error { + // don't try to get the image if it doesn't appear to be a URL + if m == nil || m.BackImage == nil || !strings.HasPrefix(*m.BackImage, "http") { + // nothing to do + return nil + } + + img, err := getImage(*m.BackImage, globalConfig) + if err != nil { + return err + } + + m.BackImage = img + + return nil +} + func getImage(url string, globalConfig GlobalConfig) (*string, error) { client := &http.Client{ Timeout: imageGetTimeout, diff --git a/pkg/scraper/json.go b/pkg/scraper/json.go index 2b25c55e0..7fb7522a3 100644 --- a/pkg/scraper/json.go +++ b/pkg/scraper/json.go @@ -88,6 +88,16 @@ func (s *jsonScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error) return scraper.scrapeScene(q) } +func (s *jsonScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error) { + doc, scraper, err := s.scrapeURL(url) + if err != nil { + return nil, err + } + + q := s.getJsonQuery(doc) + return scraper.scrapeMovie(q) +} + func (s *jsonScraper) scrapePerformersByName(name string) ([]*models.ScrapedPerformer, error) { scraper := s.getJsonScraper() diff --git a/pkg/scraper/mapped.go b/pkg/scraper/mapped.go index 55dcca0b3..b504066fe 100644 --- a/pkg/scraper/mapped.go +++ b/pkg/scraper/mapped.go @@ -161,6 +161,58 @@ func (s *mappedPerformerScraperConfig) UnmarshalYAML(unmarshal func(interface{}) return unmarshal(&s.mappedConfig) } +type mappedMovieScraperConfig struct { + mappedConfig + + Studio mappedConfig `yaml:"Studio"` +} +type _mappedMovieScraperConfig mappedMovieScraperConfig + +const ( + mappedScraperConfigMovieStudio = "Studio" +) + +func (s *mappedMovieScraperConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { + // HACK - unmarshal to map first, then remove known movie sub-fields, then + // remarshal to yaml and pass that down to the base map + parentMap := make(map[string]interface{}) + if err := unmarshal(parentMap); err != nil { + return err + } + + // move the known sub-fields to a separate map + thisMap := make(map[string]interface{}) + + thisMap[mappedScraperConfigMovieStudio] = parentMap[mappedScraperConfigMovieStudio] + + delete(parentMap, mappedScraperConfigMovieStudio) + + // re-unmarshal the sub-fields + yml, err := yaml.Marshal(thisMap) + if err != nil { + return err + } + + // needs to be a different type to prevent infinite recursion + c := _mappedMovieScraperConfig{} + if err := yaml.Unmarshal(yml, &c); err != nil { + return err + } + + *s = mappedMovieScraperConfig(c) + + yml, err = yaml.Marshal(parentMap) + if err != nil { + return err + } + + if err := yaml.Unmarshal(yml, &s.mappedConfig); err != nil { + return err + } + + return nil +} + type mappedRegexConfig struct { Regex string `yaml:"regex"` With string `yaml:"with"` @@ -454,6 +506,7 @@ type mappedScraper struct { Common commonMappedConfig `yaml:"common"` Scene *mappedSceneScraperConfig `yaml:"scene"` Performer *mappedPerformerScraperConfig `yaml:"performer"` + Movie *mappedMovieScraperConfig `yaml:"movie"` } type mappedResult map[string]string @@ -598,3 +651,33 @@ func (s mappedScraper) scrapeScene(q mappedQuery) (*models.ScrapedScene, error) return &ret, nil } + +func (s mappedScraper) scrapeMovie(q mappedQuery) (*models.ScrapedMovie, error) { + var ret models.ScrapedMovie + + movieScraperConfig := s.Movie + movieMap := movieScraperConfig.mappedConfig + if movieMap == nil { + return nil, nil + } + + movieStudioMap := movieScraperConfig.Studio + + results := movieMap.process(q, s.Common) + if len(results) > 0 { + results[0].apply(&ret) + + if movieStudioMap != nil { + logger.Debug(`Processing movie studio:`) + studioResults := movieStudioMap.process(q, s.Common) + + if len(studioResults) > 0 { + studio := &models.ScrapedMovieStudio{} + studioResults[0].apply(studio) + ret.Studio = studio + } + } + } + + return &ret, nil +} diff --git a/pkg/scraper/scrapers.go b/pkg/scraper/scrapers.go index 864c09a66..64788bccc 100644 --- a/pkg/scraper/scrapers.go +++ b/pkg/scraper/scrapers.go @@ -132,6 +132,20 @@ func (c Cache) ListSceneScrapers() []*models.Scraper { return ret } +// ListMovieScrapers returns a list of scrapers that are capable of +// scraping scenes. +func (c Cache) ListMovieScrapers() []*models.Scraper { + var ret []*models.Scraper + for _, s := range c.scrapers { + // filter on type + if s.supportsMovies() { + ret = append(ret, s.toScraper()) + } + } + + return ret +} + func (c Cache) findScraper(scraperID string) *config { for _, s := range c.scrapers { if s.ID == scraperID { @@ -360,3 +374,55 @@ func (c Cache) ScrapeSceneURL(url string) (*models.ScrapedScene, error) { return nil, nil } + +func matchMovieStudio(s *models.ScrapedMovieStudio) error { + qb := models.NewStudioQueryBuilder() + + studio, err := qb.FindByName(s.Name, nil, true) + + if err != nil { + return err + } + + if studio == nil { + // ignore - cannot match + return nil + } + + id := strconv.Itoa(studio.ID) + s.ID = &id + return nil +} + +// ScrapeMovieURL uses the first scraper it finds that matches the URL +// provided to scrape a movie. If no scrapers are found that matches +// the URL, then nil is returned. +func (c Cache) ScrapeMovieURL(url string) (*models.ScrapedMovie, error) { + for _, s := range c.scrapers { + if s.matchesMovieURL(url) { + ret, err := s.ScrapeMovieURL(url, c.globalConfig) + if err != nil { + return nil, err + } + + if ret.Studio != nil { + err := matchMovieStudio(ret.Studio) + if err != nil { + return nil, err + } + } + + // post-process - set the image if applicable + if err := setMovieFrontImage(ret, c.globalConfig); err != nil { + logger.Warnf("Could not set front image using URL %s: %s", *ret.FrontImage, err.Error()) + } + if err := setMovieBackImage(ret, c.globalConfig); err != nil { + logger.Warnf("Could not set back image using URL %s: %s", *ret.BackImage, err.Error()) + } + + return ret, nil + } + } + + return nil, nil +} diff --git a/pkg/scraper/script.go b/pkg/scraper/script.go index c07adaf73..91a754f7c 100644 --- a/pkg/scraper/script.go +++ b/pkg/scraper/script.go @@ -146,3 +146,13 @@ func (s *scriptScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, erro return &ret, err } + +func (s *scriptScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error) { + inString := `{"url": "` + url + `"}` + + var ret models.ScrapedMovie + + err := s.runScraperScript(string(inString), &ret) + + return &ret, err +} diff --git a/pkg/scraper/stash.go b/pkg/scraper/stash.go index fb9122678..d14122760 100644 --- a/pkg/scraper/stash.go +++ b/pkg/scraper/stash.go @@ -192,6 +192,10 @@ func (s *stashScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error return nil, errors.New("scrapeSceneByURL not supported for stash scraper") } +func (s *stashScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error) { + return nil, errors.New("scrapeMovieByURL not supported for stash scraper") +} + func sceneFromUpdateFragment(scene models.SceneUpdateInput) (*models.Scene, error) { qb := models.NewSceneQueryBuilder() id, err := strconv.Atoi(scene.ID) diff --git a/pkg/scraper/xpath.go b/pkg/scraper/xpath.go index f9afbfbca..95291e1de 100644 --- a/pkg/scraper/xpath.go +++ b/pkg/scraper/xpath.go @@ -69,6 +69,16 @@ func (s *xpathScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error return scraper.scrapeScene(q) } +func (s *xpathScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error) { + doc, scraper, err := s.scrapeURL(url) + if err != nil { + return nil, err + } + + q := s.getXPathQuery(doc) + return scraper.scrapeMovie(q) +} + func (s *xpathScraper) scrapePerformersByName(name string) ([]*models.ScrapedPerformer, error) { scraper := s.getXpathScraper() diff --git a/ui/v2.5/src/components/Changelog/versions/v030.tsx b/ui/v2.5/src/components/Changelog/versions/v030.tsx index 85e711cac..190e1c415 100644 --- a/ui/v2.5/src/components/Changelog/versions/v030.tsx +++ b/ui/v2.5/src/components/Changelog/versions/v030.tsx @@ -5,6 +5,7 @@ const markup = ` #### 💥 **Note: After upgrading, the next scan will populate all scenes with oshash hashes. MD5 calculation can be disabled after populating the oshash for all scenes. See \`Hashing Algorithms\` in the \`Configuration\` section of the manual for details. ** ### ✨ New Features +* Add support for scraping movie details. * Add support for JSON scrapers. * Add support for plugin tasks. * Add oshash algorithm for hashing scene video files. Enabled by default on new systems. diff --git a/ui/v2.5/src/components/Movies/MovieDetails/Movie.tsx b/ui/v2.5/src/components/Movies/MovieDetails/Movie.tsx index 5252cfd7b..58fdc0594 100644 --- a/ui/v2.5/src/components/Movies/MovieDetails/Movie.tsx +++ b/ui/v2.5/src/components/Movies/MovieDetails/Movie.tsx @@ -7,6 +7,8 @@ import { useMovieUpdate, useMovieCreate, useMovieDestroy, + queryScrapeMovieURL, + useListMovieScrapers, } from "src/core/StashService"; import { useParams, useHistory } from "react-router-dom"; import { @@ -14,6 +16,7 @@ import { LoadingIndicator, Modal, StudioSelect, + Icon, } from "src/components/Shared"; import { useToast } from "src/hooks"; import { Table, Form, Modal as BSModal, Button } from "react-bootstrap"; @@ -22,8 +25,10 @@ import { ImageUtils, EditableTextUtils, TextUtils, + DurationUtils, } from "src/utils"; import { MovieScenesPanel } from "./MovieScenesPanel"; +import { MovieScrapeDialog } from "./MovieScrapeDialog"; export const Movie: React.FC = () => { const history = useHistory(); @@ -64,12 +69,18 @@ export const Movie: React.FC = () => { // Network state const { data, error, loading } = useFindMovie(id); + const [isLoading, setIsLoading] = useState(false); const [updateMovie] = useMovieUpdate(getMovieInput() as GQL.MovieUpdateInput); const [createMovie] = useMovieCreate(getMovieInput() as GQL.MovieCreateInput); const [deleteMovie] = useMovieDestroy( getMovieInput() as GQL.MovieDestroyInput ); + const Scrapers = useListMovieScrapers(); + const [scrapedMovie, setScrapedMovie] = useState< + GQL.ScrapedMovie | undefined + >(); + const intl = useIntl(); // set up hotkeys @@ -290,6 +301,139 @@ export const Movie: React.FC = () => { ); } + function updateMovieEditStateFromScraper( + state: Partial + ) { + if (state.name) { + setName(state.name); + } + + if (state.aliases) { + setAliases(state.aliases ?? undefined); + } + + if (state.duration) { + setDuration(DurationUtils.stringToSeconds(state.duration) ?? undefined); + } + + if (state.date) { + setDate(state.date ?? undefined); + } + + if (state.studio && state.studio.id) { + setStudioId(state.studio.id ?? undefined); + } + + if (state.director) { + setDirector(state.director ?? undefined); + } + if (state.synopsis) { + setSynopsis(state.synopsis ?? undefined); + } + if (state.url) { + setUrl(state.url ?? undefined); + } + + // image is a base64 string + // #404: don't overwrite image if it has been modified by the user + // overwrite if not new since it came from a dialog + // otherwise follow existing behaviour + if ( + (!isNew || frontImage === undefined) && + (state as GQL.ScrapedMovieDataFragment).front_image !== undefined + ) { + const imageStr = (state as GQL.ScrapedMovieDataFragment).front_image; + setFrontImage(imageStr ?? undefined); + setImagePreview(imageStr ?? undefined); + } + + if ( + (!isNew || backImage === undefined) && + (state as GQL.ScrapedMovieDataFragment).back_image !== undefined + ) { + const imageStr = (state as GQL.ScrapedMovieDataFragment).back_image; + setBackImage(imageStr ?? undefined); + setBackImagePreview(imageStr ?? undefined); + } + } + + async function onScrapeMovieURL() { + if (!url) return; + setIsLoading(true); + + try { + const result = await queryScrapeMovieURL(url); + if (!result.data || !result.data.scrapeMovieURL) { + return; + } + + // if this is a new movie, just dump the data + if (isNew) { + updateMovieEditStateFromScraper(result.data.scrapeMovieURL); + } else { + setScrapedMovie(result.data.scrapeMovieURL); + } + } catch (e) { + Toast.error(e); + } finally { + setIsLoading(false); + } + } + + function urlScrapable(scrapedUrl: string) { + return ( + !!scrapedUrl && + (Scrapers?.data?.listMovieScrapers ?? []).some((s) => + (s?.movie?.urls ?? []).some((u) => scrapedUrl.includes(u)) + ) + ); + } + + function maybeRenderScrapeButton() { + if (!url || !isEditing || !urlScrapable(url)) { + return undefined; + } + return ( + + ); + } + + function maybeRenderScrapeDialog() { + if (!scrapedMovie) { + return; + } + + const currentMovie = getMovieInput(); + + // Get image paths for scrape gui + currentMovie.front_image = movie.front_image_path; + currentMovie.back_image = movie.back_image_path; + + return ( + { + onScrapeDialogClosed(m); + }} + /> + ); + } + + function onScrapeDialogClosed(p?: GQL.ScrapedMovieDataFragment) { + if (p) { + updateMovieEditStateFromScraper(p); + } + setScrapedMovie(undefined); + } + + if (isLoading) return ; + // TODO: CSS class return (
@@ -363,7 +507,7 @@ export const Movie: React.FC = () => { - URL + URL {maybeRenderScrapeButton()}
{EditableTextUtils.renderInputGroup({ isEditing, @@ -405,6 +549,7 @@ export const Movie: React.FC = () => { )} {renderDeleteAlert()} {renderImageAlert()} + {maybeRenderScrapeDialog()}
); }; diff --git a/ui/v2.5/src/components/Movies/MovieDetails/MovieScrapeDialog.tsx b/ui/v2.5/src/components/Movies/MovieDetails/MovieScrapeDialog.tsx new file mode 100644 index 000000000..d62f1681c --- /dev/null +++ b/ui/v2.5/src/components/Movies/MovieDetails/MovieScrapeDialog.tsx @@ -0,0 +1,204 @@ +import React, { useState } from "react"; +import * as GQL from "src/core/generated-graphql"; +import { + ScrapeDialog, + ScrapeResult, + ScrapedInputGroupRow, + ScrapedImageRow, + ScrapeDialogRow, +} from "src/components/Shared/ScrapeDialog"; +import { StudioSelect } from "src/components/Shared"; +import { DurationUtils } from "src/utils"; + +function renderScrapedStudio( + result: ScrapeResult, + isNew?: boolean, + onChange?: (value: string) => void +) { + const resultValue = isNew ? result.newValue : result.originalValue; + const value = resultValue ? [resultValue] : []; + + return ( + { + if (onChange) { + onChange(items[0]?.id); + } + }} + ids={value} + /> + ); +} + +function renderScrapedStudioRow( + result: ScrapeResult, + onChange: (value: ScrapeResult) => void +) { + return ( + renderScrapedStudio(result)} + renderNewField={() => + renderScrapedStudio(result, true, (value) => + onChange(result.cloneWithValue(value)) + ) + } + onChange={onChange} + /> + ); +} + +interface IMovieScrapeDialogProps { + movie: Partial; + scraped: GQL.ScrapedMovie; + + onClose: (scrapedMovie?: GQL.ScrapedMovie) => void; +} + +export const MovieScrapeDialog: React.FC = ( + props: IMovieScrapeDialogProps +) => { + const [name, setName] = useState>( + new ScrapeResult(props.movie.name, props.scraped.name) + ); + const [aliases, setAliases] = useState>( + new ScrapeResult(props.movie.aliases, props.scraped.aliases) + ); + const [duration, setDuration] = useState>( + new ScrapeResult( + DurationUtils.secondsToString(props.movie.duration || 0), + props.scraped.duration + ) + ); + const [date, setDate] = useState>( + new ScrapeResult(props.movie.date, props.scraped.date) + ); + const [director, setDirector] = useState>( + new ScrapeResult(props.movie.director, props.scraped.director) + ); + const [synopsis, setSynopsis] = useState>( + new ScrapeResult(props.movie.synopsis, props.scraped.synopsis) + ); + const [studio, setStudio] = useState>( + new ScrapeResult(props.movie.studio_id, props.scraped.studio?.id) + ); + const [url, setURL] = useState>( + new ScrapeResult(props.movie.url, props.scraped.url) + ); + const [frontImage, setFrontImage] = useState>( + new ScrapeResult(props.movie.front_image, props.scraped.front_image) + ); + const [backImage, setBackImage] = useState>( + new ScrapeResult(props.movie.back_image, props.scraped.back_image) + ); + + const allFields = [ + name, + aliases, + duration, + date, + director, + synopsis, + studio, + url, + frontImage, + backImage, + ]; + // don't show the dialog if nothing was scraped + if (allFields.every((r) => !r.scraped)) { + props.onClose(); + return <>; + } + + // todo: reenable + function makeNewScrapedItem(): GQL.ScrapedMovie { + const newStudio = studio.getNewValue(); + const durationString = duration.getNewValue(); + + return { + name: name.getNewValue(), + aliases: aliases.getNewValue(), + duration: durationString, + date: date.getNewValue(), + director: director.getNewValue(), + synopsis: synopsis.getNewValue(), + studio: newStudio + ? { + id: newStudio, + name: "", + } + : undefined, + url: url.getNewValue(), + front_image: frontImage.getNewValue(), + back_image: backImage.getNewValue(), + }; + } + + function renderScrapeRows() { + return ( + <> + setName(value)} + /> + setAliases(value)} + /> + setDuration(value)} + /> + setDate(value)} + /> + setDirector(value)} + /> + setSynopsis(value)} + /> + {renderScrapedStudioRow(studio, (value) => setStudio(value))} + setURL(value)} + /> + setFrontImage(value)} + /> + setBackImage(value)} + /> + + ); + } + + return ( + { + props.onClose(apply ? makeNewScrapedItem() : undefined); + }} + /> + ); +}; diff --git a/ui/v2.5/src/core/StashService.ts b/ui/v2.5/src/core/StashService.ts index 9097476e4..ce3f00c8d 100644 --- a/ui/v2.5/src/core/StashService.ts +++ b/ui/v2.5/src/core/StashService.ts @@ -168,6 +168,8 @@ export const useScrapePerformer = ( export const useListSceneScrapers = () => GQL.useListSceneScrapersQuery(); +export const useListMovieScrapers = () => GQL.useListMovieScrapersQuery(); + export const useScrapeFreeonesPerformers = (q: string) => GQL.useScrapeFreeonesPerformersQuery({ variables: { q } }); @@ -433,6 +435,14 @@ export const queryScrapeSceneURL = (url: string) => }, }); +export const queryScrapeMovieURL = (url: string) => + client.query({ + query: GQL.ScrapeMovieUrlDocument, + variables: { + url, + }, + }); + export const queryScrapeScene = ( scraperId: string, scene: GQL.SceneUpdateInput diff --git a/ui/v2.5/src/docs/en/Scraping.md b/ui/v2.5/src/docs/en/Scraping.md index 02b4065cd..448ef4418 100644 --- a/ui/v2.5/src/docs/en/Scraping.md +++ b/ui/v2.5/src/docs/en/Scraping.md @@ -1,6 +1,6 @@ # Metadata Scraping -Stash supports scraping of performer and scene details. +Stash supports scraping of performer, scene and movie details. Stash includes a freeones.xxx performer scraper built in. @@ -23,6 +23,8 @@ Performer details can be scraped from the new/edit Performer page in two differe Scene details can be scraped using URL as above, or via the `Scrape With...` button, which scrapes using the current scene metadata. +Movie details can currently only be scraped using URL as above. + # Community Scrapers The stash community maintains a number of custom scraper configuration files that can be found [here](https://github.com/stashapp/CommunityScrapers). @@ -42,6 +44,8 @@ sceneByFragment: sceneByURL: +movieByURL: + ``` @@ -57,6 +61,7 @@ The scraping types and their required fields are outlined in the following table | Scrape performer from URL | Valid `performerByURL` configuration with matching URL. | | Scraper in `Scrape...` dropdown button in Scene Edit page | Valid `sceneByFragment` configuration. | | Scrape scene from URL | Valid `sceneByURL` configuration with matching URL. | +| Scrape movie from URL | Valid `movieByURL` configuration with matching URL. | URL-based scraping accepts multiple scrape configurations, and each configuration requires a `url` field. stash iterates through these configurations, attempting to match the entered URL against the `url` fields in the configuration. It executes the first scraping configuration where the entered URL contains the value of the `url` field. @@ -87,6 +92,7 @@ The script is sent input and expects output based on the scraping type, as detai | `performerByURL` | `{"url": ""}` | JSON-encoded performer fragment | | `sceneByFragment` | JSON-encoded scene fragment | JSON-encoded scene fragment | | `sceneByURL` | `{"url": ""}` | JSON-encoded scene fragment | +| `movieByURL` | `{"url": ""}` | JSON-encoded movie fragment | For `performerByName`, only `name` is required in the returned performer fragments. One entire object is sent back to `performerByFragment` to scrape a specific performer, so the other fields may be included to assist in scraping a performer. For example, the `url` field may be filled in for the specific performer page, then `performerByFragment` can extract by using its value. @@ -547,8 +553,11 @@ Duration Date Rating Director +Studio Synopsis URL +FrontImage +BackImage ``` ### Stash @@ -572,7 +581,7 @@ stashServer: ``` ### Debugging support -To print the received html from a scraper request to the log file, add the following to your scraper yml file: +To print the received html/json from a scraper request to the log file, add the following to your scraper yml file: ``` debug: printHTML: true