Implement scraping movies by URL (#709)

* api/urlbuilders/movie: Auto format.

* graphql+pkg+ui: Implement scraping movies by URL.

This patch implements the missing required boilerplate for scraping
movies by URL, using performers and scenes as a reference.

Although this patch contains a big chunck of ground work for enabling
scraping movies by fragment, the feature would require additional
changes to be completely implemented and was not tested.

* graphql+pkg+ui: Scrape movie studio.

Extends and corrects the movie model for the ability to store and
dereference studio IDs with received studio string from the scraper.
This was done with Scenes as a reference. For simplicity the duplication
of having `ScrapedMovieStudio` and `ScrapedSceneStudio` was kept, which
should probably be refactored to be the same type in the model in the
future.

* ui/movies: Add movie scrape dialog.

Adds possibility to update existing movie entries with the URL scraper.

For this the MovieScrapeDialog.tsx was implemented with Performers and
Scenes as a reference. In addition DurationUtils needs to be called one
time for converting seconds from the model to the string that is
displayed in the component. This seemed the least intrusive to me as it
kept a ScrapeResult<string> type compatible with ScrapedInputGroupRow.
This commit is contained in:
woodgen
2020-08-10 07:34:15 +02:00
committed by GitHub
parent 7158e83b75
commit 4045ddf3e9
23 changed files with 728 additions and 9 deletions

View File

@@ -44,3 +44,7 @@ models:
model: github.com/stashapp/stash/pkg/models.ScrapedSceneTag
SceneFileType:
model: github.com/stashapp/stash/pkg/models.SceneFileType
ScrapedMovie:
model: github.com/stashapp/stash/pkg/models.ScrapedMovie
ScrapedMovieStudio:
model: github.com/stashapp/stash/pkg/models.ScrapedMovieStudio

View File

@@ -38,6 +38,12 @@ fragment ScrapedScenePerformerData on ScrapedScenePerformer {
aliases
}
fragment ScrapedMovieStudioData on ScrapedMovieStudio {
id
name
url
}
fragment ScrapedMovieData on ScrapedMovie {
name
aliases
@@ -47,6 +53,12 @@ fragment ScrapedMovieData on ScrapedMovie {
director
url
synopsis
front_image
back_image
studio {
...ScrapedMovieStudioData
}
}
fragment ScrapedSceneMovieData on ScrapedSceneMovie {

View File

@@ -20,6 +20,17 @@ query ListSceneScrapers {
}
}
query ListMovieScrapers {
listMovieScrapers {
id
name
movie {
urls
supported_scrapes
}
}
}
query ScrapePerformerList($scraper_id: ID!, $query: String!) {
scrapePerformerList(scraper_id: $scraper_id, query: $query) {
...ScrapedPerformerData
@@ -49,3 +60,9 @@ query ScrapeSceneURL($url: String!) {
...ScrapedSceneData
}
}
query ScrapeMovieURL($url: String!) {
scrapeMovieURL(url: $url) {
...ScrapedMovieData
}
}

View File

@@ -59,6 +59,7 @@ type Query {
"""List available scrapers"""
listPerformerScrapers: [Scraper!]!
listSceneScrapers: [Scraper!]!
listMovieScrapers: [Scraper!]!
"""Scrape a list of performers based on name"""
scrapePerformerList(scraper_id: ID!, query: String!): [ScrapedPerformer!]!
@@ -70,6 +71,8 @@ type Query {
scrapeScene(scraper_id: ID!, scene: SceneUpdateInput!): ScrapedScene
"""Scrapes a complete performer record based on a URL"""
scrapeSceneURL(url: String!): ScrapedScene
"""Scrapes a complete movie record based on a URL"""
scrapeMovieURL(url: String!): ScrapedMovie
"""Scrape a performer using Freeones"""
scrapeFreeones(performer_name: String!): ScrapedPerformer

View File

@@ -1,3 +1,10 @@
type ScrapedMovieStudio {
"""Set if studio matched"""
id: ID
name: String!
url: String
}
"""A movie from a scraping operation..."""
type ScrapedMovie {
name: String
@@ -8,6 +15,11 @@ type ScrapedMovie {
director: String
url: String
synopsis: String
studio: ScrapedMovieStudio
"""This should be base64 encoded"""
front_image: String
back_image: String
}
input ScrapedMovieInput {

View File

@@ -20,6 +20,8 @@ type Scraper {
performer: ScraperSpec
"""Details for scene scraper"""
scene: ScraperSpec
"""Details for movie scraper"""
movie: ScraperSpec
}

View File

@@ -41,6 +41,10 @@ func (r *queryResolver) ListSceneScrapers(ctx context.Context) ([]*models.Scrape
return manager.GetInstance().ScraperCache.ListSceneScrapers(), nil
}
func (r *queryResolver) ListMovieScrapers(ctx context.Context) ([]*models.Scraper, error) {
return manager.GetInstance().ScraperCache.ListMovieScrapers(), nil
}
func (r *queryResolver) ScrapePerformerList(ctx context.Context, scraperID string, query string) ([]*models.ScrapedPerformer, error) {
if query == "" {
return nil, nil
@@ -64,3 +68,7 @@ func (r *queryResolver) ScrapeScene(ctx context.Context, scraperID string, scene
func (r *queryResolver) ScrapeSceneURL(ctx context.Context, url string) (*models.ScrapedScene, error) {
return manager.GetInstance().ScraperCache.ScrapeSceneURL(url)
}
func (r *queryResolver) ScrapeMovieURL(ctx context.Context, url string) (*models.ScrapedMovie, error) {
return manager.GetInstance().ScraperCache.ScrapeMovieURL(url)
}

View File

@@ -3,13 +3,13 @@ package urlbuilders
import "strconv"
type MovieURLBuilder struct {
BaseURL string
BaseURL string
MovieID string
}
func NewMovieURLBuilder(baseURL string, movieID int) MovieURLBuilder {
return MovieURLBuilder{
BaseURL: baseURL,
BaseURL: baseURL,
MovieID: strconv.Itoa(movieID),
}
}
@@ -21,4 +21,3 @@ func (b MovieURLBuilder) GetMovieFrontImageURL() string {
func (b MovieURLBuilder) GetMovieBackImageURL() string {
return b.BaseURL + "/movie/" + b.MovieID + "/backimage"
}

View File

@@ -135,3 +135,24 @@ type ScrapedSceneTag struct {
ID *string `graphql:"id" json:"id"`
Name string `graphql:"name" json:"name"`
}
type ScrapedMovie struct {
Name *string `graphql:"name" json:"name"`
Aliases *string `graphql:"aliases" json:"aliases"`
Duration *string `graphql:"duration" json:"duration"`
Date *string `graphql:"date" json:"date"`
Rating *string `graphql:"rating" json:"rating"`
Director *string `graphql:"director" json:"director"`
Studio *ScrapedMovieStudio `graphql:"studio" json:"studio"`
Synopsis *string `graphql:"synopsis" json:"synopsis"`
URL *string `graphql:"url" json:"url"`
FrontImage *string `graphql:"front_image" json:"front_image"`
BackImage *string `graphql:"back_image" json:"back_image"`
}
type ScrapedMovieStudio struct {
// Set if studio matched
ID *string `graphql:"id" json:"id"`
Name string `graphql:"name" json:"name"`
URL *string `graphql:"url" json:"url"`
}

View File

@@ -39,6 +39,8 @@ type scraper interface {
scrapeSceneByFragment(scene models.SceneUpdateInput) (*models.ScrapedScene, error)
scrapeSceneByURL(url string) (*models.ScrapedScene, error)
scrapeMovieByURL(url string) (*models.ScrapedMovie, error)
}
func getScraper(scraper scraperTypeConfig, config config, globalConfig GlobalConfig) scraper {

View File

@@ -35,6 +35,9 @@ type config struct {
// Configuration for querying a scene by a URL
SceneByURL []*scrapeByURLConfig `yaml:"sceneByURL"`
// Configuration for querying a movie by a URL
MovieByURL []*scrapeByURLConfig `yaml:"movieByURL"`
// Scraper debugging options
DebugOptions *scraperDebugOptions `yaml:"debug"`
@@ -86,6 +89,12 @@ func (c config) validate() error {
}
}
for _, s := range c.MovieByURL {
if err := s.validate(); err != nil {
return err
}
}
return nil
}
@@ -225,6 +234,18 @@ func (c config) toScraper() *models.Scraper {
ret.Scene = &scene
}
movie := models.ScraperSpec{}
if len(c.MovieByURL) > 0 {
movie.SupportedScrapes = append(movie.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.MovieByURL {
movie.Urls = append(movie.Urls, v.URL...)
}
}
if len(movie.SupportedScrapes) > 0 {
ret.Movie = &movie
}
return &ret
}
@@ -297,6 +318,20 @@ func (c config) matchesSceneURL(url string) bool {
return false
}
func (c config) supportsMovies() bool {
return len(c.MovieByURL) > 0
}
func (c config) matchesMovieURL(url string) bool {
for _, scraper := range c.MovieByURL {
if scraper.matchesURL(url) {
return true
}
}
return false
}
func (c config) ScrapeScene(scene models.SceneUpdateInput, globalConfig GlobalConfig) (*models.ScrapedScene, error) {
if c.SceneByFragment != nil {
s := getScraper(*c.SceneByFragment, c, globalConfig)
@@ -323,3 +358,21 @@ func (c config) ScrapeSceneURL(url string, globalConfig GlobalConfig) (*models.S
return nil, nil
}
func (c config) ScrapeMovieURL(url string, globalConfig GlobalConfig) (*models.ScrapedMovie, error) {
for _, scraper := range c.MovieByURL {
if scraper.matchesURL(url) {
s := getScraper(scraper.scraperTypeConfig, c, globalConfig)
ret, err := s.scrapeMovieByURL(url)
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}

View File

@@ -47,6 +47,40 @@ func setSceneImage(s *models.ScrapedScene, globalConfig GlobalConfig) error {
return nil
}
func setMovieFrontImage(m *models.ScrapedMovie, globalConfig GlobalConfig) error {
// don't try to get the image if it doesn't appear to be a URL
if m == nil || m.FrontImage == nil || !strings.HasPrefix(*m.FrontImage, "http") {
// nothing to do
return nil
}
img, err := getImage(*m.FrontImage, globalConfig)
if err != nil {
return err
}
m.FrontImage = img
return nil
}
func setMovieBackImage(m *models.ScrapedMovie, globalConfig GlobalConfig) error {
// don't try to get the image if it doesn't appear to be a URL
if m == nil || m.BackImage == nil || !strings.HasPrefix(*m.BackImage, "http") {
// nothing to do
return nil
}
img, err := getImage(*m.BackImage, globalConfig)
if err != nil {
return err
}
m.BackImage = img
return nil
}
func getImage(url string, globalConfig GlobalConfig) (*string, error) {
client := &http.Client{
Timeout: imageGetTimeout,

View File

@@ -88,6 +88,16 @@ func (s *jsonScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error)
return scraper.scrapeScene(q)
}
func (s *jsonScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error) {
doc, scraper, err := s.scrapeURL(url)
if err != nil {
return nil, err
}
q := s.getJsonQuery(doc)
return scraper.scrapeMovie(q)
}
func (s *jsonScraper) scrapePerformersByName(name string) ([]*models.ScrapedPerformer, error) {
scraper := s.getJsonScraper()

View File

@@ -161,6 +161,58 @@ func (s *mappedPerformerScraperConfig) UnmarshalYAML(unmarshal func(interface{})
return unmarshal(&s.mappedConfig)
}
type mappedMovieScraperConfig struct {
mappedConfig
Studio mappedConfig `yaml:"Studio"`
}
type _mappedMovieScraperConfig mappedMovieScraperConfig
const (
mappedScraperConfigMovieStudio = "Studio"
)
func (s *mappedMovieScraperConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
// HACK - unmarshal to map first, then remove known movie sub-fields, then
// remarshal to yaml and pass that down to the base map
parentMap := make(map[string]interface{})
if err := unmarshal(parentMap); err != nil {
return err
}
// move the known sub-fields to a separate map
thisMap := make(map[string]interface{})
thisMap[mappedScraperConfigMovieStudio] = parentMap[mappedScraperConfigMovieStudio]
delete(parentMap, mappedScraperConfigMovieStudio)
// re-unmarshal the sub-fields
yml, err := yaml.Marshal(thisMap)
if err != nil {
return err
}
// needs to be a different type to prevent infinite recursion
c := _mappedMovieScraperConfig{}
if err := yaml.Unmarshal(yml, &c); err != nil {
return err
}
*s = mappedMovieScraperConfig(c)
yml, err = yaml.Marshal(parentMap)
if err != nil {
return err
}
if err := yaml.Unmarshal(yml, &s.mappedConfig); err != nil {
return err
}
return nil
}
type mappedRegexConfig struct {
Regex string `yaml:"regex"`
With string `yaml:"with"`
@@ -454,6 +506,7 @@ type mappedScraper struct {
Common commonMappedConfig `yaml:"common"`
Scene *mappedSceneScraperConfig `yaml:"scene"`
Performer *mappedPerformerScraperConfig `yaml:"performer"`
Movie *mappedMovieScraperConfig `yaml:"movie"`
}
type mappedResult map[string]string
@@ -598,3 +651,33 @@ func (s mappedScraper) scrapeScene(q mappedQuery) (*models.ScrapedScene, error)
return &ret, nil
}
func (s mappedScraper) scrapeMovie(q mappedQuery) (*models.ScrapedMovie, error) {
var ret models.ScrapedMovie
movieScraperConfig := s.Movie
movieMap := movieScraperConfig.mappedConfig
if movieMap == nil {
return nil, nil
}
movieStudioMap := movieScraperConfig.Studio
results := movieMap.process(q, s.Common)
if len(results) > 0 {
results[0].apply(&ret)
if movieStudioMap != nil {
logger.Debug(`Processing movie studio:`)
studioResults := movieStudioMap.process(q, s.Common)
if len(studioResults) > 0 {
studio := &models.ScrapedMovieStudio{}
studioResults[0].apply(studio)
ret.Studio = studio
}
}
}
return &ret, nil
}

View File

@@ -132,6 +132,20 @@ func (c Cache) ListSceneScrapers() []*models.Scraper {
return ret
}
// ListMovieScrapers returns a list of scrapers that are capable of
// scraping scenes.
func (c Cache) ListMovieScrapers() []*models.Scraper {
var ret []*models.Scraper
for _, s := range c.scrapers {
// filter on type
if s.supportsMovies() {
ret = append(ret, s.toScraper())
}
}
return ret
}
func (c Cache) findScraper(scraperID string) *config {
for _, s := range c.scrapers {
if s.ID == scraperID {
@@ -360,3 +374,55 @@ func (c Cache) ScrapeSceneURL(url string) (*models.ScrapedScene, error) {
return nil, nil
}
func matchMovieStudio(s *models.ScrapedMovieStudio) error {
qb := models.NewStudioQueryBuilder()
studio, err := qb.FindByName(s.Name, nil, true)
if err != nil {
return err
}
if studio == nil {
// ignore - cannot match
return nil
}
id := strconv.Itoa(studio.ID)
s.ID = &id
return nil
}
// ScrapeMovieURL uses the first scraper it finds that matches the URL
// provided to scrape a movie. If no scrapers are found that matches
// the URL, then nil is returned.
func (c Cache) ScrapeMovieURL(url string) (*models.ScrapedMovie, error) {
for _, s := range c.scrapers {
if s.matchesMovieURL(url) {
ret, err := s.ScrapeMovieURL(url, c.globalConfig)
if err != nil {
return nil, err
}
if ret.Studio != nil {
err := matchMovieStudio(ret.Studio)
if err != nil {
return nil, err
}
}
// post-process - set the image if applicable
if err := setMovieFrontImage(ret, c.globalConfig); err != nil {
logger.Warnf("Could not set front image using URL %s: %s", *ret.FrontImage, err.Error())
}
if err := setMovieBackImage(ret, c.globalConfig); err != nil {
logger.Warnf("Could not set back image using URL %s: %s", *ret.BackImage, err.Error())
}
return ret, nil
}
}
return nil, nil
}

View File

@@ -146,3 +146,13 @@ func (s *scriptScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, erro
return &ret, err
}
func (s *scriptScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error) {
inString := `{"url": "` + url + `"}`
var ret models.ScrapedMovie
err := s.runScraperScript(string(inString), &ret)
return &ret, err
}

View File

@@ -192,6 +192,10 @@ func (s *stashScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error
return nil, errors.New("scrapeSceneByURL not supported for stash scraper")
}
func (s *stashScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error) {
return nil, errors.New("scrapeMovieByURL not supported for stash scraper")
}
func sceneFromUpdateFragment(scene models.SceneUpdateInput) (*models.Scene, error) {
qb := models.NewSceneQueryBuilder()
id, err := strconv.Atoi(scene.ID)

View File

@@ -69,6 +69,16 @@ func (s *xpathScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error
return scraper.scrapeScene(q)
}
func (s *xpathScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error) {
doc, scraper, err := s.scrapeURL(url)
if err != nil {
return nil, err
}
q := s.getXPathQuery(doc)
return scraper.scrapeMovie(q)
}
func (s *xpathScraper) scrapePerformersByName(name string) ([]*models.ScrapedPerformer, error) {
scraper := s.getXpathScraper()

View File

@@ -5,6 +5,7 @@ const markup = `
#### 💥 **Note: After upgrading, the next scan will populate all scenes with oshash hashes. MD5 calculation can be disabled after populating the oshash for all scenes. See \`Hashing Algorithms\` in the \`Configuration\` section of the manual for details. **
### ✨ New Features
* Add support for scraping movie details.
* Add support for JSON scrapers.
* Add support for plugin tasks.
* Add oshash algorithm for hashing scene video files. Enabled by default on new systems.

View File

@@ -7,6 +7,8 @@ import {
useMovieUpdate,
useMovieCreate,
useMovieDestroy,
queryScrapeMovieURL,
useListMovieScrapers,
} from "src/core/StashService";
import { useParams, useHistory } from "react-router-dom";
import {
@@ -14,6 +16,7 @@ import {
LoadingIndicator,
Modal,
StudioSelect,
Icon,
} from "src/components/Shared";
import { useToast } from "src/hooks";
import { Table, Form, Modal as BSModal, Button } from "react-bootstrap";
@@ -22,8 +25,10 @@ import {
ImageUtils,
EditableTextUtils,
TextUtils,
DurationUtils,
} from "src/utils";
import { MovieScenesPanel } from "./MovieScenesPanel";
import { MovieScrapeDialog } from "./MovieScrapeDialog";
export const Movie: React.FC = () => {
const history = useHistory();
@@ -64,12 +69,18 @@ export const Movie: React.FC = () => {
// Network state
const { data, error, loading } = useFindMovie(id);
const [isLoading, setIsLoading] = useState(false);
const [updateMovie] = useMovieUpdate(getMovieInput() as GQL.MovieUpdateInput);
const [createMovie] = useMovieCreate(getMovieInput() as GQL.MovieCreateInput);
const [deleteMovie] = useMovieDestroy(
getMovieInput() as GQL.MovieDestroyInput
);
const Scrapers = useListMovieScrapers();
const [scrapedMovie, setScrapedMovie] = useState<
GQL.ScrapedMovie | undefined
>();
const intl = useIntl();
// set up hotkeys
@@ -290,6 +301,139 @@ export const Movie: React.FC = () => {
);
}
function updateMovieEditStateFromScraper(
state: Partial<GQL.ScrapedMovieDataFragment>
) {
if (state.name) {
setName(state.name);
}
if (state.aliases) {
setAliases(state.aliases ?? undefined);
}
if (state.duration) {
setDuration(DurationUtils.stringToSeconds(state.duration) ?? undefined);
}
if (state.date) {
setDate(state.date ?? undefined);
}
if (state.studio && state.studio.id) {
setStudioId(state.studio.id ?? undefined);
}
if (state.director) {
setDirector(state.director ?? undefined);
}
if (state.synopsis) {
setSynopsis(state.synopsis ?? undefined);
}
if (state.url) {
setUrl(state.url ?? undefined);
}
// image is a base64 string
// #404: don't overwrite image if it has been modified by the user
// overwrite if not new since it came from a dialog
// otherwise follow existing behaviour
if (
(!isNew || frontImage === undefined) &&
(state as GQL.ScrapedMovieDataFragment).front_image !== undefined
) {
const imageStr = (state as GQL.ScrapedMovieDataFragment).front_image;
setFrontImage(imageStr ?? undefined);
setImagePreview(imageStr ?? undefined);
}
if (
(!isNew || backImage === undefined) &&
(state as GQL.ScrapedMovieDataFragment).back_image !== undefined
) {
const imageStr = (state as GQL.ScrapedMovieDataFragment).back_image;
setBackImage(imageStr ?? undefined);
setBackImagePreview(imageStr ?? undefined);
}
}
async function onScrapeMovieURL() {
if (!url) return;
setIsLoading(true);
try {
const result = await queryScrapeMovieURL(url);
if (!result.data || !result.data.scrapeMovieURL) {
return;
}
// if this is a new movie, just dump the data
if (isNew) {
updateMovieEditStateFromScraper(result.data.scrapeMovieURL);
} else {
setScrapedMovie(result.data.scrapeMovieURL);
}
} catch (e) {
Toast.error(e);
} finally {
setIsLoading(false);
}
}
function urlScrapable(scrapedUrl: string) {
return (
!!scrapedUrl &&
(Scrapers?.data?.listMovieScrapers ?? []).some((s) =>
(s?.movie?.urls ?? []).some((u) => scrapedUrl.includes(u))
)
);
}
function maybeRenderScrapeButton() {
if (!url || !isEditing || !urlScrapable(url)) {
return undefined;
}
return (
<Button
className="minimal scrape-url-button"
onClick={() => onScrapeMovieURL()}
>
<Icon icon="file-upload" />
</Button>
);
}
function maybeRenderScrapeDialog() {
if (!scrapedMovie) {
return;
}
const currentMovie = getMovieInput();
// Get image paths for scrape gui
currentMovie.front_image = movie.front_image_path;
currentMovie.back_image = movie.back_image_path;
return (
<MovieScrapeDialog
movie={currentMovie}
scraped={scrapedMovie}
onClose={(m) => {
onScrapeDialogClosed(m);
}}
/>
);
}
function onScrapeDialogClosed(p?: GQL.ScrapedMovieDataFragment) {
if (p) {
updateMovieEditStateFromScraper(p);
}
setScrapedMovie(undefined);
}
if (isLoading) return <LoadingIndicator />;
// TODO: CSS class
return (
<div className="row">
@@ -363,7 +507,7 @@ export const Movie: React.FC = () => {
</Table>
<Form.Group controlId="url">
<Form.Label>URL</Form.Label>
<Form.Label>URL {maybeRenderScrapeButton()}</Form.Label>
<div>
{EditableTextUtils.renderInputGroup({
isEditing,
@@ -405,6 +549,7 @@ export const Movie: React.FC = () => {
)}
{renderDeleteAlert()}
{renderImageAlert()}
{maybeRenderScrapeDialog()}
</div>
);
};

View File

@@ -0,0 +1,204 @@
import React, { useState } from "react";
import * as GQL from "src/core/generated-graphql";
import {
ScrapeDialog,
ScrapeResult,
ScrapedInputGroupRow,
ScrapedImageRow,
ScrapeDialogRow,
} from "src/components/Shared/ScrapeDialog";
import { StudioSelect } from "src/components/Shared";
import { DurationUtils } from "src/utils";
function renderScrapedStudio(
result: ScrapeResult<string>,
isNew?: boolean,
onChange?: (value: string) => void
) {
const resultValue = isNew ? result.newValue : result.originalValue;
const value = resultValue ? [resultValue] : [];
return (
<StudioSelect
className="form-control react-select"
isDisabled={!isNew}
onSelect={(items) => {
if (onChange) {
onChange(items[0]?.id);
}
}}
ids={value}
/>
);
}
function renderScrapedStudioRow(
result: ScrapeResult<string>,
onChange: (value: ScrapeResult<string>) => void
) {
return (
<ScrapeDialogRow
title="Studio"
result={result}
renderOriginalField={() => renderScrapedStudio(result)}
renderNewField={() =>
renderScrapedStudio(result, true, (value) =>
onChange(result.cloneWithValue(value))
)
}
onChange={onChange}
/>
);
}
interface IMovieScrapeDialogProps {
movie: Partial<GQL.MovieUpdateInput>;
scraped: GQL.ScrapedMovie;
onClose: (scrapedMovie?: GQL.ScrapedMovie) => void;
}
export const MovieScrapeDialog: React.FC<IMovieScrapeDialogProps> = (
props: IMovieScrapeDialogProps
) => {
const [name, setName] = useState<ScrapeResult<string>>(
new ScrapeResult<string>(props.movie.name, props.scraped.name)
);
const [aliases, setAliases] = useState<ScrapeResult<string>>(
new ScrapeResult<string>(props.movie.aliases, props.scraped.aliases)
);
const [duration, setDuration] = useState<ScrapeResult<string>>(
new ScrapeResult<string>(
DurationUtils.secondsToString(props.movie.duration || 0),
props.scraped.duration
)
);
const [date, setDate] = useState<ScrapeResult<string>>(
new ScrapeResult<string>(props.movie.date, props.scraped.date)
);
const [director, setDirector] = useState<ScrapeResult<string>>(
new ScrapeResult<string>(props.movie.director, props.scraped.director)
);
const [synopsis, setSynopsis] = useState<ScrapeResult<string>>(
new ScrapeResult<string>(props.movie.synopsis, props.scraped.synopsis)
);
const [studio, setStudio] = useState<ScrapeResult<string>>(
new ScrapeResult<string>(props.movie.studio_id, props.scraped.studio?.id)
);
const [url, setURL] = useState<ScrapeResult<string>>(
new ScrapeResult<string>(props.movie.url, props.scraped.url)
);
const [frontImage, setFrontImage] = useState<ScrapeResult<string>>(
new ScrapeResult<string>(props.movie.front_image, props.scraped.front_image)
);
const [backImage, setBackImage] = useState<ScrapeResult<string>>(
new ScrapeResult<string>(props.movie.back_image, props.scraped.back_image)
);
const allFields = [
name,
aliases,
duration,
date,
director,
synopsis,
studio,
url,
frontImage,
backImage,
];
// don't show the dialog if nothing was scraped
if (allFields.every((r) => !r.scraped)) {
props.onClose();
return <></>;
}
// todo: reenable
function makeNewScrapedItem(): GQL.ScrapedMovie {
const newStudio = studio.getNewValue();
const durationString = duration.getNewValue();
return {
name: name.getNewValue(),
aliases: aliases.getNewValue(),
duration: durationString,
date: date.getNewValue(),
director: director.getNewValue(),
synopsis: synopsis.getNewValue(),
studio: newStudio
? {
id: newStudio,
name: "",
}
: undefined,
url: url.getNewValue(),
front_image: frontImage.getNewValue(),
back_image: backImage.getNewValue(),
};
}
function renderScrapeRows() {
return (
<>
<ScrapedInputGroupRow
title="Name"
result={name}
onChange={(value) => setName(value)}
/>
<ScrapedInputGroupRow
title="Aliases"
result={aliases}
onChange={(value) => setAliases(value)}
/>
<ScrapedInputGroupRow
title="Duration"
result={duration}
onChange={(value) => setDuration(value)}
/>
<ScrapedInputGroupRow
title="Date"
result={date}
onChange={(value) => setDate(value)}
/>
<ScrapedInputGroupRow
title="Director"
result={director}
onChange={(value) => setDirector(value)}
/>
<ScrapedInputGroupRow
title="Synopsis"
result={synopsis}
onChange={(value) => setSynopsis(value)}
/>
{renderScrapedStudioRow(studio, (value) => setStudio(value))}
<ScrapedInputGroupRow
title="URL"
result={url}
onChange={(value) => setURL(value)}
/>
<ScrapedImageRow
title="Front Image"
className="front-image"
result={frontImage}
onChange={(value) => setFrontImage(value)}
/>
<ScrapedImageRow
title="Back Image"
className="front-image"
result={backImage}
onChange={(value) => setBackImage(value)}
/>
</>
);
}
return (
<ScrapeDialog
title="Movie Scrape Results"
renderScrapeRows={renderScrapeRows}
onClose={(apply) => {
props.onClose(apply ? makeNewScrapedItem() : undefined);
}}
/>
);
};

View File

@@ -168,6 +168,8 @@ export const useScrapePerformer = (
export const useListSceneScrapers = () => GQL.useListSceneScrapersQuery();
export const useListMovieScrapers = () => GQL.useListMovieScrapersQuery();
export const useScrapeFreeonesPerformers = (q: string) =>
GQL.useScrapeFreeonesPerformersQuery({ variables: { q } });
@@ -433,6 +435,14 @@ export const queryScrapeSceneURL = (url: string) =>
},
});
export const queryScrapeMovieURL = (url: string) =>
client.query<GQL.ScrapeMovieUrlQuery>({
query: GQL.ScrapeMovieUrlDocument,
variables: {
url,
},
});
export const queryScrapeScene = (
scraperId: string,
scene: GQL.SceneUpdateInput

View File

@@ -1,6 +1,6 @@
# Metadata Scraping
Stash supports scraping of performer and scene details.
Stash supports scraping of performer, scene and movie details.
Stash includes a freeones.xxx performer scraper built in.
@@ -23,6 +23,8 @@ Performer details can be scraped from the new/edit Performer page in two differe
Scene details can be scraped using URL as above, or via the `Scrape With...` button, which scrapes using the current scene metadata.
Movie details can currently only be scraped using URL as above.
# Community Scrapers
The stash community maintains a number of custom scraper configuration files that can be found [here](https://github.com/stashapp/CommunityScrapers).
@@ -42,6 +44,8 @@ sceneByFragment:
<single scraper config>
sceneByURL:
<multiple scraper URL configs>
movieByURL:
<multiple scraper URL configs>
<other configurations>
```
@@ -57,6 +61,7 @@ The scraping types and their required fields are outlined in the following table
| Scrape performer from URL | Valid `performerByURL` configuration with matching URL. |
| Scraper in `Scrape...` dropdown button in Scene Edit page | Valid `sceneByFragment` configuration. |
| Scrape scene from URL | Valid `sceneByURL` configuration with matching URL. |
| Scrape movie from URL | Valid `movieByURL` configuration with matching URL. |
URL-based scraping accepts multiple scrape configurations, and each configuration requires a `url` field. stash iterates through these configurations, attempting to match the entered URL against the `url` fields in the configuration. It executes the first scraping configuration where the entered URL contains the value of the `url` field.
@@ -87,6 +92,7 @@ The script is sent input and expects output based on the scraping type, as detai
| `performerByURL` | `{"url": "<url>"}` | JSON-encoded performer fragment |
| `sceneByFragment` | JSON-encoded scene fragment | JSON-encoded scene fragment |
| `sceneByURL` | `{"url": "<url>"}` | JSON-encoded scene fragment |
| `movieByURL` | `{"url": "<url>"}` | JSON-encoded movie fragment |
For `performerByName`, only `name` is required in the returned performer fragments. One entire object is sent back to `performerByFragment` to scrape a specific performer, so the other fields may be included to assist in scraping a performer. For example, the `url` field may be filled in for the specific performer page, then `performerByFragment` can extract by using its value.
@@ -547,8 +553,11 @@ Duration
Date
Rating
Director
Studio
Synopsis
URL
FrontImage
BackImage
```
### Stash
@@ -572,7 +581,7 @@ stashServer:
```
### Debugging support
To print the received html from a scraper request to the log file, add the following to your scraper yml file:
To print the received html/json from a scraper request to the log file, add the following to your scraper yml file:
```
debug:
printHTML: true