Stashbox tagger reorder (#2840)

Co-authored-by: WithoutPants <53250216+WithoutPants@users.noreply.github.com>
This commit is contained in:
JackDawson94
2022-09-30 03:26:37 +02:00
committed by GitHub
parent 25bc750295
commit c63c06de1c
3 changed files with 151 additions and 9 deletions

View File

@@ -6,13 +6,14 @@ import { FormattedMessage, useIntl } from "react-intl";
import { Icon, LoadingIndicator } from "src/components/Shared"; import { Icon, LoadingIndicator } from "src/components/Shared";
import { OperationButton } from "src/components/Shared/OperationButton"; import { OperationButton } from "src/components/Shared/OperationButton";
import { TaggerStateContext } from "../context"; import { IScrapedScene, TaggerStateContext } from "../context";
import Config from "./Config"; import Config from "./Config";
import { TaggerScene } from "./TaggerScene"; import { TaggerScene } from "./TaggerScene";
import { SceneTaggerModals } from "./sceneTaggerModals"; import { SceneTaggerModals } from "./sceneTaggerModals";
import { SceneSearchResults } from "./StashSearchResult"; import { SceneSearchResults } from "./StashSearchResult";
import { ConfigurationContext } from "src/hooks/Config"; import { ConfigurationContext } from "src/hooks/Config";
import { faCog } from "@fortawesome/free-solid-svg-icons"; import { faCog } from "@fortawesome/free-solid-svg-icons";
import { distance } from "src/utils/hamming";
interface ITaggerProps { interface ITaggerProps {
scenes: GQL.SlimSceneDataFragment[]; scenes: GQL.SlimSceneDataFragment[];
@@ -90,6 +91,139 @@ export const Tagger: React.FC<ITaggerProps> = ({ scenes, queue }) => {
); );
} }
function minDistance(hash: string, stashScene: GQL.SlimSceneDataFragment) {
let ret = 9999;
stashScene.files.forEach((cv) => {
if (ret === 0) return;
const stashHash = cv.fingerprints.find((fp) => fp.type === "phash");
if (!stashHash) {
return;
}
const d = distance(hash, stashHash.value);
if (d < ret) {
ret = d;
}
});
return ret;
}
function calculatePhashComparisonScore(
stashScene: GQL.SlimSceneDataFragment,
scrapedScene: IScrapedScene
) {
const phashFingerprints =
scrapedScene.fingerprints?.filter((f) => f.algorithm === "PHASH") ?? [];
const filteredFingerprints = phashFingerprints.filter(
(f) => minDistance(f.hash, stashScene) <= 8
);
if (phashFingerprints.length == 0) return [0, 0];
return [
filteredFingerprints.length,
filteredFingerprints.length / phashFingerprints.length,
];
}
function minDurationDiff(
stashScene: GQL.SlimSceneDataFragment,
duration: number
) {
let ret = 9999;
stashScene.files.forEach((cv) => {
if (ret === 0) return;
const d = Math.abs(duration - cv.duration);
if (d < ret) {
ret = d;
}
});
return ret;
}
function calculateDurationComparisonScore(
stashScene: GQL.SlimSceneDataFragment,
scrapedScene: IScrapedScene
) {
if (scrapedScene.fingerprints && scrapedScene.fingerprints.length > 0) {
const durations = scrapedScene.fingerprints.map((f) => f.duration);
const diffs = durations.map((d) => minDurationDiff(stashScene, d));
const filteredDurations = diffs.filter((duration) => duration <= 5);
const minDiff = Math.min(...diffs);
return [
filteredDurations.length,
filteredDurations.length / durations.length,
minDiff,
];
}
return [0, 0, 0];
}
function compareScenesForSort(
stashScene: GQL.SlimSceneDataFragment,
sceneA: IScrapedScene,
sceneB: IScrapedScene
) {
// Compare sceneA and sceneB to each other for sorting based on similarity to stashScene
// Order of priority is: nb. phash match > nb. duration match > ratio duration match > ratio phash match
// scenes without any fingerprints should be sorted to the end
if (!sceneA.fingerprints?.length && sceneB.fingerprints?.length) {
return 1;
}
if (!sceneB.fingerprints?.length && sceneA.fingerprints?.length) {
return -1;
}
const [
nbPhashMatchSceneA,
ratioPhashMatchSceneA,
] = calculatePhashComparisonScore(stashScene, sceneA);
const [
nbPhashMatchSceneB,
ratioPhashMatchSceneB,
] = calculatePhashComparisonScore(stashScene, sceneB);
if (nbPhashMatchSceneA != nbPhashMatchSceneB) {
return nbPhashMatchSceneB - nbPhashMatchSceneA;
}
// Same number of phash matches, check duration
const [
nbDurationMatchSceneA,
ratioDurationMatchSceneA,
minDurationDiffSceneA,
] = calculateDurationComparisonScore(stashScene, sceneA);
const [
nbDurationMatchSceneB,
ratioDurationMatchSceneB,
minDurationDiffSceneB,
] = calculateDurationComparisonScore(stashScene, sceneB);
if (nbDurationMatchSceneA != nbDurationMatchSceneB) {
return nbDurationMatchSceneB - nbDurationMatchSceneA;
}
// Same number of phash & duration, check duration ratio
if (ratioDurationMatchSceneA != ratioDurationMatchSceneB) {
return ratioDurationMatchSceneB - ratioDurationMatchSceneA;
}
// Damn this is close... Check phash ratio
if (ratioPhashMatchSceneA !== ratioPhashMatchSceneB) {
return ratioPhashMatchSceneB - ratioPhashMatchSceneA;
}
// fall back to duration difference - less is better
return minDurationDiffSceneA - minDurationDiffSceneB;
}
function renderScenes() { function renderScenes() {
const filteredScenes = !hideUnmatched const filteredScenes = !hideUnmatched
? scenes ? scenes
@@ -105,6 +239,14 @@ export const Tagger: React.FC<ITaggerProps> = ({ scenes, queue }) => {
errorMessage = intl.formatMessage({ errorMessage = intl.formatMessage({
id: "component_tagger.results.match_failed_no_result", id: "component_tagger.results.match_failed_no_result",
}); });
} else if (
searchResult &&
searchResult.results &&
searchResult.results?.length >= 2
) {
searchResult.results?.sort((scrapedSceneA, scrapedSceneB) =>
compareScenesForSort(scene, scrapedSceneA, scrapedSceneB)
);
} }
return ( return (

View File

@@ -37,9 +37,7 @@ const getDurationStatus = (
?.map((f) => f.duration) ?.map((f) => f.duration)
.map((d) => Math.abs(d - stashDuration)) ?? []; .map((d) => Math.abs(d - stashDuration)) ?? [];
const sceneDuration = scene.duration ?? 0; if (!scene.duration && durations.length === 0) return "";
if (!sceneDuration && durations.length === 0) return "";
const matchCount = durations.filter((duration) => duration <= 5).length; const matchCount = durations.filter((duration) => duration <= 5).length;
@@ -51,7 +49,7 @@ const getDurationStatus = (
values={{ matchCount, durationsLength: durations.length }} values={{ matchCount, durationsLength: durations.length }}
/> />
); );
else if (Math.abs(sceneDuration - stashDuration) < 5) else if (scene.duration && Math.abs(scene.duration - stashDuration) < 5)
match = <FormattedMessage id="component_tagger.results.fp_matches" />; match = <FormattedMessage id="component_tagger.results.fp_matches" />;
if (match) if (match)
@@ -62,10 +60,11 @@ const getDurationStatus = (
</div> </div>
); );
const minDiff = Math.min( let minDiff = Math.min(...durations);
Math.abs(sceneDuration - stashDuration), if (scene.duration) {
...durations minDiff = Math.min(minDiff, Math.abs(scene.duration - stashDuration));
); }
return ( return (
<FormattedMessage <FormattedMessage
id="component_tagger.results.duration_off" id="component_tagger.results.duration_off"

View File

@@ -13,6 +13,7 @@ After migrating, please run a scan on your entire library to populate missing da
* Added release notes dialog. ([#2726](https://github.com/stashapp/stash/pull/2726)) * Added release notes dialog. ([#2726](https://github.com/stashapp/stash/pull/2726))
### 🎨 Improvements ### 🎨 Improvements
* Improve matching scene order in the tagger to prioritise matching phashes and durations. ([#2840](https://github.com/stashapp/stash/pull/2840))
* Encode reserved characters in query URLs. ([#2899](https://github.com/stashapp/stash/pull/2899)) * Encode reserved characters in query URLs. ([#2899](https://github.com/stashapp/stash/pull/2899))
* Object titles are now displayed as the file basename if the title is not explicitly set. The `Don't include file extension as part of the title` scan flag is no longer supported. * Object titles are now displayed as the file basename if the title is not explicitly set. The `Don't include file extension as part of the title` scan flag is no longer supported.
* `Set name, date, details from embedded file metadata` scan flag is no longer supported. This functionality may be implemented as a built-in scraper in the future. * `Set name, date, details from embedded file metadata` scan flag is no longer supported. This functionality may be implemented as a built-in scraper in the future.