Limit duplicate matching to files that have ~ same duration (#3663)

* Limit duplicate matching to files that have ~ same duration
* Add UI for duration diff
---------
Co-authored-by: WithoutPants <53250216+WithoutPants@users.noreply.github.com>
This commit is contained in:
puc9
2023-05-02 22:01:59 -07:00
committed by GitHub
parent 002b71bd67
commit 899d1b9395
11 changed files with 177 additions and 75 deletions

View File

@@ -1,6 +1,7 @@
package utils
import (
"math"
"strconv"
"github.com/corona10/goimagehash"
@@ -8,21 +9,28 @@ import (
)
type Phash struct {
SceneID int `db:"id"`
Hash int64 `db:"phash"`
SceneID int `db:"id"`
Hash int64 `db:"phash"`
Duration float64 `db:"duration"`
Neighbors []int
Bucket int
}
func FindDuplicates(hashes []*Phash, distance int) [][]int {
func FindDuplicates(hashes []*Phash, distance int, durationDiff float64) [][]int {
for i, scene := range hashes {
sceneHash := goimagehash.NewImageHash(uint64(scene.Hash), goimagehash.PHash)
for j, neighbor := range hashes {
if i != j && scene.SceneID != neighbor.SceneID {
neighborHash := goimagehash.NewImageHash(uint64(neighbor.Hash), goimagehash.PHash)
neighborDistance, _ := sceneHash.Distance(neighborHash)
if neighborDistance <= distance {
scene.Neighbors = append(scene.Neighbors, j)
neighbourDurationDistance := 0.
if scene.Duration > 0 && neighbor.Duration > 0 {
neighbourDurationDistance = math.Abs(scene.Duration - neighbor.Duration)
}
if (neighbourDurationDistance <= durationDiff) || (durationDiff < 0) {
neighborHash := goimagehash.NewImageHash(uint64(neighbor.Hash), goimagehash.PHash)
neighborDistance, _ := sceneHash.Distance(neighborHash)
if neighborDistance <= distance {
scene.Neighbors = append(scene.Neighbors, j)
}
}
}
}