mirror of
https://github.com/stashapp/stash.git
synced 2025-12-17 12:24:38 +03:00
Fix a bunch of scanning / tagging bugs (#3154)
* Fix possible infinite loop/stack overflow with weird/broken zip files * Fix path length calculation using bytes instead of characters (runes) * Fix bug where oshash gets buffers with size not actually multiple of 8 * Add oshash tests Co-authored-by: WithoutPants <53250216+WithoutPants@users.noreply.github.com>
This commit is contained in:
@@ -125,7 +125,12 @@ func walkDir(f FS, path string, d fs.DirEntry, walkDirFn fs.WalkDirFunc) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, d1 := range dirs {
|
for _, d1 := range dirs {
|
||||||
path1 := filepath.Join(path, d1.Name())
|
name := d1.Name()
|
||||||
|
// Prevent infinite loops; this can happen with certain FS implementations (e.g. ZipFS).
|
||||||
|
if name == "" || name == "." {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
path1 := filepath.Join(path, name)
|
||||||
if err := walkDir(f, path1, d1, walkDirFn); err != nil {
|
if err := walkDir(f, path1, d1, walkDirFn); err != nil {
|
||||||
if errors.Is(err, fs.SkipDir) {
|
if errors.Is(err, fs.SkipDir) {
|
||||||
break
|
break
|
||||||
|
|||||||
@@ -46,15 +46,16 @@ func oshash(size int64, head []byte, tail []byte) (string, error) {
|
|||||||
return fmt.Sprintf("%016x", result), nil
|
return fmt.Sprintf("%016x", result), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// FromFilePath calculates the hash reading from src.
|
// FromReader calculates the hash reading from src.
|
||||||
func FromReader(src io.ReadSeeker, fileSize int64) (string, error) {
|
func FromReader(src io.ReadSeeker, fileSize int64) (string, error) {
|
||||||
if fileSize <= 0 {
|
if fileSize <= 8 {
|
||||||
return "", fmt.Errorf("cannot calculate oshash for empty file (size %d)", fileSize)
|
return "", fmt.Errorf("cannot calculate oshash where size < 8 (%d)", fileSize)
|
||||||
}
|
}
|
||||||
|
|
||||||
fileChunkSize := chunkSize
|
fileChunkSize := chunkSize
|
||||||
if fileSize < fileChunkSize {
|
if fileSize < fileChunkSize {
|
||||||
fileChunkSize = fileSize
|
// Must be a multiple of 8.
|
||||||
|
fileChunkSize = (fileSize / 8) * 8
|
||||||
}
|
}
|
||||||
|
|
||||||
head := make([]byte, fileChunkSize)
|
head := make([]byte, fileChunkSize)
|
||||||
@@ -67,7 +68,7 @@ func FromReader(src io.ReadSeeker, fileSize int64) (string, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// seek to the end of the file - the chunk size
|
// seek to the end of the file - the chunk size
|
||||||
_, err = src.Seek(-fileChunkSize, 2)
|
_, err = src.Seek(-fileChunkSize, io.SeekEnd)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,75 +0,0 @@
|
|||||||
package oshash
|
|
||||||
|
|
||||||
import (
|
|
||||||
"math/rand"
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Note that the public API returns "" instead.
|
|
||||||
func TestOshashEmpty(t *testing.T) {
|
|
||||||
var size int64
|
|
||||||
head := make([]byte, chunkSize)
|
|
||||||
tail := make([]byte, chunkSize)
|
|
||||||
want := "0000000000000000"
|
|
||||||
got, err := oshash(size, head, tail)
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("TestOshashEmpty: Error from oshash: %v", err)
|
|
||||||
}
|
|
||||||
if got != want {
|
|
||||||
t.Errorf("TestOshashEmpty: oshash(0, 0, 0) = %q; want %q", got, want)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// As oshash sums byte values, causing collisions is trivial.
|
|
||||||
func TestOshashCollisions(t *testing.T) {
|
|
||||||
buf1 := []byte("this is dumb")
|
|
||||||
buf2 := []byte("dumb is this")
|
|
||||||
size := int64(len(buf1))
|
|
||||||
head := make([]byte, chunkSize)
|
|
||||||
|
|
||||||
tail1 := make([]byte, chunkSize)
|
|
||||||
copy(tail1[len(tail1)-len(buf1):], buf1)
|
|
||||||
hash1, err := oshash(size, head, tail1)
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("TestOshashCollisions: Error from oshash: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
tail2 := make([]byte, chunkSize)
|
|
||||||
copy(tail2[len(tail2)-len(buf2):], buf2)
|
|
||||||
hash2, err := oshash(size, head, tail2)
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("TestOshashCollisions: Error from oshash: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if hash1 != hash2 {
|
|
||||||
t.Errorf("TestOshashCollisions: oshash(n, k, ... %v) =! oshash(n, k, ... %v)", buf1, buf2)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkOsHash(b *testing.B) {
|
|
||||||
src := rand.NewSource(9999)
|
|
||||||
r := rand.New(src)
|
|
||||||
|
|
||||||
size := int64(1234567890)
|
|
||||||
|
|
||||||
head := make([]byte, 1024*64)
|
|
||||||
_, err := r.Read(head)
|
|
||||||
if err != nil {
|
|
||||||
b.Errorf("unable to generate head array: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
tail := make([]byte, 1024*64)
|
|
||||||
_, err = r.Read(tail)
|
|
||||||
if err != nil {
|
|
||||||
b.Errorf("unable to generate tail array: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
b.ResetTimer()
|
|
||||||
|
|
||||||
for n := 0; n < b.N; n++ {
|
|
||||||
_, err := oshash(size, head, tail)
|
|
||||||
if err != nil {
|
|
||||||
b.Errorf("unexpected error: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
111
pkg/hash/oshash/oshash_test.go
Normal file
111
pkg/hash/oshash/oshash_test.go
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
package oshash
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"math/rand"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func BenchmarkOsHash(b *testing.B) {
|
||||||
|
src := rand.NewSource(9999)
|
||||||
|
r := rand.New(src)
|
||||||
|
|
||||||
|
size := int64(1234567890)
|
||||||
|
|
||||||
|
head := make([]byte, 1024*64)
|
||||||
|
_, err := r.Read(head)
|
||||||
|
if err != nil {
|
||||||
|
b.Errorf("unable to generate head array: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
tail := make([]byte, 1024*64)
|
||||||
|
_, err = r.Read(tail)
|
||||||
|
if err != nil {
|
||||||
|
b.Errorf("unable to generate tail array: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
|
||||||
|
for n := 0; n < b.N; n++ {
|
||||||
|
_, err := oshash(size, head, tail)
|
||||||
|
if err != nil {
|
||||||
|
b.Errorf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFromReader(t *testing.T) {
|
||||||
|
makeByteArray := func(base []byte, mag int) []byte {
|
||||||
|
ret := base
|
||||||
|
for i := 0; i < mag; i++ {
|
||||||
|
ret = append(ret, ret...)
|
||||||
|
}
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
|
||||||
|
makeTailArray := func(base []byte, tail []byte) []byte {
|
||||||
|
ret := base
|
||||||
|
t := make([]byte, chunkSize)
|
||||||
|
copy(t[len(t)-len(tail):], tail)
|
||||||
|
ret = append(ret, t...)
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
data []byte
|
||||||
|
want string
|
||||||
|
wantErr bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
"empty",
|
||||||
|
[]byte{},
|
||||||
|
"",
|
||||||
|
true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"regular",
|
||||||
|
makeByteArray([]byte("this is a test"), 15),
|
||||||
|
"6a0eba04654d0b9b",
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"< chunk size",
|
||||||
|
[]byte("hello world"),
|
||||||
|
"d3e392dee38cd4df",
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"< 8",
|
||||||
|
[]byte("hello"),
|
||||||
|
"",
|
||||||
|
true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"identical #1",
|
||||||
|
makeTailArray(make([]byte, chunkSize), []byte("this is dumb")),
|
||||||
|
"d5d6ddd820756920",
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"identical #2",
|
||||||
|
makeTailArray(make([]byte, chunkSize), []byte("dumb is this")),
|
||||||
|
"d5d6ddd820756920",
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
r := bytes.NewReader(tt.data)
|
||||||
|
|
||||||
|
got, err := FromReader(r, int64(len(tt.data)))
|
||||||
|
if (err != nil) != tt.wantErr {
|
||||||
|
t.Errorf("FromReader() error = %v, wantErr %v", err, tt.wantErr)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if got != tt.want {
|
||||||
|
t.Errorf("FromReader() = %v, want %v", got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -7,6 +7,7 @@ import (
|
|||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
"unicode"
|
"unicode"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
"github.com/stashapp/stash/pkg/gallery"
|
"github.com/stashapp/stash/pkg/gallery"
|
||||||
"github.com/stashapp/stash/pkg/image"
|
"github.com/stashapp/stash/pkg/image"
|
||||||
@@ -77,7 +78,7 @@ func getPathWords(path string, trimExt bool) []string {
|
|||||||
// remove any single letter words
|
// remove any single letter words
|
||||||
var ret []string
|
var ret []string
|
||||||
for _, w := range words {
|
for _, w := range words {
|
||||||
if len(w) > 1 {
|
if utf8.RuneCountInString(w) > 1 {
|
||||||
// #1450 - we need to open up the criteria for matching so that we
|
// #1450 - we need to open up the criteria for matching so that we
|
||||||
// can match where path has no space between subject names -
|
// can match where path has no space between subject names -
|
||||||
// ie name = "foo bar" - path = "foobar"
|
// ie name = "foo bar" - path = "foobar"
|
||||||
|
|||||||
Reference in New Issue
Block a user