Fix a bunch of scanning / tagging bugs (#3154)

* Fix possible infinite loop/stack overflow with weird/broken zip files * Fix path length calculation using bytes instead of characters (runes) * Fix bug where oshash gets buffers with size not actually multiple of 8 * Add oshash tests Co-authored-by: WithoutPants <53250216+WithoutPants@users.noreply.github.com>
2025-12-17 12:24:38 +03:00 · 2022-12-01 15:48:04 +10:00
parent e614ca8d26
commit 87cea80e7b
5 changed files with 125 additions and 82 deletions
--- a/pkg/file/walk.go
+++ b/pkg/file/walk.go
@@ -125,7 +125,12 @@ func walkDir(f FS, path string, d fs.DirEntry, walkDirFn fs.WalkDirFunc) error {
 	}
 	for _, d1 := range dirs {
-		path1 := filepath.Join(path, d1.Name())
+		name := d1.Name()
 		// Prevent infinite loops; this can happen with certain FS implementations (e.g. ZipFS).
 		if name == "" || name == "." {
 			continue
 		}
 		path1 := filepath.Join(path, name)
 		if err := walkDir(f, path1, d1, walkDirFn); err != nil {
 			if errors.Is(err, fs.SkipDir) {
 				break
--- a/pkg/hash/oshash/oshash.go
+++ b/pkg/hash/oshash/oshash.go
@@ -46,15 +46,16 @@ func oshash(size int64, head []byte, tail []byte) (string, error) {
 	return fmt.Sprintf("%016x", result), nil
 }
-// FromFilePath calculates the hash reading from src.
+// FromReader calculates the hash reading from src.
 func FromReader(src io.ReadSeeker, fileSize int64) (string, error) {
-	if fileSize <= 0 {
+	if fileSize <= 8 {
-		return "", fmt.Errorf("cannot calculate oshash for empty file (size %d)", fileSize)
+		return "", fmt.Errorf("cannot calculate oshash where size < 8 (%d)", fileSize)
 	}
 	fileChunkSize := chunkSize
 	if fileSize < fileChunkSize {
-		fileChunkSize = fileSize
+		// Must be a multiple of 8.
 		fileChunkSize = (fileSize / 8) * 8
 	}
 	head := make([]byte, fileChunkSize)
@@ -67,7 +68,7 @@ func FromReader(src io.ReadSeeker, fileSize int64) (string, error) {
 	}
 	// seek to the end of the file - the chunk size
-	_, err = src.Seek(-fileChunkSize, 2)
+	_, err = src.Seek(-fileChunkSize, io.SeekEnd)
 	if err != nil {
 		return "", err
 	}
--- a/pkg/hash/oshash/oshash_internal_test.go
+++ b/pkg/hash/oshash/oshash_internal_test.go
@@ -1,75 +0,0 @@
 package oshash
 import (
 	"math/rand"
 	"testing"
 )
 // Note that the public API returns "" instead.
 func TestOshashEmpty(t *testing.T) {
 	var size int64
 	head := make([]byte, chunkSize)
 	tail := make([]byte, chunkSize)
 	want := "0000000000000000"
 	got, err := oshash(size, head, tail)
 	if err != nil {
 		t.Errorf("TestOshashEmpty: Error from oshash: %v", err)
 	}
 	if got != want {
 		t.Errorf("TestOshashEmpty: oshash(0, 0, 0) = %q; want %q", got, want)
 	}
 }
 // As oshash sums byte values, causing collisions is trivial.
 func TestOshashCollisions(t *testing.T) {
 	buf1 := []byte("this is dumb")
 	buf2 := []byte("dumb is this")
 	size := int64(len(buf1))
 	head := make([]byte, chunkSize)
 	tail1 := make([]byte, chunkSize)
 	copy(tail1[len(tail1)-len(buf1):], buf1)
 	hash1, err := oshash(size, head, tail1)
 	if err != nil {
 		t.Errorf("TestOshashCollisions: Error from oshash: %v", err)
 	}
 	tail2 := make([]byte, chunkSize)
 	copy(tail2[len(tail2)-len(buf2):], buf2)
 	hash2, err := oshash(size, head, tail2)
 	if err != nil {
 		t.Errorf("TestOshashCollisions: Error from oshash: %v", err)
 	}
 	if hash1 != hash2 {
 		t.Errorf("TestOshashCollisions: oshash(n, k, ... %v) =! oshash(n, k, ... %v)", buf1, buf2)
 	}
 }
 func BenchmarkOsHash(b *testing.B) {
 	src := rand.NewSource(9999)
 	r := rand.New(src)
 	size := int64(1234567890)
 	head := make([]byte, 1024*64)
 	_, err := r.Read(head)
 	if err != nil {
 		b.Errorf("unable to generate head array: %v", err)
 	}
 	tail := make([]byte, 1024*64)
 	_, err = r.Read(tail)
 	if err != nil {
 		b.Errorf("unable to generate tail array: %v", err)
 	}
 	b.ResetTimer()
 	for n := 0; n < b.N; n++ {
 		_, err := oshash(size, head, tail)
 		if err != nil {
 			b.Errorf("unexpected error: %v", err)
 		}
 	}
 }
--- a/pkg/hash/oshash/oshash_test.go
+++ b/pkg/hash/oshash/oshash_test.go
@@ -0,0 +1,111 @@
 package oshash
 import (
 	"bytes"
 	"math/rand"
 	"testing"
 )
 func BenchmarkOsHash(b *testing.B) {
 	src := rand.NewSource(9999)
 	r := rand.New(src)
 	size := int64(1234567890)
 	head := make([]byte, 1024*64)
 	_, err := r.Read(head)
 	if err != nil {
 		b.Errorf("unable to generate head array: %v", err)
 	}
 	tail := make([]byte, 1024*64)
 	_, err = r.Read(tail)
 	if err != nil {
 		b.Errorf("unable to generate tail array: %v", err)
 	}
 	b.ResetTimer()
 	for n := 0; n < b.N; n++ {
 		_, err := oshash(size, head, tail)
 		if err != nil {
 			b.Errorf("unexpected error: %v", err)
 		}
 	}
 }
 func TestFromReader(t *testing.T) {
 	makeByteArray := func(base []byte, mag int) []byte {
 		ret := base
 		for i := 0; i < mag; i++ {
 			ret = append(ret, ret...)
 		}
 		return ret
 	}
 	makeTailArray := func(base []byte, tail []byte) []byte {
 		ret := base
 		t := make([]byte, chunkSize)
 		copy(t[len(t)-len(tail):], tail)
 		ret = append(ret, t...)
 		return ret
 	}
 	tests := []struct {
 		name    string
 		data    []byte
 		want    string
 		wantErr bool
 	}{
 		{
 			"empty",
 			[]byte{},
 			"",
 			true,
 		},
 		{
 			"regular",
 			makeByteArray([]byte("this is a test"), 15),
 			"6a0eba04654d0b9b",
 			false,
 		},
 		{
 			"< chunk size",
 			[]byte("hello world"),
 			"d3e392dee38cd4df",
 			false,
 		},
 		{
 			"< 8",
 			[]byte("hello"),
 			"",
 			true,
 		},
 		{
 			"identical #1",
 			makeTailArray(make([]byte, chunkSize), []byte("this is dumb")),
 			"d5d6ddd820756920",
 			false,
 		},
 		{
 			"identical #2",
 			makeTailArray(make([]byte, chunkSize), []byte("dumb is this")),
 			"d5d6ddd820756920",
 			false,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			r := bytes.NewReader(tt.data)
 			got, err := FromReader(r, int64(len(tt.data)))
 			if (err != nil) != tt.wantErr {
 				t.Errorf("FromReader() error = %v, wantErr %v", err, tt.wantErr)
 				return
 			}
 			if got != tt.want {
 				t.Errorf("FromReader() = %v, want %v", got, tt.want)
 			}
 		})
 	}
 }
--- a/pkg/match/path.go
+++ b/pkg/match/path.go
@@ -7,6 +7,7 @@ import (
 	"regexp"
 	"strings"
 	"unicode"
 	"unicode/utf8"
 	"github.com/stashapp/stash/pkg/gallery"
 	"github.com/stashapp/stash/pkg/image"
@@ -77,7 +78,7 @@ func getPathWords(path string, trimExt bool) []string {
 	// remove any single letter words
 	var ret []string
 	for _, w := range words {
-		if len(w) > 1 {
+		if utf8.RuneCountInString(w) > 1 {
 			// #1450 - we need to open up the criteria for matching so that we
 			// can match where path has no space between subject names -
 			// ie name = "foo bar" - path = "foobar"