mirror of
https://github.com/stashapp/stash.git
synced 2025-12-17 04:14:39 +03:00
Handle unicode characters in autotag (#2336)
This commit is contained in:
@@ -22,6 +22,13 @@ func getPathQueryRegex(name string) string {
|
||||
const separator = `[` + separatorChars + `]`
|
||||
|
||||
ret := strings.ReplaceAll(name, " ", separator+"*")
|
||||
|
||||
// \p{L} is specifically omitted here because of the performance hit when
|
||||
// including it. It does mean that paths where the name is bounded by
|
||||
// unicode letters will be returned. However, the results should be tested
|
||||
// by nameMatchesPath which does include \p{L}. The improvement in query
|
||||
// performance should be outweigh the performance hit of testing any extra
|
||||
// results.
|
||||
ret = `(?:^|_|[^\w\d])` + ret + `(?:$|_|[^\w\d])`
|
||||
return ret
|
||||
}
|
||||
@@ -36,7 +43,7 @@ func getPathWords(path string) []string {
|
||||
}
|
||||
|
||||
// handle path separators
|
||||
const separator = `(?:_|[^\w\d])+`
|
||||
const separator = `(?:_|[^\p{L}\w\d])+`
|
||||
re := regexp.MustCompile(separator)
|
||||
retStr = re.ReplaceAllString(retStr, " ")
|
||||
|
||||
@@ -52,7 +59,9 @@ func getPathWords(path string) []string {
|
||||
// we post-match afterwards, so we can afford to be a little loose
|
||||
// with the query
|
||||
// just use the first two characters
|
||||
ret = append(ret, w[0:2])
|
||||
// #2293 - need to convert to unicode runes for the substring, otherwise
|
||||
// the resulting string is corrupted.
|
||||
ret = append(ret, string([]rune(w)[0:2]))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,7 +81,7 @@ func nameMatchesPath(name, path string) int {
|
||||
const separator = `[` + separatorChars + `]`
|
||||
|
||||
reStr := strings.ReplaceAll(name, " ", separator+"*")
|
||||
reStr = `(?:^|_|[^\w\d])` + reStr + `(?:$|_|[^\w\d])`
|
||||
reStr = `(?:^|_|[^\p{L}\w\d])` + reStr + `(?:$|_|[^\p{L}\w\d])`
|
||||
|
||||
re := regexp.MustCompile(reStr)
|
||||
found := re.FindAllStringIndex(path, -1)
|
||||
|
||||
@@ -4,71 +4,90 @@ import "testing"
|
||||
|
||||
func Test_nameMatchesPath(t *testing.T) {
|
||||
const name = "first last"
|
||||
const unicodeName = "伏字"
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
path string
|
||||
want int
|
||||
testName string
|
||||
name string
|
||||
path string
|
||||
want int
|
||||
}{
|
||||
{
|
||||
"exact",
|
||||
name,
|
||||
name,
|
||||
0,
|
||||
},
|
||||
{
|
||||
"partial",
|
||||
name,
|
||||
"first",
|
||||
-1,
|
||||
},
|
||||
{
|
||||
"separator",
|
||||
name,
|
||||
"first.last",
|
||||
0,
|
||||
},
|
||||
{
|
||||
"separator",
|
||||
name,
|
||||
"first-last",
|
||||
0,
|
||||
},
|
||||
{
|
||||
"separator",
|
||||
name,
|
||||
"first_last",
|
||||
0,
|
||||
},
|
||||
{
|
||||
"separators",
|
||||
name,
|
||||
"first.-_ last",
|
||||
0,
|
||||
},
|
||||
{
|
||||
"within string",
|
||||
name,
|
||||
"before_first last/after",
|
||||
6,
|
||||
},
|
||||
{
|
||||
"not within string",
|
||||
name,
|
||||
"beforefirst last/after",
|
||||
-1,
|
||||
},
|
||||
{
|
||||
"not within string",
|
||||
name,
|
||||
"before/first lastafter",
|
||||
-1,
|
||||
},
|
||||
{
|
||||
"not within string",
|
||||
name,
|
||||
"first last1",
|
||||
-1,
|
||||
},
|
||||
{
|
||||
"not within string",
|
||||
name,
|
||||
"1first last",
|
||||
-1,
|
||||
},
|
||||
{
|
||||
"unicode",
|
||||
unicodeName,
|
||||
unicodeName,
|
||||
0,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := nameMatchesPath(name, tt.path); got != tt.want {
|
||||
t.Run(tt.testName, func(t *testing.T) {
|
||||
if got := nameMatchesPath(tt.name, tt.path); got != tt.want {
|
||||
t.Errorf("nameMatchesPath() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user