mirror of
https://github.com/stashapp/stash.git
synced 2025-12-17 04:14:39 +03:00
Handle unicode characters in autotag (#2336)
This commit is contained in:
@@ -22,6 +22,13 @@ func getPathQueryRegex(name string) string {
|
|||||||
const separator = `[` + separatorChars + `]`
|
const separator = `[` + separatorChars + `]`
|
||||||
|
|
||||||
ret := strings.ReplaceAll(name, " ", separator+"*")
|
ret := strings.ReplaceAll(name, " ", separator+"*")
|
||||||
|
|
||||||
|
// \p{L} is specifically omitted here because of the performance hit when
|
||||||
|
// including it. It does mean that paths where the name is bounded by
|
||||||
|
// unicode letters will be returned. However, the results should be tested
|
||||||
|
// by nameMatchesPath which does include \p{L}. The improvement in query
|
||||||
|
// performance should be outweigh the performance hit of testing any extra
|
||||||
|
// results.
|
||||||
ret = `(?:^|_|[^\w\d])` + ret + `(?:$|_|[^\w\d])`
|
ret = `(?:^|_|[^\w\d])` + ret + `(?:$|_|[^\w\d])`
|
||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
@@ -36,7 +43,7 @@ func getPathWords(path string) []string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// handle path separators
|
// handle path separators
|
||||||
const separator = `(?:_|[^\w\d])+`
|
const separator = `(?:_|[^\p{L}\w\d])+`
|
||||||
re := regexp.MustCompile(separator)
|
re := regexp.MustCompile(separator)
|
||||||
retStr = re.ReplaceAllString(retStr, " ")
|
retStr = re.ReplaceAllString(retStr, " ")
|
||||||
|
|
||||||
@@ -52,7 +59,9 @@ func getPathWords(path string) []string {
|
|||||||
// we post-match afterwards, so we can afford to be a little loose
|
// we post-match afterwards, so we can afford to be a little loose
|
||||||
// with the query
|
// with the query
|
||||||
// just use the first two characters
|
// just use the first two characters
|
||||||
ret = append(ret, w[0:2])
|
// #2293 - need to convert to unicode runes for the substring, otherwise
|
||||||
|
// the resulting string is corrupted.
|
||||||
|
ret = append(ret, string([]rune(w)[0:2]))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -72,7 +81,7 @@ func nameMatchesPath(name, path string) int {
|
|||||||
const separator = `[` + separatorChars + `]`
|
const separator = `[` + separatorChars + `]`
|
||||||
|
|
||||||
reStr := strings.ReplaceAll(name, " ", separator+"*")
|
reStr := strings.ReplaceAll(name, " ", separator+"*")
|
||||||
reStr = `(?:^|_|[^\w\d])` + reStr + `(?:$|_|[^\w\d])`
|
reStr = `(?:^|_|[^\p{L}\w\d])` + reStr + `(?:$|_|[^\p{L}\w\d])`
|
||||||
|
|
||||||
re := regexp.MustCompile(reStr)
|
re := regexp.MustCompile(reStr)
|
||||||
found := re.FindAllStringIndex(path, -1)
|
found := re.FindAllStringIndex(path, -1)
|
||||||
|
|||||||
@@ -4,8 +4,10 @@ import "testing"
|
|||||||
|
|
||||||
func Test_nameMatchesPath(t *testing.T) {
|
func Test_nameMatchesPath(t *testing.T) {
|
||||||
const name = "first last"
|
const name = "first last"
|
||||||
|
const unicodeName = "伏字"
|
||||||
|
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
|
testName string
|
||||||
name string
|
name string
|
||||||
path string
|
path string
|
||||||
want int
|
want int
|
||||||
@@ -13,62 +15,79 @@ func Test_nameMatchesPath(t *testing.T) {
|
|||||||
{
|
{
|
||||||
"exact",
|
"exact",
|
||||||
name,
|
name,
|
||||||
|
name,
|
||||||
0,
|
0,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"partial",
|
"partial",
|
||||||
|
name,
|
||||||
"first",
|
"first",
|
||||||
-1,
|
-1,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"separator",
|
"separator",
|
||||||
|
name,
|
||||||
"first.last",
|
"first.last",
|
||||||
0,
|
0,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"separator",
|
"separator",
|
||||||
|
name,
|
||||||
"first-last",
|
"first-last",
|
||||||
0,
|
0,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"separator",
|
"separator",
|
||||||
|
name,
|
||||||
"first_last",
|
"first_last",
|
||||||
0,
|
0,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"separators",
|
"separators",
|
||||||
|
name,
|
||||||
"first.-_ last",
|
"first.-_ last",
|
||||||
0,
|
0,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"within string",
|
"within string",
|
||||||
|
name,
|
||||||
"before_first last/after",
|
"before_first last/after",
|
||||||
6,
|
6,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"not within string",
|
"not within string",
|
||||||
|
name,
|
||||||
"beforefirst last/after",
|
"beforefirst last/after",
|
||||||
-1,
|
-1,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"not within string",
|
"not within string",
|
||||||
|
name,
|
||||||
"before/first lastafter",
|
"before/first lastafter",
|
||||||
-1,
|
-1,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"not within string",
|
"not within string",
|
||||||
|
name,
|
||||||
"first last1",
|
"first last1",
|
||||||
-1,
|
-1,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"not within string",
|
"not within string",
|
||||||
|
name,
|
||||||
"1first last",
|
"1first last",
|
||||||
-1,
|
-1,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"unicode",
|
||||||
|
unicodeName,
|
||||||
|
unicodeName,
|
||||||
|
0,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.testName, func(t *testing.T) {
|
||||||
if got := nameMatchesPath(name, tt.path); got != tt.want {
|
if got := nameMatchesPath(tt.name, tt.path); got != tt.want {
|
||||||
t.Errorf("nameMatchesPath() = %v, want %v", got, tt.want)
|
t.Errorf("nameMatchesPath() = %v, want %v", got, tt.want)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -21,6 +21,8 @@ WHERE performers_tags.tag_id = ?
|
|||||||
GROUP BY performers_tags.performer_id
|
GROUP BY performers_tags.performer_id
|
||||||
`
|
`
|
||||||
|
|
||||||
|
const singleFirstCharacterRegex = `^[\w\p{L}][.\-_ ]`
|
||||||
|
|
||||||
type performerQueryBuilder struct {
|
type performerQueryBuilder struct {
|
||||||
repository
|
repository
|
||||||
}
|
}
|
||||||
@@ -184,7 +186,7 @@ func (qb *performerQueryBuilder) QueryForAutoTag(words []string) ([]*models.Perf
|
|||||||
var args []interface{}
|
var args []interface{}
|
||||||
|
|
||||||
whereClauses = append(whereClauses, "name regexp ?")
|
whereClauses = append(whereClauses, "name regexp ?")
|
||||||
args = append(args, "^[\\w][.\\-_ ]")
|
args = append(args, singleFirstCharacterRegex)
|
||||||
|
|
||||||
for _, w := range words {
|
for _, w := range words {
|
||||||
whereClauses = append(whereClauses, "name like ?")
|
whereClauses = append(whereClauses, "name like ?")
|
||||||
|
|||||||
@@ -171,6 +171,8 @@ func (r *repository) runSumQuery(query string, args []interface{}) (float64, err
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (r *repository) queryFunc(query string, args []interface{}, single bool, f func(rows *sqlx.Rows) error) error {
|
func (r *repository) queryFunc(query string, args []interface{}, single bool, f func(rows *sqlx.Rows) error) error {
|
||||||
|
logger.Tracef("SQL: %s, args: %v", query, args)
|
||||||
|
|
||||||
rows, err := r.tx.Queryx(query, args...)
|
rows, err := r.tx.Queryx(query, args...)
|
||||||
|
|
||||||
if err != nil && !errors.Is(err, sql.ErrNoRows) {
|
if err != nil && !errors.Is(err, sql.ErrNoRows) {
|
||||||
|
|||||||
@@ -145,7 +145,6 @@ func (qb *studioQueryBuilder) QueryForAutoTag(words []string) ([]*models.Studio,
|
|||||||
var args []interface{}
|
var args []interface{}
|
||||||
|
|
||||||
// always include names that begin with a single character
|
// always include names that begin with a single character
|
||||||
singleFirstCharacterRegex := "^[\\w][.\\-_ ]"
|
|
||||||
whereClauses = append(whereClauses, "studios.name regexp ? OR COALESCE(studio_aliases.alias, '') regexp ?")
|
whereClauses = append(whereClauses, "studios.name regexp ? OR COALESCE(studio_aliases.alias, '') regexp ?")
|
||||||
args = append(args, singleFirstCharacterRegex, singleFirstCharacterRegex)
|
args = append(args, singleFirstCharacterRegex, singleFirstCharacterRegex)
|
||||||
|
|
||||||
|
|||||||
@@ -236,7 +236,6 @@ func (qb *tagQueryBuilder) QueryForAutoTag(words []string) ([]*models.Tag, error
|
|||||||
var args []interface{}
|
var args []interface{}
|
||||||
|
|
||||||
// always include names that begin with a single character
|
// always include names that begin with a single character
|
||||||
singleFirstCharacterRegex := "^[\\w][.\\-_ ]"
|
|
||||||
whereClauses = append(whereClauses, "tags.name regexp ? OR COALESCE(tag_aliases.alias, '') regexp ?")
|
whereClauses = append(whereClauses, "tags.name regexp ? OR COALESCE(tag_aliases.alias, '') regexp ?")
|
||||||
args = append(args, singleFirstCharacterRegex, singleFirstCharacterRegex)
|
args = append(args, singleFirstCharacterRegex, singleFirstCharacterRegex)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user