mirror of
https://github.com/stashapp/stash.git
synced 2025-12-17 04:14:39 +03:00
Remove single unicode character from autotag query (#2363)
* Remove single unicode character from autotag query * Compile regex once where possible * Fix CPU profiling * Only match unicode characters if in path
This commit is contained in:
5
main.go
5
main.go
@@ -35,10 +35,11 @@ func main() {
|
|||||||
manager.Initialize()
|
manager.Initialize()
|
||||||
api.Start(uiBox, loginUIBox)
|
api.Start(uiBox, loginUIBox)
|
||||||
|
|
||||||
// stop any profiling at exit
|
|
||||||
defer pprof.StopCPUProfile()
|
|
||||||
blockForever()
|
blockForever()
|
||||||
|
|
||||||
|
// stop any profiling at exit
|
||||||
|
pprof.StopCPUProfile()
|
||||||
|
|
||||||
manager.GetInstance().Shutdown(0)
|
manager.GetInstance().Shutdown(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
"unicode"
|
||||||
|
|
||||||
"github.com/stashapp/stash/pkg/gallery"
|
"github.com/stashapp/stash/pkg/gallery"
|
||||||
"github.com/stashapp/stash/pkg/image"
|
"github.com/stashapp/stash/pkg/image"
|
||||||
@@ -12,7 +13,12 @@ import (
|
|||||||
"github.com/stashapp/stash/pkg/scene"
|
"github.com/stashapp/stash/pkg/scene"
|
||||||
)
|
)
|
||||||
|
|
||||||
const separatorChars = `.\-_ `
|
const (
|
||||||
|
separatorChars = `.\-_ `
|
||||||
|
|
||||||
|
reNotLetterWordUnicode = `[^\p{L}\w\d]`
|
||||||
|
reNotLetterWord = `[^\w\d]`
|
||||||
|
)
|
||||||
|
|
||||||
func getPathQueryRegex(name string) string {
|
func getPathQueryRegex(name string) string {
|
||||||
// escape specific regex characters
|
// escape specific regex characters
|
||||||
@@ -68,22 +74,22 @@ func getPathWords(path string) []string {
|
|||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// https://stackoverflow.com/a/53069799
|
||||||
|
func allASCII(s string) bool {
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
if s[i] > unicode.MaxASCII {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
// nameMatchesPath returns the index in the path for the right-most match.
|
// nameMatchesPath returns the index in the path for the right-most match.
|
||||||
// Returns -1 if not found.
|
// Returns -1 if not found.
|
||||||
func nameMatchesPath(name, path string) int {
|
func nameMatchesPath(name, path string) int {
|
||||||
// escape specific regex characters
|
// #2363 - optimisation: only use unicode character regexp if path contains
|
||||||
name = regexp.QuoteMeta(name)
|
// unicode characters
|
||||||
|
re := nameToRegexp(name, !allASCII(path))
|
||||||
name = strings.ToLower(name)
|
|
||||||
path = strings.ToLower(path)
|
|
||||||
|
|
||||||
// handle path separators
|
|
||||||
const separator = `[` + separatorChars + `]`
|
|
||||||
|
|
||||||
reStr := strings.ReplaceAll(name, " ", separator+"*")
|
|
||||||
reStr = `(?:^|_|[^\p{L}\w\d])` + reStr + `(?:$|_|[^\p{L}\w\d])`
|
|
||||||
|
|
||||||
re := regexp.MustCompile(reStr)
|
|
||||||
found := re.FindAllStringIndex(path, -1)
|
found := re.FindAllStringIndex(path, -1)
|
||||||
|
|
||||||
if found == nil {
|
if found == nil {
|
||||||
@@ -93,6 +99,39 @@ func nameMatchesPath(name, path string) int {
|
|||||||
return found[len(found)-1][0]
|
return found[len(found)-1][0]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// nameToRegexp compiles a regexp pattern to match paths from the given name.
|
||||||
|
// Set useUnicode to true if this regexp is to be used on any strings with unicode characters.
|
||||||
|
func nameToRegexp(name string, useUnicode bool) *regexp.Regexp {
|
||||||
|
// escape specific regex characters
|
||||||
|
name = regexp.QuoteMeta(name)
|
||||||
|
|
||||||
|
name = strings.ToLower(name)
|
||||||
|
|
||||||
|
// handle path separators
|
||||||
|
const separator = `[` + separatorChars + `]`
|
||||||
|
|
||||||
|
// performance optimisation: only use \p{L} is useUnicode is true
|
||||||
|
notWord := reNotLetterWord
|
||||||
|
if useUnicode {
|
||||||
|
notWord = reNotLetterWordUnicode
|
||||||
|
}
|
||||||
|
|
||||||
|
reStr := strings.ReplaceAll(name, " ", separator+"*")
|
||||||
|
reStr = `(?:^|_|` + notWord + `)` + reStr + `(?:$|_|` + notWord + `)`
|
||||||
|
|
||||||
|
re := regexp.MustCompile(reStr)
|
||||||
|
return re
|
||||||
|
}
|
||||||
|
|
||||||
|
func regexpMatchesPath(r *regexp.Regexp, path string) int {
|
||||||
|
path = strings.ToLower(path)
|
||||||
|
found := r.FindAllStringIndex(path, -1)
|
||||||
|
if found == nil {
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
return found[len(found)-1][0]
|
||||||
|
}
|
||||||
|
|
||||||
func PathToPerformers(path string, performerReader models.PerformerReader) ([]*models.Performer, error) {
|
func PathToPerformers(path string, performerReader models.PerformerReader) ([]*models.Performer, error) {
|
||||||
words := getPathWords(path)
|
words := getPathWords(path)
|
||||||
performers, err := performerReader.QueryForAutoTag(words)
|
performers, err := performerReader.QueryForAutoTag(words)
|
||||||
@@ -208,8 +247,13 @@ func PathToScenes(name string, paths []string, sceneReader models.SceneReader) (
|
|||||||
}
|
}
|
||||||
|
|
||||||
var ret []*models.Scene
|
var ret []*models.Scene
|
||||||
|
|
||||||
|
// paths may have unicode characters
|
||||||
|
const useUnicode = true
|
||||||
|
|
||||||
|
r := nameToRegexp(name, useUnicode)
|
||||||
for _, p := range scenes {
|
for _, p := range scenes {
|
||||||
if nameMatchesPath(name, p.Path) != -1 {
|
if regexpMatchesPath(r, p.Path) != -1 {
|
||||||
ret = append(ret, p)
|
ret = append(ret, p)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -240,8 +284,13 @@ func PathToImages(name string, paths []string, imageReader models.ImageReader) (
|
|||||||
}
|
}
|
||||||
|
|
||||||
var ret []*models.Image
|
var ret []*models.Image
|
||||||
|
|
||||||
|
// paths may have unicode characters
|
||||||
|
const useUnicode = true
|
||||||
|
|
||||||
|
r := nameToRegexp(name, useUnicode)
|
||||||
for _, p := range images {
|
for _, p := range images {
|
||||||
if nameMatchesPath(name, p.Path) != -1 {
|
if regexpMatchesPath(r, p.Path) != -1 {
|
||||||
ret = append(ret, p)
|
ret = append(ret, p)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -272,8 +321,13 @@ func PathToGalleries(name string, paths []string, galleryReader models.GalleryRe
|
|||||||
}
|
}
|
||||||
|
|
||||||
var ret []*models.Gallery
|
var ret []*models.Gallery
|
||||||
|
|
||||||
|
// paths may have unicode characters
|
||||||
|
const useUnicode = true
|
||||||
|
|
||||||
|
r := nameToRegexp(name, useUnicode)
|
||||||
for _, p := range gallerys {
|
for _, p := range gallerys {
|
||||||
if nameMatchesPath(name, p.Path.String) != -1 {
|
if regexpMatchesPath(r, p.Path.String) != -1 {
|
||||||
ret = append(ret, p)
|
ret = append(ret, p)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,7 +21,11 @@ WHERE performers_tags.tag_id = ?
|
|||||||
GROUP BY performers_tags.performer_id
|
GROUP BY performers_tags.performer_id
|
||||||
`
|
`
|
||||||
|
|
||||||
const singleFirstCharacterRegex = `^[\w\p{L}][.\-_ ]`
|
// KNOWN ISSUE: using \p{L} to find single unicode character names results in
|
||||||
|
// very slow queries.
|
||||||
|
// Suggested solution will be to cache single-character names and not include it
|
||||||
|
// in the autotag query.
|
||||||
|
const singleFirstCharacterRegex = `^[\w][.\-_ ]`
|
||||||
|
|
||||||
type performerQueryBuilder struct {
|
type performerQueryBuilder struct {
|
||||||
repository
|
repository
|
||||||
|
|||||||
Reference in New Issue
Block a user