diff --git a/pkg/autotag/gallery.go b/pkg/autotag/gallery.go index d35b0b05f..603e3e36a 100644 --- a/pkg/autotag/gallery.go +++ b/pkg/autotag/gallery.go @@ -2,21 +2,23 @@ package autotag import ( "github.com/stashapp/stash/pkg/gallery" + "github.com/stashapp/stash/pkg/match" "github.com/stashapp/stash/pkg/models" ) -func getGalleryFileTagger(s *models.Gallery) tagger { +func getGalleryFileTagger(s *models.Gallery, cache *match.Cache) tagger { return tagger{ - ID: s.ID, - Type: "gallery", - Name: s.GetTitle(), - Path: s.Path.String, + ID: s.ID, + Type: "gallery", + Name: s.GetTitle(), + Path: s.Path.String, + cache: cache, } } // GalleryPerformers tags the provided gallery with performers whose name matches the gallery's path. -func GalleryPerformers(s *models.Gallery, rw models.GalleryReaderWriter, performerReader models.PerformerReader) error { - t := getGalleryFileTagger(s) +func GalleryPerformers(s *models.Gallery, rw models.GalleryReaderWriter, performerReader models.PerformerReader, cache *match.Cache) error { + t := getGalleryFileTagger(s, cache) return t.tagPerformers(performerReader, func(subjectID, otherID int) (bool, error) { return gallery.AddPerformer(rw, subjectID, otherID) @@ -26,13 +28,13 @@ func GalleryPerformers(s *models.Gallery, rw models.GalleryReaderWriter, perform // GalleryStudios tags the provided gallery with the first studio whose name matches the gallery's path. // // Gallerys will not be tagged if studio is already set. -func GalleryStudios(s *models.Gallery, rw models.GalleryReaderWriter, studioReader models.StudioReader) error { +func GalleryStudios(s *models.Gallery, rw models.GalleryReaderWriter, studioReader models.StudioReader, cache *match.Cache) error { if s.StudioID.Valid { // don't modify return nil } - t := getGalleryFileTagger(s) + t := getGalleryFileTagger(s, cache) return t.tagStudios(studioReader, func(subjectID, otherID int) (bool, error) { return addGalleryStudio(rw, subjectID, otherID) @@ -40,8 +42,8 @@ func GalleryStudios(s *models.Gallery, rw models.GalleryReaderWriter, studioRead } // GalleryTags tags the provided gallery with tags whose name matches the gallery's path. -func GalleryTags(s *models.Gallery, rw models.GalleryReaderWriter, tagReader models.TagReader) error { - t := getGalleryFileTagger(s) +func GalleryTags(s *models.Gallery, rw models.GalleryReaderWriter, tagReader models.TagReader, cache *match.Cache) error { + t := getGalleryFileTagger(s, cache) return t.tagTags(tagReader, func(subjectID, otherID int) (bool, error) { return gallery.AddTag(rw, subjectID, otherID) diff --git a/pkg/autotag/gallery_test.go b/pkg/autotag/gallery_test.go index f2f498519..6d744400a 100644 --- a/pkg/autotag/gallery_test.go +++ b/pkg/autotag/gallery_test.go @@ -37,6 +37,7 @@ func TestGalleryPerformers(t *testing.T) { mockPerformerReader := &mocks.PerformerReaderWriter{} mockGalleryReader := &mocks.GalleryReaderWriter{} + mockPerformerReader.On("Query", mock.Anything, mock.Anything).Return(nil, 0, nil) mockPerformerReader.On("QueryForAutoTag", mock.Anything).Return([]*models.Performer{&performer, &reversedPerformer}, nil).Once() if test.Matches { @@ -48,7 +49,7 @@ func TestGalleryPerformers(t *testing.T) { ID: galleryID, Path: models.NullString(test.Path), } - err := GalleryPerformers(&gallery, mockGalleryReader, mockPerformerReader) + err := GalleryPerformers(&gallery, mockGalleryReader, mockPerformerReader, nil) assert.Nil(err) mockPerformerReader.AssertExpectations(t) @@ -92,7 +93,7 @@ func TestGalleryStudios(t *testing.T) { ID: galleryID, Path: models.NullString(test.Path), } - err := GalleryStudios(&gallery, mockGalleryReader, mockStudioReader) + err := GalleryStudios(&gallery, mockGalleryReader, mockStudioReader, nil) assert.Nil(err) mockStudioReader.AssertExpectations(t) @@ -103,6 +104,7 @@ func TestGalleryStudios(t *testing.T) { mockStudioReader := &mocks.StudioReaderWriter{} mockGalleryReader := &mocks.GalleryReaderWriter{} + mockStudioReader.On("Query", mock.Anything, mock.Anything).Return(nil, 0, nil) mockStudioReader.On("QueryForAutoTag", mock.Anything).Return([]*models.Studio{&studio, &reversedStudio}, nil).Once() mockStudioReader.On("GetAliases", mock.Anything).Return([]string{}, nil).Maybe() @@ -117,6 +119,7 @@ func TestGalleryStudios(t *testing.T) { mockStudioReader := &mocks.StudioReaderWriter{} mockGalleryReader := &mocks.GalleryReaderWriter{} + mockStudioReader.On("Query", mock.Anything, mock.Anything).Return(nil, 0, nil) mockStudioReader.On("QueryForAutoTag", mock.Anything).Return([]*models.Studio{&studio, &reversedStudio}, nil).Once() mockStudioReader.On("GetAliases", studioID).Return([]string{ studioName, @@ -159,7 +162,7 @@ func TestGalleryTags(t *testing.T) { ID: galleryID, Path: models.NullString(test.Path), } - err := GalleryTags(&gallery, mockGalleryReader, mockTagReader) + err := GalleryTags(&gallery, mockGalleryReader, mockTagReader, nil) assert.Nil(err) mockTagReader.AssertExpectations(t) @@ -170,6 +173,7 @@ func TestGalleryTags(t *testing.T) { mockTagReader := &mocks.TagReaderWriter{} mockGalleryReader := &mocks.GalleryReaderWriter{} + mockTagReader.On("Query", mock.Anything, mock.Anything).Return(nil, 0, nil) mockTagReader.On("QueryForAutoTag", mock.Anything).Return([]*models.Tag{&tag, &reversedTag}, nil).Once() mockTagReader.On("GetAliases", mock.Anything).Return([]string{}, nil).Maybe() @@ -183,6 +187,7 @@ func TestGalleryTags(t *testing.T) { mockTagReader := &mocks.TagReaderWriter{} mockGalleryReader := &mocks.GalleryReaderWriter{} + mockTagReader.On("Query", mock.Anything, mock.Anything).Return(nil, 0, nil) mockTagReader.On("QueryForAutoTag", mock.Anything).Return([]*models.Tag{&tag, &reversedTag}, nil).Once() mockTagReader.On("GetAliases", tagID).Return([]string{ tagName, diff --git a/pkg/autotag/image.go b/pkg/autotag/image.go index 21745897c..516f30181 100644 --- a/pkg/autotag/image.go +++ b/pkg/autotag/image.go @@ -2,21 +2,23 @@ package autotag import ( "github.com/stashapp/stash/pkg/image" + "github.com/stashapp/stash/pkg/match" "github.com/stashapp/stash/pkg/models" ) -func getImageFileTagger(s *models.Image) tagger { +func getImageFileTagger(s *models.Image, cache *match.Cache) tagger { return tagger{ - ID: s.ID, - Type: "image", - Name: s.GetTitle(), - Path: s.Path, + ID: s.ID, + Type: "image", + Name: s.GetTitle(), + Path: s.Path, + cache: cache, } } // ImagePerformers tags the provided image with performers whose name matches the image's path. -func ImagePerformers(s *models.Image, rw models.ImageReaderWriter, performerReader models.PerformerReader) error { - t := getImageFileTagger(s) +func ImagePerformers(s *models.Image, rw models.ImageReaderWriter, performerReader models.PerformerReader, cache *match.Cache) error { + t := getImageFileTagger(s, cache) return t.tagPerformers(performerReader, func(subjectID, otherID int) (bool, error) { return image.AddPerformer(rw, subjectID, otherID) @@ -26,13 +28,13 @@ func ImagePerformers(s *models.Image, rw models.ImageReaderWriter, performerRead // ImageStudios tags the provided image with the first studio whose name matches the image's path. // // Images will not be tagged if studio is already set. -func ImageStudios(s *models.Image, rw models.ImageReaderWriter, studioReader models.StudioReader) error { +func ImageStudios(s *models.Image, rw models.ImageReaderWriter, studioReader models.StudioReader, cache *match.Cache) error { if s.StudioID.Valid { // don't modify return nil } - t := getImageFileTagger(s) + t := getImageFileTagger(s, cache) return t.tagStudios(studioReader, func(subjectID, otherID int) (bool, error) { return addImageStudio(rw, subjectID, otherID) @@ -40,8 +42,8 @@ func ImageStudios(s *models.Image, rw models.ImageReaderWriter, studioReader mod } // ImageTags tags the provided image with tags whose name matches the image's path. -func ImageTags(s *models.Image, rw models.ImageReaderWriter, tagReader models.TagReader) error { - t := getImageFileTagger(s) +func ImageTags(s *models.Image, rw models.ImageReaderWriter, tagReader models.TagReader, cache *match.Cache) error { + t := getImageFileTagger(s, cache) return t.tagTags(tagReader, func(subjectID, otherID int) (bool, error) { return image.AddTag(rw, subjectID, otherID) diff --git a/pkg/autotag/image_test.go b/pkg/autotag/image_test.go index 68b33ab74..130ce51af 100644 --- a/pkg/autotag/image_test.go +++ b/pkg/autotag/image_test.go @@ -37,6 +37,7 @@ func TestImagePerformers(t *testing.T) { mockPerformerReader := &mocks.PerformerReaderWriter{} mockImageReader := &mocks.ImageReaderWriter{} + mockPerformerReader.On("Query", mock.Anything, mock.Anything).Return(nil, 0, nil) mockPerformerReader.On("QueryForAutoTag", mock.Anything).Return([]*models.Performer{&performer, &reversedPerformer}, nil).Once() if test.Matches { @@ -48,7 +49,7 @@ func TestImagePerformers(t *testing.T) { ID: imageID, Path: test.Path, } - err := ImagePerformers(&image, mockImageReader, mockPerformerReader) + err := ImagePerformers(&image, mockImageReader, mockPerformerReader, nil) assert.Nil(err) mockPerformerReader.AssertExpectations(t) @@ -92,7 +93,7 @@ func TestImageStudios(t *testing.T) { ID: imageID, Path: test.Path, } - err := ImageStudios(&image, mockImageReader, mockStudioReader) + err := ImageStudios(&image, mockImageReader, mockStudioReader, nil) assert.Nil(err) mockStudioReader.AssertExpectations(t) @@ -103,6 +104,7 @@ func TestImageStudios(t *testing.T) { mockStudioReader := &mocks.StudioReaderWriter{} mockImageReader := &mocks.ImageReaderWriter{} + mockStudioReader.On("Query", mock.Anything, mock.Anything).Return(nil, 0, nil) mockStudioReader.On("QueryForAutoTag", mock.Anything).Return([]*models.Studio{&studio, &reversedStudio}, nil).Once() mockStudioReader.On("GetAliases", mock.Anything).Return([]string{}, nil).Maybe() @@ -117,6 +119,7 @@ func TestImageStudios(t *testing.T) { mockStudioReader := &mocks.StudioReaderWriter{} mockImageReader := &mocks.ImageReaderWriter{} + mockStudioReader.On("Query", mock.Anything, mock.Anything).Return(nil, 0, nil) mockStudioReader.On("QueryForAutoTag", mock.Anything).Return([]*models.Studio{&studio, &reversedStudio}, nil).Once() mockStudioReader.On("GetAliases", studioID).Return([]string{ studioName, @@ -159,7 +162,7 @@ func TestImageTags(t *testing.T) { ID: imageID, Path: test.Path, } - err := ImageTags(&image, mockImageReader, mockTagReader) + err := ImageTags(&image, mockImageReader, mockTagReader, nil) assert.Nil(err) mockTagReader.AssertExpectations(t) @@ -170,6 +173,7 @@ func TestImageTags(t *testing.T) { mockTagReader := &mocks.TagReaderWriter{} mockImageReader := &mocks.ImageReaderWriter{} + mockTagReader.On("Query", mock.Anything, mock.Anything).Return(nil, 0, nil) mockTagReader.On("QueryForAutoTag", mock.Anything).Return([]*models.Tag{&tag, &reversedTag}, nil).Once() mockTagReader.On("GetAliases", mock.Anything).Return([]string{}, nil).Maybe() @@ -184,6 +188,7 @@ func TestImageTags(t *testing.T) { mockTagReader := &mocks.TagReaderWriter{} mockImageReader := &mocks.ImageReaderWriter{} + mockTagReader.On("Query", mock.Anything, mock.Anything).Return(nil, 0, nil) mockTagReader.On("QueryForAutoTag", mock.Anything).Return([]*models.Tag{&tag, &reversedTag}, nil).Once() mockTagReader.On("GetAliases", tagID).Return([]string{ tagName, diff --git a/pkg/autotag/integration_test.go b/pkg/autotag/integration_test.go index 588124509..d288c9e28 100644 --- a/pkg/autotag/integration_test.go +++ b/pkg/autotag/integration_test.go @@ -361,7 +361,7 @@ func TestParsePerformerScenes(t *testing.T) { for _, p := range performers { if err := withTxn(func(r models.Repository) error { - return PerformerScenes(p, nil, r.Scene()) + return PerformerScenes(p, nil, r.Scene(), nil) }); err != nil { t.Errorf("Error auto-tagging performers: %s", err) } @@ -413,7 +413,7 @@ func TestParseStudioScenes(t *testing.T) { return err } - return StudioScenes(s, nil, aliases, r.Scene()) + return StudioScenes(s, nil, aliases, r.Scene(), nil) }); err != nil { t.Errorf("Error auto-tagging performers: %s", err) } @@ -469,7 +469,7 @@ func TestParseTagScenes(t *testing.T) { return err } - return TagScenes(s, nil, aliases, r.Scene()) + return TagScenes(s, nil, aliases, r.Scene(), nil) }); err != nil { t.Errorf("Error auto-tagging performers: %s", err) } @@ -516,7 +516,7 @@ func TestParsePerformerImages(t *testing.T) { for _, p := range performers { if err := withTxn(func(r models.Repository) error { - return PerformerImages(p, nil, r.Image()) + return PerformerImages(p, nil, r.Image(), nil) }); err != nil { t.Errorf("Error auto-tagging performers: %s", err) } @@ -568,7 +568,7 @@ func TestParseStudioImages(t *testing.T) { return err } - return StudioImages(s, nil, aliases, r.Image()) + return StudioImages(s, nil, aliases, r.Image(), nil) }); err != nil { t.Errorf("Error auto-tagging performers: %s", err) } @@ -624,7 +624,7 @@ func TestParseTagImages(t *testing.T) { return err } - return TagImages(s, nil, aliases, r.Image()) + return TagImages(s, nil, aliases, r.Image(), nil) }); err != nil { t.Errorf("Error auto-tagging performers: %s", err) } @@ -671,7 +671,7 @@ func TestParsePerformerGalleries(t *testing.T) { for _, p := range performers { if err := withTxn(func(r models.Repository) error { - return PerformerGalleries(p, nil, r.Gallery()) + return PerformerGalleries(p, nil, r.Gallery(), nil) }); err != nil { t.Errorf("Error auto-tagging performers: %s", err) } @@ -723,7 +723,7 @@ func TestParseStudioGalleries(t *testing.T) { return err } - return StudioGalleries(s, nil, aliases, r.Gallery()) + return StudioGalleries(s, nil, aliases, r.Gallery(), nil) }); err != nil { t.Errorf("Error auto-tagging performers: %s", err) } @@ -779,7 +779,7 @@ func TestParseTagGalleries(t *testing.T) { return err } - return TagGalleries(s, nil, aliases, r.Gallery()) + return TagGalleries(s, nil, aliases, r.Gallery(), nil) }); err != nil { t.Errorf("Error auto-tagging performers: %s", err) } diff --git a/pkg/autotag/performer.go b/pkg/autotag/performer.go index 77ec0f558..a6c89466a 100644 --- a/pkg/autotag/performer.go +++ b/pkg/autotag/performer.go @@ -3,21 +3,23 @@ package autotag import ( "github.com/stashapp/stash/pkg/gallery" "github.com/stashapp/stash/pkg/image" + "github.com/stashapp/stash/pkg/match" "github.com/stashapp/stash/pkg/models" "github.com/stashapp/stash/pkg/scene" ) -func getPerformerTagger(p *models.Performer) tagger { +func getPerformerTagger(p *models.Performer, cache *match.Cache) tagger { return tagger{ - ID: p.ID, - Type: "performer", - Name: p.Name.String, + ID: p.ID, + Type: "performer", + Name: p.Name.String, + cache: cache, } } // PerformerScenes searches for scenes whose path matches the provided performer name and tags the scene with the performer. -func PerformerScenes(p *models.Performer, paths []string, rw models.SceneReaderWriter) error { - t := getPerformerTagger(p) +func PerformerScenes(p *models.Performer, paths []string, rw models.SceneReaderWriter, cache *match.Cache) error { + t := getPerformerTagger(p, cache) return t.tagScenes(paths, rw, func(subjectID, otherID int) (bool, error) { return scene.AddPerformer(rw, otherID, subjectID) @@ -25,8 +27,8 @@ func PerformerScenes(p *models.Performer, paths []string, rw models.SceneReaderW } // PerformerImages searches for images whose path matches the provided performer name and tags the image with the performer. -func PerformerImages(p *models.Performer, paths []string, rw models.ImageReaderWriter) error { - t := getPerformerTagger(p) +func PerformerImages(p *models.Performer, paths []string, rw models.ImageReaderWriter, cache *match.Cache) error { + t := getPerformerTagger(p, cache) return t.tagImages(paths, rw, func(subjectID, otherID int) (bool, error) { return image.AddPerformer(rw, otherID, subjectID) @@ -34,8 +36,8 @@ func PerformerImages(p *models.Performer, paths []string, rw models.ImageReaderW } // PerformerGalleries searches for galleries whose path matches the provided performer name and tags the gallery with the performer. -func PerformerGalleries(p *models.Performer, paths []string, rw models.GalleryReaderWriter) error { - t := getPerformerTagger(p) +func PerformerGalleries(p *models.Performer, paths []string, rw models.GalleryReaderWriter, cache *match.Cache) error { + t := getPerformerTagger(p, cache) return t.tagGalleries(paths, rw, func(subjectID, otherID int) (bool, error) { return gallery.AddPerformer(rw, otherID, subjectID) diff --git a/pkg/autotag/performer_test.go b/pkg/autotag/performer_test.go index 0dc616de5..31befd76a 100644 --- a/pkg/autotag/performer_test.go +++ b/pkg/autotag/performer_test.go @@ -21,15 +21,15 @@ func TestPerformerScenes(t *testing.T) { performerNames := []test{ { "performer name", - `(?i)(?:^|_|[^\w\d])performer[.\-_ ]*name(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])performer[.\-_ ]*name(?:$|_|[^\p{L}\d])`, }, { "performer + name", - `(?i)(?:^|_|[^\w\d])performer[.\-_ ]*\+[.\-_ ]*name(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])performer[.\-_ ]*\+[.\-_ ]*name(?:$|_|[^\p{L}\d])`, }, { `performer + name\`, - `(?i)(?:^|_|[^\w\d])performer[.\-_ ]*\+[.\-_ ]*name\\(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])performer[.\-_ ]*\+[.\-_ ]*name\\(?:$|_|[^\p{L}\d])`, }, } @@ -81,7 +81,7 @@ func testPerformerScenes(t *testing.T, performerName, expectedRegex string) { mockSceneReader.On("UpdatePerformers", sceneID, []int{performerID}).Return(nil).Once() } - err := PerformerScenes(&performer, nil, mockSceneReader) + err := PerformerScenes(&performer, nil, mockSceneReader, nil) assert := assert.New(t) @@ -100,11 +100,11 @@ func TestPerformerImages(t *testing.T) { performerNames := []test{ { "performer name", - `(?i)(?:^|_|[^\w\d])performer[.\-_ ]*name(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])performer[.\-_ ]*name(?:$|_|[^\p{L}\d])`, }, { "performer + name", - `(?i)(?:^|_|[^\w\d])performer[.\-_ ]*\+[.\-_ ]*name(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])performer[.\-_ ]*\+[.\-_ ]*name(?:$|_|[^\p{L}\d])`, }, } @@ -156,7 +156,7 @@ func testPerformerImages(t *testing.T, performerName, expectedRegex string) { mockImageReader.On("UpdatePerformers", imageID, []int{performerID}).Return(nil).Once() } - err := PerformerImages(&performer, nil, mockImageReader) + err := PerformerImages(&performer, nil, mockImageReader, nil) assert := assert.New(t) @@ -175,11 +175,11 @@ func TestPerformerGalleries(t *testing.T) { performerNames := []test{ { "performer name", - `(?i)(?:^|_|[^\w\d])performer[.\-_ ]*name(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])performer[.\-_ ]*name(?:$|_|[^\p{L}\d])`, }, { "performer + name", - `(?i)(?:^|_|[^\w\d])performer[.\-_ ]*\+[.\-_ ]*name(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])performer[.\-_ ]*\+[.\-_ ]*name(?:$|_|[^\p{L}\d])`, }, } @@ -230,7 +230,7 @@ func testPerformerGalleries(t *testing.T, performerName, expectedRegex string) { mockGalleryReader.On("UpdatePerformers", galleryID, []int{performerID}).Return(nil).Once() } - err := PerformerGalleries(&performer, nil, mockGalleryReader) + err := PerformerGalleries(&performer, nil, mockGalleryReader, nil) assert := assert.New(t) diff --git a/pkg/autotag/scene.go b/pkg/autotag/scene.go index aca523cb9..cfdcaf393 100644 --- a/pkg/autotag/scene.go +++ b/pkg/autotag/scene.go @@ -1,22 +1,24 @@ package autotag import ( + "github.com/stashapp/stash/pkg/match" "github.com/stashapp/stash/pkg/models" "github.com/stashapp/stash/pkg/scene" ) -func getSceneFileTagger(s *models.Scene) tagger { +func getSceneFileTagger(s *models.Scene, cache *match.Cache) tagger { return tagger{ - ID: s.ID, - Type: "scene", - Name: s.GetTitle(), - Path: s.Path, + ID: s.ID, + Type: "scene", + Name: s.GetTitle(), + Path: s.Path, + cache: cache, } } // ScenePerformers tags the provided scene with performers whose name matches the scene's path. -func ScenePerformers(s *models.Scene, rw models.SceneReaderWriter, performerReader models.PerformerReader) error { - t := getSceneFileTagger(s) +func ScenePerformers(s *models.Scene, rw models.SceneReaderWriter, performerReader models.PerformerReader, cache *match.Cache) error { + t := getSceneFileTagger(s, cache) return t.tagPerformers(performerReader, func(subjectID, otherID int) (bool, error) { return scene.AddPerformer(rw, subjectID, otherID) @@ -26,13 +28,13 @@ func ScenePerformers(s *models.Scene, rw models.SceneReaderWriter, performerRead // SceneStudios tags the provided scene with the first studio whose name matches the scene's path. // // Scenes will not be tagged if studio is already set. -func SceneStudios(s *models.Scene, rw models.SceneReaderWriter, studioReader models.StudioReader) error { +func SceneStudios(s *models.Scene, rw models.SceneReaderWriter, studioReader models.StudioReader, cache *match.Cache) error { if s.StudioID.Valid { // don't modify return nil } - t := getSceneFileTagger(s) + t := getSceneFileTagger(s, cache) return t.tagStudios(studioReader, func(subjectID, otherID int) (bool, error) { return addSceneStudio(rw, subjectID, otherID) @@ -40,8 +42,8 @@ func SceneStudios(s *models.Scene, rw models.SceneReaderWriter, studioReader mod } // SceneTags tags the provided scene with tags whose name matches the scene's path. -func SceneTags(s *models.Scene, rw models.SceneReaderWriter, tagReader models.TagReader) error { - t := getSceneFileTagger(s) +func SceneTags(s *models.Scene, rw models.SceneReaderWriter, tagReader models.TagReader, cache *match.Cache) error { + t := getSceneFileTagger(s, cache) return t.tagTags(tagReader, func(subjectID, otherID int) (bool, error) { return scene.AddTag(rw, subjectID, otherID) diff --git a/pkg/autotag/scene_test.go b/pkg/autotag/scene_test.go index 67e727a1b..190b16b8e 100644 --- a/pkg/autotag/scene_test.go +++ b/pkg/autotag/scene_test.go @@ -172,6 +172,7 @@ func TestScenePerformers(t *testing.T) { mockPerformerReader := &mocks.PerformerReaderWriter{} mockSceneReader := &mocks.SceneReaderWriter{} + mockPerformerReader.On("Query", mock.Anything, mock.Anything).Return(nil, 0, nil) mockPerformerReader.On("QueryForAutoTag", mock.Anything).Return([]*models.Performer{&performer, &reversedPerformer}, nil).Once() if test.Matches { @@ -183,7 +184,7 @@ func TestScenePerformers(t *testing.T) { ID: sceneID, Path: test.Path, } - err := ScenePerformers(&scene, mockSceneReader, mockPerformerReader) + err := ScenePerformers(&scene, mockSceneReader, mockPerformerReader, nil) assert.Nil(err) mockPerformerReader.AssertExpectations(t) @@ -227,7 +228,7 @@ func TestSceneStudios(t *testing.T) { ID: sceneID, Path: test.Path, } - err := SceneStudios(&scene, mockSceneReader, mockStudioReader) + err := SceneStudios(&scene, mockSceneReader, mockStudioReader, nil) assert.Nil(err) mockStudioReader.AssertExpectations(t) @@ -238,6 +239,7 @@ func TestSceneStudios(t *testing.T) { mockStudioReader := &mocks.StudioReaderWriter{} mockSceneReader := &mocks.SceneReaderWriter{} + mockStudioReader.On("Query", mock.Anything, mock.Anything).Return(nil, 0, nil) mockStudioReader.On("QueryForAutoTag", mock.Anything).Return([]*models.Studio{&studio, &reversedStudio}, nil).Once() mockStudioReader.On("GetAliases", mock.Anything).Return([]string{}, nil).Maybe() @@ -252,6 +254,7 @@ func TestSceneStudios(t *testing.T) { mockStudioReader := &mocks.StudioReaderWriter{} mockSceneReader := &mocks.SceneReaderWriter{} + mockStudioReader.On("Query", mock.Anything, mock.Anything).Return(nil, 0, nil) mockStudioReader.On("QueryForAutoTag", mock.Anything).Return([]*models.Studio{&studio, &reversedStudio}, nil).Once() mockStudioReader.On("GetAliases", studioID).Return([]string{ studioName, @@ -294,7 +297,7 @@ func TestSceneTags(t *testing.T) { ID: sceneID, Path: test.Path, } - err := SceneTags(&scene, mockSceneReader, mockTagReader) + err := SceneTags(&scene, mockSceneReader, mockTagReader, nil) assert.Nil(err) mockTagReader.AssertExpectations(t) @@ -305,6 +308,7 @@ func TestSceneTags(t *testing.T) { mockTagReader := &mocks.TagReaderWriter{} mockSceneReader := &mocks.SceneReaderWriter{} + mockTagReader.On("Query", mock.Anything, mock.Anything).Return(nil, 0, nil) mockTagReader.On("QueryForAutoTag", mock.Anything).Return([]*models.Tag{&tag, &reversedTag}, nil).Once() mockTagReader.On("GetAliases", mock.Anything).Return([]string{}, nil).Maybe() @@ -319,6 +323,7 @@ func TestSceneTags(t *testing.T) { mockTagReader := &mocks.TagReaderWriter{} mockSceneReader := &mocks.SceneReaderWriter{} + mockTagReader.On("Query", mock.Anything, mock.Anything).Return(nil, 0, nil) mockTagReader.On("QueryForAutoTag", mock.Anything).Return([]*models.Tag{&tag, &reversedTag}, nil).Once() mockTagReader.On("GetAliases", tagID).Return([]string{ tagName, diff --git a/pkg/autotag/studio.go b/pkg/autotag/studio.go index 635050df7..4a02e7305 100644 --- a/pkg/autotag/studio.go +++ b/pkg/autotag/studio.go @@ -3,6 +3,7 @@ package autotag import ( "database/sql" + "github.com/stashapp/stash/pkg/match" "github.com/stashapp/stash/pkg/models" ) @@ -78,11 +79,12 @@ func addGalleryStudio(galleryWriter models.GalleryReaderWriter, galleryID, studi return true, nil } -func getStudioTagger(p *models.Studio, aliases []string) []tagger { +func getStudioTagger(p *models.Studio, aliases []string, cache *match.Cache) []tagger { ret := []tagger{{ - ID: p.ID, - Type: "studio", - Name: p.Name.String, + ID: p.ID, + Type: "studio", + Name: p.Name.String, + cache: cache, }} for _, a := range aliases { @@ -97,8 +99,8 @@ func getStudioTagger(p *models.Studio, aliases []string) []tagger { } // StudioScenes searches for scenes whose path matches the provided studio name and tags the scene with the studio, if studio is not already set on the scene. -func StudioScenes(p *models.Studio, paths []string, aliases []string, rw models.SceneReaderWriter) error { - t := getStudioTagger(p, aliases) +func StudioScenes(p *models.Studio, paths []string, aliases []string, rw models.SceneReaderWriter, cache *match.Cache) error { + t := getStudioTagger(p, aliases, cache) for _, tt := range t { if err := tt.tagScenes(paths, rw, func(subjectID, otherID int) (bool, error) { @@ -112,8 +114,8 @@ func StudioScenes(p *models.Studio, paths []string, aliases []string, rw models. } // StudioImages searches for images whose path matches the provided studio name and tags the image with the studio, if studio is not already set on the image. -func StudioImages(p *models.Studio, paths []string, aliases []string, rw models.ImageReaderWriter) error { - t := getStudioTagger(p, aliases) +func StudioImages(p *models.Studio, paths []string, aliases []string, rw models.ImageReaderWriter, cache *match.Cache) error { + t := getStudioTagger(p, aliases, cache) for _, tt := range t { if err := tt.tagImages(paths, rw, func(subjectID, otherID int) (bool, error) { @@ -127,8 +129,8 @@ func StudioImages(p *models.Studio, paths []string, aliases []string, rw models. } // StudioGalleries searches for galleries whose path matches the provided studio name and tags the gallery with the studio, if studio is not already set on the gallery. -func StudioGalleries(p *models.Studio, paths []string, aliases []string, rw models.GalleryReaderWriter) error { - t := getStudioTagger(p, aliases) +func StudioGalleries(p *models.Studio, paths []string, aliases []string, rw models.GalleryReaderWriter, cache *match.Cache) error { + t := getStudioTagger(p, aliases, cache) for _, tt := range t { if err := tt.tagGalleries(paths, rw, func(subjectID, otherID int) (bool, error) { diff --git a/pkg/autotag/studio_test.go b/pkg/autotag/studio_test.go index ca6a1a9ff..76d7e7db5 100644 --- a/pkg/autotag/studio_test.go +++ b/pkg/autotag/studio_test.go @@ -20,39 +20,39 @@ type testStudioCase struct { var testStudioCases = []testStudioCase{ { "studio name", - `(?i)(?:^|_|[^\w\d])studio[.\-_ ]*name(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])studio[.\-_ ]*name(?:$|_|[^\p{L}\d])`, "", "", }, { "studio + name", - `(?i)(?:^|_|[^\w\d])studio[.\-_ ]*\+[.\-_ ]*name(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])studio[.\-_ ]*\+[.\-_ ]*name(?:$|_|[^\p{L}\d])`, "", "", }, { `studio + name\`, - `(?i)(?:^|_|[^\w\d])studio[.\-_ ]*\+[.\-_ ]*name\\(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])studio[.\-_ ]*\+[.\-_ ]*name\\(?:$|_|[^\p{L}\d])`, "", "", }, { "studio name", - `(?i)(?:^|_|[^\w\d])studio[.\-_ ]*name(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])studio[.\-_ ]*name(?:$|_|[^\p{L}\d])`, "alias name", - `(?i)(?:^|_|[^\w\d])alias[.\-_ ]*name(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])alias[.\-_ ]*name(?:$|_|[^\p{L}\d])`, }, { "studio + name", - `(?i)(?:^|_|[^\w\d])studio[.\-_ ]*\+[.\-_ ]*name(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])studio[.\-_ ]*\+[.\-_ ]*name(?:$|_|[^\p{L}\d])`, "alias + name", - `(?i)(?:^|_|[^\w\d])alias[.\-_ ]*\+[.\-_ ]*name(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])alias[.\-_ ]*\+[.\-_ ]*name(?:$|_|[^\p{L}\d])`, }, { `studio + name\`, - `(?i)(?:^|_|[^\w\d])studio[.\-_ ]*\+[.\-_ ]*name\\(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])studio[.\-_ ]*\+[.\-_ ]*name\\(?:$|_|[^\p{L}\d])`, `alias + name\`, - `(?i)(?:^|_|[^\w\d])alias[.\-_ ]*\+[.\-_ ]*name\\(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])alias[.\-_ ]*\+[.\-_ ]*name\\(?:$|_|[^\p{L}\d])`, }, } @@ -142,7 +142,7 @@ func testStudioScenes(t *testing.T, tc testStudioCase) { }).Return(nil, nil).Once() } - err := StudioScenes(&studio, nil, aliases, mockSceneReader) + err := StudioScenes(&studio, nil, aliases, mockSceneReader, nil) assert := assert.New(t) @@ -234,7 +234,7 @@ func testStudioImages(t *testing.T, tc testStudioCase) { }).Return(nil, nil).Once() } - err := StudioImages(&studio, nil, aliases, mockImageReader) + err := StudioImages(&studio, nil, aliases, mockImageReader, nil) assert := assert.New(t) @@ -324,7 +324,7 @@ func testStudioGalleries(t *testing.T, tc testStudioCase) { }).Return(nil, nil).Once() } - err := StudioGalleries(&studio, nil, aliases, mockGalleryReader) + err := StudioGalleries(&studio, nil, aliases, mockGalleryReader, nil) assert := assert.New(t) diff --git a/pkg/autotag/tag.go b/pkg/autotag/tag.go index 78e12b766..f0d080871 100644 --- a/pkg/autotag/tag.go +++ b/pkg/autotag/tag.go @@ -3,22 +3,25 @@ package autotag import ( "github.com/stashapp/stash/pkg/gallery" "github.com/stashapp/stash/pkg/image" + "github.com/stashapp/stash/pkg/match" "github.com/stashapp/stash/pkg/models" "github.com/stashapp/stash/pkg/scene" ) -func getTagTaggers(p *models.Tag, aliases []string) []tagger { +func getTagTaggers(p *models.Tag, aliases []string, cache *match.Cache) []tagger { ret := []tagger{{ - ID: p.ID, - Type: "tag", - Name: p.Name, + ID: p.ID, + Type: "tag", + Name: p.Name, + cache: cache, }} for _, a := range aliases { ret = append(ret, tagger{ - ID: p.ID, - Type: "tag", - Name: a, + ID: p.ID, + Type: "tag", + Name: a, + cache: cache, }) } @@ -26,8 +29,8 @@ func getTagTaggers(p *models.Tag, aliases []string) []tagger { } // TagScenes searches for scenes whose path matches the provided tag name and tags the scene with the tag. -func TagScenes(p *models.Tag, paths []string, aliases []string, rw models.SceneReaderWriter) error { - t := getTagTaggers(p, aliases) +func TagScenes(p *models.Tag, paths []string, aliases []string, rw models.SceneReaderWriter, cache *match.Cache) error { + t := getTagTaggers(p, aliases, cache) for _, tt := range t { if err := tt.tagScenes(paths, rw, func(subjectID, otherID int) (bool, error) { @@ -40,8 +43,8 @@ func TagScenes(p *models.Tag, paths []string, aliases []string, rw models.SceneR } // TagImages searches for images whose path matches the provided tag name and tags the image with the tag. -func TagImages(p *models.Tag, paths []string, aliases []string, rw models.ImageReaderWriter) error { - t := getTagTaggers(p, aliases) +func TagImages(p *models.Tag, paths []string, aliases []string, rw models.ImageReaderWriter, cache *match.Cache) error { + t := getTagTaggers(p, aliases, cache) for _, tt := range t { if err := tt.tagImages(paths, rw, func(subjectID, otherID int) (bool, error) { @@ -54,8 +57,8 @@ func TagImages(p *models.Tag, paths []string, aliases []string, rw models.ImageR } // TagGalleries searches for galleries whose path matches the provided tag name and tags the gallery with the tag. -func TagGalleries(p *models.Tag, paths []string, aliases []string, rw models.GalleryReaderWriter) error { - t := getTagTaggers(p, aliases) +func TagGalleries(p *models.Tag, paths []string, aliases []string, rw models.GalleryReaderWriter, cache *match.Cache) error { + t := getTagTaggers(p, aliases, cache) for _, tt := range t { if err := tt.tagGalleries(paths, rw, func(subjectID, otherID int) (bool, error) { diff --git a/pkg/autotag/tag_test.go b/pkg/autotag/tag_test.go index 3bc9c4cca..a1eed1eab 100644 --- a/pkg/autotag/tag_test.go +++ b/pkg/autotag/tag_test.go @@ -20,39 +20,39 @@ type testTagCase struct { var testTagCases = []testTagCase{ { "tag name", - `(?i)(?:^|_|[^\w\d])tag[.\-_ ]*name(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])tag[.\-_ ]*name(?:$|_|[^\p{L}\d])`, "", "", }, { "tag + name", - `(?i)(?:^|_|[^\w\d])tag[.\-_ ]*\+[.\-_ ]*name(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])tag[.\-_ ]*\+[.\-_ ]*name(?:$|_|[^\p{L}\d])`, "", "", }, { `tag + name\`, - `(?i)(?:^|_|[^\w\d])tag[.\-_ ]*\+[.\-_ ]*name\\(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])tag[.\-_ ]*\+[.\-_ ]*name\\(?:$|_|[^\p{L}\d])`, "", "", }, { "tag name", - `(?i)(?:^|_|[^\w\d])tag[.\-_ ]*name(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])tag[.\-_ ]*name(?:$|_|[^\p{L}\d])`, "alias name", - `(?i)(?:^|_|[^\w\d])alias[.\-_ ]*name(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])alias[.\-_ ]*name(?:$|_|[^\p{L}\d])`, }, { "tag + name", - `(?i)(?:^|_|[^\w\d])tag[.\-_ ]*\+[.\-_ ]*name(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])tag[.\-_ ]*\+[.\-_ ]*name(?:$|_|[^\p{L}\d])`, "alias + name", - `(?i)(?:^|_|[^\w\d])alias[.\-_ ]*\+[.\-_ ]*name(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])alias[.\-_ ]*\+[.\-_ ]*name(?:$|_|[^\p{L}\d])`, }, { `tag + name\`, - `(?i)(?:^|_|[^\w\d])tag[.\-_ ]*\+[.\-_ ]*name\\(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])tag[.\-_ ]*\+[.\-_ ]*name\\(?:$|_|[^\p{L}\d])`, `alias + name\`, - `(?i)(?:^|_|[^\w\d])alias[.\-_ ]*\+[.\-_ ]*name\\(?:$|_|[^\w\d])`, + `(?i)(?:^|_|[^\p{L}\d])alias[.\-_ ]*\+[.\-_ ]*name\\(?:$|_|[^\p{L}\d])`, }, } @@ -137,7 +137,7 @@ func testTagScenes(t *testing.T, tc testTagCase) { mockSceneReader.On("UpdateTags", sceneID, []int{tagID}).Return(nil).Once() } - err := TagScenes(&tag, nil, aliases, mockSceneReader) + err := TagScenes(&tag, nil, aliases, mockSceneReader, nil) assert := assert.New(t) @@ -225,7 +225,7 @@ func testTagImages(t *testing.T, tc testTagCase) { mockImageReader.On("UpdateTags", imageID, []int{tagID}).Return(nil).Once() } - err := TagImages(&tag, nil, aliases, mockImageReader) + err := TagImages(&tag, nil, aliases, mockImageReader, nil) assert := assert.New(t) @@ -312,7 +312,7 @@ func testTagGalleries(t *testing.T, tc testTagCase) { mockGalleryReader.On("UpdateTags", galleryID, []int{tagID}).Return(nil).Once() } - err := TagGalleries(&tag, nil, aliases, mockGalleryReader) + err := TagGalleries(&tag, nil, aliases, mockGalleryReader, nil) assert := assert.New(t) diff --git a/pkg/autotag/tagger.go b/pkg/autotag/tagger.go index 74ea86a41..624d29f5a 100644 --- a/pkg/autotag/tagger.go +++ b/pkg/autotag/tagger.go @@ -26,6 +26,8 @@ type tagger struct { Type string Name string Path string + + cache *match.Cache } type addLinkFunc func(subjectID, otherID int) (bool, error) @@ -39,7 +41,7 @@ func (t *tagger) addLog(otherType, otherName string) { } func (t *tagger) tagPerformers(performerReader models.PerformerReader, addFunc addLinkFunc) error { - others, err := match.PathToPerformers(t.Path, performerReader) + others, err := match.PathToPerformers(t.Path, performerReader, t.cache) if err != nil { return err } @@ -60,7 +62,7 @@ func (t *tagger) tagPerformers(performerReader models.PerformerReader, addFunc a } func (t *tagger) tagStudios(studioReader models.StudioReader, addFunc addLinkFunc) error { - studio, err := match.PathToStudio(t.Path, studioReader) + studio, err := match.PathToStudio(t.Path, studioReader, t.cache) if err != nil { return err } @@ -81,7 +83,7 @@ func (t *tagger) tagStudios(studioReader models.StudioReader, addFunc addLinkFun } func (t *tagger) tagTags(tagReader models.TagReader, addFunc addLinkFunc) error { - others, err := match.PathToTags(t.Path, tagReader) + others, err := match.PathToTags(t.Path, tagReader, t.cache) if err != nil { return err } diff --git a/pkg/database/functions.go b/pkg/database/functions.go index 69dc8c0fc..2971f1e22 100644 --- a/pkg/database/functions.go +++ b/pkg/database/functions.go @@ -1,15 +1,10 @@ package database import ( - "regexp" "strconv" "strings" ) -func regexFn(re, s string) (bool, error) { - return regexp.MatchString(re, s) -} - func durationToTinyIntFn(str string) (int64, error) { splits := strings.Split(str, ":") diff --git a/pkg/database/regex.go b/pkg/database/regex.go new file mode 100644 index 000000000..dc7b5feb5 --- /dev/null +++ b/pkg/database/regex.go @@ -0,0 +1,42 @@ +package database + +import ( + "regexp" + + lru "github.com/hashicorp/golang-lru" +) + +// size of the regex LRU cache in elements. +// A small number number was chosen because it's most likely use is for a +// single query - this function gets called for every row in the (filtered) +// results. It's likely to only need no more than 1 or 2 in any given query. +// After that point, it's just sitting in the cache and is unlikely to be used +// again. +const regexCacheSize = 10 + +var regexCache *lru.Cache + +func init() { + regexCache, _ = lru.New(regexCacheSize) +} + +// regexFn is registered as an SQLite function as "regexp" +// It uses an LRU cache to cache recent regex patterns to reduce CPU load over +// identical patterns. +func regexFn(re, s string) (bool, error) { + entry, ok := regexCache.Get(re) + var compiled *regexp.Regexp + + if !ok { + var err error + compiled, err = regexp.Compile(re) + if err != nil { + return false, err + } + regexCache.Add(re, compiled) + } else { + compiled = entry.(*regexp.Regexp) + } + + return compiled.MatchString(s), nil +} diff --git a/pkg/manager/task_autotag.go b/pkg/manager/task_autotag.go index 421eff709..1fc15b39a 100644 --- a/pkg/manager/task_autotag.go +++ b/pkg/manager/task_autotag.go @@ -7,11 +7,13 @@ import ( "strconv" "strings" "sync" + "time" "github.com/stashapp/stash/pkg/autotag" "github.com/stashapp/stash/pkg/image" "github.com/stashapp/stash/pkg/job" "github.com/stashapp/stash/pkg/logger" + "github.com/stashapp/stash/pkg/match" "github.com/stashapp/stash/pkg/models" "github.com/stashapp/stash/pkg/scene" ) @@ -19,9 +21,13 @@ import ( type autoTagJob struct { txnManager models.TransactionManager input models.AutoTagMetadataInput + + cache match.Cache } func (j *autoTagJob) Execute(ctx context.Context, progress *job.Progress) { + begin := time.Now() + input := j.input if j.isFileBasedAutoTag(input) { // doing file-based auto-tag @@ -30,6 +36,8 @@ func (j *autoTagJob) Execute(ctx context.Context, progress *job.Progress) { // doing specific performer/studio/tag auto-tag j.autoTagSpecific(ctx, progress) } + + logger.Infof("Finished autotag after %s", time.Since(begin).String()) } func (j *autoTagJob) isFileBasedAutoTag(input models.AutoTagMetadataInput) bool { @@ -50,6 +58,7 @@ func (j *autoTagJob) autoTagFiles(ctx context.Context, progress *job.Progress, p ctx: ctx, progress: progress, txnManager: j.txnManager, + cache: &j.cache, } t.process() @@ -105,8 +114,6 @@ func (j *autoTagJob) autoTagSpecific(ctx context.Context, progress *job.Progress j.autoTagPerformers(ctx, progress, input.Paths, performerIds) j.autoTagStudios(ctx, progress, input.Paths, studioIds) j.autoTagTags(ctx, progress, input.Paths, tagIds) - - logger.Info("Finished autotag") } func (j *autoTagJob) autoTagPerformers(ctx context.Context, progress *job.Progress, paths []string, performerIds []string) { @@ -150,13 +157,13 @@ func (j *autoTagJob) autoTagPerformers(ctx context.Context, progress *job.Progre } if err := j.txnManager.WithTxn(context.TODO(), func(r models.Repository) error { - if err := autotag.PerformerScenes(performer, paths, r.Scene()); err != nil { + if err := autotag.PerformerScenes(performer, paths, r.Scene(), &j.cache); err != nil { return err } - if err := autotag.PerformerImages(performer, paths, r.Image()); err != nil { + if err := autotag.PerformerImages(performer, paths, r.Image(), &j.cache); err != nil { return err } - if err := autotag.PerformerGalleries(performer, paths, r.Gallery()); err != nil { + if err := autotag.PerformerGalleries(performer, paths, r.Gallery(), &j.cache); err != nil { return err } @@ -222,13 +229,13 @@ func (j *autoTagJob) autoTagStudios(ctx context.Context, progress *job.Progress, return err } - if err := autotag.StudioScenes(studio, paths, aliases, r.Scene()); err != nil { + if err := autotag.StudioScenes(studio, paths, aliases, r.Scene(), &j.cache); err != nil { return err } - if err := autotag.StudioImages(studio, paths, aliases, r.Image()); err != nil { + if err := autotag.StudioImages(studio, paths, aliases, r.Image(), &j.cache); err != nil { return err } - if err := autotag.StudioGalleries(studio, paths, aliases, r.Gallery()); err != nil { + if err := autotag.StudioGalleries(studio, paths, aliases, r.Gallery(), &j.cache); err != nil { return err } @@ -288,13 +295,13 @@ func (j *autoTagJob) autoTagTags(ctx context.Context, progress *job.Progress, pa return err } - if err := autotag.TagScenes(tag, paths, aliases, r.Scene()); err != nil { + if err := autotag.TagScenes(tag, paths, aliases, r.Scene(), &j.cache); err != nil { return err } - if err := autotag.TagImages(tag, paths, aliases, r.Image()); err != nil { + if err := autotag.TagImages(tag, paths, aliases, r.Image(), &j.cache); err != nil { return err } - if err := autotag.TagGalleries(tag, paths, aliases, r.Gallery()); err != nil { + if err := autotag.TagGalleries(tag, paths, aliases, r.Gallery(), &j.cache); err != nil { return err } @@ -323,6 +330,7 @@ type autoTagFilesTask struct { ctx context.Context progress *job.Progress txnManager models.TransactionManager + cache *match.Cache } func (t *autoTagFilesTask) makeSceneFilter() *models.SceneFilterType { @@ -469,6 +477,7 @@ func (t *autoTagFilesTask) processScenes(r models.ReaderRepository) error { performers: t.performers, studios: t.studios, tags: t.tags, + cache: t.cache, } var wg sync.WaitGroup @@ -483,6 +492,10 @@ func (t *autoTagFilesTask) processScenes(r models.ReaderRepository) error { more = false } else { *findFilter.Page++ + + if *findFilter.Page%10 == 1 { + logger.Infof("Processed %d scenes...", (*findFilter.Page-1)*batchSize) + } } } @@ -517,6 +530,7 @@ func (t *autoTagFilesTask) processImages(r models.ReaderRepository) error { performers: t.performers, studios: t.studios, tags: t.tags, + cache: t.cache, } var wg sync.WaitGroup @@ -531,6 +545,10 @@ func (t *autoTagFilesTask) processImages(r models.ReaderRepository) error { more = false } else { *findFilter.Page++ + + if *findFilter.Page%10 == 1 { + logger.Infof("Processed %d images...", (*findFilter.Page-1)*batchSize) + } } } @@ -565,6 +583,7 @@ func (t *autoTagFilesTask) processGalleries(r models.ReaderRepository) error { performers: t.performers, studios: t.studios, tags: t.tags, + cache: t.cache, } var wg sync.WaitGroup @@ -579,6 +598,10 @@ func (t *autoTagFilesTask) processGalleries(r models.ReaderRepository) error { more = false } else { *findFilter.Page++ + + if *findFilter.Page%10 == 1 { + logger.Infof("Processed %d galleries...", (*findFilter.Page-1)*batchSize) + } } } @@ -596,14 +619,17 @@ func (t *autoTagFilesTask) process() { logger.Infof("Starting autotag of %d files", total) + logger.Info("Autotagging scenes...") if err := t.processScenes(r); err != nil { return err } + logger.Info("Autotagging images...") if err := t.processImages(r); err != nil { return err } + logger.Info("Autotagging galleries...") if err := t.processGalleries(r); err != nil { return err } @@ -616,8 +642,6 @@ func (t *autoTagFilesTask) process() { }); err != nil { logger.Error(err.Error()) } - - logger.Info("Finished autotag") } type autoTagSceneTask struct { @@ -627,23 +651,25 @@ type autoTagSceneTask struct { performers bool studios bool tags bool + + cache *match.Cache } func (t *autoTagSceneTask) Start(wg *sync.WaitGroup) { defer wg.Done() if err := t.txnManager.WithTxn(context.TODO(), func(r models.Repository) error { if t.performers { - if err := autotag.ScenePerformers(t.scene, r.Scene(), r.Performer()); err != nil { + if err := autotag.ScenePerformers(t.scene, r.Scene(), r.Performer(), t.cache); err != nil { return fmt.Errorf("error tagging scene performers for %s: %v", t.scene.Path, err) } } if t.studios { - if err := autotag.SceneStudios(t.scene, r.Scene(), r.Studio()); err != nil { + if err := autotag.SceneStudios(t.scene, r.Scene(), r.Studio(), t.cache); err != nil { return fmt.Errorf("error tagging scene studio for %s: %v", t.scene.Path, err) } } if t.tags { - if err := autotag.SceneTags(t.scene, r.Scene(), r.Tag()); err != nil { + if err := autotag.SceneTags(t.scene, r.Scene(), r.Tag(), t.cache); err != nil { return fmt.Errorf("error tagging scene tags for %s: %v", t.scene.Path, err) } } @@ -661,23 +687,25 @@ type autoTagImageTask struct { performers bool studios bool tags bool + + cache *match.Cache } func (t *autoTagImageTask) Start(wg *sync.WaitGroup) { defer wg.Done() if err := t.txnManager.WithTxn(context.TODO(), func(r models.Repository) error { if t.performers { - if err := autotag.ImagePerformers(t.image, r.Image(), r.Performer()); err != nil { + if err := autotag.ImagePerformers(t.image, r.Image(), r.Performer(), t.cache); err != nil { return fmt.Errorf("error tagging image performers for %s: %v", t.image.Path, err) } } if t.studios { - if err := autotag.ImageStudios(t.image, r.Image(), r.Studio()); err != nil { + if err := autotag.ImageStudios(t.image, r.Image(), r.Studio(), t.cache); err != nil { return fmt.Errorf("error tagging image studio for %s: %v", t.image.Path, err) } } if t.tags { - if err := autotag.ImageTags(t.image, r.Image(), r.Tag()); err != nil { + if err := autotag.ImageTags(t.image, r.Image(), r.Tag(), t.cache); err != nil { return fmt.Errorf("error tagging image tags for %s: %v", t.image.Path, err) } } @@ -695,23 +723,25 @@ type autoTagGalleryTask struct { performers bool studios bool tags bool + + cache *match.Cache } func (t *autoTagGalleryTask) Start(wg *sync.WaitGroup) { defer wg.Done() if err := t.txnManager.WithTxn(context.TODO(), func(r models.Repository) error { if t.performers { - if err := autotag.GalleryPerformers(t.gallery, r.Gallery(), r.Performer()); err != nil { + if err := autotag.GalleryPerformers(t.gallery, r.Gallery(), r.Performer(), t.cache); err != nil { return fmt.Errorf("error tagging gallery performers for %s: %v", t.gallery.Path.String, err) } } if t.studios { - if err := autotag.GalleryStudios(t.gallery, r.Gallery(), r.Studio()); err != nil { + if err := autotag.GalleryStudios(t.gallery, r.Gallery(), r.Studio(), t.cache); err != nil { return fmt.Errorf("error tagging gallery studio for %s: %v", t.gallery.Path.String, err) } } if t.tags { - if err := autotag.GalleryTags(t.gallery, r.Gallery(), r.Tag()); err != nil { + if err := autotag.GalleryTags(t.gallery, r.Gallery(), r.Tag(), t.cache); err != nil { return fmt.Errorf("error tagging gallery tags for %s: %v", t.gallery.Path.String, err) } } diff --git a/pkg/match/cache.go b/pkg/match/cache.go new file mode 100644 index 000000000..6d7238809 --- /dev/null +++ b/pkg/match/cache.go @@ -0,0 +1,120 @@ +package match + +import "github.com/stashapp/stash/pkg/models" + +const singleFirstCharacterRegex = `^[\p{L}][.\-_ ]` + +// Cache is used to cache queries that should not change across an autotag process. +type Cache struct { + singleCharPerformers []*models.Performer + singleCharStudios []*models.Studio + singleCharTags []*models.Tag +} + +// getSingleLetterPerformers returns all performers with names that start with single character words. +// The autotag query splits the words into two-character words to query +// against. This means that performers with single-letter words in their names could potentially +// be missed. +// This query is expensive, so it's queried once and cached, if the cache if provided. +func getSingleLetterPerformers(c *Cache, reader models.PerformerReader) ([]*models.Performer, error) { + if c == nil { + c = &Cache{} + } + + if c.singleCharPerformers == nil { + pp := -1 + performers, _, err := reader.Query(&models.PerformerFilterType{ + Name: &models.StringCriterionInput{ + Value: singleFirstCharacterRegex, + Modifier: models.CriterionModifierMatchesRegex, + }, + }, &models.FindFilterType{ + PerPage: &pp, + }) + + if err != nil { + return nil, err + } + + if len(performers) == 0 { + // make singleWordPerformers not nil + c.singleCharPerformers = make([]*models.Performer, 0) + } else { + c.singleCharPerformers = performers + } + } + + return c.singleCharPerformers, nil +} + +// getSingleLetterStudios returns all studios with names that start with single character words. +// See getSingleLetterPerformers for details. +func getSingleLetterStudios(c *Cache, reader models.StudioReader) ([]*models.Studio, error) { + if c == nil { + c = &Cache{} + } + + if c.singleCharStudios == nil { + pp := -1 + studios, _, err := reader.Query(&models.StudioFilterType{ + Name: &models.StringCriterionInput{ + Value: singleFirstCharacterRegex, + Modifier: models.CriterionModifierMatchesRegex, + }, + }, &models.FindFilterType{ + PerPage: &pp, + }) + + if err != nil { + return nil, err + } + + if len(studios) == 0 { + // make singleWordStudios not nil + c.singleCharStudios = make([]*models.Studio, 0) + } else { + c.singleCharStudios = studios + } + } + + return c.singleCharStudios, nil +} + +// getSingleLetterTags returns all tags with names that start with single character words. +// See getSingleLetterPerformers for details. +func getSingleLetterTags(c *Cache, reader models.TagReader) ([]*models.Tag, error) { + if c == nil { + c = &Cache{} + } + + if c.singleCharTags == nil { + pp := -1 + tags, _, err := reader.Query(&models.TagFilterType{ + Name: &models.StringCriterionInput{ + Value: singleFirstCharacterRegex, + Modifier: models.CriterionModifierMatchesRegex, + }, + Or: &models.TagFilterType{ + Aliases: &models.StringCriterionInput{ + Value: singleFirstCharacterRegex, + Modifier: models.CriterionModifierMatchesRegex, + }, + }, + }, &models.FindFilterType{ + PerPage: &pp, + }) + + if err != nil { + return nil, err + } + + if len(tags) == 0 { + // make singleWordTags not nil + c.singleCharTags = make([]*models.Tag, 0) + } else { + c.singleCharTags = tags + } + } + + return c.singleCharTags, nil +} diff --git a/pkg/match/path.go b/pkg/match/path.go index 5ae799de9..6eb166e1e 100644 --- a/pkg/match/path.go +++ b/pkg/match/path.go @@ -14,12 +14,15 @@ import ( ) const ( - separatorChars = `.\-_ ` + separatorChars = `.\-_ ` + separatorPattern = `(?:_|[^\p{L}\w\d])+` reNotLetterWordUnicode = `[^\p{L}\w\d]` reNotLetterWord = `[^\w\d]` ) +var separatorRE = regexp.MustCompile(separatorPattern) + func getPathQueryRegex(name string) string { // escape specific regex characters name = regexp.QuoteMeta(name) @@ -29,13 +32,7 @@ func getPathQueryRegex(name string) string { ret := strings.ReplaceAll(name, " ", separator+"*") - // \p{L} is specifically omitted here because of the performance hit when - // including it. It does mean that paths where the name is bounded by - // unicode letters will be returned. However, the results should be tested - // by nameMatchesPath which does include \p{L}. The improvement in query - // performance should be outweigh the performance hit of testing any extra - // results. - ret = `(?:^|_|[^\w\d])` + ret + `(?:$|_|[^\w\d])` + ret = `(?:^|_|[^\p{L}\d])` + ret + `(?:$|_|[^\p{L}\d])` return ret } @@ -49,9 +46,7 @@ func getPathWords(path string) []string { } // handle path separators - const separator = `(?:_|[^\p{L}\w\d])+` - re := regexp.MustCompile(separator) - retStr = re.ReplaceAllString(retStr, " ") + retStr = separatorRE.ReplaceAllString(retStr, " ") words := strings.Split(retStr, " ") @@ -132,10 +127,24 @@ func regexpMatchesPath(r *regexp.Regexp, path string) int { return found[len(found)-1][0] } -func PathToPerformers(path string, performerReader models.PerformerReader) ([]*models.Performer, error) { - words := getPathWords(path) +func getPerformers(words []string, performerReader models.PerformerReader, cache *Cache) ([]*models.Performer, error) { performers, err := performerReader.QueryForAutoTag(words) + if err != nil { + return nil, err + } + swPerformers, err := getSingleLetterPerformers(cache, performerReader) + if err != nil { + return nil, err + } + + return append(performers, swPerformers...), nil +} + +func PathToPerformers(path string, reader models.PerformerReader, cache *Cache) ([]*models.Performer, error) { + words := getPathWords(path) + + performers, err := getPerformers(words, reader, cache) if err != nil { return nil, err } @@ -151,12 +160,26 @@ func PathToPerformers(path string, performerReader models.PerformerReader) ([]*m return ret, nil } +func getStudios(words []string, reader models.StudioReader, cache *Cache) ([]*models.Studio, error) { + studios, err := reader.QueryForAutoTag(words) + if err != nil { + return nil, err + } + + swStudios, err := getSingleLetterStudios(cache, reader) + if err != nil { + return nil, err + } + + return append(studios, swStudios...), nil +} + // PathToStudio returns the Studio that matches the given path. // Where multiple matching studios are found, the one that matches the latest // position in the path is returned. -func PathToStudio(path string, reader models.StudioReader) (*models.Studio, error) { +func PathToStudio(path string, reader models.StudioReader, cache *Cache) (*models.Studio, error) { words := getPathWords(path) - candidates, err := reader.QueryForAutoTag(words) + candidates, err := getStudios(words, reader, cache) if err != nil { return nil, err @@ -188,9 +211,23 @@ func PathToStudio(path string, reader models.StudioReader) (*models.Studio, erro return ret, nil } -func PathToTags(path string, tagReader models.TagReader) ([]*models.Tag, error) { +func getTags(words []string, reader models.TagReader, cache *Cache) ([]*models.Tag, error) { + tags, err := reader.QueryForAutoTag(words) + if err != nil { + return nil, err + } + + swTags, err := getSingleLetterTags(cache, reader) + if err != nil { + return nil, err + } + + return append(tags, swTags...), nil +} + +func PathToTags(path string, reader models.TagReader, cache *Cache) ([]*models.Tag, error) { words := getPathWords(path) - tags, err := tagReader.QueryForAutoTag(words) + tags, err := getTags(words, reader, cache) if err != nil { return nil, err @@ -204,7 +241,7 @@ func PathToTags(path string, tagReader models.TagReader) ([]*models.Tag, error) } if !matches { - aliases, err := tagReader.GetAliases(t.ID) + aliases, err := reader.GetAliases(t.ID) if err != nil { return nil, err } diff --git a/pkg/scraper/autotag.go b/pkg/scraper/autotag.go index ce128b080..20940fce2 100644 --- a/pkg/scraper/autotag.go +++ b/pkg/scraper/autotag.go @@ -22,7 +22,7 @@ type autotagScraper struct { } func autotagMatchPerformers(path string, performerReader models.PerformerReader) ([]*models.ScrapedPerformer, error) { - p, err := match.PathToPerformers(path, performerReader) + p, err := match.PathToPerformers(path, performerReader, nil) if err != nil { return nil, fmt.Errorf("error matching performers: %w", err) } @@ -46,7 +46,7 @@ func autotagMatchPerformers(path string, performerReader models.PerformerReader) } func autotagMatchStudio(path string, studioReader models.StudioReader) (*models.ScrapedStudio, error) { - studio, err := match.PathToStudio(path, studioReader) + studio, err := match.PathToStudio(path, studioReader, nil) if err != nil { return nil, fmt.Errorf("error matching studios: %w", err) } @@ -63,7 +63,7 @@ func autotagMatchStudio(path string, studioReader models.StudioReader) (*models. } func autotagMatchTags(path string, tagReader models.TagReader) ([]*models.ScrapedTag, error) { - t, err := match.PathToTags(path, tagReader) + t, err := match.PathToTags(path, tagReader, nil) if err != nil { return nil, fmt.Errorf("error matching tags: %w", err) } diff --git a/pkg/sqlite/gallery.go b/pkg/sqlite/gallery.go index 7c8aca107..640006318 100644 --- a/pkg/sqlite/gallery.go +++ b/pkg/sqlite/gallery.go @@ -486,16 +486,13 @@ func galleryAverageResolutionCriterionHandler(qb *galleryQueryBuilder, resolutio } func (qb *galleryQueryBuilder) getGallerySort(findFilter *models.FindFilterType) string { - var sort string - var direction string - if findFilter == nil { - sort = "path" - direction = "ASC" - } else { - sort = findFilter.GetSort("path") - direction = findFilter.GetDirection() + if findFilter == nil || findFilter.Sort == nil || *findFilter.Sort == "" { + return "" } + sort := findFilter.GetSort("path") + direction := findFilter.GetDirection() + switch sort { case "images_count": return getCountSort(galleryTable, galleriesImagesTable, galleryIDColumn, direction) diff --git a/pkg/sqlite/image.go b/pkg/sqlite/image.go index 0e84be497..6ff57e5a9 100644 --- a/pkg/sqlite/image.go +++ b/pkg/sqlite/image.go @@ -517,8 +517,8 @@ INNER JOIN (` + valuesClause + `) t ON t.column2 = pt.tag_id } func (qb *imageQueryBuilder) getImageSort(findFilter *models.FindFilterType) string { - if findFilter == nil { - return " ORDER BY images.path ASC " + if findFilter == nil || findFilter.Sort == nil || *findFilter.Sort == "" { + return "" } sort := findFilter.GetSort("title") direction := findFilter.GetDirection() diff --git a/pkg/sqlite/performer.go b/pkg/sqlite/performer.go index b256d7d66..2994c5dba 100644 --- a/pkg/sqlite/performer.go +++ b/pkg/sqlite/performer.go @@ -21,12 +21,6 @@ WHERE performers_tags.tag_id = ? GROUP BY performers_tags.performer_id ` -// KNOWN ISSUE: using \p{L} to find single unicode character names results in -// very slow queries. -// Suggested solution will be to cache single-character names and not include it -// in the autotag query. -const singleFirstCharacterRegex = `^[\w][.\-_ ]` - type performerQueryBuilder struct { repository } @@ -189,9 +183,6 @@ func (qb *performerQueryBuilder) QueryForAutoTag(words []string) ([]*models.Perf var whereClauses []string var args []interface{} - whereClauses = append(whereClauses, "name regexp ?") - args = append(args, singleFirstCharacterRegex) - for _, w := range words { whereClauses = append(whereClauses, "name like ?") args = append(args, w+"%") diff --git a/pkg/sqlite/scene.go b/pkg/sqlite/scene.go index 2649a8322..6215e9f7b 100644 --- a/pkg/sqlite/scene.go +++ b/pkg/sqlite/scene.go @@ -760,8 +760,7 @@ func (qb *sceneQueryBuilder) getDefaultSceneSort() string { } func (qb *sceneQueryBuilder) setSceneSort(query *queryBuilder, findFilter *models.FindFilterType) { - if findFilter == nil { - query.sortAndPagination += qb.getDefaultSceneSort() + if findFilter == nil || findFilter.Sort == nil || *findFilter.Sort == "" { return } sort := findFilter.GetSort("title") diff --git a/pkg/sqlite/studio.go b/pkg/sqlite/studio.go index 6eac885cd..6b58ab0f8 100644 --- a/pkg/sqlite/studio.go +++ b/pkg/sqlite/studio.go @@ -144,10 +144,6 @@ func (qb *studioQueryBuilder) QueryForAutoTag(words []string) ([]*models.Studio, var whereClauses []string var args []interface{} - // always include names that begin with a single character - whereClauses = append(whereClauses, "studios.name regexp ? OR COALESCE(studio_aliases.alias, '') regexp ?") - args = append(args, singleFirstCharacterRegex, singleFirstCharacterRegex) - for _, w := range words { ww := w + "%" whereClauses = append(whereClauses, "studios.name like ?") diff --git a/pkg/sqlite/tag.go b/pkg/sqlite/tag.go index 57514d751..2187ff08e 100644 --- a/pkg/sqlite/tag.go +++ b/pkg/sqlite/tag.go @@ -235,10 +235,6 @@ func (qb *tagQueryBuilder) QueryForAutoTag(words []string) ([]*models.Tag, error var whereClauses []string var args []interface{} - // always include names that begin with a single character - whereClauses = append(whereClauses, "tags.name regexp ? OR COALESCE(tag_aliases.alias, '') regexp ?") - args = append(args, singleFirstCharacterRegex, singleFirstCharacterRegex) - for _, w := range words { ww := w + "%" whereClauses = append(whereClauses, "tags.name like ?") diff --git a/ui/v2.5/src/components/Changelog/Changelog.tsx b/ui/v2.5/src/components/Changelog/Changelog.tsx index ffd212a0c..66b2339d4 100644 --- a/ui/v2.5/src/components/Changelog/Changelog.tsx +++ b/ui/v2.5/src/components/Changelog/Changelog.tsx @@ -16,6 +16,7 @@ import V0100 from "./versions/v0100.md"; import V0110 from "./versions/v0110.md"; import V0120 from "./versions/v0120.md"; import V0130 from "./versions/v0130.md"; +import V0140 from "./versions/v0140.md"; import { MarkdownPage } from "../Shared/MarkdownPage"; // to avoid use of explicit any @@ -56,7 +57,7 @@ const Changelog: React.FC = () => { // then update the current fields. const currentVersion = stashVersion || "v0.13.0"; const currentDate = buildDate; - const currentPage = V0130; + const currentPage = V0140; const releases: IStashRelease[] = [ { @@ -65,6 +66,11 @@ const Changelog: React.FC = () => { page: currentPage, defaultOpen: true, }, + { + version: "v0.13.0", + date: "2021-03-08", + page: V0130, + }, { version: "v0.12.0", date: "2021-12-29", diff --git a/ui/v2.5/src/components/Changelog/versions/v0140.md b/ui/v2.5/src/components/Changelog/versions/v0140.md new file mode 100644 index 000000000..e6e0d81ac --- /dev/null +++ b/ui/v2.5/src/components/Changelog/versions/v0140.md @@ -0,0 +1,2 @@ +### 🎨 Improvements +* Improved autotag performance. ([#2368](https://github.com/stashapp/stash/pull/2368))