From 3e526a49a4a7a6ada75a5faafc672c3dccd0e88e Mon Sep 17 00:00:00 2001 From: gitgiggety <79809426+gitgiggety@users.noreply.github.com> Date: Tue, 21 Sep 2021 03:48:52 +0200 Subject: [PATCH] Add indexes for path and checksum to images (#1740) * Add indexes for path and checksum to images The scenes table has unique indexes/constraints on path and checksum colums. The images table doesn't, which doesn't really make sense, as scanning uses these colums extensively which warrents an index, and both should be unique as well. Adding these indexes thus heavily improves the scanning tasks performance. On a database containing 4700 images a (re)scan of those 4700 files, which thus shouldn't do anything, took 1.2 seconds, with the indexes added this only takes 0.4 seconds. Taking the same test on a generated database containing 4M images + the actual 4700 images took 26 minutes for a rescan, and with the index existing also only takes 0.4 seconds. * Add images.checksum unique constraint in code with fallback Work around the issue where in some cases duplicate images (/checksums on images) might exist. This as discussed in #1740 by creating the index on startup and in case of an error logging the duplicates. This so the users where this scenario exists can correct the database (by searching on the logged checksum(s) and removing the duplicates) and after a restart the unique index / constraint will still be created. In case when creating the unique index fails a "normal" / non-unique index is created as surrogate so the user will still get the performance benefit (for example during scanning) without being forced to remove the duplicates and restart beforehand. This surrogate is also automatically cleaned up after the unique index is succesfully created. --- pkg/database/custom_migrations.go | 69 +++++++++++++++++++ pkg/database/database.go | 6 +- .../migrations/28_images_indexes.up.sql | 3 + .../components/Changelog/versions/v0100.md | 3 + 4 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 pkg/database/custom_migrations.go create mode 100644 pkg/database/migrations/28_images_indexes.up.sql diff --git a/pkg/database/custom_migrations.go b/pkg/database/custom_migrations.go new file mode 100644 index 000000000..d3c234548 --- /dev/null +++ b/pkg/database/custom_migrations.go @@ -0,0 +1,69 @@ +package database + +import ( + "database/sql" + "strings" + + "github.com/jmoiron/sqlx" + "github.com/stashapp/stash/pkg/logger" +) + +func runCustomMigrations() error { + if err := createImagesChecksumIndex(); err != nil { + return err + } + + return nil +} + +func createImagesChecksumIndex() error { + return WithTxn(func(tx *sqlx.Tx) error { + row := tx.QueryRow("SELECT 1 AS found FROM sqlite_master WHERE type = 'index' AND name = 'images_checksum_unique'") + err := row.Err() + if err != nil && err != sql.ErrNoRows { + return err + } + + if err == nil { + var found bool + row.Scan(&found) + if found { + return nil + } + } + + _, err = tx.Exec("CREATE UNIQUE INDEX images_checksum_unique ON images (checksum)") + if err == nil { + _, err = tx.Exec("DROP INDEX IF EXISTS index_images_checksum") + if err != nil { + logger.Errorf("Failed to remove surrogate images.checksum index: %s", err) + } + logger.Info("Created unique constraint on images table") + return nil + } + + _, err = tx.Exec("CREATE INDEX IF NOT EXISTS index_images_checksum ON images (checksum)") + if err != nil { + logger.Errorf("Unable to create index on images.checksum: %s", err) + } + + var result []struct { + Checksum string `db:"checksum"` + } + + err = tx.Select(&result, "SELECT checksum FROM images GROUP BY checksum HAVING COUNT(1) > 1") + if err != nil && err != sql.ErrNoRows { + logger.Errorf("Unable to determine non-unique image checksums: %s", err) + return nil + } + + checksums := make([]string, len(result)) + for i, res := range result { + checksums[i] = res.Checksum + } + + logger.Warnf("The following duplicate image checksums have been found. Please remove the duplicates and restart. %s", strings.Join(checksums, ", ")) + + return nil + }) +} diff --git a/pkg/database/database.go b/pkg/database/database.go index f2aa58734..017a3497a 100644 --- a/pkg/database/database.go +++ b/pkg/database/database.go @@ -23,7 +23,7 @@ import ( var DB *sqlx.DB var WriteMu *sync.Mutex var dbPath string -var appSchemaVersion uint = 27 +var appSchemaVersion uint = 28 var databaseSchemaVersion uint var ( @@ -86,6 +86,10 @@ func Initialize(databasePath string) error { DB = open(databasePath, disableForeignKeys) WriteMu = &sync.Mutex{} + if err := runCustomMigrations(); err != nil { + return err + } + return nil } diff --git a/pkg/database/migrations/28_images_indexes.up.sql b/pkg/database/migrations/28_images_indexes.up.sql new file mode 100644 index 000000000..1fbb1cfe2 --- /dev/null +++ b/pkg/database/migrations/28_images_indexes.up.sql @@ -0,0 +1,3 @@ +DROP INDEX IF EXISTS `images_path_unique`; + +CREATE UNIQUE INDEX `images_path_unique` ON `images` (`path`); diff --git a/ui/v2.5/src/components/Changelog/versions/v0100.md b/ui/v2.5/src/components/Changelog/versions/v0100.md index cd6b2aa1b..a332998d3 100644 --- a/ui/v2.5/src/components/Changelog/versions/v0100.md +++ b/ui/v2.5/src/components/Changelog/versions/v0100.md @@ -1,3 +1,5 @@ +#### 💥 Note: Please check your logs after migrating to this release. A log warning will be generated on startup if duplicate image checksums exist in your system. Search for the images using the logged checksums, and remove the unwanted ones. + ### ✨ New Features * Added options to generate webp and static preview files for markers. ([#1604](https://github.com/stashapp/stash/pull/1604)) * Added sort by option for gallery rating. ([#1720](https://github.com/stashapp/stash/pull/1720)) @@ -10,6 +12,7 @@ * Support filtering Movies by Performers. ([#1675](https://github.com/stashapp/stash/pull/1675)) ### 🎨 Improvements +* Added missing image table indexes, resulting in a significant performance improvement. ([#1740](https://github.com/stashapp/stash/pull/1740)) * Support scraper script logging to specific log levels. ([#1648](https://github.com/stashapp/stash/pull/1648)) * Added sv-SE language option. ([#1691](https://github.com/stashapp/stash/pull/1691))