mirror of
https://github.com/stashapp/stash.git
synced 2025-12-17 20:34:37 +03:00
Add indexes for path and checksum to images (#1740)
* Add indexes for path and checksum to images The scenes table has unique indexes/constraints on path and checksum colums. The images table doesn't, which doesn't really make sense, as scanning uses these colums extensively which warrents an index, and both should be unique as well. Adding these indexes thus heavily improves the scanning tasks performance. On a database containing 4700 images a (re)scan of those 4700 files, which thus shouldn't do anything, took 1.2 seconds, with the indexes added this only takes 0.4 seconds. Taking the same test on a generated database containing 4M images + the actual 4700 images took 26 minutes for a rescan, and with the index existing also only takes 0.4 seconds. * Add images.checksum unique constraint in code with fallback Work around the issue where in some cases duplicate images (/checksums on images) might exist. This as discussed in #1740 by creating the index on startup and in case of an error logging the duplicates. This so the users where this scenario exists can correct the database (by searching on the logged checksum(s) and removing the duplicates) and after a restart the unique index / constraint will still be created. In case when creating the unique index fails a "normal" / non-unique index is created as surrogate so the user will still get the performance benefit (for example during scanning) without being forced to remove the duplicates and restart beforehand. This surrogate is also automatically cleaned up after the unique index is succesfully created.
This commit is contained in:
69
pkg/database/custom_migrations.go
Normal file
69
pkg/database/custom_migrations.go
Normal file
@@ -0,0 +1,69 @@
|
||||
package database
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"strings"
|
||||
|
||||
"github.com/jmoiron/sqlx"
|
||||
"github.com/stashapp/stash/pkg/logger"
|
||||
)
|
||||
|
||||
func runCustomMigrations() error {
|
||||
if err := createImagesChecksumIndex(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func createImagesChecksumIndex() error {
|
||||
return WithTxn(func(tx *sqlx.Tx) error {
|
||||
row := tx.QueryRow("SELECT 1 AS found FROM sqlite_master WHERE type = 'index' AND name = 'images_checksum_unique'")
|
||||
err := row.Err()
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
return err
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
var found bool
|
||||
row.Scan(&found)
|
||||
if found {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
_, err = tx.Exec("CREATE UNIQUE INDEX images_checksum_unique ON images (checksum)")
|
||||
if err == nil {
|
||||
_, err = tx.Exec("DROP INDEX IF EXISTS index_images_checksum")
|
||||
if err != nil {
|
||||
logger.Errorf("Failed to remove surrogate images.checksum index: %s", err)
|
||||
}
|
||||
logger.Info("Created unique constraint on images table")
|
||||
return nil
|
||||
}
|
||||
|
||||
_, err = tx.Exec("CREATE INDEX IF NOT EXISTS index_images_checksum ON images (checksum)")
|
||||
if err != nil {
|
||||
logger.Errorf("Unable to create index on images.checksum: %s", err)
|
||||
}
|
||||
|
||||
var result []struct {
|
||||
Checksum string `db:"checksum"`
|
||||
}
|
||||
|
||||
err = tx.Select(&result, "SELECT checksum FROM images GROUP BY checksum HAVING COUNT(1) > 1")
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
logger.Errorf("Unable to determine non-unique image checksums: %s", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
checksums := make([]string, len(result))
|
||||
for i, res := range result {
|
||||
checksums[i] = res.Checksum
|
||||
}
|
||||
|
||||
logger.Warnf("The following duplicate image checksums have been found. Please remove the duplicates and restart. %s", strings.Join(checksums, ", "))
|
||||
|
||||
return nil
|
||||
})
|
||||
}
|
||||
@@ -23,7 +23,7 @@ import (
|
||||
var DB *sqlx.DB
|
||||
var WriteMu *sync.Mutex
|
||||
var dbPath string
|
||||
var appSchemaVersion uint = 27
|
||||
var appSchemaVersion uint = 28
|
||||
var databaseSchemaVersion uint
|
||||
|
||||
var (
|
||||
@@ -86,6 +86,10 @@ func Initialize(databasePath string) error {
|
||||
DB = open(databasePath, disableForeignKeys)
|
||||
WriteMu = &sync.Mutex{}
|
||||
|
||||
if err := runCustomMigrations(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
3
pkg/database/migrations/28_images_indexes.up.sql
Normal file
3
pkg/database/migrations/28_images_indexes.up.sql
Normal file
@@ -0,0 +1,3 @@
|
||||
DROP INDEX IF EXISTS `images_path_unique`;
|
||||
|
||||
CREATE UNIQUE INDEX `images_path_unique` ON `images` (`path`);
|
||||
@@ -1,3 +1,5 @@
|
||||
#### 💥 Note: Please check your logs after migrating to this release. A log warning will be generated on startup if duplicate image checksums exist in your system. Search for the images using the logged checksums, and remove the unwanted ones.
|
||||
|
||||
### ✨ New Features
|
||||
* Added options to generate webp and static preview files for markers. ([#1604](https://github.com/stashapp/stash/pull/1604))
|
||||
* Added sort by option for gallery rating. ([#1720](https://github.com/stashapp/stash/pull/1720))
|
||||
@@ -10,6 +12,7 @@
|
||||
* Support filtering Movies by Performers. ([#1675](https://github.com/stashapp/stash/pull/1675))
|
||||
|
||||
### 🎨 Improvements
|
||||
* Added missing image table indexes, resulting in a significant performance improvement. ([#1740](https://github.com/stashapp/stash/pull/1740))
|
||||
* Support scraper script logging to specific log levels. ([#1648](https://github.com/stashapp/stash/pull/1648))
|
||||
* Added sv-SE language option. ([#1691](https://github.com/stashapp/stash/pull/1691))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user