From 9e08edc76fa606ce113053fef11ff755f9081411 Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Wed, 7 Sep 2022 14:21:10 +1000 Subject: [PATCH] [Files Refactor] Don't require fingerprint calculation post-migrate (#2892) --- internal/manager/task_import.go | 17 ++--- pkg/file/scan.go | 106 +++++++++++++++++++++++--- pkg/models/jsonschema/file_folder.go | 4 +- pkg/sqlite/migrations/32_files.up.sql | 39 +++++----- 4 files changed, 125 insertions(+), 41 deletions(-) diff --git a/internal/manager/task_import.go b/internal/manager/task_import.go index c27e83bdb..bd887b2e1 100644 --- a/internal/manager/task_import.go +++ b/internal/manager/task_import.go @@ -7,7 +7,6 @@ import ( "errors" "fmt" "io" - "io/ioutil" "os" "path/filepath" "time" @@ -196,7 +195,7 @@ func (t *ImportTask) ImportPerformers(ctx context.Context) { logger.Info("[performers] importing") path := t.json.json.Performers - files, err := ioutil.ReadDir(path) + files, err := os.ReadDir(path) if err != nil { if !errors.Is(err, os.ErrNotExist) { logger.Errorf("[performers] failed to read performers directory: %v", err) @@ -239,7 +238,7 @@ func (t *ImportTask) ImportStudios(ctx context.Context) { logger.Info("[studios] importing") path := t.json.json.Studios - files, err := ioutil.ReadDir(path) + files, err := os.ReadDir(path) if err != nil { if !errors.Is(err, os.ErrNotExist) { logger.Errorf("[studios] failed to read studios directory: %v", err) @@ -328,7 +327,7 @@ func (t *ImportTask) ImportMovies(ctx context.Context) { logger.Info("[movies] importing") path := t.json.json.Movies - files, err := ioutil.ReadDir(path) + files, err := os.ReadDir(path) if err != nil { if !errors.Is(err, os.ErrNotExist) { logger.Errorf("[movies] failed to read movies directory: %v", err) @@ -373,7 +372,7 @@ func (t *ImportTask) ImportFiles(ctx context.Context) { logger.Info("[files] importing") path := t.json.json.Files - files, err := ioutil.ReadDir(path) + files, err := os.ReadDir(path) if err != nil { if !errors.Is(err, os.ErrNotExist) { logger.Errorf("[files] failed to read files directory: %v", err) @@ -463,7 +462,7 @@ func (t *ImportTask) ImportGalleries(ctx context.Context) { logger.Info("[galleries] importing") path := t.json.json.Galleries - files, err := ioutil.ReadDir(path) + files, err := os.ReadDir(path) if err != nil { if !errors.Is(err, os.ErrNotExist) { logger.Errorf("[galleries] failed to read galleries directory: %v", err) @@ -515,7 +514,7 @@ func (t *ImportTask) ImportTags(ctx context.Context) { logger.Info("[tags] importing") path := t.json.json.Tags - files, err := ioutil.ReadDir(path) + files, err := os.ReadDir(path) if err != nil { if !errors.Is(err, os.ErrNotExist) { logger.Errorf("[tags] failed to read tags directory: %v", err) @@ -650,7 +649,7 @@ func (t *ImportTask) ImportScenes(ctx context.Context) { logger.Info("[scenes] importing") path := t.json.json.Scenes - files, err := ioutil.ReadDir(path) + files, err := os.ReadDir(path) if err != nil { if !errors.Is(err, os.ErrNotExist) { logger.Errorf("[scenes] failed to read scenes directory: %v", err) @@ -727,7 +726,7 @@ func (t *ImportTask) ImportImages(ctx context.Context) { logger.Info("[images] importing") path := t.json.json.Images - files, err := ioutil.ReadDir(path) + files, err := os.ReadDir(path) if err != nil { if !errors.Is(err, os.ErrNotExist) { logger.Errorf("[images] failed to read images directory: %v", err) diff --git a/pkg/file/scan.go b/pkg/file/scan.go index c54555849..f9c972bcc 100644 --- a/pkg/file/scan.go +++ b/pkg/file/scan.go @@ -343,8 +343,10 @@ func (s *scanJob) processQueue(ctx context.Context) error { return nil } -func (s *scanJob) incrementProgress() { - if s.ProgressReports != nil { +func (s *scanJob) incrementProgress(f scanFile) { + // don't increment for files inside zip files since these aren't + // counted during the initial walking + if s.ProgressReports != nil && f.zipFile == nil { s.ProgressReports.Increment() } } @@ -418,7 +420,7 @@ func (s *scanJob) handleFolder(ctx context.Context, file scanFile) error { path := file.Path return s.withTxn(ctx, func(ctx context.Context) error { - defer s.incrementProgress() + defer s.incrementProgress(file) // determine if folder already exists in data store (by path) f, err := s.Repository.FolderStore.FindByPath(ctx, path) @@ -579,7 +581,7 @@ func (s *scanJob) onNewFile(ctx context.Context, f scanFile) (File, error) { // add this file to the queue to be created later if s.retrying { // if we're retrying and the folder still doesn't exist, then it's a problem - s.incrementProgress() + s.incrementProgress(f) return nil, fmt.Errorf("parent folder for %q doesn't exist", path) } @@ -591,7 +593,7 @@ func (s *scanJob) onNewFile(ctx context.Context, f scanFile) (File, error) { zipFileID, err := s.getZipFileID(ctx, f.zipFile) if err != nil { - s.incrementProgress() + s.incrementProgress(f) return nil, err } @@ -601,7 +603,7 @@ func (s *scanJob) onNewFile(ctx context.Context, f scanFile) (File, error) { fp, err := s.calculateFingerprints(f.fs, baseFile, path) if err != nil { - s.incrementProgress() + s.incrementProgress(f) return nil, err } @@ -609,7 +611,7 @@ func (s *scanJob) onNewFile(ctx context.Context, f scanFile) (File, error) { file, err := s.fireDecorators(ctx, f.fs, baseFile) if err != nil { - s.incrementProgress() + s.incrementProgress(f) return nil, err } @@ -617,7 +619,7 @@ func (s *scanJob) onNewFile(ctx context.Context, f scanFile) (File, error) { // do this after decoration so that missing fields can be populated renamed, err := s.handleRename(ctx, file, fp) if err != nil { - s.incrementProgress() + s.incrementProgress(f) return nil, err } @@ -785,6 +787,63 @@ func (s *scanJob) isHandlerRequired(ctx context.Context, f File) bool { return accept } +// isMissingMetadata returns true if the provided file is missing metadata. +// Missing metadata should only occur after the 32 schema migration. +// Looks for special values. For numbers, this will be -1. For strings, this +// will be 'unset'. +// Missing metadata includes the following: +// - file size +// - image format, width or height +// - video codec, audio codec, format, width, height, framerate or bitrate +func (s *scanJob) isMissingMetadata(existing File) bool { + const ( + unsetString = "unset" + unsetNumber = -1 + ) + + if existing.Base().Size == unsetNumber { + return true + } + + switch f := existing.(type) { + case *ImageFile: + return f.Format == unsetString || f.Width == unsetNumber || f.Height == unsetNumber + case *VideoFile: + return f.VideoCodec == unsetString || f.AudioCodec == unsetString || + f.Format == unsetString || f.Width == unsetNumber || + f.Height == unsetNumber || f.FrameRate == unsetNumber || + f.BitRate == unsetNumber + } + + return false +} + +func (s *scanJob) setMissingMetadata(ctx context.Context, f scanFile, existing File) (File, error) { + path := existing.Base().Path + logger.Infof("Setting missing metadata for %s", path) + + existing.Base().Size = f.Size + + var err error + existing, err = s.fireDecorators(ctx, f.fs, existing) + if err != nil { + return nil, err + } + + // queue file for update + if err := s.withTxn(ctx, func(ctx context.Context) error { + if err := s.Repository.Update(ctx, existing); err != nil { + return fmt.Errorf("updating file %q: %w", path, err) + } + + return nil + }); err != nil { + return nil, err + } + + return existing, nil +} + // returns a file only if it was updated func (s *scanJob) onExistingFile(ctx context.Context, f scanFile, existing File) (File, error) { base := existing.Base() @@ -794,6 +853,16 @@ func (s *scanJob) onExistingFile(ctx context.Context, f scanFile, existing File) updated := !fileModTime.Equal(base.ModTime) if !updated { + isMissingMetdata := s.isMissingMetadata(existing) + // set missing information + if isMissingMetdata { + var err error + existing, err = s.setMissingMetadata(ctx, f, existing) + if err != nil { + return nil, err + } + } + handlerRequired := false if err := s.withDB(ctx, func(ctx context.Context) error { // check if the handler needs to be run @@ -804,7 +873,14 @@ func (s *scanJob) onExistingFile(ctx context.Context, f scanFile, existing File) } if !handlerRequired { - s.incrementProgress() + s.incrementProgress(f) + + // if this file is a zip file, then we need to rescan the contents + // as well. We do this by returning the file, instead of nil. + if isMissingMetdata { + return existing, nil + } + return nil, nil } @@ -813,12 +889,18 @@ func (s *scanJob) onExistingFile(ctx context.Context, f scanFile, existing File) return err } - s.incrementProgress() + s.incrementProgress(f) return nil }); err != nil { return nil, err } + // if this file is a zip file, then we need to rescan the contents + // as well. We do this by returning the file, instead of nil. + if isMissingMetdata { + return existing, nil + } + return nil, nil } @@ -830,7 +912,7 @@ func (s *scanJob) onExistingFile(ctx context.Context, f scanFile, existing File) // calculate and update fingerprints for the file fp, err := s.calculateFingerprints(f.fs, base, path) if err != nil { - s.incrementProgress() + s.incrementProgress(f) return nil, err } @@ -838,7 +920,7 @@ func (s *scanJob) onExistingFile(ctx context.Context, f scanFile, existing File) existing, err = s.fireDecorators(ctx, f.fs, existing) if err != nil { - s.incrementProgress() + s.incrementProgress(f) return nil, err } diff --git a/pkg/models/jsonschema/file_folder.go b/pkg/models/jsonschema/file_folder.go index 1e4675d84..dfe581f78 100644 --- a/pkg/models/jsonschema/file_folder.go +++ b/pkg/models/jsonschema/file_folder.go @@ -4,7 +4,7 @@ import ( "bytes" "errors" "fmt" - "io/ioutil" + "io" "os" "path/filepath" "strings" @@ -104,7 +104,7 @@ func LoadFileFile(filePath string) (DirEntry, error) { } defer r.Close() - data, err := ioutil.ReadAll(r) + data, err := io.ReadAll(r) if err != nil { return nil, err } diff --git a/pkg/sqlite/migrations/32_files.up.sql b/pkg/sqlite/migrations/32_files.up.sql index 708034bd7..8e76b0d37 100644 --- a/pkg/sqlite/migrations/32_files.up.sql +++ b/pkg/sqlite/migrations/32_files.up.sql @@ -170,9 +170,9 @@ INSERT INTO `files` SELECT `path`, 1, - COALESCE(`size`, 0), - -- set mod time to epoch so that it the format/size is calculated on scan - '1970-01-01 00:00:00', + -- special value if null so that it is recalculated + COALESCE(`size`, -1), + COALESCE(`file_mod_time`, '1970-01-01 00:00:00'), `created_at`, `updated_at` FROM `images`; @@ -186,9 +186,10 @@ INSERT INTO `image_files` ) SELECT `files`.`id`, - '', - COALESCE(`images`.`width`, 0), - COALESCE(`images`.`height`, 0) + -- special values so that they are recalculated + 'unset', + COALESCE(`images`.`width`, -1), + COALESCE(`images`.`height`, -1) FROM `images` INNER JOIN `files` ON `images`.`path` = `files`.`basename` AND `files`.`parent_folder_id` = 1; INSERT INTO `images_files` @@ -280,8 +281,9 @@ INSERT INTO `files` SELECT `path`, 1, - 0, - '1970-01-01 00:00:00', -- set to placeholder so that size is updated + -- special value so that it is recalculated + -1, + COALESCE(`file_mod_time`, '1970-01-01 00:00:00'), `created_at`, `updated_at` FROM `galleries` @@ -433,9 +435,9 @@ INSERT INTO `files` SELECT `path`, 1, - COALESCE(`size`, 0), - -- set mod time to epoch so that it the format/size is calculated on scan - '1970-01-01 00:00:00', + -- special value if null so that it is recalculated + COALESCE(`size`, -1), + COALESCE(`file_mod_time`, '1970-01-01 00:00:00'), `created_at`, `updated_at` FROM `scenes`; @@ -457,13 +459,14 @@ INSERT INTO `video_files` SELECT `files`.`id`, `scenes`.`duration`, - COALESCE(`scenes`.`video_codec`, ''), - COALESCE(`scenes`.`format`, ''), - COALESCE(`scenes`.`audio_codec`, ''), - COALESCE(`scenes`.`width`, 0), - COALESCE(`scenes`.`height`, 0), - COALESCE(`scenes`.`framerate`, 0), - COALESCE(`scenes`.`bitrate`, 0), + -- special values for unset to be updated during scan + COALESCE(`scenes`.`video_codec`, 'unset'), + COALESCE(`scenes`.`format`, 'unset'), + COALESCE(`scenes`.`audio_codec`, 'unset'), + COALESCE(`scenes`.`width`, -1), + COALESCE(`scenes`.`height`, -1), + COALESCE(`scenes`.`framerate`, -1), + COALESCE(`scenes`.`bitrate`, -1), `scenes`.`interactive`, `scenes`.`interactive_speed` FROM `scenes` INNER JOIN `files` ON `scenes`.`path` = `files`.`basename` AND `files`.`parent_folder_id` = 1;