From 39fdde273dd051ef7d5a4c41274f90058a45e676 Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Fri, 15 Oct 2021 10:39:48 +1100 Subject: [PATCH] Scan refactor (#1816) * Add file scanner * Scan scene changes * Split scan files * Generalise scan * Refactor ffprobe * Refactor ffmpeg encoder * Move scene scan code to scene package * Move matchExtension to utils * Refactor gallery scanning * Refactor image scanning * Prevent race conditions on identical hashes * Refactor image thumbnail generation * Perform count concurrently * Allow progress increment before total set * Make progress updates more frequent --- pkg/api/routes_image.go | 2 +- pkg/api/routes_scene.go | 12 +- pkg/ffmpeg/encoder.go | 26 +- pkg/ffmpeg/encoder_marker.go | 4 +- pkg/ffmpeg/encoder_scene_preview_chunk.go | 6 +- pkg/ffmpeg/encoder_screenshot.go | 2 +- pkg/ffmpeg/encoder_sprite_screenshot.go | 2 +- pkg/ffmpeg/ffprobe.go | 7 +- pkg/ffmpeg/image.go | 35 + pkg/ffmpeg/stream.go | 2 +- pkg/file/file.go | 31 + pkg/file/hash.go | 17 + pkg/file/scan.go | 175 +++ pkg/file/zip.go | 64 + pkg/gallery/export_test.go | 6 +- pkg/gallery/scan.go | 225 +++ pkg/image/image.go | 45 +- pkg/image/scan.go | 192 +++ pkg/image/thumbnail.go | 93 +- pkg/image/vips.go | 48 + pkg/job/manager.go | 2 +- pkg/job/progress.go | 6 +- pkg/manager/exclude_files.go | 12 - pkg/manager/generator.go | 2 +- pkg/manager/generator_phash.go | 2 +- pkg/manager/generator_preview.go | 2 +- pkg/manager/generator_sprite.go | 2 +- pkg/manager/manager.go | 10 +- pkg/manager/manager_tasks.go | 6 +- pkg/manager/scene.go | 3 +- pkg/manager/screenshot.go | 2 +- pkg/manager/task_clean.go | 6 +- pkg/manager/task_generate_markers.go | 8 +- pkg/manager/task_generate_phash.go | 4 +- pkg/manager/task_generate_preview.go | 4 +- pkg/manager/task_generate_screenshot.go | 4 +- pkg/manager/task_generate_sprite.go | 4 +- pkg/manager/task_migrate_hash.go | 3 +- pkg/manager/task_scan.go | 1328 ++--------------- pkg/manager/task_scan_gallery.go | 170 +++ pkg/manager/task_scan_image.go | 166 +++ pkg/manager/task_scan_scene.go | 58 + pkg/manager/task_transcode.go | 7 +- pkg/models/model_file.go | 30 + pkg/models/model_gallery.go | 35 + pkg/models/model_image.go | 55 +- pkg/models/model_scene.go | 92 +- pkg/{manager => scene}/migrate_hash.go | 29 +- pkg/scene/scan.go | 335 +++++ pkg/scene/screenshot.go | 23 + pkg/utils/file.go | 13 + pkg/utils/mutex.go | 64 + pkg/utils/mutex_test.go | 50 + pkg/utils/oshash.go | 67 +- .../components/Changelog/versions/v0110.md | 3 + 55 files changed, 2172 insertions(+), 1429 deletions(-) create mode 100644 pkg/ffmpeg/image.go create mode 100644 pkg/file/file.go create mode 100644 pkg/file/hash.go create mode 100644 pkg/file/scan.go create mode 100644 pkg/file/zip.go create mode 100644 pkg/gallery/scan.go create mode 100644 pkg/image/scan.go create mode 100644 pkg/image/vips.go create mode 100644 pkg/manager/task_scan_gallery.go create mode 100644 pkg/manager/task_scan_image.go create mode 100644 pkg/manager/task_scan_scene.go create mode 100644 pkg/models/model_file.go rename pkg/{manager => scene}/migrate_hash.go (67%) create mode 100644 pkg/scene/scan.go create mode 100644 pkg/scene/screenshot.go create mode 100644 pkg/utils/mutex.go create mode 100644 pkg/utils/mutex_test.go diff --git a/pkg/api/routes_image.go b/pkg/api/routes_image.go index 9cdaa6653..092d77668 100644 --- a/pkg/api/routes_image.go +++ b/pkg/api/routes_image.go @@ -43,7 +43,7 @@ func (rs imageRoutes) Thumbnail(w http.ResponseWriter, r *http.Request) { if exists { http.ServeFile(w, r, filepath) } else { - encoder := image.NewThumbnailEncoder(manager.GetInstance().FFMPEGPath) + encoder := image.NewThumbnailEncoder(manager.GetInstance().FFMPEG) data, err := encoder.GetThumbnail(img, models.DefaultGthumbWidth) if err != nil { logger.Errorf("error generating thumbnail for image: %s", err.Error()) diff --git a/pkg/api/routes_scene.go b/pkg/api/routes_scene.go index a029b79a9..56071e27c 100644 --- a/pkg/api/routes_scene.go +++ b/pkg/api/routes_scene.go @@ -57,7 +57,8 @@ func getSceneFileContainer(scene *models.Scene) ffmpeg.Container { container = ffmpeg.Container(scene.Format.String) } else { // container isn't in the DB // shouldn't happen, fallback to ffprobe - tmpVideoFile, err := ffmpeg.NewVideoFile(manager.GetInstance().FFProbePath, scene.Path, false) + ffprobe := manager.GetInstance().FFProbe + tmpVideoFile, err := ffprobe.NewVideoFile(scene.Path, false) if err != nil { logger.Errorf("[transcode] error reading video file: %v", err) return ffmpeg.Container("") @@ -105,7 +106,8 @@ func (rs sceneRoutes) StreamMp4(w http.ResponseWriter, r *http.Request) { func (rs sceneRoutes) StreamHLS(w http.ResponseWriter, r *http.Request) { scene := r.Context().Value(sceneKey).(*models.Scene) - videoFile, err := ffmpeg.NewVideoFile(manager.GetInstance().FFProbePath, scene.Path, false) + ffprobe := manager.GetInstance().FFProbe + videoFile, err := ffprobe.NewVideoFile(scene.Path, false) if err != nil { logger.Errorf("[stream] error reading video file: %v", err) return @@ -142,8 +144,8 @@ func (rs sceneRoutes) streamTranscode(w http.ResponseWriter, r *http.Request, vi scene := r.Context().Value(sceneKey).(*models.Scene) // needs to be transcoded - - videoFile, err := ffmpeg.NewVideoFile(manager.GetInstance().FFProbePath, scene.Path, false) + ffprobe := manager.GetInstance().FFProbe + videoFile, err := ffprobe.NewVideoFile(scene.Path, false) if err != nil { logger.Errorf("[stream] error reading video file: %v", err) return @@ -171,7 +173,7 @@ func (rs sceneRoutes) streamTranscode(w http.ResponseWriter, r *http.Request, vi options.MaxTranscodeSize = models.StreamingResolutionEnum(requestedSize) } - encoder := ffmpeg.NewEncoder(manager.GetInstance().FFMPEGPath) + encoder := manager.GetInstance().FFMPEG stream, err = encoder.GetTranscodeStream(options) if err != nil { diff --git a/pkg/ffmpeg/encoder.go b/pkg/ffmpeg/encoder.go index ed9c6e31f..2334808ed 100644 --- a/pkg/ffmpeg/encoder.go +++ b/pkg/ffmpeg/encoder.go @@ -12,21 +12,13 @@ import ( "github.com/stashapp/stash/pkg/logger" ) -type Encoder struct { - Path string -} +type Encoder string var ( runningEncoders = make(map[string][]*os.Process) runningEncodersMutex = sync.RWMutex{} ) -func NewEncoder(ffmpegPath string) Encoder { - return Encoder{ - Path: ffmpegPath, - } -} - func registerRunningEncoder(path string, process *os.Process) { runningEncodersMutex.Lock() processes := runningEncoders[path] @@ -86,7 +78,7 @@ func KillRunningEncoders(path string) { // FFmpeg runner with progress output, used for transcodes func (e *Encoder) runTranscode(probeResult VideoFile, args []string) (string, error) { - cmd := exec.Command(e.Path, args...) + cmd := exec.Command(string(*e), args...) stderr, err := cmd.StderrPipe() if err != nil { @@ -141,19 +133,25 @@ func (e *Encoder) runTranscode(probeResult VideoFile, args []string) (string, er return stdoutString, nil } -func (e *Encoder) run(probeResult VideoFile, args []string) (string, error) { - cmd := exec.Command(e.Path, args...) +func (e *Encoder) run(sourcePath string, args []string, stdin io.Reader) (string, error) { + cmd := exec.Command(string(*e), args...) var stdout, stderr bytes.Buffer cmd.Stdout = &stdout cmd.Stderr = &stderr + cmd.Stdin = stdin if err := cmd.Start(); err != nil { return "", err } - registerRunningEncoder(probeResult.Path, cmd.Process) - err := waitAndDeregister(probeResult.Path, cmd) + var err error + if sourcePath != "" { + registerRunningEncoder(sourcePath, cmd.Process) + err = waitAndDeregister(sourcePath, cmd) + } else { + err = cmd.Wait() + } if err != nil { // error message should be in the stderr stream diff --git a/pkg/ffmpeg/encoder_marker.go b/pkg/ffmpeg/encoder_marker.go index 73ef8e790..540b8e6c3 100644 --- a/pkg/ffmpeg/encoder_marker.go +++ b/pkg/ffmpeg/encoder_marker.go @@ -34,7 +34,7 @@ func (e *Encoder) SceneMarkerVideo(probeResult VideoFile, options SceneMarkerOpt "-strict", "-2", options.OutputPath, } - _, err := e.run(probeResult, args) + _, err := e.run(probeResult.Path, args, nil) return err } @@ -55,6 +55,6 @@ func (e *Encoder) SceneMarkerImage(probeResult VideoFile, options SceneMarkerOpt "-an", options.OutputPath, } - _, err := e.run(probeResult, args) + _, err := e.run(probeResult.Path, args, nil) return err } diff --git a/pkg/ffmpeg/encoder_scene_preview_chunk.go b/pkg/ffmpeg/encoder_scene_preview_chunk.go index a0b53a93f..3d8ea6062 100644 --- a/pkg/ffmpeg/encoder_scene_preview_chunk.go +++ b/pkg/ffmpeg/encoder_scene_preview_chunk.go @@ -89,7 +89,7 @@ func (e *Encoder) ScenePreviewVideoChunk(probeResult VideoFile, options ScenePre args3 = append(args3, argsAudio...) finalArgs := append(args3, options.OutputPath) - _, err := e.run(probeResult, finalArgs) + _, err := e.run(probeResult.Path, finalArgs, nil) return err } @@ -102,7 +102,7 @@ func (e *Encoder) ScenePreviewVideoChunkCombine(probeResult VideoFile, concatFil "-c", "copy", outputPath, } - _, err := e.run(probeResult, args) + _, err := e.run(probeResult.Path, args, nil) return err } @@ -122,6 +122,6 @@ func (e *Encoder) ScenePreviewVideoToImage(probeResult VideoFile, width int, vid "-an", outputPath, } - _, err := e.run(probeResult, args) + _, err := e.run(probeResult.Path, args, nil) return err } diff --git a/pkg/ffmpeg/encoder_screenshot.go b/pkg/ffmpeg/encoder_screenshot.go index bd52273c8..636092ac0 100644 --- a/pkg/ffmpeg/encoder_screenshot.go +++ b/pkg/ffmpeg/encoder_screenshot.go @@ -28,7 +28,7 @@ func (e *Encoder) Screenshot(probeResult VideoFile, options ScreenshotOptions) e "-f", "image2", options.OutputPath, } - _, err := e.run(probeResult, args) + _, err := e.run(probeResult.Path, args, nil) return err } diff --git a/pkg/ffmpeg/encoder_sprite_screenshot.go b/pkg/ffmpeg/encoder_sprite_screenshot.go index c1a87788e..cba560430 100644 --- a/pkg/ffmpeg/encoder_sprite_screenshot.go +++ b/pkg/ffmpeg/encoder_sprite_screenshot.go @@ -22,7 +22,7 @@ func (e *Encoder) SpriteScreenshot(probeResult VideoFile, options SpriteScreensh "-f", "rawvideo", "-", } - data, err := e.run(probeResult, args) + data, err := e.run(probeResult.Path, args, nil) if err != nil { return nil, err } diff --git a/pkg/ffmpeg/ffprobe.go b/pkg/ffmpeg/ffprobe.go index ed276f14a..b8cdfbbbe 100644 --- a/pkg/ffmpeg/ffprobe.go +++ b/pkg/ffmpeg/ffprobe.go @@ -221,14 +221,17 @@ type VideoFile struct { AudioCodec string } +// FFProbe +type FFProbe string + // Execute exec command and bind result to struct. -func NewVideoFile(ffprobePath string, videoPath string, stripExt bool) (*VideoFile, error) { +func (f *FFProbe) NewVideoFile(videoPath string, stripExt bool) (*VideoFile, error) { args := []string{"-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", "-show_error", videoPath} //// Extremely slow on windows for some reason //if runtime.GOOS != "windows" { // args = append(args, "-count_frames") //} - out, err := exec.Command(ffprobePath, args...).Output() + out, err := exec.Command(string(*f), args...).Output() if err != nil { return nil, fmt.Errorf("FFProbe encountered an error with <%s>.\nError JSON:\n%s\nError: %s", videoPath, string(out), err.Error()) diff --git a/pkg/ffmpeg/image.go b/pkg/ffmpeg/image.go new file mode 100644 index 000000000..85fb0e646 --- /dev/null +++ b/pkg/ffmpeg/image.go @@ -0,0 +1,35 @@ +package ffmpeg + +import ( + "bytes" + "errors" + "fmt" +) + +func (e *Encoder) ImageThumbnail(image *bytes.Buffer, format *string, maxDimensions int, path string) ([]byte, error) { + // ffmpeg spends a long sniffing image format when data is piped through stdio, so we pass the format explicitly instead + ffmpegformat := "" + if format != nil && *format == "jpeg" { + ffmpegformat = "mjpeg" + } else if format != nil && *format == "png" { + ffmpegformat = "png_pipe" + } else if format != nil && *format == "webp" { + ffmpegformat = "webp_pipe" + } else { + return nil, errors.New("unsupported image format") + } + + args := []string{ + "-f", ffmpegformat, + "-i", "-", + "-vf", fmt.Sprintf("scale=%v:%v:force_original_aspect_ratio=decrease", maxDimensions, maxDimensions), + "-c:v", "mjpeg", + "-q:v", "5", + "-f", "image2pipe", + "-", + } + + data, err := e.run(path, args, image) + + return []byte(data), err +} diff --git a/pkg/ffmpeg/stream.go b/pkg/ffmpeg/stream.go index f99c3c95c..1f4d4960e 100644 --- a/pkg/ffmpeg/stream.go +++ b/pkg/ffmpeg/stream.go @@ -205,7 +205,7 @@ func (e *Encoder) GetTranscodeStream(options TranscodeStreamOptions) (*Stream, e func (e *Encoder) stream(probeResult VideoFile, options TranscodeStreamOptions) (*Stream, error) { args := options.getStreamArgs() - cmd := exec.Command(e.Path, args...) + cmd := exec.Command(string(*e), args...) logger.Debugf("Streaming via: %s", strings.Join(cmd.Args, " ")) stdout, err := cmd.StdoutPipe() diff --git a/pkg/file/file.go b/pkg/file/file.go new file mode 100644 index 000000000..397dabd6d --- /dev/null +++ b/pkg/file/file.go @@ -0,0 +1,31 @@ +package file + +import ( + "io" + "io/fs" + "os" +) + +type fsFile struct { + path string + info fs.FileInfo +} + +func (f *fsFile) Open() (io.ReadCloser, error) { + return os.Open(f.path) +} + +func (f *fsFile) Path() string { + return f.path +} + +func (f *fsFile) FileInfo() fs.FileInfo { + return f.info +} + +func FSFile(path string, info fs.FileInfo) SourceFile { + return &fsFile{ + path: path, + info: info, + } +} diff --git a/pkg/file/hash.go b/pkg/file/hash.go new file mode 100644 index 000000000..630ffcb6f --- /dev/null +++ b/pkg/file/hash.go @@ -0,0 +1,17 @@ +package file + +import ( + "io" + + "github.com/stashapp/stash/pkg/utils" +) + +type FSHasher struct{} + +func (h *FSHasher) OSHash(src io.ReadSeeker, size int64) (string, error) { + return utils.OSHashFromReader(src, size) +} + +func (h *FSHasher) MD5(src io.Reader) (string, error) { + return utils.MD5FromReader(src) +} diff --git a/pkg/file/scan.go b/pkg/file/scan.go new file mode 100644 index 000000000..0d8ad4632 --- /dev/null +++ b/pkg/file/scan.go @@ -0,0 +1,175 @@ +package file + +import ( + "fmt" + "io" + "io/fs" + "strconv" + "time" + + "github.com/stashapp/stash/pkg/logger" + "github.com/stashapp/stash/pkg/models" +) + +type SourceFile interface { + Open() (io.ReadCloser, error) + Path() string + FileInfo() fs.FileInfo +} + +type FileBased interface { + File() models.File +} + +type Hasher interface { + OSHash(src io.ReadSeeker, size int64) (string, error) + MD5(src io.Reader) (string, error) +} + +type Scanned struct { + Old *models.File + New *models.File +} + +// FileUpdated returns true if both old and new files are present and not equal. +func (s Scanned) FileUpdated() bool { + if s.Old == nil || s.New == nil { + return false + } + + return !s.Old.Equal(*s.New) +} + +// ContentsChanged returns true if both old and new files are present and the file content is different. +func (s Scanned) ContentsChanged() bool { + if s.Old == nil || s.New == nil { + return false + } + + if s.Old.Checksum != s.New.Checksum { + return true + } + + if s.Old.OSHash != s.New.OSHash { + return true + } + + return false +} + +type Scanner struct { + Hasher Hasher + + CalculateMD5 bool + CalculateOSHash bool +} + +func (o Scanner) ScanExisting(existing FileBased, file SourceFile) (h *Scanned, err error) { + info := file.FileInfo() + h = &Scanned{} + + existingFile := existing.File() + h.Old = &existingFile + + updatedFile := existingFile + h.New = &updatedFile + + // update existing data if needed + // truncate to seconds, since we don't store beyond that in the database + updatedFile.FileModTime = info.ModTime().Truncate(time.Second) + + modTimeChanged := !existingFile.FileModTime.Equal(updatedFile.FileModTime) + + // regenerate hash(es) if missing or file mod time changed + if _, err = o.generateHashes(&updatedFile, file, modTimeChanged); err != nil { + return nil, err + } + + // notify of changes as needed + // object exists, no further processing required + return +} + +func (o Scanner) ScanNew(file SourceFile) (*models.File, error) { + info := file.FileInfo() + sizeStr := strconv.FormatInt(info.Size(), 10) + modTime := info.ModTime() + f := models.File{ + Path: file.Path(), + Size: sizeStr, + FileModTime: modTime, + } + + if _, err := o.generateHashes(&f, file, true); err != nil { + return nil, err + } + + return &f, nil +} + +// generateHashes regenerates and sets the hashes in the provided File. +// It will not recalculate unless specified. +func (o Scanner) generateHashes(f *models.File, file SourceFile, regenerate bool) (changed bool, err error) { + existing := *f + + var src io.ReadCloser + if o.CalculateOSHash && (regenerate || f.OSHash == "") { + logger.Infof("Calculating oshash for %s ...", f.Path) + + src, err = file.Open() + if err != nil { + return false, err + } + defer src.Close() + + seekSrc, valid := src.(io.ReadSeeker) + if !valid { + return false, fmt.Errorf("invalid source file type: %s", file.Path()) + } + + // regenerate hash + var oshash string + oshash, err = o.Hasher.OSHash(seekSrc, file.FileInfo().Size()) + if err != nil { + return false, fmt.Errorf("error generating oshash for %s: %w", file.Path(), err) + } + + f.OSHash = oshash + + // reset reader to start of file + _, err = seekSrc.Seek(0, io.SeekStart) + if err != nil { + return false, fmt.Errorf("error seeking to start of file in %s: %w", file.Path(), err) + } + } + + // always generate if MD5 is nil + // only regenerate MD5 if: + // - OSHash was not calculated, or + // - existing OSHash is different to generated one + // or if it was different to the previous version + if o.CalculateMD5 && (f.Checksum == "" || (regenerate && (!o.CalculateOSHash || existing.OSHash != f.OSHash))) { + logger.Infof("Calculating checksum for %s...", f.Path) + + if src == nil { + src, err = file.Open() + if err != nil { + return false, err + } + defer src.Close() + } + + // regenerate checksum + var checksum string + checksum, err = o.Hasher.MD5(src) + if err != nil { + return + } + + f.Checksum = checksum + } + + changed = (o.CalculateOSHash && (f.OSHash != existing.OSHash)) || (o.CalculateMD5 && (f.Checksum != existing.Checksum)) + + return +} diff --git a/pkg/file/zip.go b/pkg/file/zip.go new file mode 100644 index 000000000..4ae69f27b --- /dev/null +++ b/pkg/file/zip.go @@ -0,0 +1,64 @@ +package file + +import ( + "archive/zip" + "io" + "io/fs" + "strings" +) + +const zipSeparator = "\x00" + +type zipFile struct { + zipPath string + zf *zip.File +} + +func (f *zipFile) Open() (io.ReadCloser, error) { + return f.zf.Open() +} + +func (f *zipFile) Path() string { + // TODO - fix this + return ZipFilename(f.zipPath, f.zf.Name) +} + +func (f *zipFile) FileInfo() fs.FileInfo { + return f.zf.FileInfo() +} + +func ZipFile(zipPath string, zf *zip.File) SourceFile { + return &zipFile{ + zipPath: zipPath, + zf: zf, + } +} + +func ZipFilename(zipFilename, filenameInZip string) string { + return zipFilename + zipSeparator + filenameInZip +} + +// IsZipPath returns true if the path includes the zip separator byte, +// indicating it is within a zip file. +func IsZipPath(p string) bool { + return strings.Contains(p, zipSeparator) +} + +// ZipPathDisplayName converts an zip path for display. It translates the zip +// file separator character into '/', since this character is also used for +// path separators within zip files. It returns the original provided path +// if it does not contain the zip file separator character. +func ZipPathDisplayName(path string) string { + return strings.Replace(path, zipSeparator, "/", -1) +} + +func ZipFilePath(path string) (zipFilename, filename string) { + nullIndex := strings.Index(path, zipSeparator) + if nullIndex != -1 { + zipFilename = path[0:nullIndex] + filename = path[nullIndex+1:] + } else { + filename = path + } + return +} diff --git a/pkg/gallery/export_test.go b/pkg/gallery/export_test.go index ac46871fa..5dad35975 100644 --- a/pkg/gallery/export_test.go +++ b/pkg/gallery/export_test.go @@ -25,7 +25,7 @@ const ( const ( path = "path" - zip = true + isZip = true url = "url" checksum = "checksum" title = "title" @@ -48,7 +48,7 @@ func createFullGallery(id int) models.Gallery { return models.Gallery{ ID: id, Path: models.NullString(path), - Zip: zip, + Zip: isZip, Title: models.NullString(title), Checksum: checksum, Date: models.SQLiteDate{ @@ -72,7 +72,7 @@ func createFullJSONGallery() *jsonschema.Gallery { return &jsonschema.Gallery{ Title: title, Path: path, - Zip: zip, + Zip: isZip, Checksum: checksum, Date: date, Details: details, diff --git a/pkg/gallery/scan.go b/pkg/gallery/scan.go new file mode 100644 index 000000000..bc3a7dd27 --- /dev/null +++ b/pkg/gallery/scan.go @@ -0,0 +1,225 @@ +package gallery + +import ( + "archive/zip" + "context" + "database/sql" + "fmt" + "strings" + "time" + + "github.com/stashapp/stash/pkg/file" + "github.com/stashapp/stash/pkg/logger" + "github.com/stashapp/stash/pkg/manager/paths" + "github.com/stashapp/stash/pkg/models" + "github.com/stashapp/stash/pkg/plugin" + "github.com/stashapp/stash/pkg/utils" +) + +const mutexType = "gallery" + +type Scanner struct { + file.Scanner + + ImageExtensions []string + StripFileExtension bool + Ctx context.Context + CaseSensitiveFs bool + TxnManager models.TransactionManager + Paths *paths.Paths + PluginCache *plugin.Cache + MutexManager *utils.MutexManager +} + +func FileScanner(hasher file.Hasher) file.Scanner { + return file.Scanner{ + Hasher: hasher, + CalculateMD5: true, + } +} + +func (scanner *Scanner) ScanExisting(existing file.FileBased, file file.SourceFile) (retGallery *models.Gallery, scanImages bool, err error) { + scanned, err := scanner.Scanner.ScanExisting(existing, file) + if err != nil { + return nil, false, err + } + + retGallery = existing.(*models.Gallery) + + path := scanned.New.Path + + changed := false + + if scanned.ContentsChanged() { + logger.Infof("%s has been updated: rescanning", path) + + retGallery.SetFile(*scanned.New) + changed = true + } else if scanned.FileUpdated() { + logger.Infof("Updated gallery file %s", path) + + retGallery.SetFile(*scanned.New) + changed = true + } + + if changed { + scanImages = true + logger.Infof("%s has been updated: rescanning", path) + + retGallery.UpdatedAt = models.SQLiteTimestamp{Timestamp: time.Now()} + + // we are operating on a checksum now, so grab a mutex on the checksum + done := make(chan struct{}) + scanner.MutexManager.Claim(mutexType, scanned.New.Checksum, done) + + if err := scanner.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { + // free the mutex once transaction is complete + defer close(done) + + // ensure no clashes of hashes + if scanned.New.Checksum != "" && scanned.Old.Checksum != scanned.New.Checksum { + dupe, _ := r.Gallery().FindByChecksum(retGallery.Checksum) + if dupe != nil { + return fmt.Errorf("MD5 for file %s is the same as that of %s", path, dupe.Path.String) + } + } + + retGallery, err = r.Gallery().Update(*retGallery) + return err + }); err != nil { + return nil, false, err + } + + scanner.PluginCache.ExecutePostHooks(scanner.Ctx, retGallery.ID, plugin.GalleryUpdatePost, nil, nil) + } + + return +} + +func (scanner *Scanner) ScanNew(file file.SourceFile) (retGallery *models.Gallery, scanImages bool, err error) { + scanned, err := scanner.Scanner.ScanNew(file) + if err != nil { + return nil, false, err + } + + path := file.Path() + checksum := scanned.Checksum + isNewGallery := false + isUpdatedGallery := false + var g *models.Gallery + + // grab a mutex on the checksum + done := make(chan struct{}) + scanner.MutexManager.Claim(mutexType, checksum, done) + defer close(done) + + if err := scanner.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { + qb := r.Gallery() + + g, _ = qb.FindByChecksum(checksum) + if g != nil { + exists, _ := utils.FileExists(g.Path.String) + if !scanner.CaseSensitiveFs { + // #1426 - if file exists but is a case-insensitive match for the + // original filename, then treat it as a move + if exists && strings.EqualFold(path, g.Path.String) { + exists = false + } + } + + if exists { + logger.Infof("%s already exists. Duplicate of %s ", path, g.Path.String) + } else { + logger.Infof("%s already exists. Updating path...", path) + g.Path = sql.NullString{ + String: path, + Valid: true, + } + g, err = qb.Update(*g) + if err != nil { + return err + } + + isUpdatedGallery = true + } + } else { + // don't create gallery if it has no images + if scanner.hasImages(path) { + currentTime := time.Now() + + g = &models.Gallery{ + Zip: true, + Title: sql.NullString{ + String: utils.GetNameFromPath(path, scanner.StripFileExtension), + Valid: true, + }, + CreatedAt: models.SQLiteTimestamp{Timestamp: currentTime}, + UpdatedAt: models.SQLiteTimestamp{Timestamp: currentTime}, + } + + g.SetFile(*scanned) + + // only warn when creating the gallery + ok, err := utils.IsZipFileUncompressed(path) + if err == nil && !ok { + logger.Warnf("%s is using above store (0) level compression.", path) + } + + logger.Infof("%s doesn't exist. Creating new item...", path) + g, err = qb.Create(*g) + if err != nil { + return err + } + + scanImages = true + isNewGallery = true + } + } + + return nil + }); err != nil { + return nil, false, err + } + + if isNewGallery { + scanner.PluginCache.ExecutePostHooks(scanner.Ctx, g.ID, plugin.GalleryCreatePost, nil, nil) + } else if isUpdatedGallery { + scanner.PluginCache.ExecutePostHooks(scanner.Ctx, g.ID, plugin.GalleryUpdatePost, nil, nil) + } + + scanImages = isNewGallery + retGallery = g + + return +} + +func (scanner *Scanner) isImage(pathname string) bool { + return utils.MatchExtension(pathname, scanner.ImageExtensions) +} + +func (scanner *Scanner) hasImages(path string) bool { + readCloser, err := zip.OpenReader(path) + if err != nil { + logger.Warnf("Error while walking gallery zip: %v", err) + return false + } + defer readCloser.Close() + + for _, file := range readCloser.File { + if file.FileInfo().IsDir() { + continue + } + + if strings.Contains(file.Name, "__MACOSX") { + continue + } + + if !scanner.isImage(file.Name) { + continue + } + + return true + } + + return false +} diff --git a/pkg/image/image.go b/pkg/image/image.go index 486c65ef4..392c9124a 100644 --- a/pkg/image/image.go +++ b/pkg/image/image.go @@ -12,14 +12,13 @@ import ( "strings" "time" + "github.com/stashapp/stash/pkg/file" "github.com/stashapp/stash/pkg/logger" "github.com/stashapp/stash/pkg/models" "github.com/stashapp/stash/pkg/utils" _ "golang.org/x/image/webp" ) -const zipSeparator = "\x00" - func GetSourceImage(i *models.Image) (image.Image, error) { f, err := openSourceImage(i.Path) if err != nil { @@ -67,17 +66,6 @@ func FileExists(path string) bool { return true } -func ZipFilename(zipFilename, filenameInZip string) string { - return zipFilename + zipSeparator + filenameInZip -} - -// IsZipPath returns true if the path includes the zip separator byte, -// indicating it is within a zip file. -// TODO - this should be moved to utils -func IsZipPath(p string) bool { - return strings.Contains(p, zipSeparator) -} - type imageReadCloser struct { src io.ReadCloser zrc *zip.ReadCloser @@ -102,7 +90,7 @@ func (i *imageReadCloser) Close() error { func openSourceImage(path string) (io.ReadCloser, error) { // may need to read from a zip file - zipFilename, filename := getFilePath(path) + zipFilename, filename := file.ZipFilePath(path) if zipFilename != "" { r, err := zip.OpenReader(zipFilename) if err != nil { @@ -134,17 +122,6 @@ func openSourceImage(path string) (io.ReadCloser, error) { return os.Open(filename) } -func getFilePath(path string) (zipFilename, filename string) { - nullIndex := strings.Index(path, zipSeparator) - if nullIndex != -1 { - zipFilename = path[0:nullIndex] - filename = path[nullIndex+1:] - } else { - filename = path - } - return -} - // GetFileDetails returns a pointer to an Image object with the // width, height and size populated. func GetFileDetails(path string) (*models.Image, error) { @@ -203,7 +180,7 @@ func GetFileModTime(path string) (time.Time, error) { func stat(path string) (os.FileInfo, error) { // may need to read from a zip file - zipFilename, filename := getFilePath(path) + zipFilename, filename := file.ZipFilePath(path) if zipFilename != "" { r, err := zip.OpenReader(zipFilename) if err != nil { @@ -224,16 +201,8 @@ func stat(path string) (os.FileInfo, error) { return os.Stat(filename) } -// PathDisplayName converts an image path for display. It translates the zip -// file separator character into '/', since this character is also used for -// path separators within zip files. It returns the original provided path -// if it does not contain the zip file separator character. -func PathDisplayName(path string) string { - return strings.Replace(path, zipSeparator, "/", -1) -} - func Serve(w http.ResponseWriter, r *http.Request, path string) { - zipFilename, _ := getFilePath(path) + zipFilename, _ := file.ZipFilePath(path) w.Header().Add("Cache-Control", "max-age=604800000") // 1 Week if zipFilename == "" { http.ServeFile(w, r, path) @@ -259,7 +228,7 @@ func Serve(w http.ResponseWriter, r *http.Request, path string) { } func IsCover(img *models.Image) bool { - _, fn := getFilePath(img.Path) + _, fn := file.ZipFilePath(img.Path) return strings.HasSuffix(fn, "cover.jpg") } @@ -268,13 +237,13 @@ func GetTitle(s *models.Image) string { return s.Title.String } - _, fn := getFilePath(s.Path) + _, fn := file.ZipFilePath(s.Path) return filepath.Base(fn) } // GetFilename gets the base name of the image file // If stripExt is set the file extension is omitted from the name func GetFilename(s *models.Image, stripExt bool) string { - _, fn := getFilePath(s.Path) + _, fn := file.ZipFilePath(s.Path) return utils.GetNameFromPath(fn, stripExt) } diff --git a/pkg/image/scan.go b/pkg/image/scan.go new file mode 100644 index 000000000..15424f29a --- /dev/null +++ b/pkg/image/scan.go @@ -0,0 +1,192 @@ +package image + +import ( + "context" + "fmt" + "os" + "strings" + "time" + + "github.com/stashapp/stash/pkg/file" + "github.com/stashapp/stash/pkg/logger" + "github.com/stashapp/stash/pkg/manager/paths" + "github.com/stashapp/stash/pkg/models" + "github.com/stashapp/stash/pkg/plugin" + "github.com/stashapp/stash/pkg/utils" +) + +const mutexType = "image" + +type Scanner struct { + file.Scanner + + StripFileExtension bool + + Ctx context.Context + CaseSensitiveFs bool + TxnManager models.TransactionManager + Paths *paths.Paths + PluginCache *plugin.Cache + MutexManager *utils.MutexManager +} + +func FileScanner(hasher file.Hasher) file.Scanner { + return file.Scanner{ + Hasher: hasher, + CalculateMD5: true, + } +} + +func (scanner *Scanner) ScanExisting(existing file.FileBased, file file.SourceFile) (retImage *models.Image, err error) { + scanned, err := scanner.Scanner.ScanExisting(existing, file) + if err != nil { + return nil, err + } + + i := existing.(*models.Image) + + path := scanned.New.Path + oldChecksum := i.Checksum + changed := false + + if scanned.ContentsChanged() { + logger.Infof("%s has been updated: rescanning", path) + + // regenerate the file details as well + if err := SetFileDetails(i); err != nil { + return nil, err + } + + changed = true + } else if scanned.FileUpdated() { + logger.Infof("Updated image file %s", path) + + changed = true + } + + if changed { + i.SetFile(*scanned.New) + i.UpdatedAt = models.SQLiteTimestamp{Timestamp: time.Now()} + + // we are operating on a checksum now, so grab a mutex on the checksum + done := make(chan struct{}) + scanner.MutexManager.Claim(mutexType, scanned.New.Checksum, done) + + if err := scanner.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { + // free the mutex once transaction is complete + defer close(done) + var err error + + // ensure no clashes of hashes + if scanned.New.Checksum != "" && scanned.Old.Checksum != scanned.New.Checksum { + dupe, _ := r.Image().FindByChecksum(i.Checksum) + if dupe != nil { + return fmt.Errorf("MD5 for file %s is the same as that of %s", path, dupe.Path) + } + } + + retImage, err = r.Image().UpdateFull(*i) + return err + }); err != nil { + return nil, err + } + + // remove the old thumbnail if the checksum changed - we'll regenerate it + if oldChecksum != scanned.New.Checksum { + // remove cache dir of gallery + err = os.Remove(scanner.Paths.Generated.GetThumbnailPath(oldChecksum, models.DefaultGthumbWidth)) + if err != nil { + logger.Errorf("Error deleting thumbnail image: %s", err) + } + } + + scanner.PluginCache.ExecutePostHooks(scanner.Ctx, retImage.ID, plugin.ImageUpdatePost, nil, nil) + } + + return +} + +func (scanner *Scanner) ScanNew(f file.SourceFile) (retImage *models.Image, err error) { + scanned, err := scanner.Scanner.ScanNew(f) + if err != nil { + return nil, err + } + + path := f.Path() + checksum := scanned.Checksum + + // grab a mutex on the checksum + done := make(chan struct{}) + scanner.MutexManager.Claim(mutexType, checksum, done) + defer close(done) + + // check for image by checksum + var existingImage *models.Image + if err := scanner.TxnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { + var err error + existingImage, err = r.Image().FindByChecksum(checksum) + return err + }); err != nil { + return nil, err + } + + pathDisplayName := file.ZipPathDisplayName(path) + + if existingImage != nil { + exists := FileExists(existingImage.Path) + if !scanner.CaseSensitiveFs { + // #1426 - if file exists but is a case-insensitive match for the + // original filename, then treat it as a move + if exists && strings.EqualFold(path, existingImage.Path) { + exists = false + } + } + + if exists { + logger.Infof("%s already exists. Duplicate of %s ", pathDisplayName, file.ZipPathDisplayName(existingImage.Path)) + return nil, nil + } else { + logger.Infof("%s already exists. Updating path...", pathDisplayName) + imagePartial := models.ImagePartial{ + ID: existingImage.ID, + Path: &path, + } + + if err := scanner.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { + retImage, err = r.Image().Update(imagePartial) + return err + }); err != nil { + return nil, err + } + + scanner.PluginCache.ExecutePostHooks(scanner.Ctx, existingImage.ID, plugin.ImageUpdatePost, nil, nil) + } + } else { + logger.Infof("%s doesn't exist. Creating new item...", pathDisplayName) + currentTime := time.Now() + newImage := models.Image{ + CreatedAt: models.SQLiteTimestamp{Timestamp: currentTime}, + UpdatedAt: models.SQLiteTimestamp{Timestamp: currentTime}, + } + newImage.SetFile(*scanned) + newImage.Title.String = GetFilename(&newImage, scanner.StripFileExtension) + newImage.Title.Valid = true + + if err := SetFileDetails(&newImage); err != nil { + logger.Error(err.Error()) + return nil, err + } + + if err := scanner.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { + var err error + retImage, err = r.Image().Create(newImage) + return err + }); err != nil { + return nil, err + } + + scanner.PluginCache.ExecutePostHooks(scanner.Ctx, retImage.ID, plugin.ImageCreatePost, nil, nil) + } + + return +} diff --git a/pkg/image/thumbnail.go b/pkg/image/thumbnail.go index bb4cac743..6cc49b990 100644 --- a/pkg/image/thumbnail.go +++ b/pkg/image/thumbnail.go @@ -2,14 +2,11 @@ package image import ( "bytes" - "errors" - "fmt" "os/exec" "runtime" - "strings" "sync" - "github.com/stashapp/stash/pkg/logger" + "github.com/stashapp/stash/pkg/ffmpeg" "github.com/stashapp/stash/pkg/models" ) @@ -17,8 +14,8 @@ var vipsPath string var once sync.Once type ThumbnailEncoder struct { - FFMPEGPath string - VipsPath string + ffmpeg ffmpeg.Encoder + vips *vipsEncoder } func GetVipsPath() string { @@ -28,11 +25,18 @@ func GetVipsPath() string { return vipsPath } -func NewThumbnailEncoder(ffmpegPath string) ThumbnailEncoder { - return ThumbnailEncoder{ - FFMPEGPath: ffmpegPath, - VipsPath: GetVipsPath(), +func NewThumbnailEncoder(ffmpegEncoder ffmpeg.Encoder) ThumbnailEncoder { + ret := ThumbnailEncoder{ + ffmpeg: ffmpegEncoder, } + + vipsPath := GetVipsPath() + if vipsPath != "" { + vipsEncoder := vipsEncoder(vipsPath) + ret.vips = &vipsEncoder + } + + return ret } // GetThumbnail returns the thumbnail image of the provided image resized to @@ -60,72 +64,9 @@ func (e *ThumbnailEncoder) GetThumbnail(img *models.Image, maxSize int) ([]byte, } // vips has issues loading files from stdin on Windows - if e.VipsPath != "" && runtime.GOOS != "windows" { - return e.getVipsThumbnail(buf, maxSize) + if e.vips != nil && runtime.GOOS != "windows" { + return e.vips.ImageThumbnail(buf, maxSize) } else { - return e.getFFMPEGThumbnail(buf, format, maxSize, img.Path) + return e.ffmpeg.ImageThumbnail(buf, format, maxSize, img.Path) } } - -func (e *ThumbnailEncoder) getVipsThumbnail(image *bytes.Buffer, maxSize int) ([]byte, error) { - args := []string{ - "thumbnail_source", - "[descriptor=0]", - ".jpg[Q=70,strip]", - fmt.Sprint(maxSize), - "--size", "down", - } - data, err := e.run(e.VipsPath, args, image) - - return []byte(data), err -} - -func (e *ThumbnailEncoder) getFFMPEGThumbnail(image *bytes.Buffer, format *string, maxDimensions int, path string) ([]byte, error) { - // ffmpeg spends a long sniffing image format when data is piped through stdio, so we pass the format explicitly instead - ffmpegformat := "" - if format != nil && *format == "jpeg" { - ffmpegformat = "mjpeg" - } else if format != nil && *format == "png" { - ffmpegformat = "png_pipe" - } else if format != nil && *format == "webp" { - ffmpegformat = "webp_pipe" - } else { - return nil, errors.New("unsupported image format") - } - - args := []string{ - "-f", ffmpegformat, - "-i", "-", - "-vf", fmt.Sprintf("scale=%v:%v:force_original_aspect_ratio=decrease", maxDimensions, maxDimensions), - "-c:v", "mjpeg", - "-q:v", "5", - "-f", "image2pipe", - "-", - } - data, err := e.run(e.FFMPEGPath, args, image) - - return []byte(data), err -} - -func (e *ThumbnailEncoder) run(path string, args []string, stdin *bytes.Buffer) (string, error) { - cmd := exec.Command(path, args...) - - var stdout, stderr bytes.Buffer - cmd.Stdout = &stdout - cmd.Stderr = &stderr - cmd.Stdin = stdin - - if err := cmd.Start(); err != nil { - return "", err - } - - err := cmd.Wait() - - if err != nil { - // error message should be in the stderr stream - logger.Errorf("image encoder error when running command <%s>: %s", strings.Join(cmd.Args, " "), stderr.String()) - return stdout.String(), err - } - - return stdout.String(), nil -} diff --git a/pkg/image/vips.go b/pkg/image/vips.go new file mode 100644 index 000000000..061afa5f8 --- /dev/null +++ b/pkg/image/vips.go @@ -0,0 +1,48 @@ +package image + +import ( + "bytes" + "fmt" + "os/exec" + "strings" + + "github.com/stashapp/stash/pkg/logger" +) + +type vipsEncoder string + +func (e *vipsEncoder) ImageThumbnail(image *bytes.Buffer, maxSize int) ([]byte, error) { + args := []string{ + "thumbnail_source", + "[descriptor=0]", + ".jpg[Q=70,strip]", + fmt.Sprint(maxSize), + "--size", "down", + } + data, err := e.run(args, image) + + return []byte(data), err +} + +func (e *vipsEncoder) run(args []string, stdin *bytes.Buffer) (string, error) { + cmd := exec.Command(string(*e), args...) + + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + cmd.Stdin = stdin + + if err := cmd.Start(); err != nil { + return "", err + } + + err := cmd.Wait() + + if err != nil { + // error message should be in the stderr stream + logger.Errorf("image encoder error when running command <%s>: %s", strings.Join(cmd.Args, " "), stderr.String()) + return stdout.String(), err + } + + return stdout.String(), nil +} diff --git a/pkg/job/manager.go b/pkg/job/manager.go index 233818483..ed4dda133 100644 --- a/pkg/job/manager.go +++ b/pkg/job/manager.go @@ -9,7 +9,7 @@ import ( ) const maxGraveyardSize = 10 -const defaultThrottleLimit = time.Second +const defaultThrottleLimit = 100 * time.Millisecond // Manager maintains a queue of jobs. Jobs are executed one at a time. type Manager struct { diff --git a/pkg/job/progress.go b/pkg/job/progress.go index 4dd8928e9..0c1d3c5cb 100644 --- a/pkg/job/progress.go +++ b/pkg/job/progress.go @@ -92,13 +92,13 @@ func (p *Progress) SetPercent(percent float64) { p.updated() } -// Increment increments the number of processed work units, if this does not -// exceed the total units. This is used to calculate the percentage. +// Increment increments the number of processed work units. This is used to calculate the percentage. +// If total is set already, then the number of processed work units will not exceed the total. func (p *Progress) Increment() { p.mutex.Lock() defer p.mutex.Unlock() - if p.processed < p.total { + if p.total <= 0 || p.processed < p.total { p.processed++ p.calculatePercent() } diff --git a/pkg/manager/exclude_files.go b/pkg/manager/exclude_files.go index acc390a8e..333a3b151 100644 --- a/pkg/manager/exclude_files.go +++ b/pkg/manager/exclude_files.go @@ -1,7 +1,6 @@ package manager import ( - "path/filepath" "regexp" "strings" @@ -86,14 +85,3 @@ func matchFileSimple(file string, regExps []*regexp.Regexp) bool { } return false } - -func matchExtension(path string, extensions []string) bool { - ext := filepath.Ext(path) - for _, e := range extensions { - if strings.EqualFold(ext, "."+e) { - return true - } - } - - return false -} diff --git a/pkg/manager/generator.go b/pkg/manager/generator.go index 4deec7bf6..c7aed9716 100644 --- a/pkg/manager/generator.go +++ b/pkg/manager/generator.go @@ -71,7 +71,7 @@ func (g *GeneratorInfo) calculateFrameRate(videoStream *ffmpeg.FFProbeStream) er args = append(args, "/dev/null") } - command := exec.Command(instance.FFMPEGPath, args...) + command := exec.Command(string(instance.FFMPEG), args...) var stdErrBuffer bytes.Buffer command.Stderr = &stdErrBuffer // Frames go to stderr rather than stdout if err := command.Run(); err == nil { diff --git a/pkg/manager/generator_phash.go b/pkg/manager/generator_phash.go index 5ea390452..367dc8a0c 100644 --- a/pkg/manager/generator_phash.go +++ b/pkg/manager/generator_phash.go @@ -42,7 +42,7 @@ func NewPhashGenerator(videoFile ffmpeg.VideoFile, checksum string) (*PhashGener } func (g *PhashGenerator) Generate() (*uint64, error) { - encoder := ffmpeg.NewEncoder(instance.FFMPEGPath) + encoder := instance.FFMPEG sprite, err := g.generateSprite(&encoder) if err != nil { diff --git a/pkg/manager/generator_preview.go b/pkg/manager/generator_preview.go index 899e91df9..56ad6725c 100644 --- a/pkg/manager/generator_preview.go +++ b/pkg/manager/generator_preview.go @@ -57,7 +57,7 @@ func (g *PreviewGenerator) Generate() error { return err } - encoder := ffmpeg.NewEncoder(instance.FFMPEGPath) + encoder := instance.FFMPEG if g.GenerateVideo { if err := g.generateVideo(&encoder, false); err != nil { logger.Warnf("[generator] failed generating scene preview, trying fallback") diff --git a/pkg/manager/generator_sprite.go b/pkg/manager/generator_sprite.go index 2aebb827e..764df5033 100644 --- a/pkg/manager/generator_sprite.go +++ b/pkg/manager/generator_sprite.go @@ -53,7 +53,7 @@ func NewSpriteGenerator(videoFile ffmpeg.VideoFile, videoChecksum string, imageO } func (g *SpriteGenerator) Generate() error { - encoder := ffmpeg.NewEncoder(instance.FFMPEGPath) + encoder := instance.FFMPEG if err := g.generateSpriteImage(&encoder); err != nil { return err diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index a65eb7809..29519ae81 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -30,8 +30,8 @@ type singleton struct { Paths *paths.Paths - FFMPEGPath string - FFProbePath string + FFMPEG ffmpeg.Encoder + FFProbe ffmpeg.FFProbe SessionStore *session.Store @@ -184,8 +184,8 @@ func initFFMPEG() error { } } - instance.FFMPEGPath = ffmpegPath - instance.FFProbePath = ffprobePath + instance.FFMPEG = ffmpeg.Encoder(ffmpegPath) + instance.FFProbe = ffmpeg.FFProbe(ffprobePath) } return nil @@ -346,7 +346,7 @@ func (s *singleton) Setup(ctx context.Context, input models.SetupInput) error { } func (s *singleton) validateFFMPEG() error { - if s.FFMPEGPath == "" || s.FFProbePath == "" { + if s.FFMPEG == "" || s.FFProbe == "" { return errors.New("missing ffmpeg and/or ffprobe") } diff --git a/pkg/manager/manager_tasks.go b/pkg/manager/manager_tasks.go index 4f6bedf63..0dc697279 100644 --- a/pkg/manager/manager_tasks.go +++ b/pkg/manager/manager_tasks.go @@ -19,17 +19,17 @@ import ( func isGallery(pathname string) bool { gExt := config.GetInstance().GetGalleryExtensions() - return matchExtension(pathname, gExt) + return utils.MatchExtension(pathname, gExt) } func isVideo(pathname string) bool { vidExt := config.GetInstance().GetVideoExtensions() - return matchExtension(pathname, vidExt) + return utils.MatchExtension(pathname, vidExt) } func isImage(pathname string) bool { imgExt := config.GetInstance().GetImageExtensions() - return matchExtension(pathname, imgExt) + return utils.MatchExtension(pathname, imgExt) } func getScanPaths(inputPaths []string) []*models.StashConfig { diff --git a/pkg/manager/scene.go b/pkg/manager/scene.go index 2a2ce4ab9..ffbb98c27 100644 --- a/pkg/manager/scene.go +++ b/pkg/manager/scene.go @@ -192,7 +192,8 @@ func GetSceneFileContainer(scene *models.Scene) (ffmpeg.Container, error) { container = ffmpeg.Container(scene.Format.String) } else { // container isn't in the DB // shouldn't happen, fallback to ffprobe - tmpVideoFile, err := ffmpeg.NewVideoFile(GetInstance().FFProbePath, scene.Path, false) + ffprobe := GetInstance().FFProbe + tmpVideoFile, err := ffprobe.NewVideoFile(scene.Path, false) if err != nil { return ffmpeg.Container(""), fmt.Errorf("error reading video file: %v", err) } diff --git a/pkg/manager/screenshot.go b/pkg/manager/screenshot.go index 739162491..9167d1d81 100644 --- a/pkg/manager/screenshot.go +++ b/pkg/manager/screenshot.go @@ -6,7 +6,7 @@ import ( ) func makeScreenshot(probeResult ffmpeg.VideoFile, outputPath string, quality int, width int, time float64) { - encoder := ffmpeg.NewEncoder(instance.FFMPEGPath) + encoder := instance.FFMPEG options := ffmpeg.ScreenshotOptions{ OutputPath: outputPath, Quality: quality, diff --git a/pkg/manager/task_clean.go b/pkg/manager/task_clean.go index b4b969647..e1235443b 100644 --- a/pkg/manager/task_clean.go +++ b/pkg/manager/task_clean.go @@ -294,7 +294,7 @@ func (j *cleanJob) shouldCleanScene(s *models.Scene) bool { } config := config.GetInstance() - if !matchExtension(s.Path, config.GetVideoExtensions()) { + if !utils.MatchExtension(s.Path, config.GetVideoExtensions()) { logger.Infof("File extension does not match video extensions. Marking to clean: \"%s\"", s.Path) return true } @@ -325,7 +325,7 @@ func (j *cleanJob) shouldCleanGallery(g *models.Gallery) bool { } config := config.GetInstance() - if !matchExtension(path, config.GetGalleryExtensions()) { + if !utils.MatchExtension(path, config.GetGalleryExtensions()) { logger.Infof("File extension does not match gallery extensions. Marking to clean: \"%s\"", path) return true } @@ -355,7 +355,7 @@ func (j *cleanJob) shouldCleanImage(s *models.Image) bool { } config := config.GetInstance() - if !matchExtension(s.Path, config.GetImageExtensions()) { + if !utils.MatchExtension(s.Path, config.GetImageExtensions()) { logger.Infof("File extension does not match image extensions. Marking to clean: \"%s\"", s.Path) return true } diff --git a/pkg/manager/task_generate_markers.go b/pkg/manager/task_generate_markers.go index a89a5e538..4c8a37adf 100644 --- a/pkg/manager/task_generate_markers.go +++ b/pkg/manager/task_generate_markers.go @@ -43,7 +43,8 @@ func (t *GenerateMarkersTask) Start() { return } - videoFile, err := ffmpeg.NewVideoFile(instance.FFProbePath, t.Scene.Path, false) + ffprobe := instance.FFProbe + videoFile, err := ffprobe.NewVideoFile(t.Scene.Path, false) if err != nil { logger.Errorf("error reading video file: %s", err.Error()) return @@ -68,7 +69,8 @@ func (t *GenerateMarkersTask) generateSceneMarkers() { return } - videoFile, err := ffmpeg.NewVideoFile(instance.FFProbePath, t.Scene.Path, false) + ffprobe := instance.FFProbe + videoFile, err := ffprobe.NewVideoFile(t.Scene.Path, false) if err != nil { logger.Errorf("error reading video file: %s", err.Error()) return @@ -106,7 +108,7 @@ func (t *GenerateMarkersTask) generateMarker(videoFile *ffmpeg.VideoFile, scene Width: 640, } - encoder := ffmpeg.NewEncoder(instance.FFMPEGPath) + encoder := instance.FFMPEG if t.Overwrite || !videoExists { videoFilename := baseFilename + ".mp4" diff --git a/pkg/manager/task_generate_phash.go b/pkg/manager/task_generate_phash.go index 9d2fa172f..4cf202f28 100644 --- a/pkg/manager/task_generate_phash.go +++ b/pkg/manager/task_generate_phash.go @@ -4,7 +4,6 @@ import ( "context" "database/sql" - "github.com/stashapp/stash/pkg/ffmpeg" "github.com/stashapp/stash/pkg/logger" "github.com/stashapp/stash/pkg/models" ) @@ -21,7 +20,8 @@ func (t *GeneratePhashTask) Start() { return } - videoFile, err := ffmpeg.NewVideoFile(instance.FFProbePath, t.Scene.Path, false) + ffprobe := instance.FFProbe + videoFile, err := ffprobe.NewVideoFile(t.Scene.Path, false) if err != nil { logger.Errorf("error reading video file: %s", err.Error()) return diff --git a/pkg/manager/task_generate_preview.go b/pkg/manager/task_generate_preview.go index 01a68f006..172fc97d5 100644 --- a/pkg/manager/task_generate_preview.go +++ b/pkg/manager/task_generate_preview.go @@ -1,7 +1,6 @@ package manager import ( - "github.com/stashapp/stash/pkg/ffmpeg" "github.com/stashapp/stash/pkg/logger" "github.com/stashapp/stash/pkg/manager/config" "github.com/stashapp/stash/pkg/models" @@ -27,7 +26,8 @@ func (t *GeneratePreviewTask) Start() { return } - videoFile, err := ffmpeg.NewVideoFile(instance.FFProbePath, t.Scene.Path, false) + ffprobe := instance.FFProbe + videoFile, err := ffprobe.NewVideoFile(t.Scene.Path, false) if err != nil { logger.Errorf("error reading video file: %s", err.Error()) return diff --git a/pkg/manager/task_generate_screenshot.go b/pkg/manager/task_generate_screenshot.go index 8694c2357..ea8d34213 100644 --- a/pkg/manager/task_generate_screenshot.go +++ b/pkg/manager/task_generate_screenshot.go @@ -7,7 +7,6 @@ import ( "os" "time" - "github.com/stashapp/stash/pkg/ffmpeg" "github.com/stashapp/stash/pkg/logger" "github.com/stashapp/stash/pkg/models" ) @@ -21,7 +20,8 @@ type GenerateScreenshotTask struct { func (t *GenerateScreenshotTask) Start() { scenePath := t.Scene.Path - probeResult, err := ffmpeg.NewVideoFile(instance.FFProbePath, scenePath, false) + ffprobe := instance.FFProbe + probeResult, err := ffprobe.NewVideoFile(scenePath, false) if err != nil { logger.Error(err.Error()) diff --git a/pkg/manager/task_generate_sprite.go b/pkg/manager/task_generate_sprite.go index 0a21b6011..0c124a5c9 100644 --- a/pkg/manager/task_generate_sprite.go +++ b/pkg/manager/task_generate_sprite.go @@ -1,7 +1,6 @@ package manager import ( - "github.com/stashapp/stash/pkg/ffmpeg" "github.com/stashapp/stash/pkg/logger" "github.com/stashapp/stash/pkg/models" "github.com/stashapp/stash/pkg/utils" @@ -18,7 +17,8 @@ func (t *GenerateSpriteTask) Start() { return } - videoFile, err := ffmpeg.NewVideoFile(instance.FFProbePath, t.Scene.Path, false) + ffprobe := instance.FFProbe + videoFile, err := ffprobe.NewVideoFile(t.Scene.Path, false) if err != nil { logger.Errorf("error reading video file: %s", err.Error()) return diff --git a/pkg/manager/task_migrate_hash.go b/pkg/manager/task_migrate_hash.go index 3ecdb54d4..e0c7c1131 100644 --- a/pkg/manager/task_migrate_hash.go +++ b/pkg/manager/task_migrate_hash.go @@ -2,6 +2,7 @@ package manager import ( "github.com/stashapp/stash/pkg/models" + "github.com/stashapp/stash/pkg/scene" ) // MigrateHashTask renames generated files between oshash and MD5 based on the @@ -28,5 +29,5 @@ func (t *MigrateHashTask) Start() { newHash = oshash } - MigrateHash(oldHash, newHash) + scene.MigrateHash(instance.Paths, oldHash, newHash) } diff --git a/pkg/manager/task_scan.go b/pkg/manager/task_scan.go index 3ab4b7844..d54ad1066 100644 --- a/pkg/manager/task_scan.go +++ b/pkg/manager/task_scan.go @@ -1,142 +1,110 @@ package manager import ( - "archive/zip" "context" - "database/sql" "errors" "fmt" "os" "path/filepath" - "strconv" - "strings" "time" "github.com/remeh/sizedwaitgroup" - "github.com/stashapp/stash/pkg/ffmpeg" - "github.com/stashapp/stash/pkg/gallery" - "github.com/stashapp/stash/pkg/image" + "github.com/stashapp/stash/pkg/file" "github.com/stashapp/stash/pkg/job" "github.com/stashapp/stash/pkg/logger" "github.com/stashapp/stash/pkg/manager/config" "github.com/stashapp/stash/pkg/models" - "github.com/stashapp/stash/pkg/plugin" - "github.com/stashapp/stash/pkg/scene" "github.com/stashapp/stash/pkg/utils" ) +const scanQueueSize = 200000 + type ScanJob struct { txnManager models.TransactionManager input models.ScanMetadataInput subscriptions *subscriptionManager } +type scanFile struct { + path string + info os.FileInfo + caseSensitiveFs bool +} + func (j *ScanJob) Execute(ctx context.Context, progress *job.Progress) { input := j.input paths := getScanPaths(input.Paths) - var total *int - var newFiles *int - progress.ExecuteTask("Counting files to scan...", func() { - total, newFiles = j.neededScan(ctx, paths) - }) - if job.IsCancelled(ctx) { logger.Info("Stopping due to user request") return } - if total == nil || newFiles == nil { - logger.Infof("Taking too long to count content. Skipping...") - logger.Infof("Starting scan") - } else { - logger.Infof("Starting scan of %d files. %d New files found", *total, *newFiles) - } - start := time.Now() config := config.GetInstance() parallelTasks := config.GetParallelTasksWithAutoDetection() - logger.Infof("Scan started with %d parallel tasks", parallelTasks) - wg := sizedwaitgroup.New(parallelTasks) - if total != nil { - progress.SetTotal(*total) - } + logger.Infof("Scan started with %d parallel tasks", parallelTasks) + + fileQueue := make(chan scanFile, scanQueueSize) + go func() { + total, newFiles := j.queueFiles(ctx, paths, fileQueue, parallelTasks) + + if !job.IsCancelled(ctx) { + progress.SetTotal(total) + logger.Infof("Finished counting files. Total files to scan: %d, %d new files found", total, newFiles) + } + }() + + wg := sizedwaitgroup.New(parallelTasks) fileNamingAlgo := config.GetVideoFileNamingAlgorithm() calculateMD5 := config.IsCalculateMD5() - stoppingErr := errors.New("stopping") var err error var galleries []string - for _, sp := range paths { - csFs, er := utils.IsFsPathCaseSensitive(sp.Path) - if er != nil { - logger.Warnf("Cannot determine fs case sensitivity: %s", er.Error()) - } + mutexManager := utils.NewMutexManager() - err = walkFilesToScan(sp, func(path string, info os.FileInfo, err error) error { - if job.IsCancelled(ctx) { - return stoppingErr - } - - // #1756 - skip zero length files and directories - if info.IsDir() { - return nil - } - - if info.Size() == 0 { - logger.Infof("Skipping zero-length file: %s", path) - return nil - } - - if isGallery(path) { - galleries = append(galleries, path) - } - - if err := instance.Paths.Generated.EnsureTmpDir(); err != nil { - logger.Warnf("couldn't create temporary directory: %v", err) - } - - wg.Add() - task := ScanTask{ - TxnManager: j.txnManager, - FilePath: path, - UseFileMetadata: utils.IsTrue(input.UseFileMetadata), - StripFileExtension: utils.IsTrue(input.StripFileExtension), - fileNamingAlgorithm: fileNamingAlgo, - calculateMD5: calculateMD5, - GeneratePreview: utils.IsTrue(input.ScanGeneratePreviews), - GenerateImagePreview: utils.IsTrue(input.ScanGenerateImagePreviews), - GenerateSprite: utils.IsTrue(input.ScanGenerateSprites), - GeneratePhash: utils.IsTrue(input.ScanGeneratePhashes), - GenerateThumbnails: utils.IsTrue(input.ScanGenerateThumbnails), - progress: progress, - CaseSensitiveFs: csFs, - ctx: ctx, - } - - go func() { - task.Start(ctx) - wg.Done() - progress.Increment() - }() - - return nil - }) - - if errors.Is(err, stoppingErr) { - logger.Info("Stopping due to user request") + for f := range fileQueue { + if job.IsCancelled(ctx) { break } - if err != nil { - logger.Errorf("Error encountered scanning files: %s", err.Error()) - break + if isGallery(f.path) { + galleries = append(galleries, f.path) } + + if err := instance.Paths.Generated.EnsureTmpDir(); err != nil { + logger.Warnf("couldn't create temporary directory: %v", err) + } + + wg.Add() + task := ScanTask{ + TxnManager: j.txnManager, + file: file.FSFile(f.path, f.info), + UseFileMetadata: utils.IsTrue(input.UseFileMetadata), + StripFileExtension: utils.IsTrue(input.StripFileExtension), + fileNamingAlgorithm: fileNamingAlgo, + calculateMD5: calculateMD5, + GeneratePreview: utils.IsTrue(input.ScanGeneratePreviews), + GenerateImagePreview: utils.IsTrue(input.ScanGenerateImagePreviews), + GenerateSprite: utils.IsTrue(input.ScanGenerateSprites), + GeneratePhash: utils.IsTrue(input.ScanGeneratePhashes), + GenerateThumbnails: utils.IsTrue(input.ScanGenerateThumbnails), + progress: progress, + CaseSensitiveFs: f.caseSensitiveFs, + ctx: ctx, + mutexManager: mutexManager, + } + + go func() { + task.Start(ctx) + wg.Done() + progress.Increment() + }() } wg.Wait() @@ -148,7 +116,12 @@ func (j *ScanJob) Execute(ctx context.Context, progress *job.Progress) { elapsed := time.Since(start) logger.Info(fmt.Sprintf("Scan finished (%s)", elapsed)) - if job.IsCancelled(ctx) || err != nil { + if job.IsCancelled(ctx) { + logger.Info("Stopping due to user request") + return + } + + if err != nil { return } @@ -157,7 +130,7 @@ func (j *ScanJob) Execute(ctx context.Context, progress *job.Progress) { wg.Add() task := ScanTask{ TxnManager: j.txnManager, - FilePath: path, + file: file.FSFile(path, nil), // hopefully info is not needed UseFileMetadata: false, } @@ -170,60 +143,102 @@ func (j *ScanJob) Execute(ctx context.Context, progress *job.Progress) { j.subscriptions.notify() } -func (j *ScanJob) neededScan(ctx context.Context, paths []*models.StashConfig) (total *int, newFiles *int) { - const timeout = 90 * time.Second +func (j *ScanJob) queueFiles(ctx context.Context, paths []*models.StashConfig, scanQueue chan<- scanFile, parallelTasks int) (total int, newFiles int) { + defer close(scanQueue) - // create a control channel through which to signal the counting loop when the timeout is reached - chTimeout := time.After(timeout) - - logger.Infof("Counting files to scan...") - - t := 0 - n := 0 - - timeoutErr := errors.New("timed out") + wg := sizedwaitgroup.New(parallelTasks) for _, sp := range paths { + csFs, er := utils.IsFsPathCaseSensitive(sp.Path) + if er != nil { + logger.Warnf("Cannot determine fs case sensitivity: %s", er.Error()) + } + err := walkFilesToScan(sp, func(path string, info os.FileInfo, err error) error { - t++ - task := ScanTask{FilePath: path, TxnManager: j.txnManager} - if !task.doesPathExist() { - n++ - } - - //check for timeout - select { - case <-chTimeout: - return timeoutErr - default: - } - // check stop if job.IsCancelled(ctx) { - return timeoutErr + return context.Canceled } + wg.Add() + + go func() { + defer wg.Done() + + // #1756 - skip zero length files and directories + if info.IsDir() { + return + } + + if info.Size() == 0 { + logger.Infof("Skipping zero-length file: %s", path) + return + } + + total++ + if !j.doesPathExist(path) { + newFiles++ + } + + scanQueue <- scanFile{ + path: path, + info: info, + caseSensitiveFs: csFs, + } + }() + return nil }) - if errors.Is(err, timeoutErr) { - // timeout should return nil counts - return nil, nil - } + wg.Wait() - if err != nil { - logger.Errorf("Error encountered counting files to scan: %s", err.Error()) - return nil, nil + if err != nil && !errors.Is(err, context.Canceled) { + logger.Errorf("Error encountered queuing files to scan: %s", err.Error()) + return } } - return &t, &n + return +} + +func (j *ScanJob) doesPathExist(path string) bool { + config := config.GetInstance() + vidExt := config.GetVideoExtensions() + imgExt := config.GetImageExtensions() + gExt := config.GetGalleryExtensions() + + ret := false + txnErr := j.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { + if utils.MatchExtension(path, gExt) { + gallery, _ := r.Gallery().FindByPath(path) + if gallery != nil { + ret = true + } + } else if utils.MatchExtension(path, vidExt) { + s, _ := r.Scene().FindByPath(path) + if s != nil { + ret = true + } + } else if utils.MatchExtension(path, imgExt) { + i, _ := r.Image().FindByPath(path) + if i != nil { + ret = true + } + } + + return nil + }) + if txnErr != nil { + logger.Warnf("error checking if file exists in database: %v", txnErr) + } + + return ret } type ScanTask struct { ctx context.Context TxnManager models.TransactionManager - FilePath string + file file.SourceFile UseFileMetadata bool StripFileExtension bool calculateMD5 bool @@ -236,17 +251,19 @@ type ScanTask struct { zipGallery *models.Gallery progress *job.Progress CaseSensitiveFs bool + + mutexManager *utils.MutexManager } func (t *ScanTask) Start(ctx context.Context) { var s *models.Scene - - t.progress.ExecuteTask("Scanning "+t.FilePath, func() { - if isGallery(t.FilePath) { + path := t.file.Path() + t.progress.ExecuteTask("Scanning "+path, func() { + if isGallery(path) { t.scanGallery(ctx) - } else if isVideo(t.FilePath) { + } else if isVideo(path) { s = t.scanScene() - } else if isImage(t.FilePath) { + } else if isImage(path) { t.scanImage() } }) @@ -257,7 +274,7 @@ func (t *ScanTask) Start(ctx context.Context) { if t.GenerateSprite { iwg.Add() - go t.progress.ExecuteTask(fmt.Sprintf("Generating sprites for %s", t.FilePath), func() { + go t.progress.ExecuteTask(fmt.Sprintf("Generating sprites for %s", path), func() { taskSprite := GenerateSpriteTask{ Scene: *s, Overwrite: false, @@ -271,7 +288,7 @@ func (t *ScanTask) Start(ctx context.Context) { if t.GeneratePhash { iwg.Add() - go t.progress.ExecuteTask(fmt.Sprintf("Generating phash for %s", t.FilePath), func() { + go t.progress.ExecuteTask(fmt.Sprintf("Generating phash for %s", path), func() { taskPhash := GeneratePhashTask{ Scene: *s, fileNamingAlgorithm: t.fileNamingAlgorithm, @@ -285,7 +302,7 @@ func (t *ScanTask) Start(ctx context.Context) { if t.GeneratePreview { iwg.Add() - go t.progress.ExecuteTask(fmt.Sprintf("Generating preview for %s", t.FilePath), func() { + go t.progress.ExecuteTask(fmt.Sprintf("Generating preview for %s", path), func() { config := config.GetInstance() var previewSegmentDuration = config.GetPreviewSegmentDuration() var previewSegments = config.GetPreviewSegments() @@ -318,1047 +335,6 @@ func (t *ScanTask) Start(ctx context.Context) { } } -func (t *ScanTask) scanGallery(ctx context.Context) { - var g *models.Gallery - images := 0 - scanImages := false - - if err := t.TxnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { - var err error - g, err = r.Gallery().FindByPath(t.FilePath) - - if g != nil && err != nil { - images, err = r.Image().CountByGalleryID(g.ID) - if err != nil { - return fmt.Errorf("error getting images for zip gallery %s: %s", t.FilePath, err.Error()) - } - } - - return err - }); err != nil { - logger.Error(err.Error()) - return - } - - fileModTime, err := t.getFileModTime() - if err != nil { - logger.Error(err.Error()) - return - } - - if g != nil { - // We already have this item in the database, keep going - - // if file mod time is not set, set it now - if !g.FileModTime.Valid { - // we will also need to rescan the zip contents - scanImages = true - logger.Infof("setting file modification time on %s", t.FilePath) - - if err := t.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { - qb := r.Gallery() - if _, err := gallery.UpdateFileModTime(qb, g.ID, models.NullSQLiteTimestamp{ - Timestamp: fileModTime, - Valid: true, - }); err != nil { - return err - } - - // update our copy of the gallery - var err error - g, err = qb.Find(g.ID) - return err - }); err != nil { - logger.Error(err.Error()) - return - } - } - - // if the mod time of the zip file is different than that of the associated - // gallery, then recalculate the checksum - modified := t.isFileModified(fileModTime, g.FileModTime) - if modified { - scanImages = true - logger.Infof("%s has been updated: rescanning", t.FilePath) - - // update the checksum and the modification time - checksum, err := t.calculateChecksum() - if err != nil { - logger.Error(err.Error()) - return - } - - currentTime := time.Now() - galleryPartial := models.GalleryPartial{ - ID: g.ID, - Checksum: &checksum, - FileModTime: &models.NullSQLiteTimestamp{ - Timestamp: fileModTime, - Valid: true, - }, - UpdatedAt: &models.SQLiteTimestamp{Timestamp: currentTime}, - } - - if err := t.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { - _, err := r.Gallery().UpdatePartial(galleryPartial) - return err - }); err != nil { - logger.Error(err.Error()) - return - } - } - - // scan the zip files if the gallery has no images - scanImages = scanImages || images == 0 - } else { - checksum, err := t.calculateChecksum() - if err != nil { - logger.Error(err.Error()) - return - } - - isNewGallery := false - if err := t.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { - qb := r.Gallery() - g, _ = qb.FindByChecksum(checksum) - if g != nil { - exists, _ := utils.FileExists(g.Path.String) - if !t.CaseSensitiveFs { - // #1426 - if file exists but is a case-insensitive match for the - // original filename, then treat it as a move - if exists && strings.EqualFold(t.FilePath, g.Path.String) { - exists = false - } - } - - if exists { - logger.Infof("%s already exists. Duplicate of %s ", t.FilePath, g.Path.String) - } else { - logger.Infof("%s already exists. Updating path...", t.FilePath) - g.Path = sql.NullString{ - String: t.FilePath, - Valid: true, - } - g, err = qb.Update(*g) - if err != nil { - return err - } - - GetInstance().PluginCache.ExecutePostHooks(t.ctx, g.ID, plugin.GalleryUpdatePost, nil, nil) - } - } else { - currentTime := time.Now() - - newGallery := models.Gallery{ - Checksum: checksum, - Zip: true, - Path: sql.NullString{ - String: t.FilePath, - Valid: true, - }, - FileModTime: models.NullSQLiteTimestamp{ - Timestamp: fileModTime, - Valid: true, - }, - Title: sql.NullString{ - String: utils.GetNameFromPath(t.FilePath, t.StripFileExtension), - Valid: true, - }, - CreatedAt: models.SQLiteTimestamp{Timestamp: currentTime}, - UpdatedAt: models.SQLiteTimestamp{Timestamp: currentTime}, - } - - // don't create gallery if it has no images - if countImagesInZip(t.FilePath) > 0 { - // only warn when creating the gallery - ok, err := utils.IsZipFileUncompressed(t.FilePath) - if err == nil && !ok { - logger.Warnf("%s is using above store (0) level compression.", t.FilePath) - } - - logger.Infof("%s doesn't exist. Creating new item...", t.FilePath) - g, err = qb.Create(newGallery) - if err != nil { - return err - } - scanImages = true - - isNewGallery = true - } - } - - return nil - }); err != nil { - logger.Error(err.Error()) - return - } - - if isNewGallery { - GetInstance().PluginCache.ExecutePostHooks(t.ctx, g.ID, plugin.GalleryCreatePost, nil, nil) - } - } - - if g != nil { - if scanImages { - t.scanZipImages(g) - } else { - // in case thumbnails have been deleted, regenerate them - t.regenerateZipImages(g) - } - } -} - -func (t *ScanTask) getFileModTime() (time.Time, error) { - fi, err := os.Stat(t.FilePath) - if err != nil { - return time.Time{}, fmt.Errorf("error performing stat on %s: %s", t.FilePath, err.Error()) - } - - ret := fi.ModTime() - // truncate to seconds, since we don't store beyond that in the database - ret = ret.Truncate(time.Second) - - return ret, nil -} - -func (t *ScanTask) getInteractive() bool { - _, err := os.Stat(utils.GetFunscriptPath(t.FilePath)) - return err == nil - -} - -func (t *ScanTask) isFileModified(fileModTime time.Time, modTime models.NullSQLiteTimestamp) bool { - return !modTime.Timestamp.Equal(fileModTime) -} - -// associates a gallery to a scene with the same basename -func (t *ScanTask) associateGallery(wg *sizedwaitgroup.SizedWaitGroup) { - if err := t.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { - qb := r.Gallery() - sqb := r.Scene() - g, err := qb.FindByPath(t.FilePath) - if err != nil { - return err - } - - if g == nil { - // associate is run after scan is finished - // should only happen if gallery is a directory or an io error occurs during hashing - logger.Warnf("associate: gallery %s not found in DB", t.FilePath) - return nil - } - - basename := strings.TrimSuffix(t.FilePath, filepath.Ext(t.FilePath)) - var relatedFiles []string - vExt := config.GetInstance().GetVideoExtensions() - // make a list of media files that can be related to the gallery - for _, ext := range vExt { - related := basename + "." + ext - // exclude gallery extensions from the related files - if !isGallery(related) { - relatedFiles = append(relatedFiles, related) - } - } - for _, scenePath := range relatedFiles { - scene, _ := sqb.FindByPath(scenePath) - // found related Scene - if scene != nil { - sceneGalleries, _ := sqb.FindByGalleryID(g.ID) // check if gallery is already associated to the scene - isAssoc := false - for _, sg := range sceneGalleries { - if scene.ID == sg.ID { - isAssoc = true - break - } - } - if !isAssoc { - logger.Infof("associate: Gallery %s is related to scene: %d", t.FilePath, scene.ID) - if err := sqb.UpdateGalleries(scene.ID, []int{g.ID}); err != nil { - return err - } - } - } - } - return nil - }); err != nil { - logger.Error(err.Error()) - } - wg.Done() -} - -func (t *ScanTask) scanScene() *models.Scene { - logError := func(err error) *models.Scene { - logger.Error(err.Error()) - return nil - } - - var retScene *models.Scene - var s *models.Scene - - if err := t.TxnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { - var err error - s, err = r.Scene().FindByPath(t.FilePath) - return err - }); err != nil { - logger.Error(err.Error()) - return nil - } - - fileModTime, err := t.getFileModTime() - if err != nil { - return logError(err) - } - interactive := t.getInteractive() - - if s != nil { - // if file mod time is not set, set it now - if !s.FileModTime.Valid { - logger.Infof("setting file modification time on %s", t.FilePath) - - if err := t.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { - qb := r.Scene() - if _, err := scene.UpdateFileModTime(qb, s.ID, models.NullSQLiteTimestamp{ - Timestamp: fileModTime, - Valid: true, - }); err != nil { - return err - } - - // update our copy of the scene - var err error - s, err = qb.Find(s.ID) - return err - }); err != nil { - return logError(err) - } - } - - // if the mod time of the file is different than that of the associated - // scene, then recalculate the checksum and regenerate the thumbnail - modified := t.isFileModified(fileModTime, s.FileModTime) - config := config.GetInstance() - if modified || !s.Size.Valid { - oldHash := s.GetHash(config.GetVideoFileNamingAlgorithm()) - s, err = t.rescanScene(s, fileModTime) - if err != nil { - return logError(err) - } - - // Migrate any generated files if the hash has changed - newHash := s.GetHash(config.GetVideoFileNamingAlgorithm()) - if newHash != oldHash { - MigrateHash(oldHash, newHash) - } - } - - // We already have this item in the database - // check for thumbnails,screenshots - t.makeScreenshots(nil, s.GetHash(t.fileNamingAlgorithm)) - - // check for container - if !s.Format.Valid { - videoFile, err := ffmpeg.NewVideoFile(instance.FFProbePath, t.FilePath, t.StripFileExtension) - if err != nil { - return logError(err) - } - container := ffmpeg.MatchContainer(videoFile.Container, t.FilePath) - logger.Infof("Adding container %s to file %s", container, t.FilePath) - - if err := t.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { - _, err := scene.UpdateFormat(r.Scene(), s.ID, string(container)) - return err - }); err != nil { - return logError(err) - } - } - - // check if oshash is set - if !s.OSHash.Valid { - logger.Infof("Calculating oshash for existing file %s ...", t.FilePath) - oshash, err := utils.OSHashFromFilePath(t.FilePath) - if err != nil { - return nil - } - - if err := t.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { - qb := r.Scene() - // check if oshash clashes with existing scene - dupe, _ := qb.FindByOSHash(oshash) - if dupe != nil { - return fmt.Errorf("OSHash for file %s is the same as that of %s", t.FilePath, dupe.Path) - } - - _, err := scene.UpdateOSHash(qb, s.ID, oshash) - return err - }); err != nil { - return logError(err) - } - } - - // check if MD5 is set, if calculateMD5 is true - if t.calculateMD5 && !s.Checksum.Valid { - checksum, err := t.calculateChecksum() - if err != nil { - return logError(err) - } - - if err := t.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { - qb := r.Scene() - // check if checksum clashes with existing scene - dupe, _ := qb.FindByChecksum(checksum) - if dupe != nil { - return fmt.Errorf("MD5 for file %s is the same as that of %s", t.FilePath, dupe.Path) - } - - _, err := scene.UpdateChecksum(qb, s.ID, checksum) - return err - }); err != nil { - return logError(err) - } - } - - if s.Interactive != interactive { - if err := t.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { - qb := r.Scene() - scenePartial := models.ScenePartial{ - ID: s.ID, - Interactive: &interactive, - } - _, err := qb.Update(scenePartial) - return err - }); err != nil { - return logError(err) - } - } - - return nil - } - - videoFile, err := ffmpeg.NewVideoFile(instance.FFProbePath, t.FilePath, t.StripFileExtension) - if err != nil { - logger.Error(err.Error()) - return nil - } - container := ffmpeg.MatchContainer(videoFile.Container, t.FilePath) - - // Override title to be filename if UseFileMetadata is false - if !t.UseFileMetadata { - videoFile.SetTitleFromPath(t.StripFileExtension) - } - - var checksum string - - logger.Infof("%s not found. Calculating oshash...", t.FilePath) - oshash, err := utils.OSHashFromFilePath(t.FilePath) - if err != nil { - return logError(err) - } - - if t.fileNamingAlgorithm == models.HashAlgorithmMd5 || t.calculateMD5 { - checksum, err = t.calculateChecksum() - if err != nil { - return logError(err) - } - } - - // check for scene by checksum and oshash - MD5 should be - // redundant, but check both - txnErr := t.TxnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { - qb := r.Scene() - if checksum != "" { - s, _ = qb.FindByChecksum(checksum) - } - - if s == nil { - s, _ = qb.FindByOSHash(oshash) - } - - return nil - }) - if txnErr != nil { - logger.Warnf("error in read transaction: %v", txnErr) - } - - sceneHash := oshash - - if t.fileNamingAlgorithm == models.HashAlgorithmMd5 { - sceneHash = checksum - } - - t.makeScreenshots(videoFile, sceneHash) - - if s != nil { - exists, _ := utils.FileExists(s.Path) - if !t.CaseSensitiveFs { - // #1426 - if file exists but is a case-insensitive match for the - // original filename, then treat it as a move - if exists && strings.EqualFold(t.FilePath, s.Path) { - exists = false - } - } - - if exists { - logger.Infof("%s already exists. Duplicate of %s", t.FilePath, s.Path) - } else { - logger.Infof("%s already exists. Updating path...", t.FilePath) - scenePartial := models.ScenePartial{ - ID: s.ID, - Path: &t.FilePath, - Interactive: &interactive, - } - if err := t.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { - _, err := r.Scene().Update(scenePartial) - return err - }); err != nil { - return logError(err) - } - - GetInstance().PluginCache.ExecutePostHooks(t.ctx, s.ID, plugin.SceneUpdatePost, nil, nil) - } - } else { - logger.Infof("%s doesn't exist. Creating new item...", t.FilePath) - currentTime := time.Now() - newScene := models.Scene{ - Checksum: sql.NullString{String: checksum, Valid: checksum != ""}, - OSHash: sql.NullString{String: oshash, Valid: oshash != ""}, - Path: t.FilePath, - Title: sql.NullString{String: videoFile.Title, Valid: true}, - Duration: sql.NullFloat64{Float64: videoFile.Duration, Valid: true}, - VideoCodec: sql.NullString{String: videoFile.VideoCodec, Valid: true}, - AudioCodec: sql.NullString{String: videoFile.AudioCodec, Valid: true}, - Format: sql.NullString{String: string(container), Valid: true}, - Width: sql.NullInt64{Int64: int64(videoFile.Width), Valid: true}, - Height: sql.NullInt64{Int64: int64(videoFile.Height), Valid: true}, - Framerate: sql.NullFloat64{Float64: videoFile.FrameRate, Valid: true}, - Bitrate: sql.NullInt64{Int64: videoFile.Bitrate, Valid: true}, - Size: sql.NullString{String: strconv.FormatInt(videoFile.Size, 10), Valid: true}, - FileModTime: models.NullSQLiteTimestamp{ - Timestamp: fileModTime, - Valid: true, - }, - CreatedAt: models.SQLiteTimestamp{Timestamp: currentTime}, - UpdatedAt: models.SQLiteTimestamp{Timestamp: currentTime}, - Interactive: interactive, - } - - if t.UseFileMetadata { - newScene.Details = sql.NullString{String: videoFile.Comment, Valid: true} - newScene.Date = models.SQLiteDate{String: videoFile.CreationTime.Format("2006-01-02")} - } - - if err := t.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { - var err error - retScene, err = r.Scene().Create(newScene) - return err - }); err != nil { - return logError(err) - } - - GetInstance().PluginCache.ExecutePostHooks(t.ctx, retScene.ID, plugin.SceneCreatePost, nil, nil) - } - - return retScene -} - -func (t *ScanTask) rescanScene(s *models.Scene, fileModTime time.Time) (*models.Scene, error) { - logger.Infof("%s has been updated: rescanning", t.FilePath) - - // update the oshash/checksum and the modification time - logger.Infof("Calculating oshash for existing file %s ...", t.FilePath) - oshash, err := utils.OSHashFromFilePath(t.FilePath) - if err != nil { - return nil, err - } - - var checksum *sql.NullString - if t.calculateMD5 { - cs, err := t.calculateChecksum() - if err != nil { - return nil, err - } - - checksum = &sql.NullString{ - String: cs, - Valid: true, - } - } - - // regenerate the file details as well - videoFile, err := ffmpeg.NewVideoFile(instance.FFProbePath, t.FilePath, t.StripFileExtension) - if err != nil { - return nil, err - } - container := ffmpeg.MatchContainer(videoFile.Container, t.FilePath) - - currentTime := time.Now() - scenePartial := models.ScenePartial{ - ID: s.ID, - Checksum: checksum, - OSHash: &sql.NullString{ - String: oshash, - Valid: true, - }, - Duration: &sql.NullFloat64{Float64: videoFile.Duration, Valid: true}, - VideoCodec: &sql.NullString{String: videoFile.VideoCodec, Valid: true}, - AudioCodec: &sql.NullString{String: videoFile.AudioCodec, Valid: true}, - Format: &sql.NullString{String: string(container), Valid: true}, - Width: &sql.NullInt64{Int64: int64(videoFile.Width), Valid: true}, - Height: &sql.NullInt64{Int64: int64(videoFile.Height), Valid: true}, - Framerate: &sql.NullFloat64{Float64: videoFile.FrameRate, Valid: true}, - Bitrate: &sql.NullInt64{Int64: videoFile.Bitrate, Valid: true}, - Size: &sql.NullString{String: strconv.FormatInt(videoFile.Size, 10), Valid: true}, - FileModTime: &models.NullSQLiteTimestamp{ - Timestamp: fileModTime, - Valid: true, - }, - UpdatedAt: &models.SQLiteTimestamp{Timestamp: currentTime}, - } - - var ret *models.Scene - if err := t.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { - var err error - ret, err = r.Scene().Update(scenePartial) - return err - }); err != nil { - logger.Error(err.Error()) - return nil, err - } - - GetInstance().PluginCache.ExecutePostHooks(t.ctx, ret.ID, plugin.SceneUpdatePost, nil, nil) - - // leave the generated files as is - the scene file may have been moved - // elsewhere - - return ret, nil -} -func (t *ScanTask) makeScreenshots(probeResult *ffmpeg.VideoFile, checksum string) { - thumbPath := instance.Paths.Scene.GetThumbnailScreenshotPath(checksum) - normalPath := instance.Paths.Scene.GetScreenshotPath(checksum) - - thumbExists, _ := utils.FileExists(thumbPath) - normalExists, _ := utils.FileExists(normalPath) - - if thumbExists && normalExists { - return - } - - if probeResult == nil { - var err error - probeResult, err = ffmpeg.NewVideoFile(instance.FFProbePath, t.FilePath, t.StripFileExtension) - - if err != nil { - logger.Error(err.Error()) - return - } - logger.Infof("Regenerating images for %s", t.FilePath) - } - - at := float64(probeResult.Duration) * 0.2 - - if !thumbExists { - logger.Debugf("Creating thumbnail for %s", t.FilePath) - makeScreenshot(*probeResult, thumbPath, 5, 320, at) - } - - if !normalExists { - logger.Debugf("Creating screenshot for %s", t.FilePath) - makeScreenshot(*probeResult, normalPath, 2, probeResult.Width, at) - } -} - -func (t *ScanTask) scanZipImages(zipGallery *models.Gallery) { - err := walkGalleryZip(zipGallery.Path.String, func(file *zip.File) error { - // copy this task and change the filename - subTask := *t - - // filepath is the zip file and the internal file name, separated by a null byte - subTask.FilePath = image.ZipFilename(zipGallery.Path.String, file.Name) - subTask.zipGallery = zipGallery - - // run the subtask and wait for it to complete - subTask.Start(context.TODO()) - return nil - }) - if err != nil { - logger.Warnf("failed to scan zip file images for %s: %s", zipGallery.Path.String, err.Error()) - } -} - -func (t *ScanTask) regenerateZipImages(zipGallery *models.Gallery) { - var images []*models.Image - if err := t.TxnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { - iqb := r.Image() - - var err error - images, err = iqb.FindByGalleryID(zipGallery.ID) - return err - }); err != nil { - logger.Warnf("failed to find gallery images: %s", err.Error()) - return - } - - for _, img := range images { - t.generateThumbnail(img) - } -} - -func (t *ScanTask) scanImage() { - var i *models.Image - - if err := t.TxnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { - var err error - i, err = r.Image().FindByPath(t.FilePath) - return err - }); err != nil { - logger.Error(err.Error()) - return - } - - fileModTime, err := image.GetFileModTime(t.FilePath) - if err != nil { - logger.Error(err.Error()) - return - } - - if i != nil { - // if file mod time is not set, set it now - if !i.FileModTime.Valid { - logger.Infof("setting file modification time on %s", t.FilePath) - - if err := t.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { - qb := r.Image() - if _, err := image.UpdateFileModTime(qb, i.ID, models.NullSQLiteTimestamp{ - Timestamp: fileModTime, - Valid: true, - }); err != nil { - return err - } - - // update our copy of the gallery - var err error - i, err = qb.Find(i.ID) - return err - }); err != nil { - logger.Error(err.Error()) - return - } - } - - // if the mod time of the file is different than that of the associated - // image, then recalculate the checksum and regenerate the thumbnail - modified := t.isFileModified(fileModTime, i.FileModTime) - if modified { - i, err = t.rescanImage(i, fileModTime) - if err != nil { - logger.Error(err.Error()) - return - } - } - - // We already have this item in the database - // check for thumbnails - t.generateThumbnail(i) - } else { - var checksum string - - logger.Infof("%s not found. Calculating checksum...", t.FilePath) - checksum, err = t.calculateImageChecksum() - if err != nil { - logger.Errorf("error calculating checksum for %s: %s", t.FilePath, err.Error()) - return - } - - // check for scene by checksum and oshash - MD5 should be - // redundant, but check both - if err := t.TxnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { - var err error - i, err = r.Image().FindByChecksum(checksum) - return err - }); err != nil { - logger.Error(err.Error()) - return - } - - if i != nil { - exists := image.FileExists(i.Path) - if !t.CaseSensitiveFs { - // #1426 - if file exists but is a case-insensitive match for the - // original filename, then treat it as a move - if exists && strings.EqualFold(t.FilePath, i.Path) { - exists = false - } - } - - if exists { - logger.Infof("%s already exists. Duplicate of %s ", image.PathDisplayName(t.FilePath), image.PathDisplayName(i.Path)) - } else { - logger.Infof("%s already exists. Updating path...", image.PathDisplayName(t.FilePath)) - imagePartial := models.ImagePartial{ - ID: i.ID, - Path: &t.FilePath, - } - - if err := t.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { - _, err := r.Image().Update(imagePartial) - return err - }); err != nil { - logger.Error(err.Error()) - return - } - - GetInstance().PluginCache.ExecutePostHooks(t.ctx, i.ID, plugin.ImageUpdatePost, nil, nil) - } - } else { - logger.Infof("%s doesn't exist. Creating new item...", image.PathDisplayName(t.FilePath)) - currentTime := time.Now() - newImage := models.Image{ - Checksum: checksum, - Path: t.FilePath, - FileModTime: models.NullSQLiteTimestamp{ - Timestamp: fileModTime, - Valid: true, - }, - CreatedAt: models.SQLiteTimestamp{Timestamp: currentTime}, - UpdatedAt: models.SQLiteTimestamp{Timestamp: currentTime}, - } - newImage.Title.String = image.GetFilename(&newImage, t.StripFileExtension) - newImage.Title.Valid = true - - if err := image.SetFileDetails(&newImage); err != nil { - logger.Error(err.Error()) - return - } - - if err := t.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { - var err error - i, err = r.Image().Create(newImage) - return err - }); err != nil { - logger.Error(err.Error()) - return - } - - GetInstance().PluginCache.ExecutePostHooks(t.ctx, i.ID, plugin.ImageCreatePost, nil, nil) - } - - if t.zipGallery != nil { - // associate with gallery - if err := t.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { - return gallery.AddImage(r.Gallery(), t.zipGallery.ID, i.ID) - }); err != nil { - logger.Error(err.Error()) - return - } - } else if config.GetInstance().GetCreateGalleriesFromFolders() { - // create gallery from folder or associate with existing gallery - logger.Infof("Associating image %s with folder gallery", i.Path) - var galleryID int - var isNewGallery bool - if err := t.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { - var err error - galleryID, isNewGallery, err = t.associateImageWithFolderGallery(i.ID, r.Gallery()) - return err - }); err != nil { - logger.Error(err.Error()) - return - } - - if isNewGallery { - GetInstance().PluginCache.ExecutePostHooks(t.ctx, galleryID, plugin.GalleryCreatePost, nil, nil) - } - } - } - - if i != nil { - t.generateThumbnail(i) - } -} - -func (t *ScanTask) rescanImage(i *models.Image, fileModTime time.Time) (*models.Image, error) { - logger.Infof("%s has been updated: rescanning", t.FilePath) - - oldChecksum := i.Checksum - - // update the checksum and the modification time - checksum, err := t.calculateImageChecksum() - if err != nil { - return nil, err - } - - // regenerate the file details as well - fileDetails, err := image.GetFileDetails(t.FilePath) - if err != nil { - return nil, err - } - - currentTime := time.Now() - imagePartial := models.ImagePartial{ - ID: i.ID, - Checksum: &checksum, - Width: &fileDetails.Width, - Height: &fileDetails.Height, - Size: &fileDetails.Size, - FileModTime: &models.NullSQLiteTimestamp{ - Timestamp: fileModTime, - Valid: true, - }, - UpdatedAt: &models.SQLiteTimestamp{Timestamp: currentTime}, - } - - var ret *models.Image - if err := t.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { - var err error - ret, err = r.Image().Update(imagePartial) - return err - }); err != nil { - return nil, err - } - - // remove the old thumbnail if the checksum changed - we'll regenerate it - if oldChecksum != checksum { - err = os.Remove(GetInstance().Paths.Generated.GetThumbnailPath(oldChecksum, models.DefaultGthumbWidth)) // remove cache dir of gallery - if err != nil { - logger.Errorf("Error deleting thumbnail image: %s", err) - } - } - - GetInstance().PluginCache.ExecutePostHooks(t.ctx, ret.ID, plugin.ImageUpdatePost, nil, nil) - - return ret, nil -} - -func (t *ScanTask) associateImageWithFolderGallery(imageID int, qb models.GalleryReaderWriter) (galleryID int, isNew bool, err error) { - // find a gallery with the path specified - path := filepath.Dir(t.FilePath) - var g *models.Gallery - g, err = qb.FindByPath(path) - if err != nil { - return - } - - if g == nil { - checksum := utils.MD5FromString(path) - - // create the gallery - currentTime := time.Now() - - newGallery := models.Gallery{ - Checksum: checksum, - Path: sql.NullString{ - String: path, - Valid: true, - }, - CreatedAt: models.SQLiteTimestamp{Timestamp: currentTime}, - UpdatedAt: models.SQLiteTimestamp{Timestamp: currentTime}, - Title: sql.NullString{ - String: utils.GetNameFromPath(path, false), - Valid: true, - }, - } - - logger.Infof("Creating gallery for folder %s", path) - g, err = qb.Create(newGallery) - if err != nil { - return 0, false, err - } - - isNew = true - } - - // associate image with gallery - err = gallery.AddImage(qb, g.ID, imageID) - galleryID = g.ID - return -} - -func (t *ScanTask) generateThumbnail(i *models.Image) { - if !t.GenerateThumbnails { - return - } - - thumbPath := GetInstance().Paths.Generated.GetThumbnailPath(i.Checksum, models.DefaultGthumbWidth) - exists, _ := utils.FileExists(thumbPath) - if exists { - return - } - - config, _, err := image.DecodeSourceImage(i) - if err != nil { - logger.Errorf("error reading image %s: %s", i.Path, err.Error()) - return - } - - if config.Height > models.DefaultGthumbWidth || config.Width > models.DefaultGthumbWidth { - encoder := image.NewThumbnailEncoder(instance.FFMPEGPath) - data, err := encoder.GetThumbnail(i, models.DefaultGthumbWidth) - - if err != nil { - logger.Errorf("error getting thumbnail for image %s: %s", i.Path, err.Error()) - return - } - - err = utils.WriteFile(thumbPath, data) - if err != nil { - logger.Errorf("error writing thumbnail for image %s: %s", i.Path, err) - } - } -} - -func (t *ScanTask) calculateChecksum() (string, error) { - logger.Infof("Calculating checksum for %s...", t.FilePath) - checksum, err := utils.MD5FromFilePath(t.FilePath) - if err != nil { - return "", err - } - logger.Debugf("Checksum calculated: %s", checksum) - return checksum, nil -} - -func (t *ScanTask) calculateImageChecksum() (string, error) { - logger.Infof("Calculating checksum for %s...", image.PathDisplayName(t.FilePath)) - // uses image.CalculateMD5 to read files in zips - checksum, err := image.CalculateMD5(t.FilePath) - if err != nil { - return "", err - } - logger.Debugf("Checksum calculated: %s", checksum) - return checksum, nil -} - -func (t *ScanTask) doesPathExist() bool { - config := config.GetInstance() - vidExt := config.GetVideoExtensions() - imgExt := config.GetImageExtensions() - gExt := config.GetGalleryExtensions() - - ret := false - txnErr := t.TxnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { - if matchExtension(t.FilePath, gExt) { - gallery, _ := r.Gallery().FindByPath(t.FilePath) - if gallery != nil { - ret = true - } - } else if matchExtension(t.FilePath, vidExt) { - s, _ := r.Scene().FindByPath(t.FilePath) - if s != nil { - ret = true - } - } else if matchExtension(t.FilePath, imgExt) { - i, _ := r.Image().FindByPath(t.FilePath) - if i != nil { - ret = true - } - } - - return nil - }) - if txnErr != nil { - logger.Warnf("error while executing read transaction: %v", txnErr) - } - - return ret -} - func walkFilesToScan(s *models.StashConfig, f filepath.WalkFunc) error { config := config.GetInstance() vidExt := config.GetVideoExtensions() @@ -1368,7 +344,7 @@ func walkFilesToScan(s *models.StashConfig, f filepath.WalkFunc) error { excludeImgRegex := generateRegexps(config.GetImageExcludes()) // don't scan zip images directly - if image.IsZipPath(s.Path) { + if file.IsZipPath(s.Path) { logger.Warnf("Cannot rescan zip image %s. Rescan zip gallery instead.", s.Path) return nil } @@ -1397,12 +373,12 @@ func walkFilesToScan(s *models.StashConfig, f filepath.WalkFunc) error { return nil } - if !s.ExcludeVideo && matchExtension(path, vidExt) && !matchFileRegex(path, excludeVidRegex) { + if !s.ExcludeVideo && utils.MatchExtension(path, vidExt) && !matchFileRegex(path, excludeVidRegex) { return f(path, info, err) } if !s.ExcludeImage { - if (matchExtension(path, imgExt) || matchExtension(path, gExt)) && !matchFileRegex(path, excludeImgRegex) { + if (utils.MatchExtension(path, imgExt) || utils.MatchExtension(path, gExt)) && !matchFileRegex(path, excludeImgRegex) { return f(path, info, err) } } diff --git a/pkg/manager/task_scan_gallery.go b/pkg/manager/task_scan_gallery.go new file mode 100644 index 000000000..751e6a0f3 --- /dev/null +++ b/pkg/manager/task_scan_gallery.go @@ -0,0 +1,170 @@ +package manager + +import ( + "archive/zip" + "context" + "fmt" + "path/filepath" + "strings" + + "github.com/remeh/sizedwaitgroup" + "github.com/stashapp/stash/pkg/file" + "github.com/stashapp/stash/pkg/gallery" + "github.com/stashapp/stash/pkg/logger" + "github.com/stashapp/stash/pkg/manager/config" + "github.com/stashapp/stash/pkg/models" +) + +func (t *ScanTask) scanGallery(ctx context.Context) { + var g *models.Gallery + path := t.file.Path() + images := 0 + scanImages := false + + if err := t.TxnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { + var err error + g, err = r.Gallery().FindByPath(path) + + if g != nil && err != nil { + images, err = r.Image().CountByGalleryID(g.ID) + if err != nil { + return fmt.Errorf("error getting images for zip gallery %s: %s", path, err.Error()) + } + } + + return err + }); err != nil { + logger.Error(err.Error()) + return + } + + scanner := gallery.Scanner{ + Scanner: gallery.FileScanner(&file.FSHasher{}), + ImageExtensions: instance.Config.GetImageExtensions(), + StripFileExtension: t.StripFileExtension, + Ctx: t.ctx, + CaseSensitiveFs: t.CaseSensitiveFs, + TxnManager: t.TxnManager, + Paths: instance.Paths, + PluginCache: instance.PluginCache, + MutexManager: t.mutexManager, + } + + var err error + if g != nil { + g, scanImages, err = scanner.ScanExisting(g, t.file) + if err != nil { + logger.Error(err.Error()) + return + } + + // scan the zip files if the gallery has no images + scanImages = scanImages || images == 0 + } else { + g, scanImages, err = scanner.ScanNew(t.file) + if err != nil { + logger.Error(err.Error()) + } + } + + if g != nil { + if scanImages { + t.scanZipImages(g) + } else { + // in case thumbnails have been deleted, regenerate them + t.regenerateZipImages(g) + } + } +} + +// associates a gallery to a scene with the same basename +func (t *ScanTask) associateGallery(wg *sizedwaitgroup.SizedWaitGroup) { + path := t.file.Path() + if err := t.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { + qb := r.Gallery() + sqb := r.Scene() + g, err := qb.FindByPath(path) + if err != nil { + return err + } + + if g == nil { + // associate is run after scan is finished + // should only happen if gallery is a directory or an io error occurs during hashing + logger.Warnf("associate: gallery %s not found in DB", path) + return nil + } + + basename := strings.TrimSuffix(path, filepath.Ext(path)) + var relatedFiles []string + vExt := config.GetInstance().GetVideoExtensions() + // make a list of media files that can be related to the gallery + for _, ext := range vExt { + related := basename + "." + ext + // exclude gallery extensions from the related files + if !isGallery(related) { + relatedFiles = append(relatedFiles, related) + } + } + for _, scenePath := range relatedFiles { + scene, _ := sqb.FindByPath(scenePath) + // found related Scene + if scene != nil { + sceneGalleries, _ := sqb.FindByGalleryID(g.ID) // check if gallery is already associated to the scene + isAssoc := false + for _, sg := range sceneGalleries { + if scene.ID == sg.ID { + isAssoc = true + break + } + } + if !isAssoc { + logger.Infof("associate: Gallery %s is related to scene: %d", path, scene.ID) + if err := sqb.UpdateGalleries(scene.ID, []int{g.ID}); err != nil { + return err + } + } + } + } + return nil + }); err != nil { + logger.Error(err.Error()) + } + wg.Done() +} + +func (t *ScanTask) scanZipImages(zipGallery *models.Gallery) { + err := walkGalleryZip(zipGallery.Path.String, func(f *zip.File) error { + // copy this task and change the filename + subTask := *t + + // filepath is the zip file and the internal file name, separated by a null byte + subTask.file = file.ZipFile(zipGallery.Path.String, f) + subTask.zipGallery = zipGallery + + // run the subtask and wait for it to complete + subTask.Start(context.TODO()) + return nil + }) + if err != nil { + logger.Warnf("failed to scan zip file images for %s: %s", zipGallery.Path.String, err.Error()) + } +} + +func (t *ScanTask) regenerateZipImages(zipGallery *models.Gallery) { + var images []*models.Image + if err := t.TxnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { + iqb := r.Image() + + var err error + images, err = iqb.FindByGalleryID(zipGallery.ID) + return err + }); err != nil { + logger.Warnf("failed to find gallery images: %s", err.Error()) + return + } + + for _, img := range images { + t.generateThumbnail(img) + } +} diff --git a/pkg/manager/task_scan_image.go b/pkg/manager/task_scan_image.go new file mode 100644 index 000000000..d48e5f5c7 --- /dev/null +++ b/pkg/manager/task_scan_image.go @@ -0,0 +1,166 @@ +package manager + +import ( + "context" + "database/sql" + "path/filepath" + "time" + + "github.com/stashapp/stash/pkg/file" + "github.com/stashapp/stash/pkg/gallery" + "github.com/stashapp/stash/pkg/image" + "github.com/stashapp/stash/pkg/logger" + "github.com/stashapp/stash/pkg/manager/config" + "github.com/stashapp/stash/pkg/models" + "github.com/stashapp/stash/pkg/plugin" + "github.com/stashapp/stash/pkg/utils" +) + +func (t *ScanTask) scanImage() { + var i *models.Image + path := t.file.Path() + + if err := t.TxnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { + var err error + i, err = r.Image().FindByPath(path) + return err + }); err != nil { + logger.Error(err.Error()) + return + } + + scanner := image.Scanner{ + Scanner: image.FileScanner(&file.FSHasher{}), + StripFileExtension: t.StripFileExtension, + Ctx: t.ctx, + TxnManager: t.TxnManager, + Paths: GetInstance().Paths, + PluginCache: instance.PluginCache, + MutexManager: t.mutexManager, + } + + var err error + if i != nil { + i, err = scanner.ScanExisting(i, t.file) + if err != nil { + logger.Error(err.Error()) + return + } + } else { + i, err = scanner.ScanNew(t.file) + if err != nil { + logger.Error(err.Error()) + return + } + + if i != nil { + if t.zipGallery != nil { + // associate with gallery + if err := t.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { + return gallery.AddImage(r.Gallery(), t.zipGallery.ID, i.ID) + }); err != nil { + logger.Error(err.Error()) + return + } + } else if config.GetInstance().GetCreateGalleriesFromFolders() { + // create gallery from folder or associate with existing gallery + logger.Infof("Associating image %s with folder gallery", i.Path) + var galleryID int + var isNewGallery bool + if err := t.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { + var err error + galleryID, isNewGallery, err = t.associateImageWithFolderGallery(i.ID, r.Gallery()) + return err + }); err != nil { + logger.Error(err.Error()) + return + } + + if isNewGallery { + GetInstance().PluginCache.ExecutePostHooks(t.ctx, galleryID, plugin.GalleryCreatePost, nil, nil) + } + } + } + } + + if i != nil { + t.generateThumbnail(i) + } +} + +func (t *ScanTask) associateImageWithFolderGallery(imageID int, qb models.GalleryReaderWriter) (galleryID int, isNew bool, err error) { + // find a gallery with the path specified + path := filepath.Dir(t.file.Path()) + var g *models.Gallery + g, err = qb.FindByPath(path) + if err != nil { + return + } + + if g == nil { + checksum := utils.MD5FromString(path) + + // create the gallery + currentTime := time.Now() + + newGallery := models.Gallery{ + Checksum: checksum, + Path: sql.NullString{ + String: path, + Valid: true, + }, + CreatedAt: models.SQLiteTimestamp{Timestamp: currentTime}, + UpdatedAt: models.SQLiteTimestamp{Timestamp: currentTime}, + Title: sql.NullString{ + String: utils.GetNameFromPath(path, false), + Valid: true, + }, + } + + logger.Infof("Creating gallery for folder %s", path) + g, err = qb.Create(newGallery) + if err != nil { + return 0, false, err + } + + isNew = true + } + + // associate image with gallery + err = gallery.AddImage(qb, g.ID, imageID) + galleryID = g.ID + return +} + +func (t *ScanTask) generateThumbnail(i *models.Image) { + if !t.GenerateThumbnails { + return + } + + thumbPath := GetInstance().Paths.Generated.GetThumbnailPath(i.Checksum, models.DefaultGthumbWidth) + exists, _ := utils.FileExists(thumbPath) + if exists { + return + } + + config, _, err := image.DecodeSourceImage(i) + if err != nil { + logger.Errorf("error reading image %s: %s", i.Path, err.Error()) + return + } + + if config.Height > models.DefaultGthumbWidth || config.Width > models.DefaultGthumbWidth { + encoder := image.NewThumbnailEncoder(instance.FFMPEG) + data, err := encoder.GetThumbnail(i, models.DefaultGthumbWidth) + + if err != nil { + logger.Errorf("error getting thumbnail for image %s: %s", i.Path, err.Error()) + return + } + + err = utils.WriteFile(thumbPath, data) + if err != nil { + logger.Errorf("error writing thumbnail for image %s: %s", i.Path, err) + } + } +} diff --git a/pkg/manager/task_scan_scene.go b/pkg/manager/task_scan_scene.go new file mode 100644 index 000000000..98798efaf --- /dev/null +++ b/pkg/manager/task_scan_scene.go @@ -0,0 +1,58 @@ +package manager + +import ( + "context" + + "github.com/stashapp/stash/pkg/file" + "github.com/stashapp/stash/pkg/logger" + "github.com/stashapp/stash/pkg/models" + "github.com/stashapp/stash/pkg/scene" +) + +func (t *ScanTask) scanScene() *models.Scene { + logError := func(err error) *models.Scene { + logger.Error(err.Error()) + return nil + } + + var retScene *models.Scene + var s *models.Scene + + if err := t.TxnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { + var err error + s, err = r.Scene().FindByPath(t.file.Path()) + return err + }); err != nil { + logger.Error(err.Error()) + return nil + } + + scanner := scene.Scanner{ + Scanner: scene.FileScanner(&file.FSHasher{}, t.fileNamingAlgorithm, t.calculateMD5), + StripFileExtension: t.StripFileExtension, + FileNamingAlgorithm: t.fileNamingAlgorithm, + Ctx: t.ctx, + TxnManager: t.TxnManager, + Paths: GetInstance().Paths, + Screenshotter: &instance.FFMPEG, + VideoFileCreator: &instance.FFProbe, + PluginCache: instance.PluginCache, + MutexManager: t.mutexManager, + } + + if s != nil { + if err := scanner.ScanExisting(s, t.file); err != nil { + return logError(err) + } + + return nil + } + + var err error + retScene, err = scanner.ScanNew(t.file) + if err != nil { + return logError(err) + } + + return retScene +} diff --git a/pkg/manager/task_transcode.go b/pkg/manager/task_transcode.go index 7c55eaba5..807f1c2e3 100644 --- a/pkg/manager/task_transcode.go +++ b/pkg/manager/task_transcode.go @@ -20,13 +20,14 @@ func (t *GenerateTranscodeTask) Start() { return } + ffprobe := instance.FFProbe var container ffmpeg.Container if t.Scene.Format.Valid { container = ffmpeg.Container(t.Scene.Format.String) } else { // container isn't in the DB // shouldn't happen unless user hasn't scanned after updating to PR#384+ version - tmpVideoFile, err := ffmpeg.NewVideoFile(instance.FFProbePath, t.Scene.Path, false) + tmpVideoFile, err := ffprobe.NewVideoFile(t.Scene.Path, false) if err != nil { logger.Errorf("[transcode] error reading video file: %s", err.Error()) return @@ -45,7 +46,7 @@ func (t *GenerateTranscodeTask) Start() { return } - videoFile, err := ffmpeg.NewVideoFile(instance.FFProbePath, t.Scene.Path, false) + videoFile, err := ffprobe.NewVideoFile(t.Scene.Path, false) if err != nil { logger.Errorf("[transcode] error reading video file: %s", err.Error()) return @@ -58,7 +59,7 @@ func (t *GenerateTranscodeTask) Start() { OutputPath: outputPath, MaxTranscodeSize: transcodeSize, } - encoder := ffmpeg.NewEncoder(instance.FFMPEGPath) + encoder := instance.FFMPEG if videoCodec == ffmpeg.H264 { // for non supported h264 files stream copy the video part if audioCodec == ffmpeg.MissingUnsupported { diff --git a/pkg/models/model_file.go b/pkg/models/model_file.go new file mode 100644 index 000000000..799cac7e9 --- /dev/null +++ b/pkg/models/model_file.go @@ -0,0 +1,30 @@ +package models + +import "time" + +type File struct { + Checksum string `db:"checksum" json:"checksum"` + OSHash string `db:"oshash" json:"oshash"` + Path string `db:"path" json:"path"` + Size string `db:"size" json:"size"` + FileModTime time.Time `db:"file_mod_time" json:"file_mod_time"` +} + +// GetHash returns the hash of the scene, based on the hash algorithm provided. If +// hash algorithm is MD5, then Checksum is returned. Otherwise, OSHash is returned. +func (s File) GetHash(hashAlgorithm HashAlgorithm) string { + var ret string + if hashAlgorithm == HashAlgorithmMd5 { + ret = s.Checksum + } else if hashAlgorithm == HashAlgorithmOshash { + ret = s.OSHash + } else { + panic("unknown hash algorithm") + } + + return ret +} + +func (s File) Equal(o File) bool { + return s.Path == o.Path && s.Checksum == o.Checksum && s.OSHash == o.OSHash && s.Size == o.Size && s.FileModTime.Equal(o.FileModTime) +} diff --git a/pkg/models/model_gallery.go b/pkg/models/model_gallery.go index 977df7663..e7b2b09b4 100644 --- a/pkg/models/model_gallery.go +++ b/pkg/models/model_gallery.go @@ -3,6 +3,7 @@ package models import ( "database/sql" "path/filepath" + "time" ) type Gallery struct { @@ -40,6 +41,40 @@ type GalleryPartial struct { UpdatedAt *SQLiteTimestamp `db:"updated_at" json:"updated_at"` } +func (s *Gallery) File() File { + ret := File{ + Path: s.Path.String, + } + + ret.Checksum = s.Checksum + + if s.FileModTime.Valid { + ret.FileModTime = s.FileModTime.Timestamp + } + + return ret +} + +func (s *Gallery) SetFile(f File) { + path := f.Path + s.Path = sql.NullString{ + String: path, + Valid: true, + } + + if f.Checksum != "" { + s.Checksum = f.Checksum + } + + zeroTime := time.Time{} + if f.FileModTime != zeroTime { + s.FileModTime = NullSQLiteTimestamp{ + Timestamp: f.FileModTime, + Valid: true, + } + } +} + // GetTitle returns the title of the scene. If the Title field is empty, // then the base filename is returned. func (s Gallery) GetTitle() string { diff --git a/pkg/models/model_image.go b/pkg/models/model_image.go index 6470e619d..4aae450ec 100644 --- a/pkg/models/model_image.go +++ b/pkg/models/model_image.go @@ -3,6 +3,8 @@ package models import ( "database/sql" "path/filepath" + "strconv" + "time" ) // Image stores the metadata for a single image. @@ -41,14 +43,55 @@ type ImagePartial struct { UpdatedAt *SQLiteTimestamp `db:"updated_at" json:"updated_at"` } -// GetTitle returns the title of the image. If the Title field is empty, -// then the base filename is returned. -func (s Image) GetTitle() string { - if s.Title.String != "" { - return s.Title.String +func (i *Image) File() File { + ret := File{ + Path: i.Path, } - return filepath.Base(s.Path) + ret.Checksum = i.Checksum + if i.FileModTime.Valid { + ret.FileModTime = i.FileModTime.Timestamp + } + if i.Size.Valid { + ret.Size = strconv.FormatInt(i.Size.Int64, 10) + } + + return ret +} + +func (i *Image) SetFile(f File) { + path := f.Path + i.Path = path + + if f.Checksum != "" { + i.Checksum = f.Checksum + } + zeroTime := time.Time{} + if f.FileModTime != zeroTime { + i.FileModTime = NullSQLiteTimestamp{ + Timestamp: f.FileModTime, + Valid: true, + } + } + if f.Size != "" { + size, err := strconv.ParseInt(f.Size, 10, 64) + if err == nil { + i.Size = sql.NullInt64{ + Int64: size, + Valid: true, + } + } + } +} + +// GetTitle returns the title of the image. If the Title field is empty, +// then the base filename is returned. +func (i *Image) GetTitle() string { + if i.Title.String != "" { + return i.Title.String + } + + return filepath.Base(i.Path) } // ImageFileType represents the file metadata for an image. diff --git a/pkg/models/model_scene.go b/pkg/models/model_scene.go index 2adb0a274..6ffc914c1 100644 --- a/pkg/models/model_scene.go +++ b/pkg/models/model_scene.go @@ -3,6 +3,7 @@ package models import ( "database/sql" "path/filepath" + "time" ) // Scene stores the metadata for a single video scene. @@ -35,6 +36,58 @@ type Scene struct { Interactive bool `db:"interactive" json:"interactive"` } +func (s *Scene) File() File { + ret := File{ + Path: s.Path, + } + + if s.Checksum.Valid { + ret.Checksum = s.Checksum.String + } + if s.OSHash.Valid { + ret.OSHash = s.OSHash.String + } + if s.FileModTime.Valid { + ret.FileModTime = s.FileModTime.Timestamp + } + if s.Size.Valid { + ret.Size = s.Size.String + } + + return ret +} + +func (s *Scene) SetFile(f File) { + path := f.Path + s.Path = path + + if f.Checksum != "" { + s.Checksum = sql.NullString{ + String: f.Checksum, + Valid: true, + } + } + if f.OSHash != "" { + s.OSHash = sql.NullString{ + String: f.OSHash, + Valid: true, + } + } + zeroTime := time.Time{} + if f.FileModTime != zeroTime { + s.FileModTime = NullSQLiteTimestamp{ + Timestamp: f.FileModTime, + Valid: true, + } + } + if f.Size != "" { + s.Size = sql.NullString{ + String: f.Size, + Valid: true, + } + } +} + // ScenePartial represents part of a Scene object. It is used to update // the database entry. Only non-nil fields will be updated. type ScenePartial struct { @@ -66,6 +119,37 @@ type ScenePartial struct { Interactive *bool `db:"interactive" json:"interactive"` } +func (s *ScenePartial) SetFile(f File) { + path := f.Path + s.Path = &path + + if f.Checksum != "" { + s.Checksum = &sql.NullString{ + String: f.Checksum, + Valid: true, + } + } + if f.OSHash != "" { + s.OSHash = &sql.NullString{ + String: f.OSHash, + Valid: true, + } + } + zeroTime := time.Time{} + if f.FileModTime != zeroTime { + s.FileModTime = &NullSQLiteTimestamp{ + Timestamp: f.FileModTime, + Valid: true, + } + } + if f.Size != "" { + s.Size = &sql.NullString{ + String: f.Size, + Valid: true, + } + } +} + // GetTitle returns the title of the scene. If the Title field is empty, // then the base filename is returned. func (s Scene) GetTitle() string { @@ -79,13 +163,7 @@ func (s Scene) GetTitle() string { // GetHash returns the hash of the scene, based on the hash algorithm provided. If // hash algorithm is MD5, then Checksum is returned. Otherwise, OSHash is returned. func (s Scene) GetHash(hashAlgorithm HashAlgorithm) string { - if hashAlgorithm == HashAlgorithmMd5 { - return s.Checksum.String - } else if hashAlgorithm == HashAlgorithmOshash { - return s.OSHash.String - } - - panic("unknown hash algorithm") + return s.File().GetHash(hashAlgorithm) } func (s Scene) GetMinResolution() int64 { diff --git a/pkg/manager/migrate_hash.go b/pkg/scene/migrate_hash.go similarity index 67% rename from pkg/manager/migrate_hash.go rename to pkg/scene/migrate_hash.go index f1f37d5d3..d0a7892e5 100644 --- a/pkg/manager/migrate_hash.go +++ b/pkg/scene/migrate_hash.go @@ -1,49 +1,50 @@ -package manager +package scene import ( "os" "path/filepath" "github.com/stashapp/stash/pkg/logger" + "github.com/stashapp/stash/pkg/manager/paths" "github.com/stashapp/stash/pkg/utils" ) -func MigrateHash(oldHash string, newHash string) { - oldPath := filepath.Join(instance.Paths.Generated.Markers, oldHash) - newPath := filepath.Join(instance.Paths.Generated.Markers, newHash) - migrate(oldPath, newPath) +func MigrateHash(p *paths.Paths, oldHash string, newHash string) { + oldPath := filepath.Join(p.Generated.Markers, oldHash) + newPath := filepath.Join(p.Generated.Markers, newHash) + migrateSceneFiles(oldPath, newPath) - scenePaths := GetInstance().Paths.Scene + scenePaths := p.Scene oldPath = scenePaths.GetThumbnailScreenshotPath(oldHash) newPath = scenePaths.GetThumbnailScreenshotPath(newHash) - migrate(oldPath, newPath) + migrateSceneFiles(oldPath, newPath) oldPath = scenePaths.GetScreenshotPath(oldHash) newPath = scenePaths.GetScreenshotPath(newHash) - migrate(oldPath, newPath) + migrateSceneFiles(oldPath, newPath) oldPath = scenePaths.GetStreamPreviewPath(oldHash) newPath = scenePaths.GetStreamPreviewPath(newHash) - migrate(oldPath, newPath) + migrateSceneFiles(oldPath, newPath) oldPath = scenePaths.GetStreamPreviewImagePath(oldHash) newPath = scenePaths.GetStreamPreviewImagePath(newHash) - migrate(oldPath, newPath) + migrateSceneFiles(oldPath, newPath) oldPath = scenePaths.GetTranscodePath(oldHash) newPath = scenePaths.GetTranscodePath(newHash) - migrate(oldPath, newPath) + migrateSceneFiles(oldPath, newPath) oldPath = scenePaths.GetSpriteVttFilePath(oldHash) newPath = scenePaths.GetSpriteVttFilePath(newHash) - migrate(oldPath, newPath) + migrateSceneFiles(oldPath, newPath) oldPath = scenePaths.GetSpriteImageFilePath(oldHash) newPath = scenePaths.GetSpriteImageFilePath(newHash) - migrate(oldPath, newPath) + migrateSceneFiles(oldPath, newPath) } -func migrate(oldName, newName string) { +func migrateSceneFiles(oldName, newName string) { oldExists, err := utils.FileExists(oldName) if err != nil && !os.IsNotExist(err) { logger.Errorf("Error checking existence of %s: %s", oldName, err.Error()) diff --git a/pkg/scene/scan.go b/pkg/scene/scan.go new file mode 100644 index 000000000..9b3c80de2 --- /dev/null +++ b/pkg/scene/scan.go @@ -0,0 +1,335 @@ +package scene + +import ( + "context" + "database/sql" + "fmt" + "os" + "strconv" + "strings" + "time" + + "github.com/stashapp/stash/pkg/ffmpeg" + "github.com/stashapp/stash/pkg/file" + "github.com/stashapp/stash/pkg/logger" + "github.com/stashapp/stash/pkg/manager/config" + "github.com/stashapp/stash/pkg/manager/paths" + "github.com/stashapp/stash/pkg/models" + "github.com/stashapp/stash/pkg/plugin" + "github.com/stashapp/stash/pkg/utils" +) + +const mutexType = "scene" + +type videoFileCreator interface { + NewVideoFile(path string, stripFileExtension bool) (*ffmpeg.VideoFile, error) +} + +type Scanner struct { + file.Scanner + + StripFileExtension bool + UseFileMetadata bool + FileNamingAlgorithm models.HashAlgorithm + + Ctx context.Context + CaseSensitiveFs bool + TxnManager models.TransactionManager + Paths *paths.Paths + Screenshotter screenshotter + VideoFileCreator videoFileCreator + PluginCache *plugin.Cache + MutexManager *utils.MutexManager +} + +func FileScanner(hasher file.Hasher, fileNamingAlgorithm models.HashAlgorithm, calculateMD5 bool) file.Scanner { + return file.Scanner{ + Hasher: hasher, + CalculateOSHash: true, + CalculateMD5: fileNamingAlgorithm == models.HashAlgorithmMd5 || calculateMD5, + } +} + +func (scanner *Scanner) ScanExisting(existing file.FileBased, file file.SourceFile) (err error) { + scanned, err := scanner.Scanner.ScanExisting(existing, file) + if err != nil { + return err + } + + s := existing.(*models.Scene) + + path := scanned.New.Path + interactive := getInteractive(path) + + config := config.GetInstance() + oldHash := s.GetHash(scanner.FileNamingAlgorithm) + changed := false + + var videoFile *ffmpeg.VideoFile + + if scanned.ContentsChanged() { + logger.Infof("%s has been updated: rescanning", path) + + s.SetFile(*scanned.New) + + videoFile, err = scanner.VideoFileCreator.NewVideoFile(path, scanner.StripFileExtension) + if err != nil { + return err + } + + videoFileToScene(s, videoFile) + changed = true + } else if scanned.FileUpdated() || s.Interactive != interactive { + logger.Infof("Updated scene file %s", path) + + // update fields as needed + s.SetFile(*scanned.New) + changed = true + } + + // check for container + if !s.Format.Valid { + if videoFile == nil { + videoFile, err = scanner.VideoFileCreator.NewVideoFile(path, scanner.StripFileExtension) + if err != nil { + return err + } + } + container := ffmpeg.MatchContainer(videoFile.Container, path) + logger.Infof("Adding container %s to file %s", container, path) + s.Format = models.NullString(string(container)) + changed = true + } + + if changed { + // we are operating on a checksum now, so grab a mutex on the checksum + done := make(chan struct{}) + if scanned.New.OSHash != "" { + scanner.MutexManager.Claim(mutexType, scanned.New.OSHash, done) + } + if scanned.New.Checksum != "" { + scanner.MutexManager.Claim(mutexType, scanned.New.Checksum, done) + } + + if err := scanner.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { + defer close(done) + qb := r.Scene() + + // ensure no clashes of hashes + if scanned.New.Checksum != "" && scanned.Old.Checksum != scanned.New.Checksum { + dupe, _ := qb.FindByChecksum(s.Checksum.String) + if dupe != nil { + return fmt.Errorf("MD5 for file %s is the same as that of %s", path, dupe.Path) + } + } + + if scanned.New.OSHash != "" && scanned.Old.OSHash != scanned.New.OSHash { + dupe, _ := qb.FindByOSHash(scanned.New.OSHash) + if dupe != nil { + return fmt.Errorf("OSHash for file %s is the same as that of %s", path, dupe.Path) + } + } + + s.UpdatedAt = models.SQLiteTimestamp{Timestamp: time.Now()} + + _, err := qb.UpdateFull(*s) + return err + }); err != nil { + return err + } + + // Migrate any generated files if the hash has changed + newHash := s.GetHash(config.GetVideoFileNamingAlgorithm()) + if newHash != oldHash { + MigrateHash(scanner.Paths, oldHash, newHash) + } + + scanner.PluginCache.ExecutePostHooks(scanner.Ctx, s.ID, plugin.SceneUpdatePost, nil, nil) + } + + // We already have this item in the database + // check for thumbnails, screenshots + scanner.makeScreenshots(path, videoFile, s.GetHash(scanner.FileNamingAlgorithm)) + + return nil +} + +func (scanner *Scanner) ScanNew(file file.SourceFile) (retScene *models.Scene, err error) { + scanned, err := scanner.Scanner.ScanNew(file) + if err != nil { + return nil, err + } + + path := file.Path() + checksum := scanned.Checksum + oshash := scanned.OSHash + + // grab a mutex on the checksum and oshash + done := make(chan struct{}) + if oshash != "" { + scanner.MutexManager.Claim(mutexType, oshash, done) + } + if checksum != "" { + scanner.MutexManager.Claim(mutexType, checksum, done) + } + + defer close(done) + + // check for scene by checksum and oshash - MD5 should be + // redundant, but check both + var s *models.Scene + if err := scanner.TxnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { + qb := r.Scene() + if checksum != "" { + s, _ = qb.FindByChecksum(checksum) + } + + if s == nil { + s, _ = qb.FindByOSHash(oshash) + } + + return nil + }); err != nil { + return nil, err + } + + sceneHash := oshash + + if scanner.FileNamingAlgorithm == models.HashAlgorithmMd5 { + sceneHash = checksum + } + + interactive := getInteractive(file.Path()) + + if s != nil { + exists, _ := utils.FileExists(s.Path) + if !scanner.CaseSensitiveFs { + // #1426 - if file exists but is a case-insensitive match for the + // original filename, then treat it as a move + if exists && strings.EqualFold(path, s.Path) { + exists = false + } + } + + if exists { + logger.Infof("%s already exists. Duplicate of %s", path, s.Path) + } else { + logger.Infof("%s already exists. Updating path...", path) + scenePartial := models.ScenePartial{ + ID: s.ID, + Path: &path, + Interactive: &interactive, + } + if err := scanner.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { + _, err := r.Scene().Update(scenePartial) + return err + }); err != nil { + return nil, err + } + + scanner.makeScreenshots(path, nil, sceneHash) + scanner.PluginCache.ExecutePostHooks(scanner.Ctx, s.ID, plugin.SceneUpdatePost, nil, nil) + } + } else { + logger.Infof("%s doesn't exist. Creating new item...", path) + currentTime := time.Now() + + videoFile, err := scanner.VideoFileCreator.NewVideoFile(path, scanner.StripFileExtension) + if err != nil { + return nil, err + } + + // Override title to be filename if UseFileMetadata is false + if !scanner.UseFileMetadata { + videoFile.SetTitleFromPath(scanner.StripFileExtension) + } + + newScene := models.Scene{ + Checksum: sql.NullString{String: checksum, Valid: checksum != ""}, + OSHash: sql.NullString{String: oshash, Valid: oshash != ""}, + Path: path, + FileModTime: models.NullSQLiteTimestamp{ + Timestamp: scanned.FileModTime, + Valid: true, + }, + Title: sql.NullString{String: videoFile.Title, Valid: true}, + CreatedAt: models.SQLiteTimestamp{Timestamp: currentTime}, + UpdatedAt: models.SQLiteTimestamp{Timestamp: currentTime}, + Interactive: interactive, + } + + videoFileToScene(&newScene, videoFile) + + if scanner.UseFileMetadata { + newScene.Details = sql.NullString{String: videoFile.Comment, Valid: true} + newScene.Date = models.SQLiteDate{String: videoFile.CreationTime.Format("2006-01-02")} + } + + if err := scanner.TxnManager.WithTxn(context.TODO(), func(r models.Repository) error { + var err error + retScene, err = r.Scene().Create(newScene) + return err + }); err != nil { + return nil, err + } + + scanner.makeScreenshots(path, videoFile, sceneHash) + scanner.PluginCache.ExecutePostHooks(scanner.Ctx, retScene.ID, plugin.SceneCreatePost, nil, nil) + } + + return retScene, nil +} + +func videoFileToScene(s *models.Scene, videoFile *ffmpeg.VideoFile) { + container := ffmpeg.MatchContainer(videoFile.Container, s.Path) + + s.Duration = sql.NullFloat64{Float64: videoFile.Duration, Valid: true} + s.VideoCodec = sql.NullString{String: videoFile.VideoCodec, Valid: true} + s.AudioCodec = sql.NullString{String: videoFile.AudioCodec, Valid: true} + s.Format = sql.NullString{String: string(container), Valid: true} + s.Width = sql.NullInt64{Int64: int64(videoFile.Width), Valid: true} + s.Height = sql.NullInt64{Int64: int64(videoFile.Height), Valid: true} + s.Framerate = sql.NullFloat64{Float64: videoFile.FrameRate, Valid: true} + s.Bitrate = sql.NullInt64{Int64: videoFile.Bitrate, Valid: true} + s.Size = sql.NullString{String: strconv.FormatInt(videoFile.Size, 10), Valid: true} +} + +func (scanner *Scanner) makeScreenshots(path string, probeResult *ffmpeg.VideoFile, checksum string) { + thumbPath := scanner.Paths.Scene.GetThumbnailScreenshotPath(checksum) + normalPath := scanner.Paths.Scene.GetScreenshotPath(checksum) + + thumbExists, _ := utils.FileExists(thumbPath) + normalExists, _ := utils.FileExists(normalPath) + + if thumbExists && normalExists { + return + } + + if probeResult == nil { + var err error + probeResult, err = scanner.VideoFileCreator.NewVideoFile(path, scanner.StripFileExtension) + + if err != nil { + logger.Error(err.Error()) + return + } + logger.Infof("Regenerating images for %s", path) + } + + at := float64(probeResult.Duration) * 0.2 + + if !thumbExists { + logger.Debugf("Creating thumbnail for %s", path) + makeScreenshot(scanner.Screenshotter, *probeResult, thumbPath, 5, 320, at) + } + + if !normalExists { + logger.Debugf("Creating screenshot for %s", path) + makeScreenshot(scanner.Screenshotter, *probeResult, normalPath, 2, probeResult.Width, at) + } +} + +func getInteractive(path string) bool { + _, err := os.Stat(utils.GetFunscriptPath(path)) + return err == nil +} diff --git a/pkg/scene/screenshot.go b/pkg/scene/screenshot.go new file mode 100644 index 000000000..9ee0ccab0 --- /dev/null +++ b/pkg/scene/screenshot.go @@ -0,0 +1,23 @@ +package scene + +import ( + "github.com/stashapp/stash/pkg/ffmpeg" + "github.com/stashapp/stash/pkg/logger" +) + +type screenshotter interface { + Screenshot(probeResult ffmpeg.VideoFile, options ffmpeg.ScreenshotOptions) error +} + +func makeScreenshot(encoder screenshotter, probeResult ffmpeg.VideoFile, outputPath string, quality int, width int, time float64) { + options := ffmpeg.ScreenshotOptions{ + OutputPath: outputPath, + Quality: quality, + Time: time, + Width: width, + } + + if err := encoder.Screenshot(probeResult, options); err != nil { + logger.Warnf("[encoder] failure to generate screenshot: %v", err) + } +} diff --git a/pkg/utils/file.go b/pkg/utils/file.go index baeefed26..474f7b8a2 100644 --- a/pkg/utils/file.go +++ b/pkg/utils/file.go @@ -377,3 +377,16 @@ func FindInPaths(paths []string, baseName string) string { return "" } + +// MatchExtension returns true if the extension of the provided path +// matches any of the provided extensions. +func MatchExtension(path string, extensions []string) bool { + ext := filepath.Ext(path) + for _, e := range extensions { + if strings.EqualFold(ext, "."+e) { + return true + } + } + + return false +} diff --git a/pkg/utils/mutex.go b/pkg/utils/mutex.go new file mode 100644 index 000000000..212200214 --- /dev/null +++ b/pkg/utils/mutex.go @@ -0,0 +1,64 @@ +package utils + +// MutexManager manages access to mutexes using a mutex type and key. +type MutexManager struct { + mapChan chan map[string]<-chan struct{} +} + +// NewMutexManager returns a new instance of MutexManager. +func NewMutexManager() *MutexManager { + ret := &MutexManager{ + mapChan: make(chan map[string]<-chan struct{}, 1), + } + + initial := make(map[string]<-chan struct{}) + ret.mapChan <- initial + + return ret +} + +// Claim blocks until the mutex for the mutexType and key pair is available. +// The mutex is then claimed by the calling code until the provided done +// channel is closed. +func (csm *MutexManager) Claim(mutexType string, key string, done <-chan struct{}) { + mapKey := mutexType + "_" + key + success := false + + var existing <-chan struct{} + for !success { + // grab the map + m := <-csm.mapChan + + // get the entry for the given key + newEntry := m[mapKey] + + // if its the existing entry or nil, then it's available, add our channel + if newEntry == nil || newEntry == existing { + m[mapKey] = done + success = true + } + + // return the map + csm.mapChan <- m + + // if there is an existing entry, now we can wait for it to + // finish, then repeat the process + if newEntry != nil { + existing = newEntry + <-newEntry + } + } + + // add to goroutine to remove from the map only + go func() { + <-done + + m := <-csm.mapChan + + if m[mapKey] == done { + delete(m, mapKey) + } + + csm.mapChan <- m + }() +} diff --git a/pkg/utils/mutex_test.go b/pkg/utils/mutex_test.go new file mode 100644 index 000000000..f5d03091e --- /dev/null +++ b/pkg/utils/mutex_test.go @@ -0,0 +1,50 @@ +package utils + +import ( + "sync" + "testing" +) + +// should be run with -race +func TestMutexManager(t *testing.T) { + m := NewMutexManager() + + map1 := make(map[string]bool) + map2 := make(map[string]bool) + map3 := make(map[string]bool) + maps := []map[string]bool{ + map1, + map2, + map3, + } + + types := []string{ + "foo", + "foo", + "bar", + } + + const key = "baz" + + const workers = 8 + const loops = 300 + var wg sync.WaitGroup + for k := 0; k < workers; k++ { + wg.Add(1) + go func(wk int) { + defer wg.Done() + for l := 0; l < loops; l++ { + func(l int) { + c := make(chan struct{}) + defer close(c) + + m.Claim(types[l%3], key, c) + + maps[l%3][key] = true + }(l) + } + }(k) + } + + wg.Wait() +} diff --git a/pkg/utils/oshash.go b/pkg/utils/oshash.go index aa7990d7c..0057e2617 100644 --- a/pkg/utils/oshash.go +++ b/pkg/utils/oshash.go @@ -4,6 +4,7 @@ import ( "encoding/binary" "errors" "fmt" + "io" "os" ) @@ -41,6 +42,40 @@ func oshash(size int64, head []byte, tail []byte) (string, error) { return fmt.Sprintf("%016x", result), nil } +func OSHashFromReader(src io.ReadSeeker, fileSize int64) (string, error) { + if fileSize == 0 { + return "", nil + } + + fileChunkSize := chunkSize + if fileSize < fileChunkSize { + fileChunkSize = fileSize + } + + head := make([]byte, fileChunkSize) + tail := make([]byte, fileChunkSize) + + // read the head of the file into the start of the buffer + _, err := src.Read(head) + if err != nil { + return "", err + } + + // seek to the end of the file - the chunk size + _, err = src.Seek(-fileChunkSize, 2) + if err != nil { + return "", err + } + + // read the tail of the file + _, err = src.Read(tail) + if err != nil { + return "", err + } + + return oshash(fileSize, head, tail) +} + // OSHashFromFilePath calculates the hash using the same algorithm that // OpenSubtitles.org uses. // @@ -60,35 +95,5 @@ func OSHashFromFilePath(filePath string) (string, error) { fileSize := fi.Size() - if fileSize == 0 { - return "", nil - } - - fileChunkSize := chunkSize - if fileSize < fileChunkSize { - fileChunkSize = fileSize - } - - head := make([]byte, fileChunkSize) - tail := make([]byte, fileChunkSize) - - // read the head of the file into the start of the buffer - _, err = f.Read(head) - if err != nil { - return "", err - } - - // seek to the end of the file - the chunk size - _, err = f.Seek(-fileChunkSize, 2) - if err != nil { - return "", err - } - - // read the tail of the file - _, err = f.Read(tail) - if err != nil { - return "", err - } - - return oshash(fileSize, head, tail) + return OSHashFromReader(f, fileSize) } diff --git a/ui/v2.5/src/components/Changelog/versions/v0110.md b/ui/v2.5/src/components/Changelog/versions/v0110.md index db698cadb..e99faa25a 100644 --- a/ui/v2.5/src/components/Changelog/versions/v0110.md +++ b/ui/v2.5/src/components/Changelog/versions/v0110.md @@ -3,6 +3,9 @@ * Added built-in `Auto Tag` scene scraper to match performers, studio and tags from filename - using AutoTag logic. ([#1817](https://github.com/stashapp/stash/pull/1817)) * Added interface options to disable creating performers/studios/tags from dropdown selectors. ([#1814](https://github.com/stashapp/stash/pull/1814)) +### 🎨 Improvements +* Optimised scanning process. ([#1816](https://github.com/stashapp/stash/pull/1816)) + ### 🐛 Bug fixes * Fix colour codes not outputting correctly when logging to file on Windows. ([#1846](https://github.com/stashapp/stash/pull/1846)) * Sort directory listings using case sensitive collation. ([#1823](https://github.com/stashapp/stash/pull/1823))