File storage rewrite (#2676)

* Restructure data layer part 2 (#2599)
* Refactor and separate image model
* Refactor image query builder
* Handle relationships in image query builder
* Remove relationship management methods
* Refactor gallery model/query builder
* Add scenes to gallery model
* Convert scene model
* Refactor scene models
* Remove unused methods
* Add unit tests for gallery
* Add image tests
* Add scene tests
* Convert unnecessary scene value pointers to values
* Convert unnecessary pointer values to values
* Refactor scene partial
* Add scene partial tests
* Refactor ImagePartial
* Add image partial tests
* Refactor gallery partial update
* Add partial gallery update tests
* Use zero/null package for null values
* Add files and scan system
* Add sqlite implementation for files/folders
* Add unit tests for files/folders
* Image refactors
* Update image data layer
* Refactor gallery model and creation
* Refactor scene model
* Refactor scenes
* Don't set title from filename
* Allow galleries to freely add/remove images
* Add multiple scene file support to graphql and UI
* Add multiple file support for images in graphql/UI
* Add multiple file for galleries in graphql/UI
* Remove use of some deprecated fields
* Remove scene path usage
* Remove gallery path usage
* Remove path from image
* Move funscript to video file
* Refactor caption detection
* Migrate existing data
* Add post commit/rollback hook system
* Lint. Comment out import/export tests
* Add WithDatabase read only wrapper
* Prepend tasks to list
* Add 32 pre-migration
* Add warnings in release and migration notes
This commit is contained in:
WithoutPants
2022-07-13 16:30:54 +10:00
parent 30877c75fb
commit 5495d72849
359 changed files with 43690 additions and 16000 deletions

View File

@@ -4,327 +4,279 @@ import (
"context"
"errors"
"fmt"
"os"
"io/fs"
"path/filepath"
"regexp"
"time"
"github.com/remeh/sizedwaitgroup"
"github.com/stashapp/stash/internal/manager/config"
"github.com/stashapp/stash/pkg/file"
"github.com/stashapp/stash/pkg/file/video"
"github.com/stashapp/stash/pkg/fsutil"
"github.com/stashapp/stash/pkg/gallery"
"github.com/stashapp/stash/pkg/image"
"github.com/stashapp/stash/pkg/job"
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/models"
"github.com/stashapp/stash/pkg/scene"
"github.com/stashapp/stash/pkg/scene/generate"
"github.com/stashapp/stash/pkg/utils"
)
const scanQueueSize = 200000
type scanner interface {
Scan(ctx context.Context, handlers []file.Handler, options file.ScanOptions, progressReporter file.ProgressReporter)
}
type ScanJob struct {
txnManager models.Repository
scanner scanner
input ScanMetadataInput
subscriptions *subscriptionManager
}
type scanFile struct {
path string
info os.FileInfo
caseSensitiveFs bool
}
func (j *ScanJob) Execute(ctx context.Context, progress *job.Progress) {
input := j.input
paths := getScanPaths(input.Paths)
if job.IsCancelled(ctx) {
logger.Info("Stopping due to user request")
return
}
start := time.Now()
config := config.GetInstance()
parallelTasks := config.GetParallelTasksWithAutoDetection()
logger.Infof("Scan started with %d parallel tasks", parallelTasks)
fileQueue := make(chan scanFile, scanQueueSize)
go func() {
total, newFiles := j.queueFiles(ctx, paths, fileQueue, parallelTasks)
if !job.IsCancelled(ctx) {
progress.SetTotal(total)
logger.Infof("Finished counting files. Total files to scan: %d, %d new files found", total, newFiles)
}
}()
wg := sizedwaitgroup.New(parallelTasks)
fileNamingAlgo := config.GetVideoFileNamingAlgorithm()
calculateMD5 := config.IsCalculateMD5()
var err error
var galleries []string
mutexManager := utils.NewMutexManager()
for f := range fileQueue {
if job.IsCancelled(ctx) {
break
}
if isGallery(f.path) {
galleries = append(galleries, f.path)
}
if err := instance.Paths.Generated.EnsureTmpDir(); err != nil {
logger.Warnf("couldn't create temporary directory: %v", err)
}
wg.Add()
task := ScanTask{
TxnManager: j.txnManager,
file: file.FSFile(f.path, f.info),
UseFileMetadata: input.UseFileMetadata,
StripFileExtension: input.StripFileExtension,
fileNamingAlgorithm: fileNamingAlgo,
calculateMD5: calculateMD5,
GeneratePreview: input.ScanGeneratePreviews,
GenerateImagePreview: input.ScanGenerateImagePreviews,
GenerateSprite: input.ScanGenerateSprites,
GeneratePhash: input.ScanGeneratePhashes,
GenerateThumbnails: input.ScanGenerateThumbnails,
progress: progress,
CaseSensitiveFs: f.caseSensitiveFs,
mutexManager: mutexManager,
}
go func() {
task.Start(ctx)
wg.Done()
progress.Increment()
}()
sp := getScanPaths(input.Paths)
paths := make([]string, len(sp))
for i, p := range sp {
paths[i] = p.Path
}
wg.Wait()
start := time.Now()
if err := instance.Paths.Generated.EmptyTmpDir(); err != nil {
logger.Warnf("couldn't empty temporary directory: %v", err)
const taskQueueSize = 200000
taskQueue := job.NewTaskQueue(ctx, progress, taskQueueSize, instance.Config.GetParallelTasksWithAutoDetection())
j.scanner.Scan(ctx, getScanHandlers(j.input, taskQueue, progress), file.ScanOptions{
Paths: paths,
ScanFilters: []file.PathFilter{newScanFilter(instance.Config)},
ZipFileExtensions: instance.Config.GetGalleryExtensions(),
ParallelTasks: instance.Config.GetParallelTasksWithAutoDetection(),
}, progress)
taskQueue.Close()
if job.IsCancelled(ctx) {
logger.Info("Stopping due to user request")
return
}
elapsed := time.Since(start)
logger.Info(fmt.Sprintf("Scan finished (%s)", elapsed))
if job.IsCancelled(ctx) {
logger.Info("Stopping due to user request")
return
}
if err != nil {
return
}
progress.ExecuteTask("Associating galleries", func() {
for _, path := range galleries {
wg.Add()
task := ScanTask{
TxnManager: j.txnManager,
file: file.FSFile(path, nil), // hopefully info is not needed
UseFileMetadata: false,
}
go task.associateGallery(ctx, &wg)
wg.Wait()
}
logger.Info("Finished gallery association")
})
j.subscriptions.notify()
}
func (j *ScanJob) queueFiles(ctx context.Context, paths []*config.StashConfig, scanQueue chan<- scanFile, parallelTasks int) (total int, newFiles int) {
defer close(scanQueue)
var minModTime time.Time
if j.input.Filter != nil && j.input.Filter.MinModTime != nil {
minModTime = *j.input.Filter.MinModTime
}
wg := sizedwaitgroup.New(parallelTasks)
for _, sp := range paths {
csFs, er := fsutil.IsFsPathCaseSensitive(sp.Path)
if er != nil {
logger.Warnf("Cannot determine fs case sensitivity: %s", er.Error())
}
err := walkFilesToScan(sp, func(path string, info os.FileInfo, err error) error {
// check stop
if job.IsCancelled(ctx) {
return context.Canceled
}
// exit early on cutoff
if info.Mode().IsRegular() && info.ModTime().Before(minModTime) {
return nil
}
wg.Add()
go func() {
defer wg.Done()
// #1756 - skip zero length files and directories
if info.IsDir() {
return
}
if info.Size() == 0 {
logger.Infof("Skipping zero-length file: %s", path)
return
}
total++
if !j.doesPathExist(ctx, path) {
newFiles++
}
scanQueue <- scanFile{
path: path,
info: info,
caseSensitiveFs: csFs,
}
}()
return nil
})
wg.Wait()
if err != nil && !errors.Is(err, context.Canceled) {
logger.Errorf("Error encountered queuing files to scan: %s", err.Error())
return
}
}
return
type scanFilter struct {
stashPaths []*config.StashConfig
generatedPath string
vidExt []string
imgExt []string
zipExt []string
videoExcludeRegex []*regexp.Regexp
imageExcludeRegex []*regexp.Regexp
}
func (j *ScanJob) doesPathExist(ctx context.Context, path string) bool {
config := config.GetInstance()
vidExt := config.GetVideoExtensions()
imgExt := config.GetImageExtensions()
gExt := config.GetGalleryExtensions()
func newScanFilter(c *config.Instance) *scanFilter {
return &scanFilter{
stashPaths: c.GetStashPaths(),
generatedPath: c.GetGeneratedPath(),
vidExt: c.GetVideoExtensions(),
imgExt: c.GetImageExtensions(),
zipExt: c.GetGalleryExtensions(),
videoExcludeRegex: generateRegexps(c.GetExcludes()),
imageExcludeRegex: generateRegexps(c.GetImageExcludes()),
}
}
ret := false
txnErr := j.txnManager.WithTxn(ctx, func(ctx context.Context) error {
r := j.txnManager
switch {
case fsutil.MatchExtension(path, gExt):
g, _ := r.Gallery.FindByPath(ctx, path)
if g != nil {
ret = true
}
case fsutil.MatchExtension(path, vidExt):
s, _ := r.Scene.FindByPath(ctx, path)
if s != nil {
ret = true
}
case fsutil.MatchExtension(path, imgExt):
i, _ := r.Image.FindByPath(ctx, path)
if i != nil {
ret = true
}
}
return nil
})
if txnErr != nil {
logger.Warnf("error checking if file exists in database: %v", txnErr)
func (f *scanFilter) Accept(ctx context.Context, path string, info fs.FileInfo) bool {
if fsutil.IsPathInDir(f.generatedPath, path) {
return false
}
return ret
}
isVideoFile := fsutil.MatchExtension(path, f.vidExt)
isImageFile := fsutil.MatchExtension(path, f.imgExt)
isZipFile := fsutil.MatchExtension(path, f.zipExt)
type ScanTask struct {
TxnManager models.Repository
file file.SourceFile
UseFileMetadata bool
StripFileExtension bool
calculateMD5 bool
fileNamingAlgorithm models.HashAlgorithm
GenerateSprite bool
GeneratePhash bool
GeneratePreview bool
GenerateImagePreview bool
GenerateThumbnails bool
zipGallery *models.Gallery
progress *job.Progress
CaseSensitiveFs bool
// handle caption files
if fsutil.MatchExtension(path, video.CaptionExts) {
// we don't include caption files in the file scan, but we do need
// to handle them
video.AssociateCaptions(ctx, path, instance.Repository, instance.Database.File, instance.Database.File)
mutexManager *utils.MutexManager
}
return false
}
func (t *ScanTask) Start(ctx context.Context) {
var s *models.Scene
path := t.file.Path()
t.progress.ExecuteTask("Scanning "+path, func() {
switch {
case isGallery(path):
t.scanGallery(ctx)
case isVideo(path):
s = t.scanScene(ctx)
case isImage(path):
t.scanImage(ctx)
case isCaptions(path):
t.associateCaptions(ctx)
}
})
if !info.IsDir() && !isVideoFile && !isImageFile && !isZipFile {
return false
}
// #1756 - skip zero length files
if !info.IsDir() && info.Size() == 0 {
logger.Infof("Skipping zero-length file: %s", path)
return false
}
s := getStashFromDirPath(f.stashPaths, path)
if s == nil {
return
return false
}
// Handle the case of a scene
iwg := sizedwaitgroup.New(2)
// shortcut: skip the directory entirely if it matches both exclusion patterns
// add a trailing separator so that it correctly matches against patterns like path/.*
pathExcludeTest := path + string(filepath.Separator)
if (s.ExcludeVideo || matchFileRegex(pathExcludeTest, f.videoExcludeRegex)) && (s.ExcludeImage || matchFileRegex(pathExcludeTest, f.imageExcludeRegex)) {
return false
}
if t.GenerateSprite {
iwg.Add()
if isVideoFile && (s.ExcludeVideo || matchFileRegex(path, f.videoExcludeRegex)) {
return false
} else if (isImageFile || isZipFile) && s.ExcludeImage || matchFileRegex(path, f.imageExcludeRegex) {
return false
}
go t.progress.ExecuteTask(fmt.Sprintf("Generating sprites for %s", path), func() {
return true
}
type scanConfig struct {
isGenerateThumbnails bool
}
func (c *scanConfig) GetCreateGalleriesFromFolders() bool {
return instance.Config.GetCreateGalleriesFromFolders()
}
func (c *scanConfig) IsGenerateThumbnails() bool {
return c.isGenerateThumbnails
}
func getScanHandlers(options ScanMetadataInput, taskQueue *job.TaskQueue, progress *job.Progress) []file.Handler {
db := instance.Database
pluginCache := instance.PluginCache
return []file.Handler{
&file.FilteredHandler{
Filter: file.FilterFunc(imageFileFilter),
Handler: &image.ScanHandler{
CreatorUpdater: db.Image,
GalleryFinder: db.Gallery,
ThumbnailGenerator: &imageThumbnailGenerator{},
ScanConfig: &scanConfig{
isGenerateThumbnails: options.ScanGenerateThumbnails,
},
PluginCache: pluginCache,
},
},
&file.FilteredHandler{
Filter: file.FilterFunc(galleryFileFilter),
Handler: &gallery.ScanHandler{
CreatorUpdater: db.Gallery,
SceneFinderUpdater: db.Scene,
PluginCache: pluginCache,
},
},
&file.FilteredHandler{
Filter: file.FilterFunc(videoFileFilter),
Handler: &scene.ScanHandler{
CreatorUpdater: db.Scene,
PluginCache: pluginCache,
CoverGenerator: &coverGenerator{},
ScanGenerator: &sceneGenerators{
input: options,
taskQueue: taskQueue,
progress: progress,
},
},
},
}
}
type imageThumbnailGenerator struct{}
func (g *imageThumbnailGenerator) GenerateThumbnail(ctx context.Context, i *models.Image, f *file.ImageFile) error {
thumbPath := GetInstance().Paths.Generated.GetThumbnailPath(i.Checksum(), models.DefaultGthumbWidth)
exists, _ := fsutil.FileExists(thumbPath)
if exists {
return nil
}
if f.Height <= models.DefaultGthumbWidth && f.Width <= models.DefaultGthumbWidth {
return nil
}
logger.Debugf("Generating thumbnail for %s", f.Path)
encoder := image.NewThumbnailEncoder(instance.FFMPEG)
data, err := encoder.GetThumbnail(f, models.DefaultGthumbWidth)
if err != nil {
// don't log for animated images
if !errors.Is(err, image.ErrNotSupportedForThumbnail) {
return fmt.Errorf("getting thumbnail for image %s: %w", f.Path, err)
}
return nil
}
err = fsutil.WriteFile(thumbPath, data)
if err != nil {
return fmt.Errorf("writing thumbnail for image %s: %w", f.Path, err)
}
return nil
}
type sceneGenerators struct {
input ScanMetadataInput
taskQueue *job.TaskQueue
progress *job.Progress
}
func (g *sceneGenerators) Generate(ctx context.Context, s *models.Scene, f *file.VideoFile) error {
const overwrite = false
progress := g.progress
t := g.input
path := f.Path
config := instance.Config
fileNamingAlgorithm := config.GetVideoFileNamingAlgorithm()
if t.ScanGenerateSprites {
progress.AddTotal(1)
g.taskQueue.Add(fmt.Sprintf("Generating sprites for %s", path), func(ctx context.Context) {
taskSprite := GenerateSpriteTask{
Scene: *s,
Overwrite: false,
fileNamingAlgorithm: t.fileNamingAlgorithm,
Overwrite: overwrite,
fileNamingAlgorithm: fileNamingAlgorithm,
}
taskSprite.Start(ctx)
iwg.Done()
progress.Increment()
})
}
if t.GeneratePhash {
iwg.Add()
go t.progress.ExecuteTask(fmt.Sprintf("Generating phash for %s", path), func() {
if t.ScanGeneratePhashes {
progress.AddTotal(1)
g.taskQueue.Add(fmt.Sprintf("Generating phash for %s", path), func(ctx context.Context) {
taskPhash := GeneratePhashTask{
Scene: *s,
fileNamingAlgorithm: t.fileNamingAlgorithm,
txnManager: t.TxnManager,
File: f,
fileNamingAlgorithm: fileNamingAlgorithm,
txnManager: instance.Database,
fileUpdater: instance.Database.File,
Overwrite: overwrite,
}
taskPhash.Start(ctx)
iwg.Done()
progress.Increment()
})
}
if t.GeneratePreview {
iwg.Add()
go t.progress.ExecuteTask(fmt.Sprintf("Generating preview for %s", path), func() {
if t.ScanGeneratePreviews {
progress.AddTotal(1)
g.taskQueue.Add(fmt.Sprintf("Generating preview for %s", path), func(ctx context.Context) {
options := getGeneratePreviewOptions(GeneratePreviewOptionsInput{})
const overwrite = false
g := &generate.Generator{
Encoder: instance.FFMPEG,
@@ -336,73 +288,16 @@ func (t *ScanTask) Start(ctx context.Context) {
taskPreview := GeneratePreviewTask{
Scene: *s,
ImagePreview: t.GenerateImagePreview,
ImagePreview: t.ScanGenerateImagePreviews,
Options: options,
Overwrite: overwrite,
fileNamingAlgorithm: t.fileNamingAlgorithm,
fileNamingAlgorithm: fileNamingAlgorithm,
generator: g,
}
taskPreview.Start(ctx)
iwg.Done()
progress.Increment()
})
}
iwg.Wait()
}
func walkFilesToScan(s *config.StashConfig, f filepath.WalkFunc) error {
config := config.GetInstance()
vidExt := config.GetVideoExtensions()
imgExt := config.GetImageExtensions()
gExt := config.GetGalleryExtensions()
capExt := scene.CaptionExts
excludeVidRegex := generateRegexps(config.GetExcludes())
excludeImgRegex := generateRegexps(config.GetImageExcludes())
// don't scan zip images directly
if file.IsZipPath(s.Path) {
logger.Warnf("Cannot rescan zip image %s. Rescan zip gallery instead.", s.Path)
return nil
}
generatedPath := config.GetGeneratedPath()
return fsutil.SymWalk(s.Path, func(path string, info os.FileInfo, err error) error {
if err != nil {
logger.Warnf("error scanning %s: %s", path, err.Error())
return nil
}
if info.IsDir() {
// #1102 - ignore files in generated path
if fsutil.IsPathInDir(generatedPath, path) {
return filepath.SkipDir
}
// shortcut: skip the directory entirely if it matches both exclusion patterns
// add a trailing separator so that it correctly matches against patterns like path/.*
pathExcludeTest := path + string(filepath.Separator)
if (s.ExcludeVideo || matchFileRegex(pathExcludeTest, excludeVidRegex)) && (s.ExcludeImage || matchFileRegex(pathExcludeTest, excludeImgRegex)) {
return filepath.SkipDir
}
return nil
}
if !s.ExcludeVideo && fsutil.MatchExtension(path, vidExt) && !matchFileRegex(path, excludeVidRegex) {
return f(path, info, err)
}
if !s.ExcludeImage {
if (fsutil.MatchExtension(path, imgExt) || fsutil.MatchExtension(path, gExt)) && !matchFileRegex(path, excludeImgRegex) {
return f(path, info, err)
}
}
if fsutil.MatchExtension(path, capExt) {
return f(path, info, err)
}
return nil
})
return nil
}