[Files Refactor] Import export fixup (#2763)

* Adjust json schema
* Remove mappings file from export
* Import file/folder support
* Update documentation
* Make gallery filenames unique
This commit is contained in:
WithoutPants
2022-08-30 12:17:15 +10:00
parent 1222b7b87b
commit 0b534d89c6
35 changed files with 3315 additions and 3146 deletions

View File

@@ -8,13 +8,14 @@ import (
"os"
"path/filepath"
"runtime"
"strconv"
"sync"
"time"
"github.com/stashapp/stash/internal/manager/config"
"github.com/stashapp/stash/pkg/file"
"github.com/stashapp/stash/pkg/fsutil"
"github.com/stashapp/stash/pkg/gallery"
"github.com/stashapp/stash/pkg/hash/md5"
"github.com/stashapp/stash/pkg/image"
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/models"
@@ -38,7 +39,6 @@ type ExportTask struct {
baseDir string
json jsonUtils
Mappings *jsonschema.Mappings
fileNamingAlgorithm models.HashAlgorithm
scenes *exportSpec
@@ -118,8 +118,6 @@ func (t *ExportTask) Start(ctx context.Context, wg *sync.WaitGroup) {
// @manager.total = Scene.count + Gallery.count + Performer.count + Studio.count + Movie.count
workerCount := runtime.GOMAXPROCS(0) // set worker count to number of cpus available
t.Mappings = &jsonschema.Mappings{}
startTime := time.Now()
if t.full {
@@ -140,10 +138,16 @@ func (t *ExportTask) Start(ctx context.Context, wg *sync.WaitGroup) {
}()
}
if t.baseDir == "" {
logger.Errorf("baseDir must not be empty")
return
}
t.json = jsonUtils{
json: *paths.GetJSONPaths(t.baseDir),
}
paths.EmptyJSONDirs(t.baseDir)
paths.EnsureJSONDirs(t.baseDir)
txnErr := t.txnManager.WithTxn(ctx, func(ctx context.Context) error {
@@ -180,10 +184,6 @@ func (t *ExportTask) Start(ctx context.Context, wg *sync.WaitGroup) {
logger.Warnf("error while running export transaction: %v", txnErr)
}
if err := t.json.saveMappings(t.Mappings); err != nil {
logger.Errorf("[mappings] failed to save json: %s", err.Error())
}
if !t.full {
err := t.generateDownload()
if err != nil {
@@ -226,12 +226,6 @@ func (t *ExportTask) zipFiles(w io.Writer) error {
json: *paths.GetJSONPaths(""),
}
// write the mappings file
err := t.zipFile(t.json.json.MappingsFile, "", z)
if err != nil {
return err
}
walkWarn(t.json.json.Tags, t.zipWalkFunc(u.json.Tags, z))
walkWarn(t.json.json.Galleries, t.zipWalkFunc(u.json.Galleries, z))
walkWarn(t.json.json.Performers, t.zipWalkFunc(u.json.Performers, z))
@@ -380,7 +374,6 @@ func (t *ExportTask) ExportScenes(ctx context.Context, workers int, repo Reposit
if (i % 100) == 0 { // make progress easier to read
logger.Progressf("[scenes] %d of %d", index, len(scenes))
}
t.Mappings.Scenes = append(t.Mappings.Scenes, jsonschema.PathNameMapping{Path: scene.Path(), Checksum: scene.GetHash(t.fileNamingAlgorithm)})
jobCh <- scene // feed workers
}
@@ -390,6 +383,96 @@ func (t *ExportTask) ExportScenes(ctx context.Context, workers int, repo Reposit
logger.Infof("[scenes] export complete in %s. %d workers used.", time.Since(startTime), workers)
}
func exportFile(f file.File, t *ExportTask) {
newFileJSON := fileToJSON(f)
fn := newFileJSON.Filename()
if err := t.json.saveFile(fn, newFileJSON); err != nil {
logger.Errorf("[files] <%s> failed to save json: %s", fn, err.Error())
}
}
func fileToJSON(f file.File) jsonschema.DirEntry {
bf := f.Base()
base := jsonschema.BaseFile{
BaseDirEntry: jsonschema.BaseDirEntry{
Type: jsonschema.DirEntryTypeFile,
ModTime: json.JSONTime{Time: bf.ModTime},
Path: filepath.ToSlash(bf.Path),
CreatedAt: json.JSONTime{Time: bf.CreatedAt},
UpdatedAt: json.JSONTime{Time: bf.UpdatedAt},
},
Size: bf.Size,
}
if bf.ZipFile != nil {
base.ZipFile = bf.ZipFile.Base().Path
}
for _, fp := range bf.Fingerprints {
base.Fingerprints = append(base.Fingerprints, jsonschema.Fingerprint{
Type: fp.Type,
Fingerprint: fp.Fingerprint,
})
}
switch ff := f.(type) {
case *file.VideoFile:
base.Type = jsonschema.DirEntryTypeVideo
return jsonschema.VideoFile{
BaseFile: &base,
Format: ff.Format,
Width: ff.Width,
Height: ff.Height,
Duration: ff.Duration,
VideoCodec: ff.VideoCodec,
AudioCodec: ff.AudioCodec,
FrameRate: ff.FrameRate,
BitRate: ff.BitRate,
Interactive: ff.Interactive,
InteractiveSpeed: ff.InteractiveSpeed,
}
case *file.ImageFile:
base.Type = jsonschema.DirEntryTypeImage
return jsonschema.ImageFile{
BaseFile: &base,
Format: ff.Format,
Width: ff.Width,
Height: ff.Height,
}
}
return &base
}
func exportFolder(f file.Folder, t *ExportTask) {
newFileJSON := folderToJSON(f)
fn := newFileJSON.Filename()
if err := t.json.saveFile(fn, newFileJSON); err != nil {
logger.Errorf("[files] <%s> failed to save json: %s", fn, err.Error())
}
}
func folderToJSON(f file.Folder) jsonschema.DirEntry {
base := jsonschema.BaseDirEntry{
Type: jsonschema.DirEntryTypeFolder,
ModTime: json.JSONTime{Time: f.ModTime},
Path: filepath.ToSlash(f.Path),
CreatedAt: json.JSONTime{Time: f.CreatedAt},
UpdatedAt: json.JSONTime{Time: f.UpdatedAt},
}
if f.ZipFile != nil {
base.ZipFile = f.ZipFile.Base().Path
}
return &base
}
func exportScene(ctx context.Context, wg *sync.WaitGroup, jobChan <-chan *models.Scene, repo Repository, t *ExportTask) {
defer wg.Done()
sceneReader := repo.Scene
@@ -413,6 +496,11 @@ func exportScene(ctx context.Context, wg *sync.WaitGroup, jobChan <-chan *models
continue
}
// export files
for _, f := range s.Files {
exportFile(f, t)
}
newSceneJSON.Studio, err = scene.GetStudioName(ctx, studioReader, s)
if err != nil {
logger.Errorf("[scenes] <%s> error getting scene studio name: %s", sceneHash, err.Error())
@@ -425,7 +513,7 @@ func exportScene(ctx context.Context, wg *sync.WaitGroup, jobChan <-chan *models
continue
}
newSceneJSON.Galleries = gallery.GetChecksums(galleries)
newSceneJSON.Galleries = gallery.GetRefs(galleries)
performers, err := performerReader.FindBySceneID(ctx, s.ID)
if err != nil {
@@ -477,12 +565,17 @@ func exportScene(ctx context.Context, wg *sync.WaitGroup, jobChan <-chan *models
t.performers.IDs = intslice.IntAppendUniques(t.performers.IDs, performer.GetIDs(performers))
}
sceneJSON, err := t.json.getScene(sceneHash)
if err == nil && jsonschema.CompareJSON(*sceneJSON, *newSceneJSON) {
continue
pf := s.PrimaryFile()
basename := ""
hash := ""
if pf != nil {
basename = pf.Basename
hash = s.OSHash()
}
if err := t.json.saveScene(sceneHash, newSceneJSON); err != nil {
fn := newSceneJSON.Filename(basename, hash)
if err := t.json.saveScene(fn, newSceneJSON); err != nil {
logger.Errorf("[scenes] <%s> failed to save json: %s", sceneHash, err.Error())
}
}
@@ -522,7 +615,6 @@ func (t *ExportTask) ExportImages(ctx context.Context, workers int, repo Reposit
if (i % 100) == 0 { // make progress easier to read
logger.Progressf("[images] %d of %d", index, len(images))
}
t.Mappings.Images = append(t.Mappings.Images, jsonschema.PathNameMapping{Path: image.Path(), Checksum: image.Checksum()})
jobCh <- image // feed workers
}
@@ -544,6 +636,11 @@ func exportImage(ctx context.Context, wg *sync.WaitGroup, jobChan <-chan *models
newImageJSON := image.ToBasicJSON(s)
// export files
for _, f := range s.Files {
exportFile(f, t)
}
var err error
newImageJSON.Studio, err = image.GetStudioName(ctx, studioReader, s)
if err != nil {
@@ -557,7 +654,7 @@ func exportImage(ctx context.Context, wg *sync.WaitGroup, jobChan <-chan *models
continue
}
newImageJSON.Galleries = t.getGalleryChecksums(imageGalleries)
newImageJSON.Galleries = gallery.GetRefs(imageGalleries)
performers, err := performerReader.FindByImageID(ctx, s.ID)
if err != nil {
@@ -585,24 +682,22 @@ func exportImage(ctx context.Context, wg *sync.WaitGroup, jobChan <-chan *models
t.performers.IDs = intslice.IntAppendUniques(t.performers.IDs, performer.GetIDs(performers))
}
imageJSON, err := t.json.getImage(imageHash)
if err == nil && jsonschema.CompareJSON(*imageJSON, *newImageJSON) {
continue
pf := s.PrimaryFile()
basename := ""
hash := ""
if pf != nil {
basename = pf.Basename
hash = s.Checksum()
}
if err := t.json.saveImage(imageHash, newImageJSON); err != nil {
fn := newImageJSON.Filename(basename, hash)
if err := t.json.saveImage(fn, newImageJSON); err != nil {
logger.Errorf("[images] <%s> failed to save json: %s", imageHash, err.Error())
}
}
}
func (t *ExportTask) getGalleryChecksums(galleries []*models.Gallery) (ret []string) {
for _, g := range galleries {
ret = append(ret, g.Checksum())
}
return
}
func (t *ExportTask) ExportGalleries(ctx context.Context, workers int, repo Repository) {
var galleriesWg sync.WaitGroup
@@ -638,14 +733,6 @@ func (t *ExportTask) ExportGalleries(ctx context.Context, workers int, repo Repo
logger.Progressf("[galleries] %d of %d", index, len(galleries))
}
title := gallery.Title
path := gallery.Path()
t.Mappings.Galleries = append(t.Mappings.Galleries, jsonschema.PathNameMapping{
Path: path,
Name: title,
Checksum: gallery.Checksum(),
})
jobCh <- gallery
}
@@ -670,6 +757,27 @@ func exportGallery(ctx context.Context, wg *sync.WaitGroup, jobChan <-chan *mode
continue
}
// export files
for _, f := range g.Files {
exportFile(f, t)
}
// export folder if necessary
if g.FolderID != nil {
folder, err := repo.Folder.Find(ctx, *g.FolderID)
if err != nil {
logger.Errorf("[galleries] <%s> error getting gallery folder: %v", galleryHash, err)
continue
}
if folder == nil {
logger.Errorf("[galleries] <%s> unable to find gallery folder", galleryHash)
continue
}
exportFolder(*folder, t)
}
newGalleryJSON.Studio, err = gallery.GetStudioName(ctx, studioReader, g)
if err != nil {
logger.Errorf("[galleries] <%s> error getting gallery studio name: %s", galleryHash, err.Error())
@@ -701,12 +809,23 @@ func exportGallery(ctx context.Context, wg *sync.WaitGroup, jobChan <-chan *mode
t.performers.IDs = intslice.IntAppendUniques(t.performers.IDs, performer.GetIDs(performers))
}
galleryJSON, err := t.json.getGallery(galleryHash)
if err == nil && jsonschema.CompareJSON(*galleryJSON, *newGalleryJSON) {
continue
pf := g.PrimaryFile()
basename := ""
// use id in case multiple galleries with the same basename
hash := strconv.Itoa(g.ID)
switch {
case pf != nil:
basename = pf.Base().Basename
case g.FolderPath != "":
basename = filepath.Base(g.FolderPath)
default:
basename = g.Title
}
if err := t.json.saveGallery(galleryHash, newGalleryJSON); err != nil {
fn := newGalleryJSON.Filename(basename, hash)
if err := t.json.saveGallery(fn, newGalleryJSON); err != nil {
logger.Errorf("[galleries] <%s> failed to save json: %s", galleryHash, err.Error())
}
}
@@ -742,7 +861,6 @@ func (t *ExportTask) ExportPerformers(ctx context.Context, workers int, repo Rep
index := i + 1
logger.Progressf("[performers] %d of %d", index, len(performers))
t.Mappings.Performers = append(t.Mappings.Performers, jsonschema.PathNameMapping{Name: performer.Name.String, Checksum: performer.Checksum})
jobCh <- performer // feed workers
}
@@ -777,14 +895,9 @@ func (t *ExportTask) exportPerformer(ctx context.Context, wg *sync.WaitGroup, jo
t.tags.IDs = intslice.IntAppendUniques(t.tags.IDs, tag.GetIDs(tags))
}
performerJSON, err := t.json.getPerformer(p.Checksum)
if err != nil {
logger.Debugf("[performers] error reading performer json: %s", err.Error())
} else if jsonschema.CompareJSON(*performerJSON, *newPerformerJSON) {
continue
}
fn := newPerformerJSON.Filename()
if err := t.json.savePerformer(p.Checksum, newPerformerJSON); err != nil {
if err := t.json.savePerformer(fn, newPerformerJSON); err != nil {
logger.Errorf("[performers] <%s> failed to save json: %s", p.Checksum, err.Error())
}
}
@@ -821,7 +934,6 @@ func (t *ExportTask) ExportStudios(ctx context.Context, workers int, repo Reposi
index := i + 1
logger.Progressf("[studios] %d of %d", index, len(studios))
t.Mappings.Studios = append(t.Mappings.Studios, jsonschema.PathNameMapping{Name: studio.Name.String, Checksum: studio.Checksum})
jobCh <- studio // feed workers
}
@@ -844,12 +956,9 @@ func (t *ExportTask) exportStudio(ctx context.Context, wg *sync.WaitGroup, jobCh
continue
}
studioJSON, err := t.json.getStudio(s.Checksum)
if err == nil && jsonschema.CompareJSON(*studioJSON, *newStudioJSON) {
continue
}
fn := newStudioJSON.Filename()
if err := t.json.saveStudio(s.Checksum, newStudioJSON); err != nil {
if err := t.json.saveStudio(fn, newStudioJSON); err != nil {
logger.Errorf("[studios] <%s> failed to save json: %s", s.Checksum, err.Error())
}
}
@@ -886,10 +995,6 @@ func (t *ExportTask) ExportTags(ctx context.Context, workers int, repo Repositor
index := i + 1
logger.Progressf("[tags] %d of %d", index, len(tags))
// generate checksum on the fly by name, since we don't store it
checksum := md5.FromString(tag.Name)
t.Mappings.Tags = append(t.Mappings.Tags, jsonschema.PathNameMapping{Name: tag.Name, Checksum: checksum})
jobCh <- tag // feed workers
}
@@ -912,16 +1017,10 @@ func (t *ExportTask) exportTag(ctx context.Context, wg *sync.WaitGroup, jobChan
continue
}
// generate checksum on the fly by name, since we don't store it
checksum := md5.FromString(thisTag.Name)
fn := newTagJSON.Filename()
tagJSON, err := t.json.getTag(checksum)
if err == nil && jsonschema.CompareJSON(*tagJSON, *newTagJSON) {
continue
}
if err := t.json.saveTag(checksum, newTagJSON); err != nil {
logger.Errorf("[tags] <%s> failed to save json: %s", checksum, err.Error())
if err := t.json.saveTag(fn, newTagJSON); err != nil {
logger.Errorf("[tags] <%s> failed to save json: %s", fn, err.Error())
}
}
}
@@ -957,7 +1056,6 @@ func (t *ExportTask) ExportMovies(ctx context.Context, workers int, repo Reposit
index := i + 1
logger.Progressf("[movies] %d of %d", index, len(movies))
t.Mappings.Movies = append(t.Mappings.Movies, jsonschema.PathNameMapping{Name: movie.Name.String, Checksum: movie.Checksum})
jobCh <- movie // feed workers
}
@@ -987,15 +1085,10 @@ func (t *ExportTask) exportMovie(ctx context.Context, wg *sync.WaitGroup, jobCha
}
}
movieJSON, err := t.json.getMovie(m.Checksum)
if err != nil {
logger.Debugf("[movies] error reading movie json: %s", err.Error())
} else if jsonschema.CompareJSON(*movieJSON, *newMovieJSON) {
continue
}
fn := newMovieJSON.Filename()
if err := t.json.saveMovie(m.Checksum, newMovieJSON); err != nil {
logger.Errorf("[movies] <%s> failed to save json: %s", m.Checksum, err.Error())
if err := t.json.saveMovie(fn, newMovieJSON); err != nil {
logger.Errorf("[movies] <%s> failed to save json: %s", fn, err.Error())
}
}
}