Parallel scanning/generation, and combined scanning/preview/sprite (#820)

* Implement parallel scanning and generation, and combined scanning/preview/sprite generation.
* Added UI component for preview/sprite generation during scan, and configurable number of parallel tasks.
* Add v050 changelog entry
This commit is contained in:
JoeSmithStarkers
2020-11-25 12:45:10 +11:00
committed by GitHub
parent 502e9297ad
commit e3eb550a7d
25 changed files with 445 additions and 73 deletions

1
go.mod
View File

@@ -20,6 +20,7 @@ require (
github.com/json-iterator/go v1.1.9 github.com/json-iterator/go v1.1.9
github.com/mattn/go-sqlite3 v1.13.0 github.com/mattn/go-sqlite3 v1.13.0
github.com/natefinch/pie v0.0.0-20170715172608-9a0d72014007 github.com/natefinch/pie v0.0.0-20170715172608-9a0d72014007
github.com/remeh/sizedwaitgroup v1.0.0
github.com/rs/cors v1.6.0 github.com/rs/cors v1.6.0
github.com/shurcooL/graphql v0.0.0-20181231061246-d48a9a75455f github.com/shurcooL/graphql v0.0.0-20181231061246-d48a9a75455f
github.com/sirupsen/logrus v1.4.2 github.com/sirupsen/logrus v1.4.2

2
go.sum
View File

@@ -614,6 +614,8 @@ github.com/prometheus/procfs v0.0.0-20190117184657-bf6a532e95b1/go.mod h1:c3At6R
github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU=
github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
github.com/remeh/sizedwaitgroup v1.0.0 h1:VNGGFwNo/R5+MJBf6yrsr110p0m4/OX4S3DCy7Kyl5E=
github.com/remeh/sizedwaitgroup v1.0.0/go.mod h1:3j2R4OIe/SeS6YDhICBy22RWjJC5eNCJ1V+9+NVNYlo=
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
github.com/rogpeppe/go-internal v1.0.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.0.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rogpeppe/go-internal v1.1.0 h1:g0fH8RicVgNl+zVZDCDfbdWxAWoAEJyI7I3TZYXFiig= github.com/rogpeppe/go-internal v1.1.0 h1:g0fH8RicVgNl+zVZDCDfbdWxAWoAEJyI7I3TZYXFiig=

View File

@@ -9,6 +9,7 @@ fragment ConfigGeneralData on ConfigGeneralResult {
cachePath cachePath
calculateMD5 calculateMD5
videoFileNamingAlgorithm videoFileNamingAlgorithm
parallelTasks
previewSegments previewSegments
previewSegmentDuration previewSegmentDuration
previewExcludeStart previewExcludeStart

View File

@@ -35,6 +35,8 @@ input ConfigGeneralInput {
calculateMD5: Boolean! calculateMD5: Boolean!
"""Hash algorithm to use for generated file naming""" """Hash algorithm to use for generated file naming"""
videoFileNamingAlgorithm: HashAlgorithm! videoFileNamingAlgorithm: HashAlgorithm!
"""Number of parallel tasks to start during scan/generate"""
parallelTasks: Int
"""Number of segments in a preview file""" """Number of segments in a preview file"""
previewSegments: Int previewSegments: Int
"""Preview segment duration, in seconds""" """Preview segment duration, in seconds"""
@@ -96,6 +98,8 @@ type ConfigGeneralResult {
calculateMD5: Boolean! calculateMD5: Boolean!
"""Hash algorithm to use for generated file naming""" """Hash algorithm to use for generated file naming"""
videoFileNamingAlgorithm: HashAlgorithm! videoFileNamingAlgorithm: HashAlgorithm!
"""Number of parallel tasks to start during scan/generate"""
parallelTasks: Int!
"""Number of segments in a preview file""" """Number of segments in a preview file"""
previewSegments: Int! previewSegments: Int!
"""Preview segment duration, in seconds""" """Preview segment duration, in seconds"""

View File

@@ -32,7 +32,14 @@ input GeneratePreviewOptionsInput {
input ScanMetadataInput { input ScanMetadataInput {
paths: [String!] paths: [String!]
"""Set name, date, details from metadata (if present)"""
useFileMetadata: Boolean! useFileMetadata: Boolean!
"""Generate previews during scan"""
scanGeneratePreviews: Boolean!
"""Generate image previews during scan"""
scanGenerateImagePreviews: Boolean!
"""Generate sprites during scan"""
scanGenerateSprites: Boolean!
} }
input AutoTagMetadataInput { input AutoTagMetadataInput {

View File

@@ -61,6 +61,9 @@ func (r *mutationResolver) ConfigureGeneral(ctx context.Context, input models.Co
config.Set(config.CalculateMD5, input.CalculateMd5) config.Set(config.CalculateMD5, input.CalculateMd5)
if input.ParallelTasks != nil {
config.Set(config.ParallelTasks, *input.ParallelTasks)
}
if input.PreviewSegments != nil { if input.PreviewSegments != nil {
config.Set(config.PreviewSegments, *input.PreviewSegments) config.Set(config.PreviewSegments, *input.PreviewSegments)
} }

View File

@@ -49,6 +49,7 @@ func makeConfigGeneralResult() *models.ConfigGeneralResult {
CachePath: config.GetCachePath(), CachePath: config.GetCachePath(),
CalculateMd5: config.IsCalculateMD5(), CalculateMd5: config.IsCalculateMD5(),
VideoFileNamingAlgorithm: config.GetVideoFileNamingAlgorithm(), VideoFileNamingAlgorithm: config.GetVideoFileNamingAlgorithm(),
ParallelTasks: config.GetParallelTasks(),
PreviewSegments: config.GetPreviewSegments(), PreviewSegments: config.GetPreviewSegments(),
PreviewSegmentDuration: config.GetPreviewSegmentDuration(), PreviewSegmentDuration: config.GetPreviewSegmentDuration(),
PreviewExcludeStart: config.GetPreviewExcludeStart(), PreviewExcludeStart: config.GetPreviewExcludeStart(),

View File

@@ -2,6 +2,7 @@ package config
import ( import (
"golang.org/x/crypto/bcrypt" "golang.org/x/crypto/bcrypt"
"runtime"
"errors" "errors"
"io/ioutil" "io/ioutil"
@@ -56,6 +57,9 @@ const PreviewPreset = "preview_preset"
const MaxTranscodeSize = "max_transcode_size" const MaxTranscodeSize = "max_transcode_size"
const MaxStreamingTranscodeSize = "max_streaming_transcode_size" const MaxStreamingTranscodeSize = "max_streaming_transcode_size"
const ParallelTasks = "parallel_tasks"
const parallelTasksDefault = 1
const PreviewSegmentDuration = "preview_segment_duration" const PreviewSegmentDuration = "preview_segment_duration"
const previewSegmentDurationDefault = 0.75 const previewSegmentDurationDefault = 0.75
@@ -297,6 +301,20 @@ func GetPreviewSegmentDuration() float64 {
return viper.GetFloat64(PreviewSegmentDuration) return viper.GetFloat64(PreviewSegmentDuration)
} }
// GetParallelTasks returns the number of parallel tasks that should be started
// by scan or generate task.
func GetParallelTasks() int {
return viper.GetInt(ParallelTasks)
}
func GetParallelTasksWithAutoDetection() int {
parallelTasks := viper.GetInt(ParallelTasks)
if parallelTasks <= 0 {
parallelTasks = (runtime.NumCPU() / 4) + 1
}
return parallelTasks
}
// GetPreviewSegments returns the amount of segments in a scene preview file. // GetPreviewSegments returns the amount of segments in a scene preview file.
func GetPreviewSegments() int { func GetPreviewSegments() int {
return viper.GetInt(PreviewSegments) return viper.GetInt(PreviewSegments)
@@ -550,6 +568,7 @@ func IsValid() bool {
} }
func setDefaultValues() { func setDefaultValues() {
viper.SetDefault(ParallelTasks, parallelTasksDefault)
viper.SetDefault(PreviewSegmentDuration, previewSegmentDurationDefault) viper.SetDefault(PreviewSegmentDuration, previewSegmentDurationDefault)
viper.SetDefault(PreviewSegments, previewSegmentsDefault) viper.SetDefault(PreviewSegments, previewSegmentsDefault)
viper.SetDefault(PreviewExcludeStart, previewExcludeStartDefault) viper.SetDefault(PreviewExcludeStart, previewExcludeStartDefault)

View File

@@ -14,6 +14,7 @@ import (
type PreviewGenerator struct { type PreviewGenerator struct {
Info *GeneratorInfo Info *GeneratorInfo
VideoChecksum string
VideoFilename string VideoFilename string
ImageFilename string ImageFilename string
OutputDirectory string OutputDirectory string
@@ -26,7 +27,7 @@ type PreviewGenerator struct {
Overwrite bool Overwrite bool
} }
func NewPreviewGenerator(videoFile ffmpeg.VideoFile, videoFilename string, imageFilename string, outputDirectory string, generateVideo bool, generateImage bool, previewPreset string) (*PreviewGenerator, error) { func NewPreviewGenerator(videoFile ffmpeg.VideoFile, videoChecksum string, videoFilename string, imageFilename string, outputDirectory string, generateVideo bool, generateImage bool, previewPreset string) (*PreviewGenerator, error) {
exists, err := utils.FileExists(videoFile.Path) exists, err := utils.FileExists(videoFile.Path)
if !exists { if !exists {
return nil, err return nil, err
@@ -39,6 +40,7 @@ func NewPreviewGenerator(videoFile ffmpeg.VideoFile, videoFilename string, image
return &PreviewGenerator{ return &PreviewGenerator{
Info: generator, Info: generator,
VideoChecksum: videoChecksum,
VideoFilename: videoFilename, VideoFilename: videoFilename,
ImageFilename: imageFilename, ImageFilename: imageFilename,
OutputDirectory: outputDirectory, OutputDirectory: outputDirectory,
@@ -87,7 +89,7 @@ func (g *PreviewGenerator) generateConcatFile() error {
w := bufio.NewWriter(f) w := bufio.NewWriter(f)
for i := 0; i < g.Info.ChunkCount; i++ { for i := 0; i < g.Info.ChunkCount; i++ {
num := fmt.Sprintf("%.3d", i) num := fmt.Sprintf("%.3d", i)
filename := "preview" + num + ".mp4" filename := "preview_" + g.VideoChecksum + "_" + num + ".mp4"
_, _ = w.WriteString(fmt.Sprintf("file '%s'\n", filename)) _, _ = w.WriteString(fmt.Sprintf("file '%s'\n", filename))
} }
return w.Flush() return w.Flush()
@@ -105,7 +107,7 @@ func (g *PreviewGenerator) generateVideo(encoder *ffmpeg.Encoder, fallback bool)
for i := 0; i < g.Info.ChunkCount; i++ { for i := 0; i < g.Info.ChunkCount; i++ {
time := offset + (float64(i) * stepSize) time := offset + (float64(i) * stepSize)
num := fmt.Sprintf("%.3d", i) num := fmt.Sprintf("%.3d", i)
filename := "preview" + num + ".mp4" filename := "preview_" + g.VideoChecksum + "_" + num + ".mp4"
chunkOutputPath := instance.Paths.Generated.GetTmpPath(filename) chunkOutputPath := instance.Paths.Generated.GetTmpPath(filename)
options := ffmpeg.ScenePreviewChunkOptions{ options := ffmpeg.ScenePreviewChunkOptions{
@@ -148,5 +150,5 @@ func (g *PreviewGenerator) generateImage(encoder *ffmpeg.Encoder) error {
} }
func (g *PreviewGenerator) getConcatFilePath() string { func (g *PreviewGenerator) getConcatFilePath() string {
return instance.Paths.Generated.GetTmpPath("files.txt") return instance.Paths.Generated.GetTmpPath(fmt.Sprintf("files_%s.txt", g.VideoChecksum))
} }

View File

@@ -20,6 +20,7 @@ import (
type SpriteGenerator struct { type SpriteGenerator struct {
Info *GeneratorInfo Info *GeneratorInfo
VideoChecksum string
ImageOutputPath string ImageOutputPath string
VTTOutputPath string VTTOutputPath string
Rows int Rows int
@@ -28,7 +29,7 @@ type SpriteGenerator struct {
Overwrite bool Overwrite bool
} }
func NewSpriteGenerator(videoFile ffmpeg.VideoFile, imageOutputPath string, vttOutputPath string, rows int, cols int) (*SpriteGenerator, error) { func NewSpriteGenerator(videoFile ffmpeg.VideoFile, videoChecksum string, imageOutputPath string, vttOutputPath string, rows int, cols int) (*SpriteGenerator, error) {
exists, err := utils.FileExists(videoFile.Path) exists, err := utils.FileExists(videoFile.Path)
if !exists { if !exists {
return nil, err return nil, err
@@ -44,6 +45,7 @@ func NewSpriteGenerator(videoFile ffmpeg.VideoFile, imageOutputPath string, vttO
return &SpriteGenerator{ return &SpriteGenerator{
Info: generator, Info: generator,
VideoChecksum: videoChecksum,
ImageOutputPath: imageOutputPath, ImageOutputPath: imageOutputPath,
VTTOutputPath: vttOutputPath, VTTOutputPath: vttOutputPath,
Rows: rows, Rows: rows,
@@ -74,7 +76,7 @@ func (g *SpriteGenerator) generateSpriteImage(encoder *ffmpeg.Encoder) error {
for i := 0; i < g.Info.ChunkCount; i++ { for i := 0; i < g.Info.ChunkCount; i++ {
time := float64(i) * stepSize time := float64(i) * stepSize
num := fmt.Sprintf("%.3d", i) num := fmt.Sprintf("%.3d", i)
filename := "thumbnail" + num + ".jpg" filename := "thumbnail_" + g.VideoChecksum + "_" + num + ".jpg"
options := ffmpeg.ScreenshotOptions{ options := ffmpeg.ScreenshotOptions{
OutputPath: instance.Paths.Generated.GetTmpPath(filename), OutputPath: instance.Paths.Generated.GetTmpPath(filename),
@@ -85,7 +87,7 @@ func (g *SpriteGenerator) generateSpriteImage(encoder *ffmpeg.Encoder) error {
} }
// Combine all of the thumbnails into a sprite image // Combine all of the thumbnails into a sprite image
globPath := filepath.Join(instance.Paths.Generated.Tmp, "thumbnail*.jpg") globPath := filepath.Join(instance.Paths.Generated.Tmp, fmt.Sprintf("thumbnail_%s_*.jpg", g.VideoChecksum))
imagePaths, _ := doublestar.Glob(globPath) imagePaths, _ := doublestar.Glob(globPath)
utils.NaturalSort(imagePaths) utils.NaturalSort(imagePaths)
var images []image.Image var images []image.Image

View File

@@ -2,11 +2,14 @@ package manager
import ( import (
"errors" "errors"
"fmt"
"os" "os"
"strconv" "strconv"
"sync" "sync"
"time" "time"
"github.com/remeh/sizedwaitgroup"
"github.com/stashapp/stash/pkg/logger" "github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/manager/config" "github.com/stashapp/stash/pkg/manager/config"
"github.com/stashapp/stash/pkg/models" "github.com/stashapp/stash/pkg/models"
@@ -176,7 +179,11 @@ func (s *singleton) Scan(input models.ScanMetadataInput) {
logger.Infof("Starting scan of %d files. %d New files found", *total, *newFiles) logger.Infof("Starting scan of %d files. %d New files found", *total, *newFiles)
} }
var wg sync.WaitGroup start := time.Now()
parallelTasks := config.GetParallelTasksWithAutoDetection()
logger.Infof("Scan started with %d parallel tasks", parallelTasks)
wg := sizedwaitgroup.New(parallelTasks)
s.Status.Progress = 0 s.Status.Progress = 0
fileNamingAlgo := config.GetVideoFileNamingAlgorithm() fileNamingAlgo := config.GetVideoFileNamingAlgorithm()
calculateMD5 := config.IsCalculateMD5() calculateMD5 := config.IsCalculateMD5()
@@ -201,10 +208,11 @@ func (s *singleton) Scan(input models.ScanMetadataInput) {
galleries = append(galleries, path) galleries = append(galleries, path)
} }
wg.Add(1) instance.Paths.Generated.EnsureTmpDir()
task := ScanTask{FilePath: path, UseFileMetadata: input.UseFileMetadata, fileNamingAlgorithm: fileNamingAlgo, calculateMD5: calculateMD5}
wg.Add()
task := ScanTask{FilePath: path, UseFileMetadata: input.UseFileMetadata, fileNamingAlgorithm: fileNamingAlgo, calculateMD5: calculateMD5, GeneratePreview: input.ScanGeneratePreviews, GenerateImagePreview: input.ScanGenerateImagePreviews, GenerateSprite: input.ScanGenerateSprites}
go task.Start(&wg) go task.Start(&wg)
wg.Wait()
return nil return nil
}) })
@@ -224,9 +232,14 @@ func (s *singleton) Scan(input models.ScanMetadataInput) {
return return
} }
logger.Info("Finished scan") wg.Wait()
instance.Paths.Generated.EmptyTmpDir()
elapsed := time.Since(start)
logger.Info(fmt.Sprintf("Scan finished (%s)", elapsed))
for _, path := range galleries { for _, path := range galleries {
wg.Add(1) wg.Add()
task := ScanTask{FilePath: path, UseFileMetadata: false} task := ScanTask{FilePath: path, UseFileMetadata: false}
go task.associateGallery(&wg) go task.associateGallery(&wg)
wg.Wait() wg.Wait()
@@ -357,8 +370,10 @@ func (s *singleton) Generate(input models.GenerateMetadataInput) {
return return
} }
delta := utils.Btoi(input.Sprites) + utils.Btoi(input.Previews) + utils.Btoi(input.Markers) + utils.Btoi(input.Transcodes) parallelTasks := config.GetParallelTasksWithAutoDetection()
var wg sync.WaitGroup
logger.Infof("Generate started with %d parallel tasks", parallelTasks)
wg := sizedwaitgroup.New(parallelTasks)
s.Status.Progress = 0 s.Status.Progress = 0
lenScenes := len(scenes) lenScenes := len(scenes)
@@ -397,6 +412,10 @@ func (s *singleton) Generate(input models.GenerateMetadataInput) {
} }
setGeneratePreviewOptionsInput(generatePreviewOptions) setGeneratePreviewOptionsInput(generatePreviewOptions)
// Start measuring how long the scan has taken. (consider moving this up)
start := time.Now()
instance.Paths.Generated.EnsureTmpDir()
for i, scene := range scenes { for i, scene := range scenes {
s.Status.setProgress(i, total) s.Status.setProgress(i, total)
if s.Status.stopping { if s.Status.stopping {
@@ -409,15 +428,9 @@ func (s *singleton) Generate(input models.GenerateMetadataInput) {
continue continue
} }
wg.Add(delta)
// Clear the tmp directory for each scene
if input.Sprites || input.Previews || input.Markers {
instance.Paths.Generated.EmptyTmpDir()
}
if input.Sprites { if input.Sprites {
task := GenerateSpriteTask{Scene: *scene, Overwrite: overwrite, fileNamingAlgorithm: fileNamingAlgo} task := GenerateSpriteTask{Scene: *scene, Overwrite: overwrite, fileNamingAlgorithm: fileNamingAlgo}
wg.Add()
go task.Start(&wg) go task.Start(&wg)
} }
@@ -429,22 +442,25 @@ func (s *singleton) Generate(input models.GenerateMetadataInput) {
Overwrite: overwrite, Overwrite: overwrite,
fileNamingAlgorithm: fileNamingAlgo, fileNamingAlgorithm: fileNamingAlgo,
} }
wg.Add()
go task.Start(&wg) go task.Start(&wg)
} }
if input.Markers { if input.Markers {
wg.Add()
task := GenerateMarkersTask{Scene: scene, Overwrite: overwrite, fileNamingAlgorithm: fileNamingAlgo} task := GenerateMarkersTask{Scene: scene, Overwrite: overwrite, fileNamingAlgorithm: fileNamingAlgo}
go task.Start(&wg) go task.Start(&wg)
} }
if input.Transcodes { if input.Transcodes {
wg.Add()
task := GenerateTranscodeTask{Scene: *scene, Overwrite: overwrite, fileNamingAlgorithm: fileNamingAlgo} task := GenerateTranscodeTask{Scene: *scene, Overwrite: overwrite, fileNamingAlgorithm: fileNamingAlgo}
go task.Start(&wg) go task.Start(&wg)
} }
wg.Wait()
} }
wg.Wait()
for i, marker := range markers { for i, marker := range markers {
s.Status.setProgress(lenScenes+i, total) s.Status.setProgress(lenScenes+i, total)
if s.Status.stopping { if s.Status.stopping {
@@ -457,13 +473,16 @@ func (s *singleton) Generate(input models.GenerateMetadataInput) {
continue continue
} }
wg.Add(1) wg.Add()
task := GenerateMarkersTask{Marker: marker, Overwrite: overwrite, fileNamingAlgorithm: fileNamingAlgo} task := GenerateMarkersTask{Marker: marker, Overwrite: overwrite, fileNamingAlgorithm: fileNamingAlgo}
go task.Start(&wg) go task.Start(&wg)
wg.Wait()
} }
logger.Infof("Generate finished") wg.Wait()
instance.Paths.Generated.EmptyTmpDir()
elapsed := time.Since(start)
logger.Info(fmt.Sprintf("Generate finished (%s)", elapsed))
}() }()
} }

View File

@@ -3,7 +3,8 @@ package manager
import ( import (
"path/filepath" "path/filepath"
"strconv" "strconv"
"sync"
"github.com/remeh/sizedwaitgroup"
"github.com/stashapp/stash/pkg/ffmpeg" "github.com/stashapp/stash/pkg/ffmpeg"
"github.com/stashapp/stash/pkg/logger" "github.com/stashapp/stash/pkg/logger"
@@ -18,7 +19,7 @@ type GenerateMarkersTask struct {
fileNamingAlgorithm models.HashAlgorithm fileNamingAlgorithm models.HashAlgorithm
} }
func (t *GenerateMarkersTask) Start(wg *sync.WaitGroup) { func (t *GenerateMarkersTask) Start(wg *sizedwaitgroup.SizedWaitGroup) {
defer wg.Done() defer wg.Done()
if t.Scene != nil { if t.Scene != nil {

View File

@@ -1,7 +1,7 @@
package manager package manager
import ( import (
"sync" "github.com/remeh/sizedwaitgroup"
"github.com/stashapp/stash/pkg/ffmpeg" "github.com/stashapp/stash/pkg/ffmpeg"
"github.com/stashapp/stash/pkg/logger" "github.com/stashapp/stash/pkg/logger"
@@ -19,11 +19,13 @@ type GeneratePreviewTask struct {
fileNamingAlgorithm models.HashAlgorithm fileNamingAlgorithm models.HashAlgorithm
} }
func (t *GeneratePreviewTask) Start(wg *sync.WaitGroup) { func (t *GeneratePreviewTask) Start(wg *sizedwaitgroup.SizedWaitGroup) {
defer wg.Done() defer wg.Done()
videoFilename := t.videoFilename() videoFilename := t.videoFilename()
videoChecksum := t.Scene.GetHash(t.fileNamingAlgorithm)
imageFilename := t.imageFilename() imageFilename := t.imageFilename()
if !t.Overwrite && !t.required() { if !t.Overwrite && !t.required() {
return return
} }
@@ -35,7 +37,8 @@ func (t *GeneratePreviewTask) Start(wg *sync.WaitGroup) {
} }
const generateVideo = true const generateVideo = true
generator, err := NewPreviewGenerator(*videoFile, videoFilename, imageFilename, instance.Paths.Generated.Screenshots, generateVideo, t.ImagePreview, t.Options.PreviewPreset.String()) generator, err := NewPreviewGenerator(*videoFile, videoChecksum, videoFilename, imageFilename, instance.Paths.Generated.Screenshots, generateVideo, t.ImagePreview, t.Options.PreviewPreset.String())
if err != nil { if err != nil {
logger.Errorf("error creating preview generator: %s", err.Error()) logger.Errorf("error creating preview generator: %s", err.Error())
return return

View File

@@ -1,7 +1,7 @@
package manager package manager
import ( import (
"sync" "github.com/remeh/sizedwaitgroup"
"github.com/stashapp/stash/pkg/ffmpeg" "github.com/stashapp/stash/pkg/ffmpeg"
"github.com/stashapp/stash/pkg/logger" "github.com/stashapp/stash/pkg/logger"
@@ -15,7 +15,7 @@ type GenerateSpriteTask struct {
fileNamingAlgorithm models.HashAlgorithm fileNamingAlgorithm models.HashAlgorithm
} }
func (t *GenerateSpriteTask) Start(wg *sync.WaitGroup) { func (t *GenerateSpriteTask) Start(wg *sizedwaitgroup.SizedWaitGroup) {
defer wg.Done() defer wg.Done()
if !t.Overwrite && !t.required() { if !t.Overwrite && !t.required() {
@@ -31,7 +31,8 @@ func (t *GenerateSpriteTask) Start(wg *sync.WaitGroup) {
sceneHash := t.Scene.GetHash(t.fileNamingAlgorithm) sceneHash := t.Scene.GetHash(t.fileNamingAlgorithm)
imagePath := instance.Paths.Scene.GetSpriteImageFilePath(sceneHash) imagePath := instance.Paths.Scene.GetSpriteImageFilePath(sceneHash)
vttPath := instance.Paths.Scene.GetSpriteVttFilePath(sceneHash) vttPath := instance.Paths.Scene.GetSpriteVttFilePath(sceneHash)
generator, err := NewSpriteGenerator(*videoFile, imagePath, vttPath, 9, 9) generator, err := NewSpriteGenerator(*videoFile, sceneHash, imagePath, vttPath, 9, 9)
if err != nil { if err != nil {
logger.Errorf("error creating sprite generator: %s", err.Error()) logger.Errorf("error creating sprite generator: %s", err.Error())
return return

View File

@@ -9,10 +9,10 @@ import (
"path/filepath" "path/filepath"
"strconv" "strconv"
"strings" "strings"
"sync"
"time" "time"
"github.com/jmoiron/sqlx" "github.com/jmoiron/sqlx"
"github.com/remeh/sizedwaitgroup"
"github.com/stashapp/stash/pkg/database" "github.com/stashapp/stash/pkg/database"
"github.com/stashapp/stash/pkg/ffmpeg" "github.com/stashapp/stash/pkg/ffmpeg"
@@ -24,19 +24,61 @@ import (
) )
type ScanTask struct { type ScanTask struct {
FilePath string FilePath string
UseFileMetadata bool UseFileMetadata bool
calculateMD5 bool calculateMD5 bool
fileNamingAlgorithm models.HashAlgorithm fileNamingAlgorithm models.HashAlgorithm
GenerateSprite bool
zipGallery *models.Gallery GeneratePreview bool
GenerateImagePreview bool
zipGallery *models.Gallery
} }
func (t *ScanTask) Start(wg *sync.WaitGroup) { func (t *ScanTask) Start(wg *sizedwaitgroup.SizedWaitGroup) {
if isGallery(t.FilePath) { if isGallery(t.FilePath) {
t.scanGallery() t.scanGallery()
} else if isVideo(t.FilePath) { } else if isVideo(t.FilePath) {
t.scanScene() scene := t.scanScene()
if scene != nil {
iwg := sizedwaitgroup.New(2)
if t.GenerateSprite {
iwg.Add()
taskSprite := GenerateSpriteTask{Scene: *scene, Overwrite: false, fileNamingAlgorithm: t.fileNamingAlgorithm}
go taskSprite.Start(&iwg)
}
if t.GeneratePreview {
iwg.Add()
var previewSegmentDuration = config.GetPreviewSegmentDuration()
var previewSegments = config.GetPreviewSegments()
var previewExcludeStart = config.GetPreviewExcludeStart()
var previewExcludeEnd = config.GetPreviewExcludeEnd()
var previewPresent = config.GetPreviewPreset()
// NOTE: the reuse of this model like this is painful.
previewOptions := models.GeneratePreviewOptionsInput{
PreviewSegments: &previewSegments,
PreviewSegmentDuration: &previewSegmentDuration,
PreviewExcludeStart: &previewExcludeStart,
PreviewExcludeEnd: &previewExcludeEnd,
PreviewPreset: &previewPresent,
}
taskPreview := GeneratePreviewTask{
Scene: *scene,
ImagePreview: t.GenerateImagePreview,
Options: previewOptions,
Overwrite: false,
fileNamingAlgorithm: t.fileNamingAlgorithm,
}
go taskPreview.Start(&iwg)
}
iwg.Wait()
}
} else if isImage(t.FilePath) { } else if isImage(t.FilePath) {
t.scanImage() t.scanImage()
} }
@@ -237,7 +279,7 @@ func (t *ScanTask) isFileModified(fileModTime time.Time, modTime models.NullSQLi
} }
// associates a gallery to a scene with the same basename // associates a gallery to a scene with the same basename
func (t *ScanTask) associateGallery(wg *sync.WaitGroup) { func (t *ScanTask) associateGallery(wg *sizedwaitgroup.SizedWaitGroup) {
qb := models.NewGalleryQueryBuilder() qb := models.NewGalleryQueryBuilder()
gallery, _ := qb.FindByPath(t.FilePath) gallery, _ := qb.FindByPath(t.FilePath)
if gallery == nil { if gallery == nil {
@@ -291,14 +333,14 @@ func (t *ScanTask) associateGallery(wg *sync.WaitGroup) {
wg.Done() wg.Done()
} }
func (t *ScanTask) scanScene() { func (t *ScanTask) scanScene() *models.Scene {
qb := models.NewSceneQueryBuilder() qb := models.NewSceneQueryBuilder()
scene, _ := qb.FindByPath(t.FilePath) scene, _ := qb.FindByPath(t.FilePath)
fileModTime, err := t.getFileModTime() fileModTime, err := t.getFileModTime()
if err != nil { if err != nil {
logger.Error(err.Error()) logger.Error(err.Error())
return return nil
} }
if scene != nil { if scene != nil {
@@ -311,7 +353,7 @@ func (t *ScanTask) scanScene() {
scene, err = qb.Find(scene.ID) scene, err = qb.Find(scene.ID)
if err != nil { if err != nil {
logger.Error(err.Error()) logger.Error(err.Error())
return return nil
} }
} }
@@ -322,7 +364,7 @@ func (t *ScanTask) scanScene() {
scene, err = t.rescanScene(scene, fileModTime) scene, err = t.rescanScene(scene, fileModTime)
if err != nil { if err != nil {
logger.Error(err.Error()) logger.Error(err.Error())
return return nil
} }
} }
@@ -335,7 +377,7 @@ func (t *ScanTask) scanScene() {
videoFile, err := ffmpeg.NewVideoFile(instance.FFProbePath, t.FilePath) videoFile, err := ffmpeg.NewVideoFile(instance.FFProbePath, t.FilePath)
if err != nil { if err != nil {
logger.Error(err.Error()) logger.Error(err.Error())
return return nil
} }
container := ffmpeg.MatchContainer(videoFile.Container, t.FilePath) container := ffmpeg.MatchContainer(videoFile.Container, t.FilePath)
logger.Infof("Adding container %s to file %s", container, t.FilePath) logger.Infof("Adding container %s to file %s", container, t.FilePath)
@@ -357,14 +399,14 @@ func (t *ScanTask) scanScene() {
oshash, err := utils.OSHashFromFilePath(t.FilePath) oshash, err := utils.OSHashFromFilePath(t.FilePath)
if err != nil { if err != nil {
logger.Error(err.Error()) logger.Error(err.Error())
return return nil
} }
// check if oshash clashes with existing scene // check if oshash clashes with existing scene
dupe, _ := qb.FindByOSHash(oshash) dupe, _ := qb.FindByOSHash(oshash)
if dupe != nil { if dupe != nil {
logger.Errorf("OSHash for file %s is the same as that of %s", t.FilePath, dupe.Path) logger.Errorf("OSHash for file %s is the same as that of %s", t.FilePath, dupe.Path)
return return nil
} }
ctx := context.TODO() ctx := context.TODO()
@@ -373,7 +415,7 @@ func (t *ScanTask) scanScene() {
if err != nil { if err != nil {
logger.Error(err.Error()) logger.Error(err.Error())
tx.Rollback() tx.Rollback()
return return nil
} else if err := tx.Commit(); err != nil { } else if err := tx.Commit(); err != nil {
logger.Error(err.Error()) logger.Error(err.Error())
} }
@@ -384,14 +426,14 @@ func (t *ScanTask) scanScene() {
checksum, err := t.calculateChecksum() checksum, err := t.calculateChecksum()
if err != nil { if err != nil {
logger.Error(err.Error()) logger.Error(err.Error())
return return nil
} }
// check if checksum clashes with existing scene // check if checksum clashes with existing scene
dupe, _ := qb.FindByChecksum(checksum) dupe, _ := qb.FindByChecksum(checksum)
if dupe != nil { if dupe != nil {
logger.Errorf("MD5 for file %s is the same as that of %s", t.FilePath, dupe.Path) logger.Errorf("MD5 for file %s is the same as that of %s", t.FilePath, dupe.Path)
return return nil
} }
ctx := context.TODO() ctx := context.TODO()
@@ -405,18 +447,18 @@ func (t *ScanTask) scanScene() {
} }
} }
return return nil
} }
// Ignore directories. // Ignore directories.
if isDir, _ := utils.DirExists(t.FilePath); isDir { if isDir, _ := utils.DirExists(t.FilePath); isDir {
return return nil
} }
videoFile, err := ffmpeg.NewVideoFile(instance.FFProbePath, t.FilePath) videoFile, err := ffmpeg.NewVideoFile(instance.FFProbePath, t.FilePath)
if err != nil { if err != nil {
logger.Error(err.Error()) logger.Error(err.Error())
return return nil
} }
container := ffmpeg.MatchContainer(videoFile.Container, t.FilePath) container := ffmpeg.MatchContainer(videoFile.Container, t.FilePath)
@@ -431,14 +473,14 @@ func (t *ScanTask) scanScene() {
oshash, err := utils.OSHashFromFilePath(t.FilePath) oshash, err := utils.OSHashFromFilePath(t.FilePath)
if err != nil { if err != nil {
logger.Error(err.Error()) logger.Error(err.Error())
return return nil
} }
if t.fileNamingAlgorithm == models.HashAlgorithmMd5 || t.calculateMD5 { if t.fileNamingAlgorithm == models.HashAlgorithmMd5 || t.calculateMD5 {
checksum, err = t.calculateChecksum() checksum, err = t.calculateChecksum()
if err != nil { if err != nil {
logger.Error(err.Error()) logger.Error(err.Error())
return return nil
} }
} }
@@ -460,6 +502,8 @@ func (t *ScanTask) scanScene() {
t.makeScreenshots(videoFile, sceneHash) t.makeScreenshots(videoFile, sceneHash)
var retScene *models.Scene
ctx := context.TODO() ctx := context.TODO()
tx := database.DB.MustBeginTx(ctx, nil) tx := database.DB.MustBeginTx(ctx, nil)
if scene != nil { if scene != nil {
@@ -503,15 +547,21 @@ func (t *ScanTask) scanScene() {
newScene.Details = sql.NullString{String: videoFile.Comment, Valid: true} newScene.Details = sql.NullString{String: videoFile.Comment, Valid: true}
newScene.Date = models.SQLiteDate{String: videoFile.CreationTime.Format("2006-01-02")} newScene.Date = models.SQLiteDate{String: videoFile.CreationTime.Format("2006-01-02")}
} }
_, err = qb.Create(newScene, tx)
retScene, err = qb.Create(newScene, tx)
} }
if err != nil { if err != nil {
logger.Error(err.Error()) logger.Error(err.Error())
_ = tx.Rollback() _ = tx.Rollback()
return nil
} else if err := tx.Commit(); err != nil { } else if err := tx.Commit(); err != nil {
logger.Error(err.Error()) logger.Error(err.Error())
return nil
} }
return retScene
} }
func (t *ScanTask) rescanScene(scene *models.Scene, fileModTime time.Time) (*models.Scene, error) { func (t *ScanTask) rescanScene(scene *models.Scene, fileModTime time.Time) (*models.Scene, error) {
@@ -630,9 +680,9 @@ func (t *ScanTask) scanZipImages(zipGallery *models.Gallery) {
subTask.zipGallery = zipGallery subTask.zipGallery = zipGallery
// run the subtask and wait for it to complete // run the subtask and wait for it to complete
var wg sync.WaitGroup iwg := sizedwaitgroup.New(1)
wg.Add(1) iwg.Add()
subTask.Start(&wg) subTask.Start(&iwg)
return nil return nil
}) })
if err != nil { if err != nil {

View File

@@ -1,12 +1,13 @@
package manager package manager
import ( import (
"github.com/remeh/sizedwaitgroup"
"github.com/stashapp/stash/pkg/ffmpeg" "github.com/stashapp/stash/pkg/ffmpeg"
"github.com/stashapp/stash/pkg/logger" "github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/manager/config" "github.com/stashapp/stash/pkg/manager/config"
"github.com/stashapp/stash/pkg/models" "github.com/stashapp/stash/pkg/models"
"github.com/stashapp/stash/pkg/utils" "github.com/stashapp/stash/pkg/utils"
"sync"
) )
type GenerateTranscodeTask struct { type GenerateTranscodeTask struct {
@@ -15,7 +16,7 @@ type GenerateTranscodeTask struct {
fileNamingAlgorithm models.HashAlgorithm fileNamingAlgorithm models.HashAlgorithm
} }
func (t *GenerateTranscodeTask) Start(wg *sync.WaitGroup) { func (t *GenerateTranscodeTask) Start(wg *sizedwaitgroup.SizedWaitGroup) {
defer wg.Done() defer wg.Done()
hasTranscode := HasTranscode(&t.Scene, t.fileNamingAlgorithm) hasTranscode := HasTranscode(&t.Scene, t.fileNamingAlgorithm)

View File

@@ -7,6 +7,7 @@ import V020 from "./versions/v020.md";
import V021 from "./versions/v021.md"; import V021 from "./versions/v021.md";
import V030 from "./versions/v030.md"; import V030 from "./versions/v030.md";
import V040 from "./versions/v040.md"; import V040 from "./versions/v040.md";
import V050 from "./versions/v050.md";
import { MarkdownPage } from "../Shared/MarkdownPage"; import { MarkdownPage } from "../Shared/MarkdownPage";
const Changelog: React.FC = () => { const Changelog: React.FC = () => {
@@ -36,11 +37,20 @@ const Changelog: React.FC = () => {
<> <>
<h1 className="mb-4">Changelog:</h1> <h1 className="mb-4">Changelog:</h1>
<Version <Version
version={stashVersion || "v0.4.0"} version={stashVersion || "v0.5.0"}
date={buildDate} date={buildDate}
openState={openState} openState={openState}
setOpenState={setVersionOpenState} setOpenState={setVersionOpenState}
defaultOpen defaultOpen
>
<MarkdownPage page={V050} />
</Version>
<Version
version="v0.4.0"
date="2020-11-24"
openState={openState}
setOpenState={setVersionOpenState}
defaultOpen
> >
<MarkdownPage page={V040} /> <MarkdownPage page={V040} />
</Version> </Version>

View File

@@ -0,0 +1,3 @@
### 🎨 Improvements
* Support optional preview and sprite generation during scanning.
* Support configurable number of threads for scanning and generation.

View File

@@ -79,6 +79,7 @@ export const SettingsConfigurationPanel: React.FC = () => {
const [videoFileNamingAlgorithm, setVideoFileNamingAlgorithm] = useState< const [videoFileNamingAlgorithm, setVideoFileNamingAlgorithm] = useState<
GQL.HashAlgorithm | undefined GQL.HashAlgorithm | undefined
>(undefined); >(undefined);
const [parallelTasks, setParallelTasks] = useState<number>(0);
const [previewSegments, setPreviewSegments] = useState<number>(0); const [previewSegments, setPreviewSegments] = useState<number>(0);
const [previewSegmentDuration, setPreviewSegmentDuration] = useState<number>( const [previewSegmentDuration, setPreviewSegmentDuration] = useState<number>(
0 0
@@ -139,6 +140,7 @@ export const SettingsConfigurationPanel: React.FC = () => {
calculateMD5, calculateMD5,
videoFileNamingAlgorithm: videoFileNamingAlgorithm:
(videoFileNamingAlgorithm as GQL.HashAlgorithm) ?? undefined, (videoFileNamingAlgorithm as GQL.HashAlgorithm) ?? undefined,
parallelTasks,
previewSegments, previewSegments,
previewSegmentDuration, previewSegmentDuration,
previewExcludeStart, previewExcludeStart,
@@ -182,6 +184,7 @@ export const SettingsConfigurationPanel: React.FC = () => {
setCachePath(conf.general.cachePath); setCachePath(conf.general.cachePath);
setVideoFileNamingAlgorithm(conf.general.videoFileNamingAlgorithm); setVideoFileNamingAlgorithm(conf.general.videoFileNamingAlgorithm);
setCalculateMD5(conf.general.calculateMD5); setCalculateMD5(conf.general.calculateMD5);
setParallelTasks(conf.general.parallelTasks);
setPreviewSegments(conf.general.previewSegments); setPreviewSegments(conf.general.previewSegments);
setPreviewSegmentDuration(conf.general.previewSegmentDuration); setPreviewSegmentDuration(conf.general.previewSegmentDuration);
setPreviewExcludeStart(conf.general.previewExcludeStart); setPreviewExcludeStart(conf.general.previewExcludeStart);
@@ -567,6 +570,31 @@ export const SettingsConfigurationPanel: React.FC = () => {
<hr /> <hr />
<Form.Group>
<h4>Parallel Scan/Generation</h4>
<Form.Group id="parallel-tasks">
<h6>Number of parallel task for scan/generation</h6>
<Form.Control
className="col col-sm-6 text-input"
type="number"
value={parallelTasks}
onChange={(e: React.ChangeEvent<HTMLInputElement>) =>
setParallelTasks(
Number.parseInt(e.currentTarget.value || "0", 10)
)
}
/>
<Form.Text className="text-muted">
Set to 0 for auto-detection. Warning running more tasks than is
required to achieve 100% cpu utilisation will decrease performance
and potentially cause other issues.
</Form.Text>
</Form.Group>
</Form.Group>
<hr />
<Form.Group> <Form.Group>
<h4>Preview Generation</h4> <h4>Preview Generation</h4>

View File

@@ -31,6 +31,16 @@ export const SettingsTasksPanel: React.FC = () => {
const [isImportDialogOpen, setIsImportDialogOpen] = useState<boolean>(false); const [isImportDialogOpen, setIsImportDialogOpen] = useState<boolean>(false);
const [isScanDialogOpen, setIsScanDialogOpen] = useState<boolean>(false); const [isScanDialogOpen, setIsScanDialogOpen] = useState<boolean>(false);
const [useFileMetadata, setUseFileMetadata] = useState<boolean>(false); const [useFileMetadata, setUseFileMetadata] = useState<boolean>(false);
const [scanGeneratePreviews, setScanGeneratePreviews] = useState<boolean>(
false
);
const [scanGenerateSprites, setScanGenerateSprites] = useState<boolean>(
false
);
const [scanGenerateImagePreviews, setScanGenerateImagePreviews] = useState<
boolean
>(false);
const [status, setStatus] = useState<string>(""); const [status, setStatus] = useState<string>("");
const [progress, setProgress] = useState<number>(0); const [progress, setProgress] = useState<number>(0);
@@ -166,8 +176,11 @@ export const SettingsTasksPanel: React.FC = () => {
async function onScan(paths?: string[]) { async function onScan(paths?: string[]) {
try { try {
await mutateMetadataScan({ await mutateMetadataScan({
useFileMetadata,
paths, paths,
useFileMetadata,
scanGeneratePreviews,
scanGenerateImagePreviews,
scanGenerateSprites,
}); });
Toast.success({ content: "Started scan" }); Toast.success({ content: "Started scan" });
jobStatus.refetch(); jobStatus.refetch();
@@ -304,6 +317,31 @@ export const SettingsTasksPanel: React.FC = () => {
label="Set name, date, details from metadata (if present)" label="Set name, date, details from metadata (if present)"
onChange={() => setUseFileMetadata(!useFileMetadata)} onChange={() => setUseFileMetadata(!useFileMetadata)}
/> />
<Form.Check
id="scan-generate-previews"
checked={scanGeneratePreviews}
label="Generate previews during scan (video previews which play when hovering over a scene)"
onChange={() => setScanGeneratePreviews(!scanGeneratePreviews)}
/>
<div className="d-flex flex-row">
<div></div>
<Form.Check
id="scan-generate-image-previews"
checked={scanGenerateImagePreviews}
disabled={!scanGeneratePreviews}
label="Generate image previews during scan (animated WebP previews, only required if Preview Type is set to Animated Image)"
onChange={() =>
setScanGenerateImagePreviews(!scanGenerateImagePreviews)
}
className="ml-2 flex-grow"
/>
</div>
<Form.Check
id="scan-generate-sprites"
checked={scanGenerateSprites}
label="Generate sprites during scan (for the scene scrubber)"
onChange={() => setScanGenerateSprites(!scanGenerateSprites)}
/>
</Form.Group> </Form.Group>
<Form.Group> <Form.Group>
<Button <Button

View File

@@ -19,7 +19,7 @@ Some examples
For the config file you need the following added For the config file you need the following added
``` ```
exclude: exclude:
- "sample\\.mp4$" - "sample\\.mp4$"
- "/\\.[[:word:]]+/" - "/\\.[[:word:]]+/"
- "c:\\\\stash\\\\videos\\\\exclude" - "c:\\\\stash\\\\videos\\\\exclude"
@@ -40,11 +40,11 @@ Stash identifies video files by calculating a hash of the file. There are two al
The hash is used to name the generated files such as preview images and videos, and sprite images. The hash is used to name the generated files such as preview images and videos, and sprite images.
By default, new systems have MD5 calculation disabled for optimal performance. Existing systems that are upgraded will have the oshash populated for each scene on the next scan. By default, new systems have MD5 calculation disabled for optimal performance. Existing systems that are upgraded will have the oshash populated for each scene on the next scan.
### Changing the hashing algorithm ### Changing the hashing algorithm
To change the file naming hash to oshash, all scenes must have their oshash values populated. oshash population is done automatically when scanning. To change the file naming hash to oshash, all scenes must have their oshash values populated. oshash population is done automatically when scanning.
To change the file naming hash to `MD5`, the MD5 must be populated for all scenes. To do this, `Calculate MD5` for videos must be enabled and the library must be rescanned. To change the file naming hash to `MD5`, the MD5 must be populated for all scenes. To do this, `Calculate MD5` for videos must be enabled and the library must be rescanned.
@@ -60,7 +60,22 @@ These instructions are for existing users whose systems will be defaulted to use
2. In Settings -> Configuration page, untick `Calculate MD5` and select `oshash` as file naming hash. Save the configuration. 2. In Settings -> Configuration page, untick `Calculate MD5` and select `oshash` as file naming hash. Save the configuration.
3. In Settings -> Tasks page, click on the `Rename generated files` migration button. 3. In Settings -> Tasks page, click on the `Rename generated files` migration button.
## Scraping
## Parallel Scan/Generation
#### Number of parallel task for scan/generation
This setting controls how many sub-tasks will be run in parallel during scanning and generation tasks. (See Tasks)
Auto-detection can be enabled by setting this to zero. This will calculate the number of parallel tasks to be cpu_cores/4 + 1.
This setting can be used to increase/decrease overall CPU utilisation in two scenarios:
1) High performance 4+ core cpus.
2) Media files stored on remote/cloud filesystem.
Note: If this is set too high it will decrease overall performance and causes failures (out of memory).
## Scraping
### User Agent string ### User Agent string
@@ -68,7 +83,7 @@ Some websites require a legitimate User-Agent string when receiving requests, or
### Chrome CDP path ### Chrome CDP path
Some scrapers require a Chrome instance to function correctly. If left empty, stash will attempt to find the Chrome executable in the path environment, and will fail if it cannot find one. Some scrapers require a Chrome instance to function correctly. If left empty, stash will attempt to find the Chrome executable in the path environment, and will fail if it cannot find one.
`Chrome CDP path` can be set to a path to the chrome executable, or an http(s) address to remote chrome instance (for example: `http://localhost:9222/json/version`). `Chrome CDP path` can be set to a path to the chrome executable, or an http(s) address to remote chrome instance (for example: `http://localhost:9222/json/version`).

21
vendor/github.com/remeh/sizedwaitgroup/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2018 Rémy Mathieu
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

54
vendor/github.com/remeh/sizedwaitgroup/README.md generated vendored Normal file
View File

@@ -0,0 +1,54 @@
# SizedWaitGroup
[![GoDoc](https://godoc.org/github.com/remeh/sizedwaitgroup?status.svg)](https://godoc.org/github.com/remeh/sizedwaitgroup)
`SizedWaitGroup` has the same role and API as `sync.WaitGroup` but it adds a limit of the amount of goroutines started concurrently.
`SizedWaitGroup` adds the feature of limiting the maximum number of concurrently started routines. It could for example be used to start multiples routines querying a database but without sending too much queries in order to not overload the given database.
# Example
```
package main
import (
"fmt"
"math/rand"
"time"
"github.com/remeh/sizedwaitgroup"
)
func main() {
rand.Seed(time.Now().UnixNano())
// Typical use-case:
// 50 queries must be executed as quick as possible
// but without overloading the database, so only
// 8 routines should be started concurrently.
swg := sizedwaitgroup.New(8)
for i := 0; i < 50; i++ {
swg.Add()
go func(i int) {
defer swg.Done()
query(i)
}(i)
}
swg.Wait()
}
func query(i int) {
fmt.Println(i)
ms := i + 500 + rand.Intn(500)
time.Sleep(time.Duration(ms) * time.Millisecond)
}
```
# License
MIT
# Copyright
Rémy Mathieu © 2016

View File

@@ -0,0 +1,84 @@
// Based upon sync.WaitGroup, SizedWaitGroup allows to start multiple
// routines and to wait for their end using the simple API.
// SizedWaitGroup adds the feature of limiting the maximum number of
// concurrently started routines. It could for example be used to start
// multiples routines querying a database but without sending too much
// queries in order to not overload the given database.
//
// Rémy Mathieu © 2016
package sizedwaitgroup
import (
"context"
"math"
"sync"
)
// SizedWaitGroup has the same role and close to the
// same API as the Golang sync.WaitGroup but adds a limit of
// the amount of goroutines started concurrently.
type SizedWaitGroup struct {
Size int
current chan struct{}
wg sync.WaitGroup
}
// New creates a SizedWaitGroup.
// The limit parameter is the maximum amount of
// goroutines which can be started concurrently.
func New(limit int) SizedWaitGroup {
size := math.MaxInt32 // 2^32 - 1
if limit > 0 {
size = limit
}
return SizedWaitGroup{
Size: size,
current: make(chan struct{}, size),
wg: sync.WaitGroup{},
}
}
// Add increments the internal WaitGroup counter.
// It can be blocking if the limit of spawned goroutines
// has been reached. It will stop blocking when Done is
// been called.
//
// See sync.WaitGroup documentation for more information.
func (s *SizedWaitGroup) Add() {
s.AddWithContext(context.Background())
}
// AddWithContext increments the internal WaitGroup counter.
// It can be blocking if the limit of spawned goroutines
// has been reached. It will stop blocking when Done is
// been called, or when the context is canceled. Returns nil on
// success or an error if the context is canceled before the lock
// is acquired.
//
// See sync.WaitGroup documentation for more information.
func (s *SizedWaitGroup) AddWithContext(ctx context.Context) error {
select {
case <-ctx.Done():
return ctx.Err()
case s.current <- struct{}{}:
break
}
s.wg.Add(1)
return nil
}
// Done decrements the SizedWaitGroup counter.
// See sync.WaitGroup documentation for more information.
func (s *SizedWaitGroup) Done() {
<-s.current
s.wg.Done()
}
// Wait blocks until the SizedWaitGroup counter is zero.
// See sync.WaitGroup documentation for more information.
func (s *SizedWaitGroup) Wait() {
s.wg.Wait()
}

2
vendor/modules.txt vendored
View File

@@ -230,6 +230,8 @@ github.com/natefinch/pie
github.com/pelletier/go-toml github.com/pelletier/go-toml
# github.com/pkg/errors v0.9.1 # github.com/pkg/errors v0.9.1
github.com/pkg/errors github.com/pkg/errors
# github.com/remeh/sizedwaitgroup v1.0.0
github.com/remeh/sizedwaitgroup
# github.com/pmezard/go-difflib v1.0.0 # github.com/pmezard/go-difflib v1.0.0
github.com/pmezard/go-difflib/difflib github.com/pmezard/go-difflib/difflib
# github.com/rogpeppe/go-internal v1.3.0 # github.com/rogpeppe/go-internal v1.3.0