Caption support (#2462)

Co-authored-by: WithoutPants <53250216+WithoutPants@users.noreply.github.com>
2025-12-18 04:44:37 +03:00 · 2022-05-05 20:59:28 -05:00
parent ab1b30ffb7
commit c1a096a1a6
114 changed files with 16899 additions and 17 deletions
--- a/vendor/github.com/asticode/go-astikit/pcm.go
+++ b/vendor/github.com/asticode/go-astikit/pcm.go
@@ -0,0 +1,426 @@
+package astikit
+
+import (
+	"fmt"
+	"math"
+	"sync"
+	"time"
+)
+
+// PCMLevel computes the PCM level of samples
+// https://dsp.stackexchange.com/questions/2951/loudness-of-pcm-stream
+// https://dsp.stackexchange.com/questions/290/getting-loudness-of-a-track-with-rms?noredirect=1&lq=1
+func PCMLevel(samples []int) float64 {
+	// Compute sum of square values
+	var sum float64
+	for _, s := range samples {
+		sum += math.Pow(float64(s), 2)
+	}
+
+	// Square root
+	return math.Sqrt(sum / float64(len(samples)))
+}
+
+func maxPCMSample(bitDepth int) int {
+	return int(math.Pow(2, float64(bitDepth))/2.0) - 1
+}
+
+// PCMNormalize normalizes the PCM samples
+func PCMNormalize(samples []int, bitDepth int) (o []int) {
+	// Get max sample
+	var m int
+	for _, s := range samples {
+		if v := int(math.Abs(float64(s))); v > m {
+			m = v
+		}
+	}
+
+	// Get max for bit depth
+	max := maxPCMSample(bitDepth)
+
+	// Loop through samples
+	for _, s := range samples {
+		o = append(o, s*max/m)
+	}
+	return
+}
+
+// ConvertPCMBitDepth converts the PCM bit depth
+func ConvertPCMBitDepth(srcSample int, srcBitDepth, dstBitDepth int) (dstSample int, err error) {
+	// Nothing to do
+	if srcBitDepth == dstBitDepth {
+		dstSample = srcSample
+		return
+	}
+
+	// Convert
+	if srcBitDepth < dstBitDepth {
+		dstSample = srcSample << uint(dstBitDepth-srcBitDepth)
+	} else {
+		dstSample = srcSample >> uint(srcBitDepth-dstBitDepth)
+	}
+	return
+}
+
+// PCMSampleFunc is a func that can process a sample
+type PCMSampleFunc func(s int) error
+
+// PCMSampleRateConverter is an object capable of converting a PCM's sample rate
+type PCMSampleRateConverter struct {
+	b                    [][]int
+	dstSampleRate        int
+	fn                   PCMSampleFunc
+	numChannels          int
+	numChannelsProcessed int
+	numSamplesOutputed   int
+	numSamplesProcessed  int
+	srcSampleRate        int
+}
+
+// NewPCMSampleRateConverter creates a new PCMSampleRateConverter
+func NewPCMSampleRateConverter(srcSampleRate, dstSampleRate, numChannels int, fn PCMSampleFunc) *PCMSampleRateConverter {
+	return &PCMSampleRateConverter{
+		b:             make([][]int, numChannels),
+		dstSampleRate: dstSampleRate,
+		fn:            fn,
+		numChannels:   numChannels,
+		srcSampleRate: srcSampleRate,
+	}
+}
+
+// Reset resets the converter
+func (c *PCMSampleRateConverter) Reset() {
+	c.b = make([][]int, c.numChannels)
+	c.numChannelsProcessed = 0
+	c.numSamplesOutputed = 0
+	c.numSamplesProcessed = 0
+}
+
+// Add adds a new sample to the converter
+func (c *PCMSampleRateConverter) Add(i int) (err error) {
+	// Forward sample
+	if c.srcSampleRate == c.dstSampleRate {
+		if err = c.fn(i); err != nil {
+			err = fmt.Errorf("astikit: handling sample failed: %w", err)
+			return
+		}
+		return
+	}
+
+	// Increment num channels processed
+	c.numChannelsProcessed++
+
+	// Reset num channels processed
+	if c.numChannelsProcessed > c.numChannels {
+		c.numChannelsProcessed = 1
+	}
+
+	// Only increment num samples processed if all channels have been processed
+	if c.numChannelsProcessed == c.numChannels {
+		c.numSamplesProcessed++
+	}
+
+	// Append sample to buffer
+	c.b[c.numChannelsProcessed-1] = append(c.b[c.numChannelsProcessed-1], i)
+
+	// Throw away data
+	if c.srcSampleRate > c.dstSampleRate {
+		// Make sure to always keep the first sample but do nothing until we have all channels or target sample has been
+		// reached
+		if (c.numSamplesOutputed > 0 && float64(c.numSamplesProcessed) < 1.0+float64(c.numSamplesOutputed)*float64(c.srcSampleRate)/float64(c.dstSampleRate)) || c.numChannelsProcessed < c.numChannels {
+			return
+		}
+
+		// Loop through channels
+		for idx, b := range c.b {
+			// Merge samples
+			var s int
+			for _, v := range b {
+				s += v
+			}
+			s /= len(b)
+
+			// Reset buffer
+			c.b[idx] = []int{}
+
+			// Custom
+			if err = c.fn(s); err != nil {
+				err = fmt.Errorf("astikit: handling sample failed: %w", err)
+				return
+			}
+		}
+
+		// Increment num samples outputted
+		c.numSamplesOutputed++
+		return
+	}
+
+	// Do nothing until we have all channels
+	if c.numChannelsProcessed < c.numChannels {
+		return
+	}
+
+	// Repeat data
+	for c.numSamplesOutputed == 0 || float64(c.numSamplesProcessed)+1.0 > 1.0+float64(c.numSamplesOutputed)*float64(c.srcSampleRate)/float64(c.dstSampleRate) {
+		// Loop through channels
+		for _, b := range c.b {
+			// Invalid length
+			if len(b) != 1 {
+				err = fmt.Errorf("astikit: invalid buffer item length %d", len(b))
+				return
+			}
+
+			// Custom
+			if err = c.fn(b[0]); err != nil {
+				err = fmt.Errorf("astikit: handling sample failed: %w", err)
+				return
+			}
+		}
+
+		// Increment num samples outputted
+		c.numSamplesOutputed++
+	}
+
+	// Reset buffer
+	c.b = make([][]int, c.numChannels)
+	return
+}
+
+// PCMChannelsConverter is an object of converting PCM's channels
+type PCMChannelsConverter struct {
+	dstNumChannels int
+	fn             PCMSampleFunc
+	srcNumChannels int
+	srcSamples     int
+}
+
+// NewPCMChannelsConverter creates a new PCMChannelsConverter
+func NewPCMChannelsConverter(srcNumChannels, dstNumChannels int, fn PCMSampleFunc) *PCMChannelsConverter {
+	return &PCMChannelsConverter{
+		dstNumChannels: dstNumChannels,
+		fn:             fn,
+		srcNumChannels: srcNumChannels,
+	}
+}
+
+// Reset resets the converter
+func (c *PCMChannelsConverter) Reset() {
+	c.srcSamples = 0
+}
+
+// Add adds a new sample to the converter
+func (c *PCMChannelsConverter) Add(i int) (err error) {
+	// Forward sample
+	if c.srcNumChannels == c.dstNumChannels {
+		if err = c.fn(i); err != nil {
+			err = fmt.Errorf("astikit: handling sample failed: %w", err)
+			return
+		}
+		return
+	}
+
+	// Reset
+	if c.srcSamples == c.srcNumChannels {
+		c.srcSamples = 0
+	}
+
+	// Increment src samples
+	c.srcSamples++
+
+	// Throw away data
+	if c.srcNumChannels > c.dstNumChannels {
+		// Throw away sample
+		if c.srcSamples > c.dstNumChannels {
+			return
+		}
+
+		// Custom
+		if err = c.fn(i); err != nil {
+			err = fmt.Errorf("astikit: handling sample failed: %w", err)
+			return
+		}
+		return
+	}
+
+	// Store
+	var ss []int
+	if c.srcSamples < c.srcNumChannels {
+		ss = []int{i}
+	} else {
+		// Repeat data
+		for idx := c.srcNumChannels; idx <= c.dstNumChannels; idx++ {
+			ss = append(ss, i)
+		}
+	}
+
+	// Loop through samples
+	for _, s := range ss {
+		// Custom
+		if err = c.fn(s); err != nil {
+			err = fmt.Errorf("astikit: handling sample failed: %w", err)
+			return
+		}
+	}
+	return
+}
+
+// PCMSilenceDetector represents a PCM silence detector
+type PCMSilenceDetector struct {
+	analyses              []pcmSilenceDetectorAnalysis
+	buf                   []int
+	m                     *sync.Mutex // Locks buf
+	minAnalysesPerSilence int
+	o                     PCMSilenceDetectorOptions
+	samplesPerAnalysis    int
+}
+
+type pcmSilenceDetectorAnalysis struct {
+	level   float64
+	samples []int
+}
+
+// PCMSilenceDetectorOptions represents a PCM silence detector options
+type PCMSilenceDetectorOptions struct {
+	MaxSilenceLevel    float64       `toml:"max_silence_level"`
+	MinSilenceDuration time.Duration `toml:"min_silence_duration"`
+	SampleRate         int           `toml:"sample_rate"`
+	StepDuration       time.Duration `toml:"step_duration"`
+}
+
+// NewPCMSilenceDetector creates a new silence detector
+func NewPCMSilenceDetector(o PCMSilenceDetectorOptions) (d *PCMSilenceDetector) {
+	// Create
+	d = &PCMSilenceDetector{
+		m: &sync.Mutex{},
+		o: o,
+	}
+
+	// Reset
+	d.Reset()
+
+	// Default option values
+	if d.o.MinSilenceDuration == 0 {
+		d.o.MinSilenceDuration = time.Second
+	}
+	if d.o.StepDuration == 0 {
+		d.o.StepDuration = 30 * time.Millisecond
+	}
+
+	// Compute attributes depending on options
+	d.samplesPerAnalysis = int(math.Floor(float64(d.o.SampleRate) * d.o.StepDuration.Seconds()))
+	d.minAnalysesPerSilence = int(math.Floor(d.o.MinSilenceDuration.Seconds() / d.o.StepDuration.Seconds()))
+	return
+}
+
+// Reset resets the silence detector
+func (d *PCMSilenceDetector) Reset() {
+	// Lock
+	d.m.Lock()
+	defer d.m.Unlock()
+
+	// Reset
+	d.analyses = []pcmSilenceDetectorAnalysis{}
+	d.buf = []int{}
+}
+
+// Add adds samples to the buffer and checks whether there are valid samples between silences
+func (d *PCMSilenceDetector) Add(samples []int) (validSamples [][]int) {
+	// Lock
+	d.m.Lock()
+	defer d.m.Unlock()
+
+	// Append samples to buffer
+	d.buf = append(d.buf, samples...)
+
+	// Analyze samples by step
+	for len(d.buf) >= d.samplesPerAnalysis {
+		// Append analysis
+		d.analyses = append(d.analyses, pcmSilenceDetectorAnalysis{
+			level:   PCMLevel(d.buf[:d.samplesPerAnalysis]),
+			samples: append([]int(nil), d.buf[:d.samplesPerAnalysis]...),
+		})
+
+		// Remove samples from buffer
+		d.buf = d.buf[d.samplesPerAnalysis:]
+	}
+
+	// Loop through analyses
+	var leadingSilence, inBetween, trailingSilence int
+	for i := 0; i < len(d.analyses); i++ {
+		if d.analyses[i].level < d.o.MaxSilenceLevel {
+			// This is a silence
+
+			// This is a leading silence
+			if inBetween == 0 {
+				leadingSilence++
+
+				// The leading silence is valid
+				// We can trim its useless part
+				if leadingSilence > d.minAnalysesPerSilence {
+					d.analyses = d.analyses[leadingSilence-d.minAnalysesPerSilence:]
+					i -= leadingSilence - d.minAnalysesPerSilence
+					leadingSilence = d.minAnalysesPerSilence
+				}
+				continue
+			}
+
+			// This is a trailing silence
+			trailingSilence++
+
+			// Trailing silence is invalid
+			if trailingSilence < d.minAnalysesPerSilence {
+				continue
+			}
+
+			// Trailing silence is valid
+			// Loop through analyses
+			var ss []int
+			for _, a := range d.analyses[:i+1] {
+				ss = append(ss, a.samples...)
+			}
+
+			// Append valid samples
+			validSamples = append(validSamples, ss)
+
+			// Remove leading silence and non silence
+			d.analyses = d.analyses[leadingSilence+inBetween:]
+			i -= leadingSilence + inBetween
+
+			// Reset counts
+			leadingSilence, inBetween, trailingSilence = trailingSilence, 0, 0
+		} else {
+			// This is not a silence
+
+			// This is a leading non silence
+			// We need to remove it
+			if i == 0 {
+				d.analyses = d.analyses[1:]
+				i = -1
+				continue
+			}
+
+			// This is the first in-between
+			if inBetween == 0 {
+				// The leading silence is invalid
+				// We need to remove it as well as this first non silence
+				if leadingSilence < d.minAnalysesPerSilence {
+					d.analyses = d.analyses[i+1:]
+					i = -1
+					continue
+				}
+			}
+
+			// This non-silence was preceded by a silence not big enough to be a valid trailing silence
+			// We incorporate it in the in-between
+			if trailingSilence > 0 {
+				inBetween += trailingSilence
+				trailingSilence = 0
+			}
+
+			// This is an in-between
+			inBetween++
+			continue
+		}
+	}
+	return
+}