mirror of
https://github.com/stashapp/stash.git
synced 2025-12-17 04:14:39 +03:00
Improve oshash code (#1829)
Reduce allocations. Don't create intermediary arrays which we then consume right after. Manually fuse the arrays and decode straight into the sum instead. Furthermore, don't invoke a Reader, but carve out the locations via a loop, directly. These two changes taken together speeds up oshash computations by a factor of 10 according to the benchmark tests. The main reason for this change is a much lowered memory allocation rate which in turn improves GC pressure. While here, add a benchmark for oshash computations and use it for testing the performance.
This commit is contained in:
@@ -1,37 +1,44 @@
|
|||||||
package utils
|
package utils
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
)
|
)
|
||||||
|
|
||||||
const chunkSize int64 = 64 * 1024
|
const chunkSize int64 = 64 * 1024
|
||||||
|
|
||||||
func oshash(size int64, head []byte, tail []byte) (string, error) {
|
var ErrOsHashLen = errors.New("buffer is not a multiple of 8")
|
||||||
// put the head and tail together
|
|
||||||
buf := append(head, tail...)
|
|
||||||
|
|
||||||
// convert bytes into uint64
|
func sumBytes(buf []byte) (uint64, error) {
|
||||||
ints := make([]uint64, len(buf)/8)
|
if len(buf)%8 != 0 {
|
||||||
reader := bytes.NewReader(buf)
|
return 0, ErrOsHashLen
|
||||||
err := binary.Read(reader, binary.LittleEndian, &ints)
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// sum the integers
|
sz := len(buf) / 8
|
||||||
var sum uint64
|
var sum uint64
|
||||||
for _, v := range ints {
|
for j := 0; j < sz; j++ {
|
||||||
sum += v
|
sum += binary.LittleEndian.Uint64(buf[8*j : 8*(j+1)])
|
||||||
}
|
}
|
||||||
|
|
||||||
// add the filesize
|
return sum, nil
|
||||||
sum += uint64(size)
|
}
|
||||||
|
|
||||||
|
func oshash(size int64, head []byte, tail []byte) (string, error) {
|
||||||
|
headSum, err := sumBytes(head)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("oshash head: %w", err)
|
||||||
|
}
|
||||||
|
tailSum, err := sumBytes(tail)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("oshash tail: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute the sum of the head, tail and file size
|
||||||
|
result := headSum + tailSum + uint64(size)
|
||||||
// output as hex
|
// output as hex
|
||||||
return fmt.Sprintf("%016x", sum), nil
|
return fmt.Sprintf("%016x", result), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// OSHashFromFilePath calculates the hash using the same algorithm that
|
// OSHashFromFilePath calculates the hash using the same algorithm that
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package utils
|
package utils
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"math/rand"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -44,3 +45,31 @@ func TestOshashCollisions(t *testing.T) {
|
|||||||
t.Errorf("TestOshashCollisions: oshash(n, k, ... %v) =! oshash(n, k, ... %v)", buf1, buf2)
|
t.Errorf("TestOshashCollisions: oshash(n, k, ... %v) =! oshash(n, k, ... %v)", buf1, buf2)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BenchmarkOsHash(b *testing.B) {
|
||||||
|
src := rand.NewSource(9999)
|
||||||
|
r := rand.New(src)
|
||||||
|
|
||||||
|
size := int64(1234567890)
|
||||||
|
|
||||||
|
head := make([]byte, 1024*64)
|
||||||
|
_, err := r.Read(head)
|
||||||
|
if err != nil {
|
||||||
|
b.Errorf("unable to generate head array: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
tail := make([]byte, 1024*64)
|
||||||
|
_, err = r.Read(tail)
|
||||||
|
if err != nil {
|
||||||
|
b.Errorf("unable to generate tail array: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
|
||||||
|
for n := 0; n < b.N; n++ {
|
||||||
|
_, err := oshash(size, head, tail)
|
||||||
|
if err != nil {
|
||||||
|
b.Errorf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user