Improve oshash code (#1829)

Reduce allocations. Don't create intermediary arrays which we then consume right after. Manually fuse the arrays and decode straight into the sum instead. Furthermore, don't invoke a Reader, but carve out the locations via a loop, directly. These two changes taken together speeds up oshash computations by a factor of 10 according to the benchmark tests. The main reason for this change is a much lowered memory allocation rate which in turn improves GC pressure. While here, add a benchmark for oshash computations and use it for testing the performance.
2025-12-17 12:24:38 +03:00 · 2021-10-12 02:59:51 +02:00
parent e9d48683f8
commit 38384f2c60
2 changed files with 52 additions and 16 deletions
--- a/pkg/utils/oshash_internal_test.go
+++ b/pkg/utils/oshash_internal_test.go
@@ -1,6 +1,7 @@
 package utils

 import (
+	"math/rand"
 	"testing"
 )

@@ -44,3 +45,31 @@ func TestOshashCollisions(t *testing.T) {
 		t.Errorf("TestOshashCollisions: oshash(n, k, ... %v) =! oshash(n, k, ... %v)", buf1, buf2)
 	}
 }
+
+func BenchmarkOsHash(b *testing.B) {
+	src := rand.NewSource(9999)
+	r := rand.New(src)
+
+	size := int64(1234567890)
+
+	head := make([]byte, 1024*64)
+	_, err := r.Read(head)
+	if err != nil {
+		b.Errorf("unable to generate head array: %v", err)
+	}
+
+	tail := make([]byte, 1024*64)
+	_, err = r.Read(tail)
+	if err != nil {
+		b.Errorf("unable to generate tail array: %v", err)
+	}
+
+	b.ResetTimer()
+
+	for n := 0; n < b.N; n++ {
+		_, err := oshash(size, head, tail)
+		if err != nil {
+			b.Errorf("unexpected error: %v", err)
+		}
+	}
+}