From 65905b84ebde74f8561f4de965d343696b131e0e Mon Sep 17 00:00:00 2001 From: Martin Hutchinson Date: Wed, 18 Mar 2026 13:27:44 +0000 Subject: [PATCH] [SumDB] Make MapFn even faster Reduced amount of memory allocated and avoid expensive operations unless needed. Before: ``` go test -bench=BenchmarkMapFn -benchmem goos: linux goarch: amd64 pkg: github.com/transparency-dev/incubator/vindex/cmd/sumdbindex cpu: Intel(R) Xeon(R) CPU @ 2.20GHz BenchmarkMapFn-24 354208 3442 ns/op 1041 B/op 12 allocs/op PASS ok github.com/transparency-dev/incubator/vindex/cmd/sumdbindex 1.309s ``` After: ``` go test -bench=BenchmarkMapFn -benchmem goos: linux goarch: amd64 pkg: github.com/transparency-dev/incubator/vindex/cmd/sumdbindex cpu: Intel(R) Xeon(R) CPU @ 2.20GHz BenchmarkMapFn-24 698796 1738 ns/op 130 B/op 4 allocs/op PASS ok github.com/transparency-dev/incubator/vindex/cmd/sumdbindex 1.302s ``` --- vindex/cmd/sumdbindex/main.go | 29 +++++++++++++++++------------ vindex/cmd/sumdbindex/main_test.go | 10 ++++++++++ 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/vindex/cmd/sumdbindex/main.go b/vindex/cmd/sumdbindex/main.go index 4b6162e..603c528 100644 --- a/vindex/cmd/sumdbindex/main.go +++ b/vindex/cmd/sumdbindex/main.go @@ -18,6 +18,7 @@ package main import ( + "bytes" "context" "crypto/sha256" "errors" @@ -30,7 +31,6 @@ import ( "os/signal" "path" - "strings" "syscall" "time" @@ -219,21 +219,26 @@ func getKeyFile(path string) (string, error) { } func mapFn(data []byte) [][32]byte { - lines := strings.Split(string(data), "\n") - if len(lines) < 2 { - panic(fmt.Errorf("expected 2 lines but got %d", len(lines))) + modEnd := bytes.IndexByte(data, ' ') + if modEnd == -1 { + panic("invalid line 0: no space") } - line0Parts := strings.Fields(lines[0]) - if len(line0Parts) < 2 { - panic(fmt.Errorf("expected at least 2 parts in line 0 but got %d", len(line0Parts))) + verStart := modEnd + 1 + verLen := bytes.IndexByte(data[verStart:], ' ') + if verLen == -1 { + panic("invalid line 0: no second space") } - line0Module, line0Version := line0Parts[0], line0Parts[1] + verBytes := data[verStart : verStart+verLen] - if module.IsPseudoVersion(line0Version) { - // Drop any emphemeral builds - return nil + // Fast path: pseudo-versions always contain a dash. + if bytes.IndexByte(verBytes, '-') != -1 { + // Only allocate a string and call IsPseudoVersion if a dash is present. + if module.IsPseudoVersion(string(verBytes)) { + // Drop any ephemeral builds + return nil + } } - return [][32]byte{sha256.Sum256([]byte(line0Module))} + return [][32]byte{sha256.Sum256(data[:modEnd])} } diff --git a/vindex/cmd/sumdbindex/main_test.go b/vindex/cmd/sumdbindex/main_test.go index c4a2997..5ebedb9 100644 --- a/vindex/cmd/sumdbindex/main_test.go +++ b/vindex/cmd/sumdbindex/main_test.go @@ -14,6 +14,9 @@ var pseudoVersionLeaf = []byte(`github.com/transparency-dev/tessera v0.0.0-20240 github.com/transparency-dev/tessera v0.0.0-20240222160914-411202e8d356/go.mod h1:T/Ym+5H1e28Qv6iMzT3w= `) +var releaseCandidateLeaf = []byte(`github.1485827954.workers.dev/aws/amazon-vpc-cni-k8s v1.7.0-rc1 h1:f1nwnVa7t5Ftd+BPef/V/Y8XxT1Sdiif0cdIo/8R9i0= +github.1485827954.workers.dev/aws/amazon-vpc-cni-k8s v1.7.0-rc1/go.mod h1:CuxOEw4CmUSK44owsXWkZ6Njh0G/gfboQoLl9hn1Voo=`) + func TestMapFn(t *testing.T) { for _, tc := range []struct { name string @@ -30,6 +33,11 @@ func TestMapFn(t *testing.T) { leaf: pseudoVersionLeaf, want: nil, }, + { + name: "release_candidate", + leaf: releaseCandidateLeaf, + want: [][32]byte{sha256.Sum256([]byte("github.1485827954.workers.dev/aws/amazon-vpc-cni-k8s"))}, + }, } { t.Run(tc.name, func(t *testing.T) { got := mapFn(tc.leaf) @@ -48,5 +56,7 @@ func TestMapFn(t *testing.T) { func BenchmarkMapFn(b *testing.B) { for b.Loop() { mapFn(exampleLeaf) + mapFn(pseudoVersionLeaf) + mapFn(releaseCandidateLeaf) } }