From 70911633249227def73c6b37ed492b2639a15983 Mon Sep 17 00:00:00 2001 From: Martin Hutchinson Date: Wed, 18 Mar 2026 10:33:54 +0000 Subject: [PATCH] [SumDB] Improved MapFn performance 10x Added test and benchmark for MapFn, which confirmed slow performance due to regexps. Before: ``` go test -bench=BenchmarkMapFn -benchmem goos: linux goarch: amd64 pkg: github.com/transparency-dev/incubator/vindex/cmd/sumdbindex cpu: Intel(R) Xeon(R) CPU @ 2.20GHz BenchmarkMapFn-24 99729 12438 ns/op 565 B/op 10 allocs/op PASS ok github.com/transparency-dev/incubator/vindex/cmd/sumdbindex 1.329s ``` After: ``` go test -bench=BenchmarkMapFn -benchmem goos: linux goarch: amd64 pkg: github.com/transparency-dev/incubator/vindex/cmd/sumdbindex cpu: Intel(R) Xeon(R) CPU @ 2.20GHz BenchmarkMapFn-24 1361030 873.2 ns/op 288 B/op 4 allocs/op PASS ok github.com/transparency-dev/incubator/vindex/cmd/sumdbindex 1.280s ``` --- vindex/cmd/sumdbindex/main.go | 29 ++++------------- vindex/cmd/sumdbindex/main_test.go | 52 ++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 23 deletions(-) create mode 100644 vindex/cmd/sumdbindex/main_test.go diff --git a/vindex/cmd/sumdbindex/main.go b/vindex/cmd/sumdbindex/main.go index 73576ca..4b6162e 100644 --- a/vindex/cmd/sumdbindex/main.go +++ b/vindex/cmd/sumdbindex/main.go @@ -29,7 +29,7 @@ import ( "os" "os/signal" "path" - "regexp" + "strings" "syscall" "time" @@ -54,15 +54,6 @@ var ( listen = flag.String("listen", ":8088", "Address to set up HTTP server listening on") ) -var ( - // Example leaf: - // golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= - // golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= - // - line0RE = regexp.MustCompile(`(.*) (.*) h1:(.*)`) - line1RE = regexp.MustCompile(`(.*) (.*)/go.mod h1:(.*)`) -) - func main() { klog.InitFlags(nil) flag.Parse() @@ -233,24 +224,16 @@ func mapFn(data []byte) [][32]byte { panic(fmt.Errorf("expected 2 lines but got %d", len(lines))) } - line0Parts := line0RE.FindStringSubmatch(lines[0]) - line0Module, line0Version := line0Parts[1], line0Parts[2] - - line1Parts := line1RE.FindStringSubmatch(lines[1]) - line1Module, line1Version := line1Parts[1], line1Parts[2] - - if line0Module != line1Module { - klog.Errorf("mismatched module names: (%s, %s)", line0Module, line1Module) - } - if line0Version != line1Version { - klog.Errorf("mismatched version names: (%s, %s)", line0Version, line0Version) + line0Parts := strings.Fields(lines[0]) + if len(line0Parts) < 2 { + panic(fmt.Errorf("expected at least 2 parts in line 0 but got %d", len(line0Parts))) } + line0Module, line0Version := line0Parts[0], line0Parts[1] + if module.IsPseudoVersion(line0Version) { // Drop any emphemeral builds return nil } - klog.V(2).Infof("MapFn found: Module: %s:\t%s", line0Module, line0Version) - return [][32]byte{sha256.Sum256([]byte(line0Module))} } diff --git a/vindex/cmd/sumdbindex/main_test.go b/vindex/cmd/sumdbindex/main_test.go new file mode 100644 index 0000000..c4a2997 --- /dev/null +++ b/vindex/cmd/sumdbindex/main_test.go @@ -0,0 +1,52 @@ +package main + +import ( + "bytes" + "crypto/sha256" + "testing" +) + +var exampleLeaf = []byte(`golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +`) + +var pseudoVersionLeaf = []byte(`github.com/transparency-dev/tessera v0.0.0-20240222160914-411202e8d356 h1:4jV/qA6RzP7Z6s+/vQ0W2RjM3FjC6B2M3r8= +github.com/transparency-dev/tessera v0.0.0-20240222160914-411202e8d356/go.mod h1:T/Ym+5H1e28Qv6iMzT3w= +`) + +func TestMapFn(t *testing.T) { + for _, tc := range []struct { + name string + leaf []byte + want [][32]byte + }{ + { + name: "valid", + leaf: exampleLeaf, + want: [][32]byte{sha256.Sum256([]byte("golang.org/x/text"))}, + }, + { + name: "pseudo_version", + leaf: pseudoVersionLeaf, + want: nil, + }, + } { + t.Run(tc.name, func(t *testing.T) { + got := mapFn(tc.leaf) + if len(got) != len(tc.want) { + t.Fatalf("mapFn() returned %d keys, want %d", len(got), len(tc.want)) + } + for i := range got { + if !bytes.Equal(got[i][:], tc.want[i][:]) { + t.Errorf("mapFn()[%d] = %x, want %x", i, got[i], tc.want[i]) + } + } + }) + } +} + +func BenchmarkMapFn(b *testing.B) { + for b.Loop() { + mapFn(exampleLeaf) + } +}