From 73d34b860a7807a86ca4cea0df73243aa52b7902 Mon Sep 17 00:00:00 2001 From: Radu Berinde Date: Sun, 18 Jan 2026 14:44:21 -0800 Subject: [PATCH 1/2] Simplify segment count calculation --- binaryfusefilter.go | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/binaryfusefilter.go b/binaryfusefilter.go index f7093b0..a452f35 100644 --- a/binaryfusefilter.go +++ b/binaryfusefilter.go @@ -52,6 +52,9 @@ type BinaryFuseBuilder struct { // buffers from the BinaryFuseBuilder if possible. For best results, the caller // should avoid having too many duplicated keys. // +// The Fingerprints slice in the resulting filter is owned by the builder; it +// is only valid until the BinaryFuseBuilder is used again. +// // The function can mutate the given keys slice to remove duplicates. // // The function may return an error if the set is empty. @@ -279,22 +282,18 @@ func (filter *BinaryFuse[T]) initializeParameters(b *BinaryFuseBuilder, size uin filter.SegmentLength = 262144 } filter.SegmentLengthMask = filter.SegmentLength - 1 - sizeFactor := calculateSizeFactor(arity, size) capacity := uint32(0) if size > 1 { + sizeFactor := calculateSizeFactor(arity, size) capacity = uint32(math.Round(float64(size) * sizeFactor)) } - initSegmentCount := (capacity+filter.SegmentLength-1)/filter.SegmentLength - (arity - 1) - arrayLength := (initSegmentCount + arity - 1) * filter.SegmentLength - filter.SegmentCount = (arrayLength + filter.SegmentLength - 1) / filter.SegmentLength - if filter.SegmentCount <= arity-1 { - filter.SegmentCount = 1 - } else { - filter.SegmentCount = filter.SegmentCount - (arity - 1) + totalSegmentCount := (capacity + filter.SegmentLength - 1) / filter.SegmentLength + if totalSegmentCount < arity { + totalSegmentCount = arity } - arrayLength = (filter.SegmentCount + arity - 1) * filter.SegmentLength + filter.SegmentCount = totalSegmentCount - (arity - 1) filter.SegmentCountLength = filter.SegmentCount * filter.SegmentLength - filter.Fingerprints = reuseBuffer[T](&b.fingerprints, int(arrayLength)) + filter.Fingerprints = reuseBuffer[T](&b.fingerprints, int(totalSegmentCount*filter.SegmentLength)) } func (filter *BinaryFuse[T]) mod3(x uint8) uint8 { From de63af6c6fd85c66596ce534797ad38c94f1dbfa Mon Sep 17 00:00:00 2001 From: Radu Berinde Date: Sun, 18 Jan 2026 20:03:45 -0800 Subject: [PATCH 2/2] Allow initial sizing of BinaryFuseBuilder Add `MakeBinaryFuseBuilder` which pre-initializes a binary fuse builder to a certain initial size, guaranteeing no allocations up to that size. This avoids reallocations as the buffers grow. We also improve the `reuseBuffer` method to use an `append` pattern (relying on the internal formulas for slice growth) and isolate the unsafe hack to the single place where it is needed now (fingerprints slice). --- README.md | 6 +-- binaryfusefilter.go | 99 ++++++++++++++++++++++++---------------- binaryfusefilter_test.go | 44 +++++++++++++++++- 3 files changed, 104 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index 57e1094..ddf5fac 100644 --- a/README.md +++ b/README.md @@ -86,10 +86,10 @@ When building many filters, memory can be reused (reducing allocation and GC overhead) with a `BinaryFuseBuilder`: ```Go var builder xorfilter.BinaryFuseBuilder +builder = xorfilter.MakeBinaryFuseBuilder[uint16](initialSize) // Optional for { - filter8, _ := BuildBinaryFuse[uint8](&builder, keys) - filter16, _ := BuildBinaryFuse[uint16](&builder, keys) - ... + filter16, _ := BuildBinaryFuse[uint16](&builder, keys) + ... } ``` diff --git a/binaryfusefilter.go b/binaryfusefilter.go index a452f35..e4db127 100644 --- a/binaryfusefilter.go +++ b/binaryfusefilter.go @@ -38,14 +38,44 @@ func NewBinaryFuse[T Unsigned](keys []uint64) (*BinaryFuse[T], error) { // BinaryFuseBuilder can be used to reuse memory allocations across multiple // BinaryFuse builds. +// +// An empty BinaryFuseBuilder can be used, and its internal memory will grow as +// needed over time. MakeBinaryFuseBuilder can also be used to pre-initialize +// for a certain size. type BinaryFuseBuilder struct { - alone reusableBuffer - t2hash reusableBuffer - reverseOrder reusableBuffer - t2count reusableBuffer - reverseH reusableBuffer - startPos reusableBuffer - fingerprints reusableBuffer + alone []uint32 + t2hash []uint64 + reverseOrder []uint64 + t2count []uint8 + reverseH []uint8 + startPos []uint32 + fingerprints []uint32 +} + +// MakeBinaryFuseBuilder creates a BinaryFuseBuilder with enough preallocated +// memory to allow building of binary fuse filters with fingerprint type T +// without allocations. +// +// Note that the builder can be used with a smaller fingerprint type without +// reallocations. If it is used with a larger fingerprint type, there will be +// one reallocation for the fingerprints slice. +func MakeBinaryFuseBuilder[T Unsigned](initialSize int) BinaryFuseBuilder { + var b BinaryFuseBuilder + var filter BinaryFuse[T] + size := uint32(initialSize) + filter.initializeParameters(&b, size) + capacity := uint32(len(filter.Fingerprints)) + reuseBuffer(&b.alone, capacity) + reuseBuffer(&b.t2count, capacity) + reuseBuffer(&b.reverseH, size) + + reuseBuffer(&b.t2hash, capacity) + reuseBuffer(&b.reverseOrder, size+1) + // The startPos array needs to be large enough for smaller sizes which use a + // smaller segment length. Also, we dynamically try a smaller segment length + // in some cases. + reuseBuffer(&b.startPos, 2<> blockBits) + startPos[i] = uint32((uint64(i) * uint64(size)) >> blockBits) } for _, key := range keys { hash := mixsplit(key, filter.Seed) @@ -293,7 +323,14 @@ func (filter *BinaryFuse[T]) initializeParameters(b *BinaryFuseBuilder, size uin } filter.SegmentCount = totalSegmentCount - (arity - 1) filter.SegmentCountLength = filter.SegmentCount * filter.SegmentLength - filter.Fingerprints = reuseBuffer[T](&b.fingerprints, int(totalSegmentCount*filter.SegmentLength)) + + // Allocate fingerprints slice. + numFingerprints := totalSegmentCount * filter.SegmentLength + // Our backing buffer is a []uint32. Figure out how many uint32s we need + // to back a []T of the requested size. + bufSize := (numFingerprints*uint32(unsafe.Sizeof(T(0))) + 3) / 4 + buf := reuseBuffer(&b.fingerprints, bufSize) + filter.Fingerprints = unsafe.Slice((*T)(unsafe.Pointer(unsafe.SliceData(buf))), numFingerprints) } func (filter *BinaryFuse[T]) mod3(x uint8) uint8 { @@ -348,29 +385,11 @@ func calculateSizeFactor(arity uint32, size uint32) float64 { } } -// reusableBuffer allows reuse of a backing buffer to avoid allocations for -// slices of integers. -type reusableBuffer struct { - buf []uint64 -} - -type integer interface { - ~int | ~int8 | ~int16 | ~int32 | ~int64 | ~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 -} - -// reuseBuffer returns an empty slice of the given size, reusing the last buffer -// if possible. -func reuseBuffer[T integer](b *reusableBuffer, size int) []T { - const sizeOfUint64 = 8 - // Our backing buffer is a []uint64. Figure out how many uint64s we need - // to back a []T of the requested size. - bufSize := int((uintptr(size)*unsafe.Sizeof(T(0)) + sizeOfUint64 - 1) / sizeOfUint64) - if cap(b.buf) >= bufSize { - clear(b.buf[:bufSize]) - } else { - // We need to allocate a new buffer. Increase by at least 25% to amortize - // allocations; this is what append() does for large enough slices. - b.buf = make([]uint64, max(bufSize, cap(b.buf)+cap(b.buf)/4)) - } - return unsafe.Slice((*T)(unsafe.Pointer(unsafe.SliceData(b.buf))), size) +// reuseBuffer returns a zeroed slice of the given size, reusing the previous +// one if possible. +func reuseBuffer[T uint8 | uint32 | uint64](buf *[]T, size uint32) []T { + // The compiler recognizes this pattern and doesn't allocate a temporary + // slice. This pattern is used in slices.Grow(). + *buf = append((*buf)[:0], make([]T, size)...) + return *buf } diff --git a/binaryfusefilter_test.go b/binaryfusefilter_test.go index a60fe3c..e7fb7a8 100644 --- a/binaryfusefilter_test.go +++ b/binaryfusefilter_test.go @@ -350,10 +350,22 @@ func TestBinaryFuseN_Issue35(t *testing.T) { } } +// TestBinaryFuseBuilder verifies that repeated builds with the same builder +// create the exact same filter as using NewBinaryFuse. func TestBinaryFuseBuilder(t *testing.T) { - // Verify that repeated builds with the same builder create the exact same - // filter as using NewBinaryFuse. var bld BinaryFuseBuilder + // Test with and without pre-allocation. + if rand.IntN(2) == 0 { + maxSize := 1 + rand.IntN(1<