Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions arrow/compute/internal/kernels/scalar_set_lookup.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,25 @@ func (s *SetLookupState[T]) Init(opts SetLookupOptions) error {
if memoType == arrow.EXTENSION {
memoType = s.ValueSetType.(arrow.ExtensionType).StorageType().ID()
}
// FixedSizeBinary with byte-widths 1/2/4/8 takes the numeric fast-path
// in CreateSetLookupState (SetLookupState[uintN] + visitNumeric[uintN]),
// so the lookup table must be the matching TypedMemoTable[uintN], not
// the BinaryMemoTable that newMemoTable would otherwise return for
// FIXED_SIZE_BINARY.
if memoType == arrow.FIXED_SIZE_BINARY {
if fsb, ok := s.ValueSetType.(*arrow.FixedSizeBinaryType); ok {
switch fsb.ByteWidth {
case 1:
memoType = arrow.UINT8
case 2:
memoType = arrow.UINT16
case 4:
memoType = arrow.UINT32
case 8:
memoType = arrow.UINT64
}
}
}
lookup, err := newMemoTable(s.Alloc, memoType)
if err != nil {
return err
Expand Down
10 changes: 8 additions & 2 deletions arrow/compute/scalar_set_lookup.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,14 @@ func RegisterScalarSetLookup(reg FunctionRegistry) {
kn.MemAlloc = exec.MemPrealloc
kn.NullHandling = exec.NullComputedPrealloc
kn.CleanupFn = func(state exec.KernelState) error {
s := state.(*kernels.SetLookupState[[]byte])
s.Lookup.(*hashing.BinaryMemoTable).Release()
// FixedSizeBinary widths 1/2/4/8 use SetLookupState[uintN] +
// the matching numeric memo table, which carries no buffers
// to release. Only the BinaryMemoTable path needs cleanup.
if s, ok := state.(*kernels.SetLookupState[[]byte]); ok {
if memo, ok := s.Lookup.(*hashing.BinaryMemoTable); ok {
memo.Release()
}
}
return nil
}

Expand Down
32 changes: 32 additions & 0 deletions arrow/compute/scalar_set_lookup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package compute_test

import (
"context"
"encoding/base64"
"fmt"
"strings"
"sync"
Expand Down Expand Up @@ -265,6 +266,37 @@ func (ss *ScalarSetLookupSuite) TestIsInFixedSizeBinary() {
}
}

func (ss *ScalarSetLookupSuite) TestIsInFixedSizeBinaryFastPaths() {
// ByteWidths 1, 2, 4, 8 take the SetLookupState[uintN] / visitNumeric
// fast-path. Before the fix in #785 this panicked because Init asked
// newMemoTable for a FIXED_SIZE_BINARY table (BinaryMemoTable) but the
// state expected a TypedMemoTable[uintN]; afterwards the cleanup hook
// also tried to type-assert all states as SetLookupState[[]byte].
pad := func(s string, w int) string {
buf := make([]byte, w)
copy(buf, s)
return string(buf)
}
enc := func(s string) string {
// scalar_set_lookup_test uses base64-encoded JSON entries via
// FixedSizeBinary's standard JSON shape; the helpers below
// already accept JSON arrays of base64 strings.
return base64.StdEncoding.EncodeToString([]byte(s))
}
for _, w := range []int{1, 2, 4, 8} {
w := w
ss.Run(fmt.Sprintf("ByteWidth=%d", w), func() {
typ := &arrow.FixedSizeBinaryType{ByteWidth: w}
input := fmt.Sprintf(`[%q, %q, %q]`,
enc(pad("a", w)), enc(pad("z", w)), enc(pad("b", w)))
valueset := fmt.Sprintf(`[%q, %q]`,
enc(pad("a", w)), enc(pad("b", w)))
ss.checkIsInFromJSON(typ, input, valueset,
`[true, false, true]`, compute.NullMatchingMatch)
})
}
}

func (ss *ScalarSetLookupSuite) TestIsInDecimal() {
type testCase struct {
expected string
Expand Down