diff --git a/arrow/compute/internal/kernels/scalar_set_lookup.go b/arrow/compute/internal/kernels/scalar_set_lookup.go index fe9841ac..7b1caeb7 100644 --- a/arrow/compute/internal/kernels/scalar_set_lookup.go +++ b/arrow/compute/internal/kernels/scalar_set_lookup.go @@ -169,6 +169,25 @@ func (s *SetLookupState[T]) Init(opts SetLookupOptions) error { if memoType == arrow.EXTENSION { memoType = s.ValueSetType.(arrow.ExtensionType).StorageType().ID() } + // FixedSizeBinary with byte-widths 1/2/4/8 takes the numeric fast-path + // in CreateSetLookupState (SetLookupState[uintN] + visitNumeric[uintN]), + // so the lookup table must be the matching TypedMemoTable[uintN], not + // the BinaryMemoTable that newMemoTable would otherwise return for + // FIXED_SIZE_BINARY. + if memoType == arrow.FIXED_SIZE_BINARY { + if fsb, ok := s.ValueSetType.(*arrow.FixedSizeBinaryType); ok { + switch fsb.ByteWidth { + case 1: + memoType = arrow.UINT8 + case 2: + memoType = arrow.UINT16 + case 4: + memoType = arrow.UINT32 + case 8: + memoType = arrow.UINT64 + } + } + } lookup, err := newMemoTable(s.Alloc, memoType) if err != nil { return err diff --git a/arrow/compute/scalar_set_lookup.go b/arrow/compute/scalar_set_lookup.go index 84305536..9323fc0a 100644 --- a/arrow/compute/scalar_set_lookup.go +++ b/arrow/compute/scalar_set_lookup.go @@ -207,8 +207,14 @@ func RegisterScalarSetLookup(reg FunctionRegistry) { kn.MemAlloc = exec.MemPrealloc kn.NullHandling = exec.NullComputedPrealloc kn.CleanupFn = func(state exec.KernelState) error { - s := state.(*kernels.SetLookupState[[]byte]) - s.Lookup.(*hashing.BinaryMemoTable).Release() + // FixedSizeBinary widths 1/2/4/8 use SetLookupState[uintN] + + // the matching numeric memo table, which carries no buffers + // to release. Only the BinaryMemoTable path needs cleanup. + if s, ok := state.(*kernels.SetLookupState[[]byte]); ok { + if memo, ok := s.Lookup.(*hashing.BinaryMemoTable); ok { + memo.Release() + } + } return nil } diff --git a/arrow/compute/scalar_set_lookup_test.go b/arrow/compute/scalar_set_lookup_test.go index b28c3d75..010bb52c 100644 --- a/arrow/compute/scalar_set_lookup_test.go +++ b/arrow/compute/scalar_set_lookup_test.go @@ -18,6 +18,7 @@ package compute_test import ( "context" + "encoding/base64" "fmt" "strings" "sync" @@ -265,6 +266,37 @@ func (ss *ScalarSetLookupSuite) TestIsInFixedSizeBinary() { } } +func (ss *ScalarSetLookupSuite) TestIsInFixedSizeBinaryFastPaths() { + // ByteWidths 1, 2, 4, 8 take the SetLookupState[uintN] / visitNumeric + // fast-path. Before the fix in #785 this panicked because Init asked + // newMemoTable for a FIXED_SIZE_BINARY table (BinaryMemoTable) but the + // state expected a TypedMemoTable[uintN]; afterwards the cleanup hook + // also tried to type-assert all states as SetLookupState[[]byte]. + pad := func(s string, w int) string { + buf := make([]byte, w) + copy(buf, s) + return string(buf) + } + enc := func(s string) string { + // scalar_set_lookup_test uses base64-encoded JSON entries via + // FixedSizeBinary's standard JSON shape; the helpers below + // already accept JSON arrays of base64 strings. + return base64.StdEncoding.EncodeToString([]byte(s)) + } + for _, w := range []int{1, 2, 4, 8} { + w := w + ss.Run(fmt.Sprintf("ByteWidth=%d", w), func() { + typ := &arrow.FixedSizeBinaryType{ByteWidth: w} + input := fmt.Sprintf(`[%q, %q, %q]`, + enc(pad("a", w)), enc(pad("z", w)), enc(pad("b", w))) + valueset := fmt.Sprintf(`[%q, %q]`, + enc(pad("a", w)), enc(pad("b", w))) + ss.checkIsInFromJSON(typ, input, valueset, + `[true, false, true]`, compute.NullMatchingMatch) + }) + } +} + func (ss *ScalarSetLookupSuite) TestIsInDecimal() { type testCase struct { expected string