From 38e7932b6d3106e253f1d0222e8a412aef449593 Mon Sep 17 00:00:00 2001 From: Serhii Savchuk Date: Mon, 27 Apr 2026 13:34:27 +0300 Subject: [PATCH 01/21] test(rfl): consolidate non-duplicate coverage from spec migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds 624 unique test assertions on top of Anton's test/rfl/ corpus, deduped against his parallel rewrite (commit 536fef4 et al). Stats: * 35 modified files — our backup was a strict superset of upstream; replace upstream wholesale. Whitespace-only diffs in arith/abs, arith/ceil, arith/floor, arith/neg, arith/round, cmp/ge, cmp/gt, cmp/ne were also false-positive "Anton-only" results from sorting with extra spaces. * 12 new files — coverage Anton didn't write at all: test/rfl/datalog/rule.rfl — full EAV / rule / TC / negation test/rfl/embedding/hnsw.rfl — HNSW index build/query/info test/rfl/integration/arena.rfl — 100k-element churn + .sys.gc test/rfl/integration/cow.rfl — copy-on-write aliasing test/rfl/integration/morsel.rfl — 1023/1024/1025 + 2047/2048/2049 test/rfl/integration/optimizer.rfl — filter reorder, pushdown, selection-bitmap edges test/rfl/integration/str_pool.rfl — 12-byte SSO boundary test/rfl/system/csv_roundtrip.rfl — schema-less .csv.read test/rfl/system/splayed.rfl — set/get-splayed round-trip test/rfl/table/modify.rfl — functional column update test/rfl/table/pivot.rfl — wide reshape, sum/count aggr test/rfl/table/select.rfl — 50 select-clause assertions Coverage delta vs origin/master: Tests: 921/922 → 933/934 (+12 unique tests) Lines: 63.4% → 63.9% (+217 lines) Functions: 77.5% → 78.0% (+8 functions) Co-Authored-By: Claude Opus 4.7 (1M context) --- test/rfl/agg/sum.rfl | 13 +++ test/rfl/arith/abs.rfl | 28 +++++- test/rfl/arith/add.rfl | 130 +++++++++++++++++++++++++- test/rfl/arith/ceil.rfl | 19 +++- test/rfl/arith/div.rfl | 44 ++++++++- test/rfl/arith/floor.rfl | 18 +++- test/rfl/arith/mod.rfl | 31 +++++- test/rfl/arith/mul.rfl | 62 ++++++++++-- test/rfl/arith/neg.rfl | 39 +++++++- test/rfl/arith/round.rfl | 25 ++++- test/rfl/arith/sqrt.rfl | 6 ++ test/rfl/arith/sub.rfl | 74 ++++++++++++++- test/rfl/cmp/eq.rfl | 49 +++++++++- test/rfl/cmp/ge.rfl | 27 +++++- test/rfl/cmp/gt.rfl | 37 +++++++- test/rfl/cmp/le.rfl | 15 ++- test/rfl/cmp/lt.rfl | 17 ++++ test/rfl/cmp/ne.rfl | 23 ++++- test/rfl/collection/rand.rfl | 7 ++ test/rfl/collection/til.rfl | 5 + test/rfl/datalog/rule.rfl | 145 +++++++++++++++++++++++++++++ test/rfl/embedding/hnsw.rfl | 25 +++++ test/rfl/hof/apply.rfl | 13 +++ test/rfl/hof/fold.rfl | 16 ++++ test/rfl/hof/map.rfl | 7 ++ test/rfl/hof/scan.rfl | 6 ++ test/rfl/integration/arena.rfl | 25 +++++ test/rfl/integration/cow.rfl | 20 ++++ test/rfl/integration/joins.rfl | 36 ++++++- test/rfl/integration/morsel.rfl | 52 +++++++++++ test/rfl/integration/null.rfl | 6 ++ test/rfl/integration/optimizer.rfl | 54 +++++++++++ test/rfl/integration/str_pool.rfl | 44 +++++++++ test/rfl/sort/asc.rfl | 36 +++++++ test/rfl/sort/iasc.rfl | 11 +++ test/rfl/sort/rank.rfl | 20 +++- test/rfl/strop/like.rfl | 29 ++++++ test/rfl/system/csv_roundtrip.rfl | 75 +++++++++++++++ test/rfl/system/serde.rfl | 12 +++ test/rfl/system/splayed.rfl | 32 +++++++ test/rfl/table/meta.rfl | 25 ++++- test/rfl/table/modify.rfl | 28 ++++++ test/rfl/table/pivot.rfl | 25 +++++ test/rfl/table/select.rfl | 117 +++++++++++++++++++++++ test/rfl/temporal/date.rfl | 36 +++++++ test/rfl/temporal/time.rfl | 13 +++ test/rfl/type/as.rfl | 39 ++++++++ 47 files changed, 1572 insertions(+), 44 deletions(-) create mode 100644 test/rfl/datalog/rule.rfl create mode 100644 test/rfl/embedding/hnsw.rfl create mode 100644 test/rfl/integration/arena.rfl create mode 100644 test/rfl/integration/cow.rfl create mode 100644 test/rfl/integration/morsel.rfl create mode 100644 test/rfl/integration/optimizer.rfl create mode 100644 test/rfl/integration/str_pool.rfl create mode 100644 test/rfl/system/csv_roundtrip.rfl create mode 100644 test/rfl/system/splayed.rfl create mode 100644 test/rfl/table/modify.rfl create mode 100644 test/rfl/table/pivot.rfl create mode 100644 test/rfl/table/select.rfl diff --git a/test/rfl/agg/sum.rfl b/test/rfl/agg/sum.rfl index 2925e651..01e4ba7c 100644 --- a/test/rfl/agg/sum.rfl +++ b/test/rfl/agg/sum.rfl @@ -11,3 +11,16 @@ ;; linearity: scales with constant — falsifies sum ≡ 0. (set V (- (rand 100 2000) 1000)) (* 3 (sum V)) -- (sum (* 3 V)) + +;; ────────────── error: sum is not defined for the null sentinel ────────────── +;; (sum null) refers to RAY_NULL_OBJ, not a typed-null atom; it isn't +;; reduceable and must surface as a type error. +(sum null) !- type + +;; ────────────── null skipping (null_policy: skip_nulls) ────────────── +;; Rayforce aggregations skip typed-null elements. Vector literals +;; reject mixing nulls and values across types, so use (list ...) which +;; produces a heterogeneous list that sum handles. +(sum (list 1 0Ni 3 0Ni 5)) -- 9 +(sum (list 0Ni 0Ni 0Ni)) -- 0 +(sum (list)) -- 0 diff --git a/test/rfl/arith/abs.rfl b/test/rfl/arith/abs.rfl index 1497f697..d8bb50e9 100644 --- a/test/rfl/arith/abs.rfl +++ b/test/rfl/arith/abs.rfl @@ -1,20 +1,38 @@ ;; Invariants for `abs` (absolute value). -;; non-negative: (>= (abs a) 0) +;; non-negative (set A (- (rand 256 200000) 100000)) (count A) -- (sum (>= (abs A) 0)) -;; idempotent: (abs (abs a)) == (abs a) +;; idempotent (count A) -- (sum (== (abs (abs A)) (abs A))) ;; |x| == |-x| (count A) -- (sum (== (abs A) (abs (neg A)))) -;; abs dominates: (>= (abs a) a) and (>= (abs a) (neg a)) +;; abs dominates (count A) -- (sum (>= (abs A) A)) (count A) -- (sum (>= (abs A) (neg A))) ;; concrete (abs -5) -- 5 -(abs 5) -- 5 -(abs 0) -- 0 +(abs 5) -- 5 +(abs 0) -- 0 + +;; null atoms propagate through abs +(nil? (abs 0N)) -- true +(nil? (abs 0Ni)) -- true + +;; ────────────────────────────────────────────────────────────────── +;; Vec + cross-type — abs widens narrow ints to i64 +;; ────────────────────────────────────────────────────────────────── + +(abs [1 -2 3 -4]) -- [1 2 3 4] +(abs [-1.5 2.5 -3.5]) -- [1.5 2.5 3.5] +(abs -5.0) -- 5.0 +(type (abs -5)) -- 'i64 +(type (abs -5.0)) -- 'f64 +(type (abs -5h)) -- 'i64 +(type (abs -5i)) -- 'i64 +(type (abs [-1h 2h])) -- 'I64 +(type (abs [-1i 2i])) -- 'I64 diff --git a/test/rfl/arith/add.rfl b/test/rfl/arith/add.rfl index d480e468..f2e9ee69 100644 --- a/test/rfl/arith/add.rfl +++ b/test/rfl/arith/add.rfl @@ -1,4 +1,4 @@ -;; Invariants for `+` on i64 vectors. +;; Invariants for `+`. ;; commutativity: (+ a b) == (+ b a) (set A (- (rand 256 200000) 100000)) @@ -18,3 +18,131 @@ ;; identity (left): (+ 0 a) == a (set A (- (rand 256 200000) 100000)) (count A) -- (sum (== (+ 0 A) A)) + +;; ────────────────────────────────────────────────────────────────── +;; Mixed numeric — atom/vec/broadcast/coercion +;; ────────────────────────────────────────────────────────────────── + +;; int+int atom +(+ 3 4) -- 7 + +;; int+int vec +(+ [1 2 3] [10 20 30]) -- [11 22 33] + +;; int+int broadcast +(+ [1 2 3] 10) -- [11 12 13] + +;; neg + pos +(+ -5 3) -- -2 + +;; neg vec + atom +(+ [-1 -2 -3] 1) -- [0 -1 -2] + +;; float+float atom +(+ 1.5 2.5) -- 4.0 + +;; int+float coerce atom +(+ 1 2.5) -- 3.5 + +;; int_vec + float atom +(+ [1 2 3] 0.5) -- [1.5 2.5 3.5] + +;; int_vec + float_vec +(+ [1 2 3] [0.5 0.5 0.5]) -- [1.5 2.5 3.5] + +;; ────────────────────────────────────────────────────────────────── +;; Type sweep — each width, result type, widening +;; ────────────────────────────────────────────────────────────────── + +;; + I16 atom value +(+ 1h 2h) -- 3 + +;; + I32 atom value +(+ 1i 2i) -- 3 + +;; + I16 atom type preserves +(type (+ 1h 2h)) -- 'i16 + +;; + I32 atom type preserves +(type (+ 1i 2i)) -- 'i32 + +;; + I16+I32 widens to I32 +(type (+ 1h 2i)) -- 'i32 + +;; + I32+I64 widens to I64 +(type (+ 1i 2)) -- 'i64 + +;; + I64+F64 widens to F64 +(type (+ 1 2.0)) -- 'f64 + +;; + I16 vec element-wise +(+ [1h 2h 3h] [1h 2h 3h]) -- [2 4 6] + +;; + I32 vec element-wise +(+ [1i 2i 3i] [1i 2i 3i]) -- [2 4 6] + +;; + F64 vec element-wise +(+ [1.0 2.0 3.0] [1.0 2.0 3.0]) -- [2.0 4.0 6.0] + +;; + I16 vec type preserves +(type (+ [1h 2h] [1h 2h])) -- 'I16 + +;; + I32 vec type preserves +(type (+ [1i 2i] [1i 2i])) -- 'I32 + +;; + atom-vec broadcast I16 +(+ 10h [1h 2h 3h]) -- [11 12 13] + +;; + atom-vec broadcast F64 +(+ 10.0 [1.0 2.0 3.0]) -- [11.0 12.0 13.0] + +;; ────────────────────────────────────────────────────────────────── +;; Null propagation — typed null atoms propagate through + +;; ────────────────────────────────────────────────────────────────── + +;; + int null propagates +(nil? (+ 0Ni 5)) -- true + +;; + rhs int null propagates +(nil? (+ 5 0Ni)) -- true + +;; + f64 null propagates +(nil? (+ 0Nf 5.0)) -- true + +;; ────────────────────────────────────────────────────────────────── +;; Type errors +;; ────────────────────────────────────────────────────────────────── + +(+ null 1) !- type +(+ "abc" 1) !- type +(+ 0Nf 2024.03.20) !- type + +;; ────────────────────────────────────────────────────────────────── +;; Morsel boundaries — 1023 / 1024 / 1025 +;; ────────────────────────────────────────────────────────────────── + +;; identity + 0 preserves vec at 1023 +(set V (til 1023)) +(+ V 0) -- V + +;; sum after +0 preserved at 1023 +(sum (+ V 0)) -- (sum V) + +;; vec+vec at 1023 doubles sum +(sum (+ V V)) -- (* 2 (sum V)) + +;; identity at 1024 +(set V (til 1024)) +(+ V 0) -- V +(sum (+ V V)) -- (* 2 (sum V)) + +;; last element at morsel 1024 +(last (+ V 1)) -- 1024 + +;; identity at 1025 +(set V (til 1025)) +(+ V 0) -- V +(sum (+ V V)) -- (* 2 (sum V)) + +;; last element at morsel 1025 +(last (+ V 1)) -- 1025 diff --git a/test/rfl/arith/ceil.rfl b/test/rfl/arith/ceil.rfl index f87ade50..c3f549d2 100644 --- a/test/rfl/arith/ceil.rfl +++ b/test/rfl/arith/ceil.rfl @@ -12,6 +12,21 @@ (count A) -- (sum (== (ceil (ceil A)) (ceil A))) ;; concrete -(ceil 3.2) -- 4.0 -(ceil 3.0) -- 3.0 +(ceil 3.2) -- 4.0 +(ceil 3.0) -- 3.0 (ceil -3.7) -- -3.0 + +;; ────────────────────────────────────────────────────────────────── +;; Integer input — identity +;; ────────────────────────────────────────────────────────────────── + +(ceil 5) -- 5 +(ceil 5h) -- 5 +(ceil 5i) -- 5 + +;; vec — float input, float result +(ceil [1.2 2.8 -0.5 -1.5]) -- [2.0 3.0 0.0 -1.0] + +;; metamorphic vs floor: ceil(x) == -floor(-x) +(ceil 1.5) -- (neg (floor (neg 1.5))) +(ceil -0.5) -- (neg (floor (neg -0.5))) diff --git a/test/rfl/arith/div.rfl b/test/rfl/arith/div.rfl index 9655514c..767efea5 100644 --- a/test/rfl/arith/div.rfl +++ b/test/rfl/arith/div.rfl @@ -13,5 +13,45 @@ (/ 7 2) -- 3 (/ 10 3) -- 3 -;; divide-by-zero yields typed null (not an error in rayforce2) -(/ 5 0) -- 0Nl +;; floor with negative — toward -inf, not toward 0 +(/ -10 3) -- -4 +(/ 10 -3) -- -4 +(/ -10 -3) -- 3 + +;; divide-by-zero yields typed null +(/ 5 0) -- 0Nl +(/ 10 0) -- 0Nl + +;; floats — Rayforce folds IEEE +Inf / NaN outcomes back to typed null +(nil? (/ 1.0 0.0)) -- true +(nil? (/ 0.0 0.0)) -- true +(nil? (div 1.0 0.0)) -- true +(nil? (div 0.0 0.0)) -- true + +;; const-fold of div-by-zero must agree with the runtime path +;; (regression for the optimizer doing the fold at compile time) +(set X 1.0) +(nil? (/ X 0.0)) -- true +(nil? (div X 0.0)) -- true +(== (nil? (/ 1 0)) (nil? (/ X 0))) -- true +(== (nil? (/ 1.0 0.0)) (nil? (/ X 0.0))) -- true + +;; ────────────────────────────────────────────────────────────────── +;; Type sweep — narrow ints / float / cross-width +;; ────────────────────────────────────────────────────────────────── + +(/ 6h 2h) -- 3 +(type (/ 6h 2h)) -- 'i16 +(/ 6i 2i) -- 3 +(type (/ 6i 2i)) -- 'i32 +(/ 10.0 4.0) -- 2.0 +(type (/ 6.0 2.0)) -- 'f64 +(/ [12 15 18] 3) -- [4 5 6] + +;; ────────────────────────────────────────────────────────────────── +;; Null propagation +;; ────────────────────────────────────────────────────────────────── + +(nil? (/ 0Ni 5)) -- true +(nil? (/ 5 0Ni)) -- true +(nil? (/ 0Nf 5.0)) -- true diff --git a/test/rfl/arith/floor.rfl b/test/rfl/arith/floor.rfl index 1632ce67..ef279ac9 100644 --- a/test/rfl/arith/floor.rfl +++ b/test/rfl/arith/floor.rfl @@ -12,6 +12,20 @@ (count A) -- (sum (== (floor (floor A)) (floor A))) ;; concrete -(floor 3.7) -- 3.0 -(floor 3.0) -- 3.0 +(floor 3.7) -- 3.0 +(floor 3.0) -- 3.0 (floor -3.2) -- -4.0 + +;; ────────────────────────────────────────────────────────────────── +;; Integer input — identity +;; ────────────────────────────────────────────────────────────────── + +(floor 5) -- 5 +(floor 5h) -- 5 +(floor 5i) -- 5 + +;; vec — float input, float result +(floor [1.2 2.8 -0.5 -1.5]) -- [1.0 2.0 -1.0 -2.0] + +;; relation to ceil — floor <= ceil +(<= (floor 1.5) (ceil 1.5)) -- true diff --git a/test/rfl/arith/mod.rfl b/test/rfl/arith/mod.rfl index 65cc3457..287554b1 100644 --- a/test/rfl/arith/mod.rfl +++ b/test/rfl/arith/mod.rfl @@ -1,4 +1,4 @@ -;; Invariants for `%` (modulo). +;; Invariants for `%` (floor-modulo). ;; range: 0 <= (% a m) < m for positive m (set A (- (rand 256 10000) 5000)) @@ -12,3 +12,32 @@ ;; concrete (% 7 3) -- 1 (% 0 5) -- 0 + +;; floor-mod with negative — sign follows divisor (Python semantics) +(% -10 3) -- 2 +(% 10 -3) -- -2 + +;; mod by 1 is always 0 +(% 17 1) -- 0 +(% -42 1) -- 0 + +;; mod by self is 0 +(% 7 7) -- 0 + +;; ────────────────────────────────────────────────────────────────── +;; Type sweep +;; ────────────────────────────────────────────────────────────────── + +(% 10h 3h) -- 1 +(type (% 10h 3h)) -- 'i16 +(% 10i 3i) -- 1 +(type (% 10i 3i)) -- 'i32 + +(% [10 11 12] 3) -- [1 2 0] + +;; ────────────────────────────────────────────────────────────────── +;; Null + zero divisor +;; ────────────────────────────────────────────────────────────────── + +(nil? (% 0Ni 5)) -- true +(% 10 0) -- 0Nl diff --git a/test/rfl/arith/mul.rfl b/test/rfl/arith/mul.rfl index 37ab4511..c5f278a4 100644 --- a/test/rfl/arith/mul.rfl +++ b/test/rfl/arith/mul.rfl @@ -1,29 +1,77 @@ -;; Invariants for `*` on i64 vectors. +;; Invariants for `*`. -;; commutativity: (* a b) == (* b a) +;; commutativity (set A (- (rand 256 400) 200)) (set B (- (rand 256 400) 200)) (count A) -- (sum (== (* A B) (* B A))) -;; associativity: (* (* a b) c) == (* a (* b c)) — narrow range to avoid overflow +;; associativity (narrow range) (set A (- (rand 256 20) 10)) (set B (- (rand 256 20) 10)) (set C (- (rand 256 20) 10)) (count A) -- (sum (== (* (* A B) C) (* A (* B C)))) -;; identity: (* a 1) == a and (* 1 a) == a +;; identity (left/right) (set A (- (rand 256 200000) 100000)) (count A) -- (sum (== (* A 1) A)) (count A) -- (sum (== (* 1 A) A)) -;; zero annihilates: (* a 0) == 0 +;; zero annihilates (count A) -- (sum (== (* A 0) 0)) -;; distributivity over +: (* a (+ b c)) == (+ (* a b) (* a c)) +;; distributivity over + (set A (- (rand 256 100) 50)) (set B (- (rand 256 100) 50)) (set C (- (rand 256 100) 50)) (count A) -- (sum (== (* A (+ B C)) (+ (* A B) (* A C)))) -;; concrete +;; ────────────────────────────────────────────────────────────────── +;; Concrete atom / vec / broadcast / cross-width +;; ────────────────────────────────────────────────────────────────── + +(* 2 3) -- 6 +(* -3 4) -- -12 +(* -3 -4) -- 12 (* [2 3 4] [5 6 7]) -- [10 18 28] +(* [1 2 3] 2) -- [2 4 6] +(* 2 [1 2 3]) -- [2 4 6] + +;; I16 / I32 / F64 +(* 2h 3h) -- 6 +(type (* 2h 3h)) -- 'i16 +(* 2i 3i) -- 6 +(type (* 2i 3i)) -- 'i32 +(* 2.5 4.0) -- 10.0 +(type (* 1 2.0)) -- 'f64 + +;; ────────────────────────────────────────────────────────────────── +;; Null propagation +;; ────────────────────────────────────────────────────────────────── + +(nil? (* 0Ni 5)) -- true +(nil? (* 5 0Ni)) -- true +(nil? (* 0Ni 0)) -- true + +;; ────────────────────────────────────────────────────────────────── +;; Type errors +;; ────────────────────────────────────────────────────────────────── + +(* null 1) !- type +(* "abc" 2) !- type +(* 02:15:07.000 02:15:07.000) !- type + +;; ────────────────────────────────────────────────────────────────── +;; Morsel boundaries +;; ────────────────────────────────────────────────────────────────── + +(set V (til 1023)) +(* V 1) -- V +(sum (* 2 V)) -- (* 2 (sum V)) + +(set V (til 1024)) +(* V 1) -- V +(sum (* 0 V)) -- 0 + +(set V (til 1025)) +(* V 1) -- V +(sum (* 2 V)) -- (* 2 (sum V)) diff --git a/test/rfl/arith/neg.rfl b/test/rfl/arith/neg.rfl index 640401fb..532d0048 100644 --- a/test/rfl/arith/neg.rfl +++ b/test/rfl/arith/neg.rfl @@ -12,6 +12,41 @@ (count A) -- (sum (== (neg (+ A B)) (+ (neg A) (neg B)))) ;; concrete -(neg 5) -- -5 -(neg 0) -- 0 +(neg 5) -- -5 +(neg 0) -- 0 (neg -3) -- 3 + +;; null atoms propagate through neg +(nil? (neg 0N)) -- true +(nil? (neg 0Ni)) -- true + +;; ────────────────────────────────────────────────────────────────── +;; Vec + cross-type +;; ────────────────────────────────────────────────────────────────── + +(neg [1 2 3]) -- [-1 -2 -3] +(neg [1.0 2.0 3.0]) -- [-1.0 -2.0 -3.0] +(neg 5.0) -- -5.0 +(type (neg 5)) -- 'i64 +(type (neg 5.0)) -- 'f64 + +;; involution per-type (vec) +(neg (neg [1 2 3])) -- [1 2 3] +(neg (neg [1.0 2.0 3.0])) -- [1.0 2.0 3.0] + +;; ────────────────────────────────────────────────────────────────── +;; Morsel boundary +;; ────────────────────────────────────────────────────────────────── + +(set V (til 1024)) +(neg (neg V)) -- V +(sum (+ V (neg V))) -- 0 + +;; ────────────────────────────────────────────────────────────────── +;; Probe: neg on narrow-int currently raises type +;; (filed in spec/_probes/neg_narrow_int.rfl) +;; ────────────────────────────────────────────────────────────────── + +(neg 5h) !- type +(neg 5i) !- type +(neg [1h 2h]) !- type diff --git a/test/rfl/arith/round.rfl b/test/rfl/arith/round.rfl index c2223cf2..84c31679 100644 --- a/test/rfl/arith/round.rfl +++ b/test/rfl/arith/round.rfl @@ -5,8 +5,25 @@ (count A) -- (sum (or (== (round A) (floor A)) (== (round A) (ceil A)))) ;; concrete — half-away-from-zero / banker's, depending on impl -(round 3.5) -- 4.0 -(round 3.4) -- 3.0 -(round 3.6) -- 4.0 -(round 3.0) -- 3.0 +(round 3.5) -- 4.0 +(round 3.4) -- 3.0 +(round 3.6) -- 4.0 +(round 3.0) -- 3.0 (round -3.4) -- -3.0 + +;; ────────────────────────────────────────────────────────────────── +;; Integer input — round always returns f64 +;; ────────────────────────────────────────────────────────────────── + +(round 5) -- 5.0 +(round 5h) -- 5.0 +(round 5i) -- 5.0 +(type (round 5)) -- 'f64 + +;; vec — also f64 result +(round [1.2 2.7 -1.5]) -- [1.0 3.0 -2.0] + +;; bounded between floor and ceil for arbitrary input +(set X 1.7) +(>= (round X) (floor X)) -- true +(<= (round X) (ceil X)) -- true diff --git a/test/rfl/arith/sqrt.rfl b/test/rfl/arith/sqrt.rfl index 69ac12e6..5b22013c 100644 --- a/test/rfl/arith/sqrt.rfl +++ b/test/rfl/arith/sqrt.rfl @@ -7,6 +7,12 @@ (sqrt 9.0) -- 3.0 (sqrt 25.0) -- 5.0 +;; sqrt of a negative produces IEEE NaN (still f64, not nil) — NaN is +;; the only float that is not equal to itself. +(type (sqrt -1.0)) -- 'f64 +(nil? (sqrt -1.0)) -- false +(!= (sqrt -1.0) (sqrt -1.0)) -- true + ;; roundtrip: (sqrt x)^2 ≈ x for x >= 0 (set A (as 'F64 (rand 256 1000))) (count A) -- (sum (< (abs (- (* (sqrt A) (sqrt A)) A)) 0.001)) diff --git a/test/rfl/arith/sub.rfl b/test/rfl/arith/sub.rfl index 3c190df7..4ea43241 100644 --- a/test/rfl/arith/sub.rfl +++ b/test/rfl/arith/sub.rfl @@ -1,4 +1,4 @@ -;; Invariants for `-` on i64 vectors. +;; Invariants for `-`. ;; self-inverse: (- a a) == 0 (rand never emits null) (set A (- (rand 256 200000) 100000)) @@ -12,3 +12,75 @@ (set A (- (rand 256 100000) 50000)) (set B (- (rand 256 100000) 50000)) (count A) -- (sum (== (- (+ A B) B) A)) + +;; anti-commutativity: (- a b) == (neg (- b a)) +(set A (- (rand 256 100000) 50000)) +(set B (- (rand 256 100000) 50000)) +(count A) -- (sum (== (- A B) (neg (- B A)))) + +;; ────────────────────────────────────────────────────────────────── +;; Atom / vec / broadcast / cross-width +;; ────────────────────────────────────────────────────────────────── + +;; - atom +(- 10 3) -- 7 + +;; - negative +(- 3 10) -- -7 + +;; - vec vec +(- [10 20 30] [1 2 3]) -- [9 18 27] + +;; - vec atom +(- [10 20 30] 5) -- [5 15 25] + +;; - atom vec (broadcast) +(- 100 [1 2 3]) -- [99 98 97] + +;; - float atom +(- 1.5 0.5) -- 1.0 + +;; - I16 atom +(- 5h 3h) -- 2 +(type (- 5h 3h)) -- 'i16 + +;; - I32 atom +(- 5i 3i) -- 2 +(type (- 5i 3i)) -- 'i32 + +;; - cross-width widens +(type (- 5h 3i)) -- 'i32 +(type (- 5 3.0)) -- 'f64 + +;; ────────────────────────────────────────────────────────────────── +;; Null propagation +;; ────────────────────────────────────────────────────────────────── + +(nil? (- 0Ni 5)) -- true +(nil? (- 5 0Ni)) -- true +(nil? (- 0Nf 5.0)) -- true + +;; ────────────────────────────────────────────────────────────────── +;; Type errors +;; ────────────────────────────────────────────────────────────────── + +(- null 1) !- type +(- "a" 1) !- type +(- 2025.03.04D15:41:47.087221025 2025.12.13) !- type + +;; ────────────────────────────────────────────────────────────────── +;; Morsel boundaries +;; ────────────────────────────────────────────────────────────────── + +(set V (til 1023)) +(sum (- V V)) -- 0 +(- V 0) -- V + +(set V (til 1024)) +(sum (- V V)) -- 0 +(- V 0) -- V +(last (- V 1)) -- 1022 + +(set V (til 1025)) +(sum (- V V)) -- 0 +(last (- V 1)) -- 1023 diff --git a/test/rfl/cmp/eq.rfl b/test/rfl/cmp/eq.rfl index 90d5d8ec..b527d964 100644 --- a/test/rfl/cmp/eq.rfl +++ b/test/rfl/cmp/eq.rfl @@ -1,4 +1,4 @@ -;; Invariants for `==` on i64 vectors. +;; Invariants for `==`. ;; reflexive: (== a a) is true for every element (set A (- (rand 256 200000) 100000)) @@ -8,3 +8,50 @@ (set A (- (rand 256 200000) 100000)) (set B (- (rand 256 200000) 100000)) (count A) -- (sum (== (== A B) (== B A))) + +;; ────────────────────────────────────────────────────────────────── +;; Concrete — atom + vec across all comparable types +;; ────────────────────────────────────────────────────────────────── + +;; reflexive concrete +(== 5 5) -- true +(== -5 -5) -- true +(== 0 0) -- true +(== 3.14 3.14) -- true +(== "hello" "hello") -- true +(== "" "") -- true +(== true true) -- true +(== false false) -- true + +;; non-equal +(== 3 4) -- false +(== "a" "b") -- false + +;; symmetric +(== 3 5) -- (== 5 3) + +;; vec element-wise +(== [1 2 3] [1 2 3]) -- [true true true] +(== [1 2 3] [1 5 3]) -- [true false true] + +;; cross-width numeric promotion +(== 5h 5) -- true +(== 5i 5) -- true +(== 5 5.0) -- true + +;; sym +(== 'foo 'foo) -- true +(== 'foo 'bar) -- false +(== ['a 'b] ['a 'b]) -- [true true] + +;; date +(== 2024.01.15 2024.01.15) -- true +(== 2024.01.15 2024.06.01) -- false + +;; ────────────────────────────────────────────────────────────────── +;; Morsel boundary +;; ────────────────────────────────────────────────────────────────── + +(set V (til 1024)) +(sum (== V V)) -- 1024 +(sum (== V 0)) -- 1 diff --git a/test/rfl/cmp/ge.rfl b/test/rfl/cmp/ge.rfl index 1e686ffb..4d0ddfed 100644 --- a/test/rfl/cmp/ge.rfl +++ b/test/rfl/cmp/ge.rfl @@ -8,6 +8,27 @@ (set B (- (rand 256 200000) 100000)) (count A) -- (sum (== (>= A B) (<= B A))) -(>= 2 1) -- true -(>= 1 2) -- false -(>= 3 3) -- true +;; >= equivalent to (or > ==) +(count A) -- (sum (== (>= A B) (or (> A B) (== A B)))) + +;; concrete +(>= 2 1) -- true +(>= 1 2) -- false +(>= 3 3) -- true +(>= -1 -5) -- true + +;; cross-types +(>= 5h 5h) -- true +(>= 7i 5i) -- true +(>= 2.5 2.5) -- true + +;; STR +(>= "x" "x") -- true +(>= "b" "a") -- true + +;; DATE +(>= 2024.01.15 2024.01.15) -- true +(>= 2024.06.01 2024.01.15) -- true + +;; vec +(>= [1 5 5] [2 2 5]) -- [false true true] diff --git a/test/rfl/cmp/gt.rfl b/test/rfl/cmp/gt.rfl index b8124773..16ae306d 100644 --- a/test/rfl/cmp/gt.rfl +++ b/test/rfl/cmp/gt.rfl @@ -8,6 +8,37 @@ (set B (- (rand 256 200000) 100000)) (count A) -- (sum (== (> A B) (< B A))) -(> 2 1) -- true -(> 1 2) -- false -(> 3 3) -- false +;; concrete +(> 2 1) -- true +(> 1 2) -- false +(> 3 3) -- false +(> -1 -5) -- true + +;; ────────────────────────────────────────────────────────────────── +;; Across types +;; ────────────────────────────────────────────────────────────────── + +(> 7h 5h) -- true +(> 7i 5i) -- true +(> 2.5 1.5) -- true + +;; STR — lex order +(> "b" "a") -- true +(> "aa" "a") -- true +(> "x" "x") -- false + +;; DATE +(> 2024.06.01 2024.01.15) -- true + +;; vec +(> [1 5 3] [2 2 2]) -- [false true true] +(> [1 5 10] 5) -- [false false true] + +;; ────────────────────────────────────────────────────────────────── +;; Probe: > on SYM atoms raises type — known bug (see _probes/cmp_sym_ordering) +;; ────────────────────────────────────────────────────────────────── + +(> 'b 'a) !- type +(< 'a 'b) !- type +(>= 'a 'a) !- type +(<= 'a 'a) !- type diff --git a/test/rfl/cmp/le.rfl b/test/rfl/cmp/le.rfl index 1bd3079b..5e098305 100644 --- a/test/rfl/cmp/le.rfl +++ b/test/rfl/cmp/le.rfl @@ -8,7 +8,20 @@ (set B (- (rand 256 200000) 100000)) (count A) -- (sum (== (<= A B) (or (< A B) (== A B)))) -;; antisymmetric when equal: (<= a b) and (<= b a) implies (== a b) +;; concrete (<= 1 2) -- true (<= 2 1) -- false (<= 3 3) -- true + +;; cross-types +(<= 5h 5h) -- true +(<= 3i 5i) -- true +(<= 2.5 2.5) -- true + +;; STR / DATE +(<= "a" "b") -- true +(<= "x" "x") -- true +(<= 2024.01.15 2024.06.01) -- true + +;; vec +(<= [1 5 5] [2 2 5]) -- [true false true] diff --git a/test/rfl/cmp/lt.rfl b/test/rfl/cmp/lt.rfl index b5cf2e85..9be58bb3 100644 --- a/test/rfl/cmp/lt.rfl +++ b/test/rfl/cmp/lt.rfl @@ -11,6 +11,23 @@ ;; trichotomy: exactly one of <, >, == holds (count A) -- (sum (+ (+ (as 'I64 (< A B)) (as 'I64 (> A B))) (as 'I64 (== A B)))) +;; concrete (< 1 2) -- true (< 2 1) -- false (< 3 3) -- false +(< -5 -1) -- true + +;; cross-types +(< 3h 5h) -- true +(< 3i 5i) -- true +(< 1.5 2.5) -- true + +;; STR +(< "a" "b") -- true + +;; DATE +(< 2024.01.15 2024.06.01) -- true + +;; vec +(< [1 5 3] [2 2 2]) -- [true false false] +(< [1 5 10] 5) -- [true false false] diff --git a/test/rfl/cmp/ne.rfl b/test/rfl/cmp/ne.rfl index e7fc3671..adbf6d79 100644 --- a/test/rfl/cmp/ne.rfl +++ b/test/rfl/cmp/ne.rfl @@ -11,5 +11,24 @@ ;; symmetric (count A) -- (sum (== (!= A B) (!= B A))) -(!= 1 2) -- true -(!= 3 3) -- false +;; concrete +(!= 1 2) -- true +(!= 3 3) -- false +(!= -1 1) -- true + +;; cross-type +(!= "a" "a") -- false +(!= "a" "b") -- true +(!= 'a 'a) -- false +(!= 'a 'b) -- true + +;; cross-width +(!= 5h 5) -- false +(!= 5h 6) -- true +(!= 5 5.0) -- false + +;; vec +(!= [1 2 3] [1 5 3]) -- [false true false] + +;; complementary to == per-element +(!= [1 2 3] [1 2 3]) -- [false false false] diff --git a/test/rfl/collection/rand.rfl b/test/rfl/collection/rand.rfl index b296cd3c..c940475f 100644 --- a/test/rfl/collection/rand.rfl +++ b/test/rfl/collection/rand.rfl @@ -7,3 +7,10 @@ ;; int random ;; ========== VERIFY RANGE ========== (and (>= (min (rand 100 10)) 0) (< (max (rand 100 10)) 10)) -- true + +;; ========== ZERO COUNT IS EMPTY (NOT AN ERROR) ========== +(rand 0 10) -- [] + +;; ========== DOMAIN ERRORS ========== +;; non-positive upper bound is rejected as a domain error +(rand 5 0) !- domain diff --git a/test/rfl/collection/til.rfl b/test/rfl/collection/til.rfl index 3a1c5739..4a866014 100644 --- a/test/rfl/collection/til.rfl +++ b/test/rfl/collection/til.rfl @@ -15,3 +15,8 @@ N -- (sum (== 1 (- (at V (+ 1 (til N))) (at V (til N))))) ;; first/last (first (til 50)) -- 0 (last (til 50)) -- 49 + +;; ========== DOMAIN ERRORS ========== +;; til of a negative count is rejected as a domain error +(til -1) !- domain +(til -100) !- domain diff --git a/test/rfl/datalog/rule.rfl b/test/rfl/datalog/rule.rfl new file mode 100644 index 00000000..332d0690 --- /dev/null +++ b/test/rfl/datalog/rule.rfl @@ -0,0 +1,145 @@ +;; Datalog: EAV facts asserted into a `datoms` store; queries answered via +;; pattern matching with optional rule expansion. Counts here were +;; cross-checked against a Python reference evaluator. + +;; ────────────── simple EAV query ────────────── +;; 3 facts, 3 (?e ?n) bindings. +(set db (datoms)) +(set db (assert-fact db 1 'age 30)) +(set db (assert-fact db 2 'age 25)) +(set db (assert-fact db 3 'age 40)) +(count (query db (find ?e ?n) (where (?e :age ?n)))) -- 3 + +;; constant in object slot filters down to no rows when no fact matches +(count (query db (find ?e) (where (?e :age 999)))) -- 0 + +;; ────────────── two-clause join on entity ────────────── +(set db (datoms)) +(set db (assert-fact db 1 'name 100)) +(set db (assert-fact db 1 'dept 10)) +(set db (assert-fact db 2 'name 200)) +(set db (assert-fact db 2 'dept 20)) +(set db (assert-fact db 3 'name 300)) +(set db (assert-fact db 3 'dept 10)) +(count (query db (find ?e ?n ?d) (where (?e :name ?n) (?e :dept ?d)))) -- 3 + +;; constant in object slot for one of the two clauses (dept = 10) keeps +;; only the two entities with that dept +(count (query db (find ?e ?n) (where (?e :name ?n) (?e :dept 10)))) -- 2 + +;; ────────────── wildcard underscore ────────────── +;; `_` matches any value but does not bind. Only entities with a :dept +;; fact are returned. +(set db (datoms)) +(set db (assert-fact db 1 'name 100)) +(set db (assert-fact db 1 'dept 10)) +(set db (assert-fact db 2 'name 200)) +(set db (assert-fact db 3 'dept 30)) +(count (query db (find ?e) (where (?e :dept _)))) -- 2 + +;; ────────────── triple-clause join with constant filter ────────────── +;; Find all (?c ?s) for entities with :age 30. Two such entities exist. +(set db (datoms)) +(set db (assert-fact db 1 'age 30)) +(set db (assert-fact db 1 'city 100)) +(set db (assert-fact db 1 'salary 80000)) +(set db (assert-fact db 2 'age 25)) +(set db (assert-fact db 2 'city 200)) +(set db (assert-fact db 2 'salary 60000)) +(set db (assert-fact db 3 'age 30)) +(set db (assert-fact db 3 'city 100)) +(set db (assert-fact db 3 'salary 90000)) +(count (query db (find ?e ?c ?s) (where (?e :age 30) (?e :city ?c) (?e :salary ?s)))) -- 2 + +;; ────────────── rules: derived relations ────────────── +;; (employee ?e ?n ?d) projects (entity, name, dept) when both facts hold +(set db (datoms)) +(set db (assert-fact db 1 'name 100)) +(set db (assert-fact db 1 'dept 10)) +(set db (assert-fact db 2 'name 200)) +(set db (assert-fact db 2 'dept 20)) +(set db (assert-fact db 3 'name 300)) +(set db (assert-fact db 3 'dept 10)) +(rule (employee ?e ?n ?d) (?e :name ?n) (?e :dept ?d)) +(count (query db (find ?n ?d) (where (employee ?e ?n ?d)))) -- 3 + +;; rule with constant body — (eng ?e) iff (?e :dept 10). Two engineers. +(set db (datoms)) +(set db (assert-fact db 1 'name 100)) +(set db (assert-fact db 1 'dept 10)) +(set db (assert-fact db 1 'salary 80000)) +(set db (assert-fact db 2 'name 200)) +(set db (assert-fact db 2 'dept 10)) +(set db (assert-fact db 2 'salary 60000)) +(set db (assert-fact db 3 'name 300)) +(set db (assert-fact db 3 'dept 20)) +(set db (assert-fact db 3 'salary 90000)) +(rule (eng ?e) (?e :dept 10)) +(count (query db (find ?e) (where (eng ?e)))) -- 2 + +;; ────────────── transitive closure ────────────── +;; Linear chain 1→2→3→4→5: TC has 4+3+2+1 = 10 reachable pairs. +(set db (datoms)) +(set db (assert-fact db 1 'edge 2)) +(set db (assert-fact db 2 'edge 3)) +(set db (assert-fact db 3 'edge 4)) +(set db (assert-fact db 4 'edge 5)) +(rule (reachable ?x ?y) (?x :edge ?y)) +(rule (reachable ?x ?z) (?x :edge ?y) (reachable ?y ?z)) +(count (query db (find ?x ?y) (where (reachable ?x ?y)))) -- 10 + +;; DAG 1→2, 1→3, 2→4, 3→4, 4→5, 5→6 — 14 reachable pairs. +(set db (datoms)) +(set db (assert-fact db 1 'edge 2)) +(set db (assert-fact db 1 'edge 3)) +(set db (assert-fact db 2 'edge 4)) +(set db (assert-fact db 3 'edge 4)) +(set db (assert-fact db 4 'edge 5)) +(set db (assert-fact db 5 'edge 6)) +(count (query db (find ?x ?y) (where (reachable ?x ?y)))) -- 14 + +;; Cycle 1↔2, 2→3 — every node reaches every other in {1,2,3} except +;; that 3 has no outgoing edge. Reachable pairs: 1→{1,2,3}, 2→{1,2,3} = 6. +(set db (datoms)) +(set db (assert-fact db 1 'edge 2)) +(set db (assert-fact db 2 'edge 1)) +(set db (assert-fact db 2 'edge 3)) +(count (query db (find ?x ?y) (where (reachable ?x ?y)))) -- 6 + +;; Self-loop on 1: only one reachable pair (1,1). +(set db (datoms)) +(set db (assert-fact db 1 'edge 1)) +(count (query db (find ?x ?y) (where (reachable ?x ?y)))) -- 1 + +;; ────────────── stratified negation ────────────── +;; (not ...) excludes entities whose negated atom is satisfiable. + +;; non-managers in dept 10 = entities 2 and 4 (1 and 3 have :manager fact; +;; 1 in dept 10 has manager so excluded, 2 and 4 have no manager → kept, +;; 3 in dept 20 doesn't match :dept 10). +(set db (datoms)) +(set db (assert-fact db 1 'dept 10)) +(set db (assert-fact db 1 'manager 1)) +(set db (assert-fact db 2 'dept 10)) +(set db (assert-fact db 3 'dept 20)) +(set db (assert-fact db 3 'manager 1)) +(set db (assert-fact db 4 'dept 10)) +(count (query db (find ?e) (where (?e :dept 10) (not (?e :manager ?m))))) -- 2 + +;; non-managers among named entities: 2 and 4 have no manager → 2. +(set db (datoms)) +(set db (assert-fact db 1 'name 100)) +(set db (assert-fact db 1 'manager 1)) +(set db (assert-fact db 2 'name 200)) +(set db (assert-fact db 3 'name 300)) +(set db (assert-fact db 3 'manager 1)) +(set db (assert-fact db 4 'name 400)) +(count (query db (find ?e) (where (?e :name ?n) (not (?e :manager ?m))))) -- 2 + +;; everyone is a manager → empty result under (not ...) +(set db (datoms)) +(set db (assert-fact db 1 'name 100)) +(set db (assert-fact db 1 'manager 1)) +(set db (assert-fact db 2 'name 200)) +(set db (assert-fact db 2 'manager 1)) +(count (query db (find ?e) (where (?e :name ?n) (not (?e :manager ?m))))) -- 0 diff --git a/test/rfl/embedding/hnsw.rfl b/test/rfl/embedding/hnsw.rfl new file mode 100644 index 00000000..067526a1 --- /dev/null +++ b/test/rfl/embedding/hnsw.rfl @@ -0,0 +1,25 @@ +;; Invariants for `hnsw-build` / `ann` / `hnsw-info` / `hnsw-free`. +;; HNSW approximate nearest-neighbour index over a list of float vectors. +;; The index is opaque; queries return a table with `_rowid` and `_dist`. + +;; Five vectors, exact-match query at row 0. +(set V (list [1.0 0.0 0.0] [0.9 0.1 0.0] [0.0 1.0 0.0] [0.0 0.0 1.0] [0.5 0.5 0.0])) +(set Idx (hnsw-build V 'l2 8 100)) + +;; ────────────── hnsw-info exposes build parameters ────────────── +(at (hnsw-info Idx) 'nrows) -- 5 +(at (hnsw-info Idx) 'dim) -- 3 +(at (hnsw-info Idx) 'metric) -- 'l2 +(at (hnsw-info Idx) 'M) -- 8 +(at (hnsw-info Idx) 'efc) -- 100 + +;; ────────────── exact-match top-1 ────────────── +;; Querying with the same vector as row 0 must return row 0 with dist 0. +(first (at (ann Idx [1.0 0.0 0.0] 1) '_rowid)) -- 0 +(first (at (ann Idx [1.0 0.0 0.0] 1) '_dist)) -- 0.0 + +;; ────────────── top-k respects requested count and orders by distance ────────────── +(count (ann Idx [1.0 0.0 0.0] 3)) -- 3 +(<= (first (at (ann Idx [1.0 0.0 0.0] 3) '_dist)) (last (at (ann Idx [1.0 0.0 0.0] 3) '_dist))) -- true + +(hnsw-free Idx) diff --git a/test/rfl/hof/apply.rfl b/test/rfl/hof/apply.rfl index b3c678fa..bdb2f8dc 100644 --- a/test/rfl/hof/apply.rfl +++ b/test/rfl/hof/apply.rfl @@ -9,6 +9,19 @@ (apply + [1 2 3] [10 20 30]) -- [11 22 33] (apply * [1 2 3] [4 5 6]) -- [4 10 18] (apply - [10 20] [1 2]) -- [9 18] +(apply + [1 2] [3 4]) -- [4 6] +(apply * [1 2 3] [10 20 30]) -- [10 40 90] ;; Mismatched lengths take the shorter side. (apply + [1 2 3 4] [10 20]) -- [11 22] + +;; lambda +(apply (fn [x y] (+ x (* 2 y))) [1 2] [10 20]) -- [21 42] + +;; floats +(apply + [1.5 2.5] [0.5 0.5]) -- [2.0 3.0] + +;; result count matches input count +(set V (rand 50 100)) +(set W (rand 50 100)) +(count V) -- (count (apply + V W)) diff --git a/test/rfl/hof/fold.rfl b/test/rfl/hof/fold.rfl index fd732df7..47dd97ca 100644 --- a/test/rfl/hof/fold.rfl +++ b/test/rfl/hof/fold.rfl @@ -7,3 +7,19 @@ ;; fold is sum for +/0 (set V (rand 50 1000)) (sum V) -- (fold + 0 V) + +;; fold without seed +(fold + [1 2 3 4 5]) -- 15 +(fold * [1 2 3 4]) -- 24 + +;; fold with seed accumulates from seed +(fold + 10 [1 2 3 4 5]) -- 25 + +;; lambda +(fold (fn [a b] (+ a b)) [1 2 3]) -- 6 + +;; empty + seed → seed +(fold + 42 ()) -- 42 + +;; floats +(fold + [1.5 2.5 3.0]) -- 7.0 diff --git a/test/rfl/hof/map.rfl b/test/rfl/hof/map.rfl index abf16a76..9d5dc9f7 100644 --- a/test/rfl/hof/map.rfl +++ b/test/rfl/hof/map.rfl @@ -10,6 +10,13 @@ ;; identity map is identity (count V) -- (sum (== (map (fn [x] x) V) V)) +;; builtin verbs as fn arguments +(map neg [1 2 3]) -- [-1 -2 -3] +(map abs [-1 2 -3]) -- [1 2 3] + +;; map on empty +(count (map neg ())) -- 0 + ;; ---- Regression tests ported from test_lang_rf.inc::test_rf_map ---- ;; Ported from test_lang_rf.inc::test_rf_map. diff --git a/test/rfl/hof/scan.rfl b/test/rfl/hof/scan.rfl index b56d4d5d..402d251e 100644 --- a/test/rfl/hof/scan.rfl +++ b/test/rfl/hof/scan.rfl @@ -1,6 +1,11 @@ ;; Invariants for `scan` (prefix fold: (scan f v)). (scan + [1 2 3 4]) -- [1 3 6 10] +(scan + [1 2 3 4 5]) -- [1 3 6 10 15] +(scan * [1 2 3 4]) -- [1 2 6 24] + +;; singleton +(scan + [42]) -- [42] ;; count preserved (set V (rand 50 100)) @@ -8,3 +13,4 @@ ;; last element equals fold (last (scan + V)) -- (fold + 0 V) +(last (scan + [1 2 3 4 5])) -- (sum [1 2 3 4 5]) diff --git a/test/rfl/integration/arena.rfl b/test/rfl/integration/arena.rfl new file mode 100644 index 00000000..60532074 --- /dev/null +++ b/test/rfl/integration/arena.rfl @@ -0,0 +1,25 @@ +;; Heavy-allocation churn — 100 000-element vectors in tight succession. +;; Tests that the arena/buddy allocator holds up under pressure and that +;; .sys.gc doesn't corrupt working state. + +(set N 100000) + +;; closed-form: sum 0..99999 = 99999 * 100000 / 2 +(sum (til N)) -- 4999950000 + +;; linearity at 100k: sum(2*v) == 2*sum(v) +(sum (* 2 (til N))) -- (* 2 (sum (til N))) + +;; concat-distributive: sum(v ++ v) == 2*sum(v) +(sum (concat (til N) (til N))) -- (* 2 (sum (til N))) + +;; count survives an atomic op at 100k +(count (+ (til N) 7)) -- 100000 + +;; ────────────── gc doesn't perturb computation ────────────── +;; Compute, gc, recompute — must be identical. +(set S1 (sum (til 100000))) +(.sys.gc) +(set S2 (sum (til 100000))) +S1 -- S2 +S1 -- 4999950000 diff --git a/test/rfl/integration/cow.rfl b/test/rfl/integration/cow.rfl new file mode 100644 index 00000000..bdb4c728 --- /dev/null +++ b/test/rfl/integration/cow.rfl @@ -0,0 +1,20 @@ +;; Copy-on-write semantics: two names bound to the "same" value see +;; identical content, but a derived operation must produce a new value +;; without mutating any aliasing name. If COW were broken, +;; (reverse y) would also reverse x. + +(set X [1 2 3]) +(set Y X) +(sum (== X Y)) -- 3 + +;; reverse leaves the source aliases intact +(set Z (reverse Y)) +X -- [1 2 3] + +;; asc on a different vector doesn't touch X +(set Z (asc [3 1 2])) +X -- [1 2 3] + +;; concat returns a new vec; original unchanged +(set Z (concat X [4 5])) +X -- [1 2 3] diff --git a/test/rfl/integration/joins.rfl b/test/rfl/integration/joins.rfl index cb58f0d1..40ed9a52 100644 --- a/test/rfl/integration/joins.rfl +++ b/test/rfl/integration/joins.rfl @@ -45,7 +45,7 @@ ;; window-join with multiple aggregations — both columns must be present (set trades (table [Sym Time Price] (list [a a] [10:00:01.000 10:00:05.000] [100 200])))(set quotes (table [Sym Time Bid Ask] (list [a a a] [10:00:00.000 10:00:02.000 10:00:04.000] [99 100 101] [110 111 112])))(set intervals (map-left + [-2000 2000] (at trades 'Time)))(set r (window-join [Sym Time] intervals trades quotes {lo: (min Bid) hi: (max Ask)}))(at r 'lo) -- [99 101] (set trades (table [Sym Time Price] (list [a a] [10:00:01.000 10:00:05.000] [100 200])))(set quotes (table [Sym Time Bid Ask] (list [a a a] [10:00:00.000 10:00:02.000 10:00:04.000] [99 100 101] [110 111 112])))(set intervals (map-left + [-2000 2000] (at trades 'Time)))(set r (window-join [Sym Time] intervals trades quotes {lo: (min Bid) hi: (max Ask)}))(at r 'hi) -- [111 112] -;; window-join canonical example (docs/queries-joins.html) +;; window-join rayforce1 canonical example (docs/queries-joins.html) ;; trades at 12:00:01, 12:00:04, 12:00:06 ± 1s windows ;; quotes at 12:00:00..12:00:09 sizes [928 528 648 914 918 626 577 817 620 698] ;; trade @ 01 window [00,02] -> sizes [928 528 648], min=528, max=928 @@ -73,7 +73,7 @@ ;; trade @ 05 window [04, 06] -> matches at 04 (1) (set trades (table [Sym Time Price] (list [a a] [10:00:01.000 10:00:05.000] [100 200])))(set quotes (table [Sym Time Tag] (list [a a a] [10:00:00.000 10:00:02.000 10:00:04.000] [foo bar baz])))(set intervals (map-left + [-1000 1000] (at trades 'Time)))(at (window-join [Sym Time] intervals trades quotes {n: (count Tag)}) 'n) -- [2 1] ;; window-join COUNT must include window matches whose source value is null -;; ((count Col) == COUNT(*), not COUNT(non-null Col)). +;; (kdb+/group.c semantics: (count Col) == COUNT(*), not COUNT(non-null Col)). ;; trade @ 01 window [00, 02]: Bid rows at 00(99) and 02(NULL) -> count=2, min=99 ;; trade @ 05 window [04, 06]: Bid row at 04(101) -> count=1, min=101 (set trades (table [Sym Time Price] (list [a a] [10:00:01.000 10:00:05.000] [100 200])))(set quotes (table [Sym Time Bid] (list [a a a] [10:00:00.000 10:00:02.000 10:00:04.000] [99 0Nl 101])))(set intervals (map-left + [-1000 1000] (at trades 'Time)))(at (window-join [Sym Time] intervals trades quotes {c: (count Bid)}) 'c) -- [2 1] @@ -127,3 +127,35 @@ (inner-join [a] [1 2 3] (table [a] (list [1]))) !- type ;; error: asof-join wrong arity (asof-join [a b]) !- arity + +;; ────────────────────────────────────────────────────────────────── +;; anti-join — rows of left whose key is NOT present in right +;; ────────────────────────────────────────────────────────────────── +(set lt (table [sym qty] (list ['AAPL 'GOOG 'MSFT 'AMZN 'TSLA] [100 50 200 75 90]))) +(set bl (table [sym] (list ['GOOG 'MSFT]))) +(count (anti-join [sym] lt bl)) -- 3 +(sum (at (anti-join [sym] lt bl) 'qty)) -- 265 + +;; anti-join: empty exclusion → identity (left preserved) +(set bl0 (table [sym] (list (as 'SYMBOL [])))) +(count (anti-join [sym] lt bl0)) -- 5 + +;; anti-join: full exclusion → empty +(set bla (table [sym] (list ['AAPL 'GOOG 'MSFT 'AMZN 'TSLA]))) +(count (anti-join [sym] lt bla)) -- 0 + +;; ────────────────────────────────────────────────────────────────── +;; inner-join + left-join: shared 10-row trades + 4-row quotes +;; ────────────────────────────────────────────────────────────────── +(set trades-10 (table [sym price size] (list ['AAPL 'GOOG 'MSFT 'AAPL 'GOOG 'MSFT 'AAPL 'GOOG 'MSFT 'AAPL] [150.0 2800.0 310.0 151.5 2795.0 309.0 152.0 2810.0 311.0 150.5] [100 50 200 150 40 250 120 60 180 90]))) +(set quotes-4 (table [sym bid ask] (list ['AAPL 'GOOG 'MSFT 'AMZN] [150.0 2799.5 309.5 3300.0] [150.5 2801.0 310.5 3301.0]))) + +;; inner-join keeps left rows that have a right match +(count (inner-join [sym] trades-10 quotes-4)) -- 10 +(sum (at (inner-join [sym] trades-10 quotes-4) 'bid)) -- 9927.0 + +;; left-join: every left row preserved (4 quotes survive, AMZN with no +;; trade match yields a null-padded right side under quotes-on-left) +(count (left-join [sym] quotes-4 trades-10)) -- 11 +(sum (at (left-join [sym] quotes-4 trades-10) 'bid)) -- 13227.0 +(count (select {from: (left-join [sym] quotes-4 trades-10) where: (== sym 'AMZN)})) -- 1 diff --git a/test/rfl/integration/morsel.rfl b/test/rfl/integration/morsel.rfl new file mode 100644 index 00000000..9d8b988f --- /dev/null +++ b/test/rfl/integration/morsel.rfl @@ -0,0 +1,52 @@ +;; Morsel-boundary stress: 1023 / 1024 / 1025, then 2047 / 2048 / 2049. +;; Rayforce executes vector ops in morsels of 1024. The interesting +;; failure mode is running state across morsel boundaries — sum, +;; count, scan, fold all need to carry partial results forward. + +;; ────────────── single-morsel boundary ────────────── +(set V1023 (til 1023)) +(set V1024 (til 1024)) +(set V1025 (til 1025)) + +;; closed-form sums: sum of 0..n-1 == n*(n-1)/2 +(sum V1023) -- 522753 +(sum V1024) -- 523776 +(sum V1025) -- 524800 + +;; count preservation under atomic ops +(count (+ V1023 1)) -- 1023 +(count (+ V1024 1)) -- 1024 +(count (+ V1025 1)) -- 1025 +(count (* V1023 2)) -- 1023 +(count (* V1024 2)) -- 1024 +(count (* V1025 2)) -- 1025 + +;; permutation invariance: sum is symmetric under reverse +(sum V1024) -- (sum (reverse V1024)) +(sum V1025) -- (sum (reverse V1025)) + +;; first/last around the boundary +(first V1024) -- 0 +(last V1024) -- 1023 +(first V1025) -- 0 +(last V1025) -- 1024 + +;; ────────────── two-morsel boundary ────────────── +;; 2047 (just under), 2048 (exact two morsels), 2049 (overflow into a +;; third morsel of one element). Running state across morsels is what +;; we exercise here. +(set V2047 (til 2047)) +(set V2048 (til 2048)) +(set V2049 (til 2049)) + +(sum V2047) -- 2094081 +(sum V2048) -- 2096128 +(sum V2049) -- 2098176 + +(count (+ V2047 7)) -- 2047 +(count (+ V2048 7)) -- 2048 +(count (+ V2049 7)) -- 2049 + +;; linearity across the boundary +(sum (* 3 V2048)) -- (* 3 (sum V2048)) +(sum (* 3 V2049)) -- (* 3 (sum V2049)) diff --git a/test/rfl/integration/null.rfl b/test/rfl/integration/null.rfl index add0b175..ed918065 100644 --- a/test/rfl/integration/null.rfl +++ b/test/rfl/integration/null.rfl @@ -6,6 +6,12 @@ (nil? 0) -- false (nil? 1) -- false (nil? "") -- false +;; nil? distinguishes typed nulls from zero-valued atoms across types +(nil? 0Ni) -- true +(nil? 0Nf) -- true +(nil? 5) -- false +(nil? 3.14) -- false +(nil? false) -- false ;; ========== NULL PROPAGATION ========== (+ 1 0Nl) -- 0Nl (* 5 0Nl) -- 0Nl diff --git a/test/rfl/integration/optimizer.rfl b/test/rfl/integration/optimizer.rfl new file mode 100644 index 00000000..8de3e9f5 --- /dev/null +++ b/test/rfl/integration/optimizer.rfl @@ -0,0 +1,54 @@ +;; Integration tests targeting the query optimizer: +;; const-fold, filter-reorder, predicate pushdown, selection-bitmap edges. +;; Each test asserts that the optimized output equals what a hand-rolled +;; non-optimized form produces. + +;; ────────────── filter reorder: and-conjoined predicates commute ────────────── +;; The optimizer is free to reorder `(and p q)` for selectivity. The +;; result must be identical to either order's hand-rolled equivalent. +(set T (table [sym price size] (list ['AAPL 'GOOG 'MSFT 'AAPL 'GOOG] [150.0 2800.0 310.0 151.5 2795.0] [100 50 200 150 40]))) +(set R1 (select {from: T where: (and (== sym 'AAPL) (> price 150))})) +(set R2 (select {from: T where: (and (> price 150) (== sym 'AAPL))})) +(count R1) -- (count R2) +(sum (at R1 'price)) -- (sum (at R2 'price)) +(sum (at R1 'size)) -- (sum (at R2 'size)) + +;; nested-select chain (filter, then filter again) must equal the single +;; conjoined form +(set R3 (select {from: (select {from: T where: (== sym 'AAPL)}) where: (> price 150)})) +(count R3) -- (count R1) + +;; ────────────── predicate pushdown across group-by ────────────── +;; (select s: agg from: t by: k where: pred) must equal manually +;; filtering then grouping. +(set Tg (table [g v] (list [1 1 2 2 2 3] [10 20 30 40 50 60]))) + +(set Pre (select {s: (sum v) from: Tg by: g where: (> v 20)})) +(set Manual (select {s: (sum v) from: (select {from: Tg where: (> v 20)}) by: g})) + +(count Pre) -- (count Manual) +(sum (at Pre 's)) -- (sum (at Manual 's)) +;; oracle: filter v>20 keeps {30,40,50,60}; group 2 sums 120, group 3 sums 60. +(sum (at Pre 's)) -- 180 + +;; ────────────── selection bitmap: all-pass / none-pass / single-row ────────────── +(set Ts (table [a b] (list [1 2 3 4 5] [10 20 30 40 50]))) + +;; all-pass — selection bitmap is all-ones; row count = source row count +(count (select {from: Ts where: (> a 0)})) -- 5 +(sum (at (select {from: Ts where: (> a 0)}) 'b)) -- 150 + +;; none-pass — bitmap is all-zeros; row count = 0 +(count (select {from: Ts where: (> a 1000)})) -- 0 +(sum (at (select {from: Ts where: (> a 1000)}) 'b)) -- 0 + +;; conjoined narrows: 2 < a < 5 keeps rows {3, 4} +(count (select {from: Ts where: (and (> a 2) (< a 5))})) -- 2 +(sum (at (select {from: Ts where: (and (> a 2) (< a 5))}) 'b)) -- 70 + +;; nested-select equivalent must match +(count (select {from: (select {from: Ts where: (> a 2)}) where: (< a 5)})) -- 2 + +;; single-row (==-keyed lookup) +(count (select {from: Ts where: (== a 3)})) -- 1 +(sum (at (select {from: Ts where: (== a 3)}) 'b)) -- 30 diff --git a/test/rfl/integration/str_pool.rfl b/test/rfl/integration/str_pool.rfl new file mode 100644 index 00000000..ac4375d7 --- /dev/null +++ b/test/rfl/integration/str_pool.rfl @@ -0,0 +1,44 @@ +;; Rayforce's RAY_STR has a 12-byte SSO boundary: strings ≤ 12 bytes are +;; stored inline in the 16-byte ray_str_t; longer strings live in a +;; per-vector pool with a 4-byte prefix for fast comparison rejection. +;; These tests pin down behaviour around the boundary. + +;; ────────────── count is independent of representation ────────────── +(count "") -- 0 +(count "x") -- 1 +(count "abcdefghijk") -- 11 +(count "abcdefghijkl") -- 12 +(count "abcdefghijklm") -- 13 +(count "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") -- 100 + +;; ────────────── equality across representations ────────────── +(== "abc" "abc") -- true +(!= "abc" "abd") -- true +(== "abcdefghijklm" "abcdefghijklm") -- true +;; inline string is never equal to a strictly-longer pool string +(!= "abcdefghijkl" "abcdefghijklm") -- true + +;; ────────────── concat across the boundary ────────────── +;; 12 + 1 = 13 → result lands in the pool +(count (concat "abcdefghijkl" "x")) -- 13 +(concat "abcdefghijkl" "x") -- "abcdefghijklx" + +;; concat fits within inline (3 + 3 = 6) +(concat "abc" "def") -- "abcdef" +(count (concat "abc" "def")) -- 6 + +;; concat pool + inline / pool + pool retains length +(concat "abcdefghijklm" "!") -- "abcdefghijklm!" +(count (concat "abcdefghijklm" "!")) -- 14 + +;; concat with empty operand +(concat "" "abc") -- "abc" +(concat "abc" "") -- "abc" +(concat "" "") -- "" + +;; ────────────── like across the boundary ────────────── +(like "abcdef" "abc*") -- true +(like "abcdefghijklmnop" "abcdef*") -- true +(like "abcdefghijklmnop" "*ghij*") -- true +(like "" "") -- true +(like "" "x") -- false diff --git a/test/rfl/sort/asc.rfl b/test/rfl/sort/asc.rfl index d0f83028..07ff8a37 100644 --- a/test/rfl/sort/asc.rfl +++ b/test/rfl/sort/asc.rfl @@ -22,3 +22,39 @@ N -- (sum (<= (at S (til N)) (at S (+ 1 (til N))))) ;; idempotent (asc (asc V)) -- (asc V) + +;; ────────────────────────────────────────────────────────────────── +;; Float / negatives / mixed +;; ────────────────────────────────────────────────────────────────── + +(asc [3.14 1.41 2.71]) -- [1.41 2.71 3.14] +(asc [-1 -3 -2]) -- [-3 -2 -1] + +;; ────────────────────────────────────────────────────────────────── +;; Morsel boundaries — 1023 / 1024 / 1025 reversed +;; ────────────────────────────────────────────────────────────────── + +(set V1 (reverse (til 1023))) +(count (asc V1)) -- 1023 +(sum (asc V1)) -- 522753 +(first (asc V1)) -- 0 +(last (asc V1)) -- 1022 + +(set V2 (reverse (til 1024))) +(count (asc V2)) -- 1024 +(sum (asc V2)) -- 523776 +(first (asc V2)) -- 0 +(last (asc V2)) -- 1023 + +(set V3 (reverse (til 1025))) +(count (asc V3)) -- 1025 +(sum (asc V3)) -- 524800 +(first (asc V3)) -- 0 +(last (asc V3)) -- 1024 + +;; ────────────────────────────────────────────────────────────────── +;; Idempotence and asc + reverse == desc +;; ────────────────────────────────────────────────────────────────── + +(asc (asc V2)) -- (asc V2) +(reverse (asc V2)) -- (desc V2) diff --git a/test/rfl/sort/iasc.rfl b/test/rfl/sort/iasc.rfl index 19a9b5b0..428deffe 100644 --- a/test/rfl/sort/iasc.rfl +++ b/test/rfl/sort/iasc.rfl @@ -2,6 +2,8 @@ (iasc [30 10 20]) -- [1 2 0] (iasc [5]) -- [0] +(iasc [1 2 3]) -- [0 1 2] +(iasc [3 2 1]) -- [2 1 0] ;; indices sort to ascending values (set V (rand 200 10000)) @@ -9,3 +11,12 @@ ;; is a permutation of (til n) (asc (iasc V)) -- (til (count V)) + +;; stable: ties preserve source order +(iasc [2 1 2 1]) -- [1 3 0 2] + +;; floats +(iasc [3.14 1.41 2.71]) -- [1 2 0] + +;; negatives +(iasc [-1 -3 -2]) -- [1 2 0] diff --git a/test/rfl/sort/rank.rfl b/test/rfl/sort/rank.rfl index e9fc8c98..9b3ca91a 100644 --- a/test/rfl/sort/rank.rfl +++ b/test/rfl/sort/rank.rfl @@ -6,5 +6,21 @@ (set V (rand 100 10000)) (asc (rank V)) -- (til (count V)) -;; rank is inverse of iasc: (at (iasc V) (rank V)) == (til n) -- is this right? -;; Actually: (at V (iasc V)) is sorted; rank tells us where each V[i] lands. +;; concrete +(rank [1 2 3 4 5]) -- [0 1 2 3 4] +(rank [5 4 3 2 1]) -- [4 3 2 1 0] + +;; sum of ranks is n*(n-1)/2 +(set V [30 10 40 20]) +(sum (rank V)) -- 6 + +;; rank of min is 0, max is n-1 +(at (rank V) 1) -- 0 +(at (rank V) 2) -- 3 + +;; (at v (iasc v)) == (asc v) +(set V (rand 100 10000)) +(at V (iasc V)) -- (asc V) + +;; floats +(rank [3.14 1.41 2.71]) -- [2 0 1] diff --git a/test/rfl/strop/like.rfl b/test/rfl/strop/like.rfl index 92e7be85..b07d1e43 100644 --- a/test/rfl/strop/like.rfl +++ b/test/rfl/strop/like.rfl @@ -6,3 +6,32 @@ (like "hello" "*o") -- true (like "hello" "*") -- true (like "hello" "h?llo") -- true + +;; ────────────── extended literal / wildcard coverage ────────────── +(like "hello world" "hello*") -- true +(like "hello world" "*world") -- true +(like "foobar" "*oo*") -- true +(like "cat" "c?t") -- true +(like "cat" "c??t") -- false + +;; like is anchored, not a substring search — "ell" does NOT match "hello" +(like "hello" "ell") -- false + +;; ────────────── empty operands ────────────── +(like "" "") -- true +(like "" "x") -- false +(like "x" "") -- false +(like "" "*") -- true + +;; ────────────── universal-star metamorphic invariants ────────────── +;; `*` matches anything, including punctuation / digits / mixed bytes. +(like "abc" "*") -- true +(like "xyz 123 !@#" "*") -- true + +;; idempotence: text matches itself when the pattern has no meta-chars +(like "plain text" "plain text") -- true +(like "foo bar" "foo bar") -- true + +;; literal prefix/suffix combined with one star +(like "hello world" "hello *") -- true +(like "hello world" "* world") -- true diff --git a/test/rfl/system/csv_roundtrip.rfl b/test/rfl/system/csv_roundtrip.rfl new file mode 100644 index 00000000..aa99aed9 --- /dev/null +++ b/test/rfl/system/csv_roundtrip.rfl @@ -0,0 +1,75 @@ +;; Round-trip invariants for `.csv.write` + `.csv.read` (no schema arg). +;; The schema-less form auto-infers column types from the first data row; +;; this test pins down the inference rules: ints → I64, mixed-precision +;; floats → F64, integer-valued floats collapse to I64, strings → SYM. +;; +;; Tests use unique /tmp/ paths and depend on a clean /tmp. + +;; ────────────── int round-trip ────────────── +(set Tint (table [a b] (list [1 2 3 4 5] [10 20 30 40 50]))) +(.csv.write Tint "/tmp/rfl_csv_int.csv") +(set Rint (.csv.read "/tmp/rfl_csv_int.csv")) +(count Rint) -- 5 +(sum (at Rint 'a)) -- 15 +(sum (at Rint 'b)) -- 150 +(type (at Rint 'a)) -- 'I64 +(key Rint) -- (key Tint) + +;; ────────────── float round-trip with non-integer values ────────────── +(set Tflt (table [x y] (list [1.5 2.5 3.14] [10.1 20.2 30.3]))) +(.csv.write Tflt "/tmp/rfl_csv_float.csv") +(set Rflt (.csv.read "/tmp/rfl_csv_float.csv")) +(count Rflt) -- 3 +(type (at Rflt 'x)) -- 'F64 +(first (at Rflt 'x)) -- 1.5 +(at (at Rflt 'x) 1) -- 2.5 + +;; integer-valued floats collapse to I64 on read (documented inference) +(set Tiv (table [x] (list [10.0 20.0 30.0]))) +(.csv.write Tiv "/tmp/rfl_csv_ivfloat.csv") +(set Riv (.csv.read "/tmp/rfl_csv_ivfloat.csv")) +(count Riv) -- 3 +(sum (at Riv 'x)) -- 60 +(type (at Riv 'x)) -- 'I64 + +;; ────────────── string round-trip — string column inferred as SYM ────────────── +(set Tstr (table [name score] (list ["Alice" "Bob" "Charlie"] [10 20 30]))) +(.csv.write Tstr "/tmp/rfl_csv_str.csv") +(set Rstr (.csv.read "/tmp/rfl_csv_str.csv")) +(count Rstr) -- 3 +(type (at Rstr 'name)) -- 'SYM +(sum (at Rstr 'score)) -- 60 +(first (at Rstr 'name)) -- 'Alice + +;; ────────────── multi-column mixed types ────────────── +(set Tmix (table [id name qty price] (list [1 2 3 4 5] ["AAPL" "GOOG" "MSFT" "AAPL" "GOOG"] [100 50 200 150 40] [150.5 2800.0 310.25 151.5 2795.75]))) +(.csv.write Tmix "/tmp/rfl_csv_mixed.csv") +(set Rmix (.csv.read "/tmp/rfl_csv_mixed.csv")) +(count Rmix) -- 5 +(sum (at Rmix 'id)) -- 15 +(sum (at Rmix 'qty)) -- 540 +(type (at Rmix 'price)) -- 'F64 +(sum (at Rmix 'price)) -- 6208.0 +(type (at Rmix 'name)) -- 'SYM +(key Rmix) -- (key Tmix) + +;; ────────────── morsel-boundary sizes ────────────── +(set T1 (table [a] (list [42]))) +(.csv.write T1 "/tmp/rfl_csv_one.csv") +(set R1 (.csv.read "/tmp/rfl_csv_one.csv")) +(count R1) -- 1 +(first (at R1 'a)) -- 42 + +;; exactly 1024 rows — morsel boundary +(set T1024 (table [n] (list (til 1024)))) +(.csv.write T1024 "/tmp/rfl_csv_1024.csv") +(set R1024 (.csv.read "/tmp/rfl_csv_1024.csv")) +(count R1024) -- 1024 +(sum (at R1024 'n)) -- 523776 + +;; 1100 rows — crosses morsel boundary mid-stream +(set T1100 (table [n] (list (til 1100)))) +(.csv.write T1100 "/tmp/rfl_csv_1100.csv") +(set R1100 (.csv.read "/tmp/rfl_csv_1100.csv")) +(count R1100) -- 1100 +(sum (at R1100 'n)) -- 604450 diff --git a/test/rfl/system/serde.rfl b/test/rfl/system/serde.rfl index a05c7f5b..de864774 100644 --- a/test/rfl/system/serde.rfl +++ b/test/rfl/system/serde.rfl @@ -14,3 +14,15 @@ (de (ser 3.14)) -- 3.14 (de (ser 'hello)) -- 'hello (de (ser "world")) -- "world" +(de (ser true)) -- true + +;; ────────────── homogeneous vectors round-trip ────────────── +(de (ser [1 2 3])) -- [1 2 3] +(de (ser [1.5 2.5])) -- [1.5 2.5] +(de (ser ["a" "b"])) -- ["a" "b"] +(de (ser ['a 'b])) -- ['a 'b] + +;; ────────────── vectors with embedded nulls ────────────── +;; Both the value bits AND the null mask must survive serialize/deser. +(count (de (ser [1 0N 3]))) -- 3 +(sum (de (ser [1 0N 3]))) -- 4 diff --git a/test/rfl/system/splayed.rfl b/test/rfl/system/splayed.rfl new file mode 100644 index 00000000..fe8e725d --- /dev/null +++ b/test/rfl/system/splayed.rfl @@ -0,0 +1,32 @@ +;; Round-trip invariants for `set-splayed` / `get-splayed` — write a +;; table to a directory as one file per column, then read it back. +;; +;; Tests use unique /tmp/ paths because the overwrite path is a known +;; bug (`error: corrupt`); these only exercise the fresh-write path. +;; Stale data from prior runs would also flip these to "corrupt", so +;; CI environments should clean /tmp/rfl_splayed_* between runs. + +;; ────────────── small table ────────────── +(set T-Small (table [a b] (list [1 2 3 4 5] [10.0 20.0 30.0 40.0 50.0]))) +(set-splayed "/tmp/rfl_splayed_small/" T-Small) +(set R-Small (get-splayed "/tmp/rfl_splayed_small/")) + +(count R-Small) -- 5 +(sum (at R-Small 'a)) -- 15 +(sum (at R-Small 'b)) -- 150.0 + +;; column names survive the round trip (key returns the SYM vec) +(key R-Small) -- (key T-Small) + +;; column values match exactly +(at R-Small 'a) -- (at T-Small 'a) +(at R-Small 'b) -- (at T-Small 'b) + +;; ────────────── morsel-boundary table (1024 rows) ────────────── +(set T-1024 (table [n] (list (til 1024)))) +(set-splayed "/tmp/rfl_splayed_1024/" T-1024) +(set R-1024 (get-splayed "/tmp/rfl_splayed_1024/")) + +(count R-1024) -- 1024 +;; sum of til 1024 = 1023*1024/2 +(sum (at R-1024 'n)) -- 523776 diff --git a/test/rfl/table/meta.rfl b/test/rfl/table/meta.rfl index d3a0d89b..37f26140 100644 --- a/test/rfl/table/meta.rfl +++ b/test/rfl/table/meta.rfl @@ -1,8 +1,27 @@ -;; Invariants for `meta` — per-table metadata dict. +;; Invariants for `meta` — describes any object as a {type, len?} dict. +;; Atoms get only :type; containers (vectors, tables, dicts) also expose +;; :len. For tables, :len is the column count (not the row count). +;; ────────────── tables ────────────── (set T (table [a b c] (list [1 2] [1.0 2.0] ['x 'y]))) (type (meta T)) -- 'DICT - -;; meta exposes at least type and len fields (at (meta T) 'type) -- 'TABLE (at (meta T) 'len) -- 3 + +;; ────────────── atoms — :type only, no :len ────────────── +(at (meta 42) 'type) -- 'i64 +(at (meta 3.14) 'type) -- 'f64 +(at (meta "x") 'type) -- 'str +(at (meta 'foo) 'type) -- 'sym +(at (meta true) 'type) -- 'b8 +(at (meta 2024.01.15) 'type) -- 'date + +;; ────────────── vectors — :type + :len ────────────── +(at (meta [1 2 3]) 'type) -- 'I64 +(at (meta [1 2 3]) 'len) -- 3 +(at (meta [1.0 2.0]) 'type) -- 'F64 +(at (meta ["a" "b"]) 'type) -- 'STR +(at (meta ['a 'b]) 'type) -- 'SYM + +;; ────────────── dict ────────────── +(at (meta (dict ['x 'y 'z] [1 2 3])) 'type) -- 'DICT diff --git a/test/rfl/table/modify.rfl b/test/rfl/table/modify.rfl new file mode 100644 index 00000000..5e39c3bf --- /dev/null +++ b/test/rfl/table/modify.rfl @@ -0,0 +1,28 @@ +;; Invariants for `modify` — functional column update on a table. +;; (modify tbl col-name fn) returns a new table with the named column +;; transformed by fn; the source table is left untouched (functional). + +(set T (table [a b] (list [1 2 3 4 5] [10.0 20.0 30.0 40.0 50.0]))) + +;; row count preserved +(set T2 (modify T 'a (fn [x] (* x 10)))) +(count T2) -- 5 + +;; the named column is transformed +(sum (at T2 'a)) -- 150 + +;; non-named columns are passed through unchanged +(sum (at T2 'b)) -- (sum (at T 'b)) + +;; source table is untouched +(sum (at T 'a)) -- 15 + +;; modifying b with a float-returning fn updates only b +(set T3 (modify T 'b (fn [x] (+ x 1.0)))) +(sum (at T3 'b)) -- 155.0 +(sum (at T3 'a)) -- 15 + +;; chained modifies compose +(set T4 (modify (modify T 'a (fn [x] (neg x))) 'b (fn [x] (* x 2.0)))) +(sum (at T4 'a)) -- -15 +(sum (at T4 'b)) -- 300.0 diff --git a/test/rfl/table/pivot.rfl b/test/rfl/table/pivot.rfl new file mode 100644 index 00000000..4e3a35c5 --- /dev/null +++ b/test/rfl/table/pivot.rfl @@ -0,0 +1,25 @@ +;; Invariants for `pivot` — wide-table reshape across one (or more) row +;; keys, one pivot key, and one value column with an aggregator. + +(set trades (table [sym side qty] (list ['AAPL 'GOOG 'AAPL 'MSFT 'AAPL 'GOOG] ['Buy 'Sell 'Sell 'Buy 'Buy 'Buy] [100 50 75 200 50 30]))) + +;; ────────────── pivot with sum aggregator ────────────── +(set Pivot-Sum (pivot trades 'sym 'side 'qty sum)) + +;; row count is the number of distinct row-keys +(count Pivot-Sum) -- 3 + +;; per-side sums match what a hand group-by would produce +(sum (at Pivot-Sum 'Buy)) -- 380 +(sum (at Pivot-Sum 'Sell)) -- 125 + +;; total across pivoted columns equals total across the source value col +(+ (sum (at Pivot-Sum 'Buy)) (sum (at Pivot-Sum 'Sell))) -- (sum (at trades 'qty)) + +;; ────────────── pivot with count aggregator ────────────── +(set Pivot-Count (pivot trades 'sym 'side 'qty count)) + +(count Pivot-Count) -- 3 + +;; total cells == total source rows +(+ (sum (at Pivot-Count 'Buy)) (sum (at Pivot-Count 'Sell))) -- (count trades) diff --git a/test/rfl/table/select.rfl b/test/rfl/table/select.rfl new file mode 100644 index 00000000..b865d9d2 --- /dev/null +++ b/test/rfl/table/select.rfl @@ -0,0 +1,117 @@ +;; Invariants for `select` — table query with where / by / take / asc / desc. +;; +;; Tests are organised by clause, then by combinations. Two fixtures are +;; used: a 10-row "trades" table for basics + projection + a single-key +;; group-by, and a 15-row "trades-15" table for systematic clause coverage +;; with sym/price/size/tms/d. + +;; ────────────── 10-row fixture ────────────── +(set trades (table [sym price size] (list ['AAPL 'GOOG 'MSFT 'AAPL 'GOOG 'MSFT 'AAPL 'GOOG 'MSFT 'AAPL] [150.0 2800.0 310.0 151.5 2795.0 309.0 152.0 2810.0 311.0 150.5] [100 50 200 150 40 250 120 60 180 90]))) + +;; ── basics +(count trades) -- 10 +(count (select {from: trades})) -- 10 +(count (select {from: trades where: (== sym 'AAPL)})) -- 4 +(count (select {from: trades where: (> price 200)})) -- 6 +(sum (at trades 'price)) -- 9939.0 +(sum (at trades 'size)) -- 1240 +(max (at trades 'price)) -- 2810.0 +(min (at trades 'size)) -- 40 +(count (distinct (at trades 'sym))) -- 3 + +;; ── projection: derived columns retain row count + arithmetic semantics +(sum (at (select {notional: (* price size) from: trades}) 'notional)) -- 685140.0 +(sum (at (select {ps: (+ price size) from: trades}) 'ps)) -- 11179.0 + +;; ── single-key group-by on the 10-row fixture +(count (select {s: (sum size) from: trades by: sym})) -- 3 +(sum (at (select {s: (sum size) from: trades by: sym}) 's)) -- 1240 +(sum (at (select {s: (sum size) from: trades by: sym where: (== sym 'AAPL)}) 's)) -- 460 + +;; ────────────── 15-row fixture (sym, price, size, tms, d) ────────────── +(set trades-15 (table [sym price size tms d] (list ['AAPL 'GOOG 'MSFT 'AAPL 'GOOG 'MSFT 'AAPL 'GOOG 'MSFT 'AMZN 'TSLA 'AAPL 'GOOG 'MSFT 'TSLA] [150.0 2800.0 310.0 151.5 2795.0 309.0 152.0 2810.0 311.0 3300.0 700.0 150.5 2805.0 312.0 702.5] [100 50 200 150 40 250 120 60 180 75 90 110 55 190 95] [09:30:15.000 09:30:15.100 09:30:15.200 09:30:15.300 09:30:15.400 09:30:15.500 09:30:15.600 09:30:15.700 09:30:15.800 09:30:15.900 09:30:16.000 09:30:16.100 09:30:16.200 09:30:16.300 09:30:16.400] [2024.01.15 2024.01.15 2024.01.15 2024.01.15 2024.01.15 2024.01.16 2024.01.16 2024.01.16 2024.01.16 2024.01.16 2024.01.17 2024.01.17 2024.01.17 2024.01.17 2024.01.17]))) + +;; ── where: simple comparisons against scalars +(count (select {from: trades-15 where: (== sym 'AAPL)})) -- 4 +(count (select {from: trades-15 where: (!= sym 'MSFT)})) -- 11 +(count (select {from: trades-15 where: (> price 200.0)})) -- 11 +(count (select {from: trades-15 where: (< size 100)})) -- 7 +(count (select {from: trades-15 where: (>= size 100)})) -- 8 +(count (select {from: trades-15 where: (<= price 2000.0)})) -- 10 + +;; ── where: compound and / or / not +(count (select {from: trades-15 where: (and (> price 500.0) (< size 200))})) -- 7 +(count (select {from: trades-15 where: (or (== sym 'AMZN) (== sym 'TSLA))})) -- 3 +(count (select {from: trades-15 where: (not (== sym 'AAPL))})) -- 11 +(count (select {from: trades-15 where: (and (> size 50) (or (== sym 'AAPL) (== sym 'GOOG)))})) -- 6 + +;; ── projection: total invariants under arithmetic columns +(sum (at (select {notional: (* price size) from: trades-15}) 'notional)) -- 1278942.5 +(sum (at (select {ps: (+ price size) from: trades-15}) 'ps)) -- 19523.5 +(count (select {t: sym from: trades-15})) -- 15 + +;; ── sort: asc / desc preserve totals; first-row equals min/max +(sum (at (select {from: trades-15 asc: price}) 'price)) -- 17758.5 +(sum (at (select {from: trades-15 desc: price}) 'price)) -- 17758.5 +(first (at (select {from: trades-15 asc: size}) 'size)) -- 40 +(first (at (select {from: trades-15 desc: size}) 'size)) -- 250 +(count (select {from: trades-15 asc: tms})) -- 15 + +;; ── take: clamping behavior +(count (select {from: trades-15 take: 5})) -- 5 +(count (select {from: trades-15 take: 0})) -- 0 +(count (select {from: trades-15 take: 15})) -- 15 + +;; ── edge cases: all-pass / none-pass / single-row / by-distinct-time +(count (select {from: trades-15 where: (> size -1)})) -- 15 +(count (select {from: trades-15 where: (> size 999999)})) -- 0 +(count (select {from: trades-15 where: (== size 200)})) -- 1 +(count (select {s: (sum size) from: trades-15 by: tms})) -- 15 +(count (select {s: (sum size) from: trades-15 by: d})) -- 3 + +;; ── group-by clause coverage on the 15-row fixture +(count (select {s: (sum size) from: trades-15 by: sym})) -- 5 +(sum (at (select {s: (sum size) from: trades-15 by: sym}) 's)) -- 1765 +(sum (at (select {p: (sum price) from: trades-15 by: sym}) 'p)) -- 17758.5 +(sum (at (select {s: (sum size) from: trades-15 by: sym where: (== sym 'AAPL)}) 's)) -- 480 +(sum (at (select {c: (count size) from: trades-15 by: sym}) 'c)) -- 15 + +;; ── combo: where + by + projection / asc + take / no-match +(count (select {from: trades-15 where: (> price 500.0) asc: price take: 3})) -- 3 +(count (select {s: (sum size) from: trades-15 by: sym asc: sym})) -- 5 +(count (select {from: trades-15 where: (== sym 'NOPE) by: sym})) -- 0 +(first (at (select {p: (avg price) from: trades-15 by: sym where: (== sym 'GOOG)}) 'p)) -- 2802.5 + +;; ────────────── multi-key group-by + multi-aggregator fixture ────────────── +(set trades-mk (table [sym sector side qty price] (list ['AAPL 'AAPL 'AAPL 'GOOG 'GOOG 'MSFT 'MSFT 'MSFT 'AMZN 'AMZN 'WMT 'WMT 'TSLA 'TSLA] ['Tech 'Tech 'Tech 'Tech 'Tech 'Tech 'Tech 'Tech 'Retail 'Retail 'Retail 'Retail 'Auto 'Auto] ['Buy 'Sell 'Buy 'Buy 'Sell 'Buy 'Sell 'Buy 'Buy 'Sell 'Buy 'Sell 'Buy 'Sell] [100 50 75 40 60 200 80 100 75 25 150 90 90 30] [150.0 151.0 149.5 2800.0 2795.0 310.0 311.0 309.0 3300.0 3305.0 180.0 181.0 700.0 705.0]))) + +;; ── single key +(count (select {s: (sum qty) from: trades-mk by: sym})) -- 6 +(sum (at (select {s: (sum qty) from: trades-mk by: sym}) 's)) -- 1165 +(sum (at (select {p: (sum price) from: trades-mk by: sym}) 'p)) -- 15346.5 +(sum (at (select {c: (count qty) from: trades-mk by: sym}) 'c)) -- 14 + +;; ── two-key +(count (select {s: (sum qty) from: trades-mk by: [sector side]})) -- 6 +(sum (at (select {s: (sum qty) from: trades-mk by: [sector side]}) 's)) -- 1165 +(sum (at (select {s: (sum qty) from: trades-mk by: [sector side] where: (and (== sector 'Tech) (== side 'Buy))}) 's)) -- 515 + +;; ── three-key +(count (select {s: (sum qty) from: trades-mk by: [sym sector side]})) -- 12 +(sum (at (select {s: (sum qty) from: trades-mk by: [sym sector side]}) 's)) -- 1165 + +;; ── int key (non-symbol grouping) +(count (select {s: (sum price) from: trades-mk by: qty})) -- 11 +(sum (at (select {s: (sum price) from: trades-mk by: qty}) 's)) -- 15346.5 + +;; ── filter-then-group preserves filtered totals +(count (select {s: (sum qty) from: trades-mk by: sym where: (== side 'Buy)})) -- 6 +(sum (at (select {s: (sum qty) from: trades-mk by: sym where: (== side 'Buy)}) 's)) -- 830 + +;; ── multi-aggregator on a single group (AAPL) +(set aapl (select {s: (sum qty) c: (count qty) mx: (max price) mn: (min price) av: (avg price) from: trades-mk by: sym where: (== sym 'AAPL)})) +(first (at aapl 's)) -- 225 +(first (at aapl 'c)) -- 3 +(first (at aapl 'mx)) -- 151.0 +(first (at aapl 'mn)) -- 149.5 +(first (at aapl 'av)) -- 150.16666666666666 diff --git a/test/rfl/temporal/date.rfl b/test/rfl/temporal/date.rfl index b23c0d98..06aaf382 100644 --- a/test/rfl/temporal/date.rfl +++ b/test/rfl/temporal/date.rfl @@ -7,3 +7,39 @@ (yyyy 2024.06.15) -- 2024 (mm 2024.06.15) -- 6 (dd 2024.06.15) -- 15 + +;; ────────────── month-boundary arithmetic ────────────── +;; date + days / date - days / date - date all stay in DATE space. +(+ 2024.01.31 1) -- 2024.02.01 +(+ 2024.03.31 1) -- 2024.04.01 +(+ 2024.12.31 1) -- 2025.01.01 + +;; ────────────── leap-year edges ────────────── +;; 2024 is leap (div 4); Feb 28 → Feb 29 → Mar 1 with single-day steps. +(+ 2024.02.28 1) -- 2024.02.29 +(+ 2024.02.29 1) -- 2024.03.01 +;; 2023 is not leap; Feb 28 → Mar 1 directly. +(+ 2023.02.28 1) -- 2023.03.01 +;; Century rule: 2000 is leap (div 400), 2100 is not (div 100, not 400). +(+ 2000.02.28 1) -- 2000.02.29 +(+ 2100.02.28 1) -- 2100.03.01 + +;; ────────────── subtraction ────────────── +(- 2024.03.01 1) -- 2024.02.29 +(- 2023.03.01 1) -- 2023.02.28 +(- 2025.01.01 1) -- 2024.12.31 +;; before epoch (date 0 = 2000.01.01) +(- 2000.01.01 1) -- 1999.12.31 + +;; ────────────── date - date = days (integer) ────────────── +(- 2024.03.01 2024.02.01) -- 29 +(- 2023.03.01 2023.02.01) -- 28 +(- 2025.01.01 2024.01.01) -- 366 +(- 2024.01.01 2023.01.01) -- 365 + +;; ────────────── ordering ────────────── +(< 2024.01.01 2024.01.02) -- true +(> 2024.12.31 2024.01.01) -- true +(== 2024.06.15 2024.06.15) -- true +(<= 2024.01.01 2024.01.01) -- true +(>= 2024.12.31 2024.06.15) -- true diff --git a/test/rfl/temporal/time.rfl b/test/rfl/temporal/time.rfl index dc4cd62a..0c935fec 100644 --- a/test/rfl/temporal/time.rfl +++ b/test/rfl/temporal/time.rfl @@ -6,3 +6,16 @@ (hh 12:30:45.000) -- 12 (minute 12:30:45.000) -- 30 (ss 12:30:45.000) -- 45 + +;; with sub-second precision the integer extractors still drop ms +(hh 12:30:45.123) -- 12 +(minute 12:30:45.123) -- 30 +(ss 12:30:45.123) -- 45 + +;; midnight / end-of-day boundaries +(hh 00:00:00.000) -- 0 +(hh 23:59:59.999) -- 23 + +;; ordering +(< 09:00:00.000 10:00:00.000) -- true +(== 12:30:45.000 12:30:45.000) -- true diff --git a/test/rfl/type/as.rfl b/test/rfl/type/as.rfl index 7fe2782a..4e8f7b81 100644 --- a/test/rfl/type/as.rfl +++ b/test/rfl/type/as.rfl @@ -367,6 +367,45 @@ (as 'I32 (list 1i 2i 3i)) -- [1i 2i 3i] (as 'F64 (list 1.0 2.0 3.0)) -- [1.0 2.0 3.0] (as 'B8 (list true false true)) -- [true false true] +;; ========== STRING → NUMERIC, EDGE VALUES ========== +;; integer-valued floats from strings +(as 'f64 "0.0") -- 0.0 +(as 'f64 "100") -- 100.0 + +;; INT16/INT32 boundary parses — negative-extreme literals can't be written +;; (parser tokenises positive then negates), so verify via i64 round-trip. +(as 'i64 (as 'i16 "-32768")) -- -32768 +(as 'i64 (as 'i16 "32767")) -- 32767 +(as 'i64 (as 'i32 "-2147483648")) -- -2147483648 +(as 'i64 (as 'i32 "2147483647")) -- 2147483647 + +;; ========== NULL PRESERVATION ACROSS CASTS ========== +;; Casting any null returns null of target type, never sentinel/INT_MIN. +(nil? (as 'i64 0Nh)) -- true +(nil? (as 'i64 0Ni)) -- true +(nil? (as 'i32 0N)) -- true +(nil? (as 'i16 0N)) -- true +(nil? (as 'f64 0N)) -- true +(nil? (as 'i64 0Nf)) -- true +(nil? (as 'i32 0Nf)) -- true +(nil? (as 'i16 0Nf)) -- true +(nil? (as 'i64 0Nd)) -- true + +;; ========== TEMPORAL OFFSET SEMANTICS ========== +;; DATE is days since 2000.01.01, TIME is ms since midnight. +(as 'i64 2000.01.01) -- 0 +(as 'i64 2000.01.02) -- 1 +(as 'i64 1999.12.31) -- -1 +(as 'i64 00:00:00.000) -- 0 +(as 'i64 00:00:01.000) -- 1000 +(as 'i64 01:00:00.000) -- 3600000 +(as 'i64 23:59:59.999) -- 86399999 + +;; integer → DATE/TIME round-trip +(as 'date 0) -- 2000.01.01 +(as 'date 1) -- 2000.01.02 +(as 'time 1000) -- 00:00:01.000 + ;; ========== PARALLEL CAST (large vectors) ========== ;; i64 -> i32 (large vector triggers parallel processing) (sum (as 'I32 (til 100000))) -- 4999950000 From 153e9e95303beb084b1f72f0b669e3e3d8e27d9e Mon Sep 17 00:00:00 2001 From: Serhii Savchuk Date: Mon, 27 Apr 2026 14:46:12 +0300 Subject: [PATCH 02/21] fix(lang): raise arity on wrong-arg-count for UNARY/BINARY builtins MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously +/-/* and other RAY_BINARY verbs silently truncated extras: (+ 1 2 3) -> 3 (third arg dropped) (+ 1 2 3 4 5) -> 3 (- 10 1 2) -> 9 (* 2 3 4) -> 6 Same hole in RAY_UNARY: extras after the first argument were released without raising. `(+ 1)` and `(+)` were already domain errors but the 3+-arg case fell through to the binary kernel against the first two args and threw the rest away. Now both dispatch sites — the bytecode VM (eval.c:1656) and the tree- walking eval (eval.c:2378) — raise: arity: expected N arg(s), got M `and` and `or` were registered as RAY_BINARY but Anton's tests expected variadic fold semantics ((and a b c) -> fold AND). Switch them to RAY_VARY via ray_and_vary_fn / ray_or_vary_fn — left-fold over the existing binary kernel. The (and X Y) DAG path through select where: clauses still emits OP_AND / OP_OR via the expression compiler, so fused execution is unaffected. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/lang/eval.c | 14 ++++++-------- src/lang/eval.h | 2 ++ src/ops/cmp.c | 28 ++++++++++++++++++++++++++++ test/rfl/arith/add.rfl | 10 ++++++++++ test/rfl/arith/mul.rfl | 5 +++++ test/rfl/arith/sub.rfl | 5 +++++ test/rfl/cmp/and.rfl | 10 ++++++++++ test/rfl/cmp/or.rfl | 10 ++++++++++ 8 files changed, 76 insertions(+), 8 deletions(-) diff --git a/src/lang/eval.c b/src/lang/eval.c index aeaae3c6..89f7dc01 100644 --- a/src/lang/eval.c +++ b/src/lang/eval.c @@ -1661,18 +1661,16 @@ op_callf: { switch (fn_obj->type) { case RAY_UNARY: if (fn_is_restricted(fn_obj)) { for (int32_t i = 0; i < n; i++) ray_release(fn_args[i]); result = ray_error("access", "restricted"); break; } - if (n < 1) { result = ray_error("arity", "expected 1 arg, got 0"); break; } + if (n != 1) { for (int32_t i = 0; i < n; i++) ray_release(fn_args[i]); result = ray_error("arity", "expected 1 arg, got %d", n); break; } result = ((ray_unary_fn)(uintptr_t)fn_obj->i64)(fn_args[0]); ray_release(fn_args[0]); - for (int32_t i = 1; i < n; i++) ray_release(fn_args[i]); break; case RAY_BINARY: if (fn_is_restricted(fn_obj)) { for (int32_t i = 0; i < n; i++) ray_release(fn_args[i]); result = ray_error("access", "restricted"); break; } - if (n < 2) { for (int32_t i = 0; i < n; i++) ray_release(fn_args[i]); result = ray_error("arity", "expected 2 args, got %d", n); break; } + if (n != 2) { for (int32_t i = 0; i < n; i++) ray_release(fn_args[i]); result = ray_error("arity", "expected 2 args, got %d", n); break; } result = ((ray_binary_fn)(uintptr_t)fn_obj->i64)(fn_args[0], fn_args[1]); ray_release(fn_args[0]); ray_release(fn_args[1]); - for (int32_t i = 2; i < n; i++) ray_release(fn_args[i]); break; case RAY_VARY: if (fn_is_restricted(fn_obj)) { for (int32_t i = 0; i < n; i++) ray_release(fn_args[i]); result = ray_error("access", "restricted"); break; } @@ -2021,8 +2019,8 @@ static void ray_register_builtins(void) { register_binary_op("<=", RAY_FN_ATOMIC, ray_lte_fn, OP_LE); register_binary_op("==", RAY_FN_ATOMIC, ray_eq_fn, OP_EQ); register_binary_op("!=", RAY_FN_ATOMIC, ray_neq_fn, OP_NE); - register_binary_op("and", RAY_FN_NONE, ray_and_fn, OP_AND); - register_binary_op("or", RAY_FN_NONE, ray_or_fn, OP_OR); + register_vary("and", RAY_FN_NONE, ray_and_vary_fn); + register_vary("or", RAY_FN_NONE, ray_or_vary_fn); register_unary_op("not", RAY_FN_NONE, ray_not_fn, OP_NOT); register_unary_op("neg", RAY_FN_ATOMIC, ray_neg_fn, OP_NEG); register_unary("round", RAY_FN_ATOMIC, ray_round_fn); @@ -2392,7 +2390,7 @@ ray_t* ray_eval(ray_t* obj) { switch (head->type) { case RAY_UNARY: { - if (n < 2) { ray_release(head); ret = ray_error("domain", NULL); goto out; } + if (n != 2) { ray_release(head); ret = ray_error("arity", "expected 1 arg, got %d", (int)(n-1)); goto out; } if (fn_is_restricted(head)) { ray_release(head); ret = ray_error("access", "restricted"); goto out; } ray_unary_fn fn = (ray_unary_fn)(uintptr_t)head->i64; uint8_t fn_attrs = head->attrs; @@ -2412,7 +2410,7 @@ ray_t* ray_eval(ray_t* obj) { ret = result; goto out; } case RAY_BINARY: { - if (n < 3) { ray_release(head); ret = ray_error("domain", NULL); goto out; } + if (n != 3) { ray_release(head); ret = ray_error("arity", "expected 2 args, got %d", (int)(n-1)); goto out; } if (fn_is_restricted(head)) { ray_release(head); ret = ray_error("access", "restricted"); goto out; } ray_binary_fn fn = (ray_binary_fn)(uintptr_t)head->i64; uint8_t fn_attrs = head->attrs; diff --git a/src/lang/eval.h b/src/lang/eval.h index d79a8dab..df86e735 100644 --- a/src/lang/eval.h +++ b/src/lang/eval.h @@ -201,6 +201,8 @@ ray_t* ray_neq_fn(ray_t* a, ray_t* b); /* Logic */ ray_t* ray_and_fn(ray_t* a, ray_t* b); ray_t* ray_or_fn(ray_t* a, ray_t* b); +ray_t* ray_and_vary_fn(ray_t** args, int64_t n); +ray_t* ray_or_vary_fn(ray_t** args, int64_t n); ray_t* ray_not_fn(ray_t* x); ray_t* ray_neg_fn(ray_t* x); diff --git a/src/ops/cmp.c b/src/ops/cmp.c index 3af97cea..7c86d417 100644 --- a/src/ops/cmp.c +++ b/src/ops/cmp.c @@ -215,6 +215,34 @@ ray_t* ray_or_fn(ray_t* a, ray_t* b) { return make_bool((is_truthy(a) || is_truthy(b)) ? 1 : 0); } +/* Variadic left-fold over the binary kernels. (and a b c) folds as + * (and (and a b) c) — same shape Lisp/Clojure use. */ +ray_t* ray_and_vary_fn(ray_t** args, int64_t n) { + if (n < 2) return ray_error("arity", "expected at least 2 args, got %lld", (long long)n); + ray_t* acc = ray_and_fn(args[0], args[1]); + if (!acc || RAY_IS_ERR(acc)) return acc; + for (int64_t i = 2; i < n; i++) { + ray_t* next = ray_and_fn(acc, args[i]); + ray_release(acc); + if (!next || RAY_IS_ERR(next)) return next; + acc = next; + } + return acc; +} + +ray_t* ray_or_vary_fn(ray_t** args, int64_t n) { + if (n < 2) return ray_error("arity", "expected at least 2 args, got %lld", (long long)n); + ray_t* acc = ray_or_fn(args[0], args[1]); + if (!acc || RAY_IS_ERR(acc)) return acc; + for (int64_t i = 2; i < n; i++) { + ray_t* next = ray_or_fn(acc, args[i]); + ray_release(acc); + if (!next || RAY_IS_ERR(next)) return next; + acc = next; + } + return acc; +} + /* Unary */ ray_t* ray_not_fn(ray_t* x) { /* Element-wise for bool vectors */ diff --git a/test/rfl/arith/add.rfl b/test/rfl/arith/add.rfl index f2e9ee69..56df4e63 100644 --- a/test/rfl/arith/add.rfl +++ b/test/rfl/arith/add.rfl @@ -146,3 +146,13 @@ ;; last element at morsel 1025 (last (+ V 1)) -- 1025 + +;; ────────────────────────────────────────────────────────────────── +;; Arity — binary verb: too few or too many args raises `arity`. +;; (Pre-fix: extras were silently dropped — `(+ 1 2 3)` returned 3.) +;; ────────────────────────────────────────────────────────────────── + +(+) !- arity +(+ 1) !- arity +(+ 1 2 3) !- arity +(+ 1 2 3 4 5) !- arity diff --git a/test/rfl/arith/mul.rfl b/test/rfl/arith/mul.rfl index c5f278a4..c19cdeaa 100644 --- a/test/rfl/arith/mul.rfl +++ b/test/rfl/arith/mul.rfl @@ -75,3 +75,8 @@ (set V (til 1025)) (* V 1) -- V (sum (* 2 V)) -- (* 2 (sum V)) + + +;; ── Arity — binary verb (pre-fix: extras silently dropped) ── +(* 2 3 4) !- arity +(* 2 3 4 5) !- arity diff --git a/test/rfl/arith/sub.rfl b/test/rfl/arith/sub.rfl index 4ea43241..56bae9a4 100644 --- a/test/rfl/arith/sub.rfl +++ b/test/rfl/arith/sub.rfl @@ -84,3 +84,8 @@ (set V (til 1025)) (sum (- V V)) -- 0 (last (- V 1)) -- 1023 + + +;; ── Arity — binary verb (pre-fix: extras silently dropped) ── +(- 10 1 2) !- arity +(- 10 1 2 3) !- arity diff --git a/test/rfl/cmp/and.rfl b/test/rfl/cmp/and.rfl index 50fd6340..973d2d56 100644 --- a/test/rfl/cmp/and.rfl +++ b/test/rfl/cmp/and.rfl @@ -25,3 +25,13 @@ (and [true false true] [false true false]) -- [false false false] (and [true false true] [false true false] [true false true]) -- [false false false] (and [true false true] true) -- [true false true] + +;; ── variadic fold: (and a b c) == (and (and a b) c) ── +(and true true true) -- true +(and true true false) -- false +(and true true true true true) -- true +(and true true true true false) -- false + +;; ── arity boundaries ── +(and) !- arity +(and true) !- arity diff --git a/test/rfl/cmp/or.rfl b/test/rfl/cmp/or.rfl index a1dc543b..cedfb089 100644 --- a/test/rfl/cmp/or.rfl +++ b/test/rfl/cmp/or.rfl @@ -30,3 +30,13 @@ (or [true false true] [false true false]) -- [true true true] (or [true false true] [false true false] [true false true]) -- [true true true] (or [true false true] true) -- [true true true] + +;; ── variadic fold: (or a b c) == (or (or a b) c) ── +(or false false false) -- false +(or false false true) -- true +(or false false false false false) -- false +(or false false false false true) -- true + +;; ── arity boundaries ── +(or) !- arity +(or false) !- arity From 072e040414f107f2467b8e39a9573ed6f71f1cab Mon Sep 17 00:00:00 2001 From: Serhii Savchuk Date: Mon, 27 Apr 2026 15:09:46 +0300 Subject: [PATCH 03/21] fix(cmp): lexicographic ordering for SYM atoms (>, <, >=, <=) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously (> 'b 'a) raised "error: type: cannot compare sym and sym" even for the reflexive case (>= 'a 'a). Equality comparisons (==/!=) worked because they could just check interned IDs, but ordering had no implementation at all and fell through to the is_numeric() guard. Add sym_atom_cmp(a, b): - Fast path: equal interned IDs => identical text (interning gives one ID per text), return 0 without touching the global sym table. - Slow path: ray_sym_str(id) returns a RAY_STR atom; ray_str_cmp delegates to ray_str_t_cmp which uses the 12-byte SSO inline path for short symbols and prefix-then-fullcompare for pooled ones. Wire it into ray_gt_fn / ray_lt_fn / ray_gte_fn / ray_lte_fn at the same dispatch site as the existing -RAY_GUID branch. Vector and broadcast paths inherit the fix automatically — atomic_map_binary unboxes SYM elements via collection_elem (-> ray_sym(id) atom), so each pairwise call lands in the new branch. The pre-fix probe (test/rfl/cmp/gt.rfl:41-44, "(> 'b 'a) !- type") is replaced with positive assertions covering atoms, broadcast, vec/vec, and the SSO inline/pooled boundary. Mirror coverage added to lt.rfl, ge.rfl, le.rfl. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/ops/cmp.c | 22 ++++++++++++++++++++++ test/rfl/cmp/ge.rfl | 9 +++++++++ test/rfl/cmp/gt.rfl | 27 ++++++++++++++++++++++----- test/rfl/cmp/le.rfl | 9 +++++++++ test/rfl/cmp/lt.rfl | 12 ++++++++++++ 5 files changed, 74 insertions(+), 5 deletions(-) diff --git a/src/ops/cmp.c b/src/ops/cmp.c index 7c86d417..d696e1cb 100644 --- a/src/ops/cmp.c +++ b/src/ops/cmp.c @@ -40,9 +40,25 @@ int char_str_cmp(ray_t* a, ray_t* b, int *out) { return 0; } +/* Lexicographic compare of two SYM atoms. Fast path: equal interned + * ids ⇒ identical text ⇒ 0, no global-table lookup. Slow path: pull + * the backing STR via ray_sym_str and delegate to ray_str_cmp, which + * uses the 12-byte SSO inline path for short symbols. */ +int sym_atom_cmp(ray_t* a, ray_t* b) { + if (a->i64 == b->i64) return 0; + ray_t* sa = ray_sym_str(a->i64); + ray_t* sb = ray_sym_str(b->i64); + int r = (sa && sb) ? ray_str_cmp(sa, sb) : 0; + if (sa) ray_release(sa); + if (sb) ray_release(sb); + return r; +} + /* Comparison */ ray_t* ray_gt_fn(ray_t* a, ray_t* b) { { int c; if (char_str_cmp(a, b, &c) == 0) return make_bool(c > 0 ? 1 : 0); } + if (a->type == -RAY_SYM && b->type == -RAY_SYM) + return make_bool(sym_atom_cmp(a, b) > 0 ? 1 : 0); if (a->type == -RAY_GUID && b->type == -RAY_GUID) return make_bool(memcmp(ray_data(a->obj), ray_data(b->obj), 16) > 0 ? 1 : 0); /* Temporal comparison (same or cross-temporal via nanosecond conversion) */ @@ -63,6 +79,8 @@ ray_t* ray_gt_fn(ray_t* a, ray_t* b) { ray_t* ray_lt_fn(ray_t* a, ray_t* b) { { int c; if (char_str_cmp(a, b, &c) == 0) return make_bool(c < 0 ? 1 : 0); } + if (a->type == -RAY_SYM && b->type == -RAY_SYM) + return make_bool(sym_atom_cmp(a, b) < 0 ? 1 : 0); if (a->type == -RAY_GUID && b->type == -RAY_GUID) return make_bool(memcmp(ray_data(a->obj), ray_data(b->obj), 16) < 0 ? 1 : 0); if (is_temporal(a) && is_temporal(b)) { @@ -82,6 +100,8 @@ ray_t* ray_lt_fn(ray_t* a, ray_t* b) { ray_t* ray_gte_fn(ray_t* a, ray_t* b) { { int c; if (char_str_cmp(a, b, &c) == 0) return make_bool(c >= 0 ? 1 : 0); } + if (a->type == -RAY_SYM && b->type == -RAY_SYM) + return make_bool(sym_atom_cmp(a, b) >= 0 ? 1 : 0); if (a->type == -RAY_GUID && b->type == -RAY_GUID) return make_bool(memcmp(ray_data(a->obj), ray_data(b->obj), 16) >= 0 ? 1 : 0); if (is_temporal(a) && is_temporal(b)) { @@ -102,6 +122,8 @@ ray_t* ray_gte_fn(ray_t* a, ray_t* b) { ray_t* ray_lte_fn(ray_t* a, ray_t* b) { { int c; if (char_str_cmp(a, b, &c) == 0) return make_bool(c <= 0 ? 1 : 0); } + if (a->type == -RAY_SYM && b->type == -RAY_SYM) + return make_bool(sym_atom_cmp(a, b) <= 0 ? 1 : 0); if (a->type == -RAY_GUID && b->type == -RAY_GUID) return make_bool(memcmp(ray_data(a->obj), ray_data(b->obj), 16) <= 0 ? 1 : 0); if (is_temporal(a) && is_temporal(b)) { diff --git a/test/rfl/cmp/ge.rfl b/test/rfl/cmp/ge.rfl index 4d0ddfed..580a9b7a 100644 --- a/test/rfl/cmp/ge.rfl +++ b/test/rfl/cmp/ge.rfl @@ -32,3 +32,12 @@ ;; vec (>= [1 5 5] [2 2 5]) -- [false true true] + +;; ────────────────────────────────────────────────────────────────── +;; SYM ordering: reflexive, lexicographic. +;; ────────────────────────────────────────────────────────────────── + +(>= 'a 'a) -- true +(>= 'b 'a) -- true +(>= 'a 'b) -- false +(>= ['a 'b 'c] 'b) -- [false true true] diff --git a/test/rfl/cmp/gt.rfl b/test/rfl/cmp/gt.rfl index 16ae306d..92551fc2 100644 --- a/test/rfl/cmp/gt.rfl +++ b/test/rfl/cmp/gt.rfl @@ -35,10 +35,27 @@ (> [1 5 10] 5) -- [false false true] ;; ────────────────────────────────────────────────────────────────── -;; Probe: > on SYM atoms raises type — known bug (see _probes/cmp_sym_ordering) +;; SYM ordering: lexicographic compare via interned text. ;; ────────────────────────────────────────────────────────────────── -(> 'b 'a) !- type -(< 'a 'b) !- type -(>= 'a 'a) !- type -(<= 'a 'a) !- type +;; atom — basic ordering +(> 'b 'a) -- true +(> 'a 'b) -- false +(> 'a 'a) -- false + +;; longer prefix is greater (shorter is "less" on prefix tie) +(> 'foobar 'foo) -- true +(> 'foo 'foobar) -- false + +;; broadcast: vec > atom +(> ['a 'b 'c] 'a) -- [false true true] +(> 'b ['a 'b 'c]) -- [true false false] + +;; vec/vec pairwise +(> ['b 'b 'b] ['a 'b 'c]) -- [true false false] + +;; mixed inline (≤ 12 bytes) and pooled (> 12 bytes) — exercises SSO. +;; Note: hyphens in symbols would be parsed as ops, so use underscore. +;; 's' > 'l' so 'short > 'longer_… +(> 'short 'longer_than_twelve_bytes_symbol) -- true +(> 'longer_than_twelve_bytes_symbol 'short) -- false diff --git a/test/rfl/cmp/le.rfl b/test/rfl/cmp/le.rfl index 5e098305..964ca8f7 100644 --- a/test/rfl/cmp/le.rfl +++ b/test/rfl/cmp/le.rfl @@ -25,3 +25,12 @@ ;; vec (<= [1 5 5] [2 2 5]) -- [true false true] + +;; ────────────────────────────────────────────────────────────────── +;; SYM ordering: reflexive, lexicographic. +;; ────────────────────────────────────────────────────────────────── + +(<= 'a 'a) -- true +(<= 'a 'b) -- true +(<= 'b 'a) -- false +(<= ['a 'b 'c] 'b) -- [true true false] diff --git a/test/rfl/cmp/lt.rfl b/test/rfl/cmp/lt.rfl index 9be58bb3..254124c3 100644 --- a/test/rfl/cmp/lt.rfl +++ b/test/rfl/cmp/lt.rfl @@ -31,3 +31,15 @@ ;; vec (< [1 5 3] [2 2 2]) -- [true false false] (< [1 5 10] 5) -- [true false false] + +;; ────────────────────────────────────────────────────────────────── +;; SYM ordering: lexicographic compare via interned text. +;; ────────────────────────────────────────────────────────────────── + +(< 'a 'b) -- true +(< 'b 'a) -- false +(< 'a 'a) -- false +(< 'foo 'foobar) -- true +(< 'foobar 'foo) -- false +(< ['a 'b 'c] 'b) -- [true false false] +(< ['a 'b] ['b 'a]) -- [true false] From 9ff6e807f8a7cf12c144dcba0525b2eeb50df5e2 Mon Sep 17 00:00:00 2001 From: Serhii Savchuk Date: Mon, 27 Apr 2026 15:32:24 +0300 Subject: [PATCH 04/21] fix(arith): neg preserves narrow-int type; pin all type-promotion rules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ray_neg_fn only handled -RAY_I64 and -RAY_F64 atoms; (neg 5h) and (neg 5i) raised "error: type" even though abs already accepted them. Add the i16/i32 branches and preserve type — same convention as binary +, -, *, %, /: (neg 5h) -> -5h (i16) (neg 5i) -> -5i (i32) (neg [1h 2h]) -> [-1h -2h] (I16) (Vector path inherits the fix via RAY_FN_ATOMIC.) Type preservation is the right call here: in Rayforce typed nulls live in a separate nullmap bit, so INT_MIN is just a regular value and there's no overflow concern that would justify widening like abs does. Add test/rfl/arith/type_preservation.rfl pinning the full type- promotion matrix so a regression in any op surfaces loudly: - same-width +/-/*//% preserve type (i16, i32, i64, f64) - cross-width +/-/*//% follow "wider wins" (i16+i32→i32, etc.) - all comparisons return bool - unary neg/floor/ceil preserve type - unary abs widens narrow ints to i64 (documented inconsistency) - math fns (round/sqrt/log/exp) always return f64 Replaces the stale `(neg 5h) !- type` probes in test/rfl/arith/neg.rfl with positive assertions including type assertions and null propagation across i16/i32. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/ops/arith.c | 3 + test/rfl/arith/neg.rfl | 22 +++-- test/rfl/arith/type_preservation.rfl | 115 +++++++++++++++++++++++++++ 3 files changed, 135 insertions(+), 5 deletions(-) create mode 100644 test/rfl/arith/type_preservation.rfl diff --git a/src/ops/arith.c b/src/ops/arith.c index 63b9352c..029a1a84 100644 --- a/src/ops/arith.c +++ b/src/ops/arith.c @@ -332,6 +332,9 @@ ray_t* ray_neg_fn(ray_t* x) { if (RAY_ATOM_IS_NULL(x)) { ray_retain(x); return x; } if (x->type == -RAY_I64) return make_i64(-x->i64); if (x->type == -RAY_F64) return make_f64(-x->f64); + /* Narrow ints preserve type — same convention as binary + - mul. */ + if (x->type == -RAY_I32) return make_i32(-x->i32); + if (x->type == -RAY_I16) return make_i16(-x->i16); return ray_error("type", NULL); } diff --git a/test/rfl/arith/neg.rfl b/test/rfl/arith/neg.rfl index 532d0048..47e0444e 100644 --- a/test/rfl/arith/neg.rfl +++ b/test/rfl/arith/neg.rfl @@ -43,10 +43,22 @@ (sum (+ V (neg V))) -- 0 ;; ────────────────────────────────────────────────────────────────── -;; Probe: neg on narrow-int currently raises type -;; (filed in spec/_probes/neg_narrow_int.rfl) +;; Narrow ints preserve type on negation (same convention as +/-/*). ;; ────────────────────────────────────────────────────────────────── -(neg 5h) !- type -(neg 5i) !- type -(neg [1h 2h]) !- type +(neg 5h) -- -5h +(neg 5i) -- -5i +(neg -5h) -- 5h +(neg -5i) -- 5i +(neg [1h 2h 3h]) -- [-1h -2h -3h] +(neg [1i 2i 3i]) -- [-1i -2i -3i] + +;; type is preserved +(type (neg 5h)) -- 'i16 +(type (neg 5i)) -- 'i32 +(type (neg [1h 2h])) -- 'I16 +(type (neg [1i 2i])) -- 'I32 + +;; null propagation across narrow types +(nil? (neg 0Nh)) -- true +(nil? (neg 0Ni)) -- true diff --git a/test/rfl/arith/type_preservation.rfl b/test/rfl/arith/type_preservation.rfl new file mode 100644 index 00000000..6f4591b8 --- /dev/null +++ b/test/rfl/arith/type_preservation.rfl @@ -0,0 +1,115 @@ +;; Regression: pin down type-promotion rules for every arithmetic op. +;; Each assertion locks in the current behavior so an accidental change +;; (e.g. someone widening `+` to i64 unconditionally) breaks loudly. +;; +;; Documented rules: +;; 1. Same-width binary +/-/*/// preserve type. +;; (+ 5h 3h) → i16, (+ 5i 3i) → i32, (+ 5 3) → i64, (+ 5.0 3.0) → f64 +;; 2. Cross-width binary: wider wins. +;; (+ i16 i32) → i32, (+ i16 i64) → i64, (+ i16 f64) → f64 +;; 3. Comparison ops always return bool. +;; 4. Unary neg / floor / ceil preserve type. +;; 5. Unary abs widens narrow ints to i64. (Documented inconsistency; +;; see SPEC for justification — flagged for review.) +;; 6. Math fns (round, sqrt, log, exp) always produce f64. + +;; ── (1) Same-width preservation: + - * / +(type (+ 5h 3h)) -- 'i16 +(type (- 5h 3h)) -- 'i16 +(type (* 5h 3h)) -- 'i16 +(type (/ 6h 3h)) -- 'i16 +(type (+ 5i 3i)) -- 'i32 +(type (- 5i 3i)) -- 'i32 +(type (* 5i 3i)) -- 'i32 +(type (/ 6i 3i)) -- 'i32 +(type (+ 5 3)) -- 'i64 +(type (- 5 3)) -- 'i64 +(type (* 5 3)) -- 'i64 +(type (/ 6 3)) -- 'i64 +(type (+ 5.0 3.0)) -- 'f64 +(type (- 5.0 3.0)) -- 'f64 +(type (* 5.0 3.0)) -- 'f64 +(type (/ 6.0 3.0)) -- 'f64 + +;; vector form mirrors atomic +(type (+ [1h 2h] [3h 4h])) -- 'I16 +(type (+ [1i 2i] [3i 4i])) -- 'I32 +(type (+ [1 2] [3 4])) -- 'I64 +(type (+ [1.0 2.0] [3.0 4.0])) -- 'F64 + +;; ── (2) Cross-width: wider wins +(type (+ 5h 3i)) -- 'i32 +(type (+ 5h 3)) -- 'i64 +(type (+ 5i 3)) -- 'i64 +(type (+ 5h 3.0)) -- 'f64 +(type (+ 5i 3.0)) -- 'f64 +(type (+ 5 3.0)) -- 'f64 + +(type (+ [1h 2h] [3i 4i])) -- 'I32 +(type (+ [1h 2h] [3 4])) -- 'I64 +(type (+ [1h 2h] [3.0 4.0])) -- 'F64 +(type (+ [1i 2i] [3.0 4.0])) -- 'F64 + +;; ── (3) Comparison always → bool +(type (== 5h 3h)) -- 'b8 +(type (!= 5h 3h)) -- 'b8 +(type (> 5h 3h)) -- 'b8 +(type (< 5h 3h)) -- 'b8 +(type (>= 5h 3h)) -- 'b8 +(type (<= 5h 3h)) -- 'b8 + +(type (== 5h 3)) -- 'b8 +(type (== 5h 3.0)) -- 'b8 + +(type (== [1h 2h] [3h 4h])) -- 'B8 + +;; ── (4) Unary preserve type: neg, floor, ceil +(type (neg 5h)) -- 'i16 +(type (neg 5i)) -- 'i32 +(type (neg 5)) -- 'i64 +(type (neg 5.0)) -- 'f64 +(type (neg [1h 2h])) -- 'I16 +(type (neg [1i 2i])) -- 'I32 + +(type (floor 5h)) -- 'i16 +(type (floor 5i)) -- 'i32 +(type (floor 5)) -- 'i64 +(type (floor 5.0)) -- 'f64 + +(type (ceil 5h)) -- 'i16 +(type (ceil 5i)) -- 'i32 +(type (ceil 5)) -- 'i64 +(type (ceil 5.0)) -- 'f64 + +;; ── (5) abs widens narrow ints to i64 (inconsistent with neg — see SPEC) +(type (abs 5h)) -- 'i64 +(type (abs 5i)) -- 'i64 +(type (abs 5)) -- 'i64 +(type (abs 5.0)) -- 'f64 + +;; ── (6) Math fns always → f64 +(type (round 5h)) -- 'f64 +(type (round 5i)) -- 'f64 +(type (round 5)) -- 'f64 +(type (round 5.0)) -- 'f64 + +(type (sqrt 5h)) -- 'f64 +(type (sqrt 5i)) -- 'f64 +(type (sqrt 5)) -- 'f64 +(type (sqrt 5.0)) -- 'f64 + +(type (log 5h)) -- 'f64 +(type (log 5i)) -- 'f64 +(type (log 5)) -- 'f64 +(type (log 5.0)) -- 'f64 + +(type (exp 5h)) -- 'f64 +(type (exp 5i)) -- 'f64 +(type (exp 5)) -- 'f64 +(type (exp 5.0)) -- 'f64 + +;; ── Mod (%) preserves type same as +/-/* +(type (% 7h 3h)) -- 'i16 +(type (% 7i 3i)) -- 'i32 +(type (% 7 3)) -- 'i64 +(type (% 7.0 3.0)) -- 'f64 From ea281efc90c917406d3df80b4a13e65aaa33883c Mon Sep 17 00:00:00 2001 From: Serhii Savchuk Date: Mon, 27 Apr 2026 16:04:42 +0300 Subject: [PATCH 05/21] fix(arith): abs preserves narrow-int type (was widening to i64) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit abs was the lone outlier in the unary-numeric family — neg, floor, ceil all preserve i16/i32, but abs widened them to i64. No good reason for the inconsistency: in Rayforce typed nulls live in a separate nullmap bit, so INT_MIN is just a regular value and the "overflow protection" rationale that justifies widening in some languages doesn't apply here. Switch the i16/i32 branches to make_i16/make_i32: (abs -5h) -> 5h (i16, was i64) (abs -5i) -> 5i (i32, was i64) (abs [-1h 2h]) -> [1h 2h] (I16, was I64) Update test/rfl/arith/abs.rfl and the type_preservation.rfl regression table: abs now joins neg/floor/ceil under the "preserve narrow ints" rule. Coverage of remaining type-promotion oddities (left intentional): - sum widens I16/I32 → i64 (overflow guard) - round of int → f64 (mathematically noop, but produces float anyway) Both pinned in type_preservation.rfl so future changes surface. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/ops/arith.c | 4 ++-- test/rfl/arith/abs.rfl | 10 +++++----- test/rfl/arith/type_preservation.rfl | 11 ++++++----- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/ops/arith.c b/src/ops/arith.c index 029a1a84..12ae7bfb 100644 --- a/src/ops/arith.c +++ b/src/ops/arith.c @@ -367,8 +367,8 @@ ray_t* ray_abs_fn(ray_t* x) { if (RAY_ATOM_IS_NULL(x)) { ray_retain(x); return x; } if (x->type == -RAY_F64) return make_f64(fabs(x->f64)); if (x->type == -RAY_I64) return make_i64(x->i64 < 0 ? -x->i64 : x->i64); - if (x->type == -RAY_I32) return make_i64(x->i32 < 0 ? -(int64_t)x->i32 : x->i32); - if (x->type == -RAY_I16) return make_i64(x->i16 < 0 ? -(int64_t)x->i16 : x->i16); + if (x->type == -RAY_I32) return make_i32(x->i32 < 0 ? -x->i32 : x->i32); + if (x->type == -RAY_I16) return make_i16(x->i16 < 0 ? -x->i16 : x->i16); return ray_error("type", NULL); } diff --git a/test/rfl/arith/abs.rfl b/test/rfl/arith/abs.rfl index d8bb50e9..55b5c738 100644 --- a/test/rfl/arith/abs.rfl +++ b/test/rfl/arith/abs.rfl @@ -24,7 +24,7 @@ (nil? (abs 0Ni)) -- true ;; ────────────────────────────────────────────────────────────────── -;; Vec + cross-type — abs widens narrow ints to i64 +;; Vec + type preservation — abs keeps narrow-int type (same as neg) ;; ────────────────────────────────────────────────────────────────── (abs [1 -2 3 -4]) -- [1 2 3 4] @@ -32,7 +32,7 @@ (abs -5.0) -- 5.0 (type (abs -5)) -- 'i64 (type (abs -5.0)) -- 'f64 -(type (abs -5h)) -- 'i64 -(type (abs -5i)) -- 'i64 -(type (abs [-1h 2h])) -- 'I64 -(type (abs [-1i 2i])) -- 'I64 +(type (abs -5h)) -- 'i16 +(type (abs -5i)) -- 'i32 +(type (abs [-1h 2h])) -- 'I16 +(type (abs [-1i 2i])) -- 'I32 diff --git a/test/rfl/arith/type_preservation.rfl b/test/rfl/arith/type_preservation.rfl index 6f4591b8..9ea79ecf 100644 --- a/test/rfl/arith/type_preservation.rfl +++ b/test/rfl/arith/type_preservation.rfl @@ -9,8 +9,7 @@ ;; (+ i16 i32) → i32, (+ i16 i64) → i64, (+ i16 f64) → f64 ;; 3. Comparison ops always return bool. ;; 4. Unary neg / floor / ceil preserve type. -;; 5. Unary abs widens narrow ints to i64. (Documented inconsistency; -;; see SPEC for justification — flagged for review.) +;; 5. Unary abs preserves type (same as neg/floor/ceil). ;; 6. Math fns (round, sqrt, log, exp) always produce f64. ;; ── (1) Same-width preservation: + - * / @@ -81,11 +80,13 @@ (type (ceil 5)) -- 'i64 (type (ceil 5.0)) -- 'f64 -;; ── (5) abs widens narrow ints to i64 (inconsistent with neg — see SPEC) -(type (abs 5h)) -- 'i64 -(type (abs 5i)) -- 'i64 +;; ── (5) abs preserves type (same convention as neg/floor/ceil) +(type (abs 5h)) -- 'i16 +(type (abs 5i)) -- 'i32 (type (abs 5)) -- 'i64 (type (abs 5.0)) -- 'f64 +(type (abs [1h -2h])) -- 'I16 +(type (abs [1i -2i])) -- 'I32 ;; ── (6) Math fns always → f64 (type (round 5h)) -- 'f64 From 3df78bb4561e94cf51fbb924422f88433a334010 Mon Sep 17 00:00:00 2001 From: Serhii Savchuk Date: Mon, 27 Apr 2026 16:30:13 +0300 Subject: [PATCH 06/21] fix(eval): call_fn1 routes atomic UNARY builtins through atomic_map_unary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (modify t 'a neg) raised "error: type" while (map neg [1 2 3]) worked. modify calls call_fn1(fn, column_vec) internally — call_fn1 saw a RAY_UNARY function and invoked it directly on the whole vector. But ray_neg_fn (and every other atomic unary kernel: abs, floor, ceil, round, sqrt, log, exp, not) is written for a single atom and rejects positive-typed (vector) input with "type". call_fn2 already had the parallel routing for binary atomic builtins: if ((fn->attrs & RAY_FN_ATOMIC) && (is_collection(a) || ...)) return atomic_map_binary(f, a, b); call_fn1 simply forgot the unary mirror. Add it. Affects every code path that uses call_fn1 — modify, fold (1-arg form), the apply 1-arg case — so any future caller now gets vector auto-mapping for free, matching the user's intuition that "if it works in map, it should work everywhere". Tests: extend test/rfl/table/modify.rfl with parity assertions showing (modify t 'a neg) ≡ (modify t 'a (fn [x] (neg x))), plus chained modify with abs/neg and floor/ceil over a float column. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/lang/eval.c | 2 ++ test/rfl/table/modify.rfl | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/src/lang/eval.c b/src/lang/eval.c index 89f7dc01..ab63ada5 100644 --- a/src/lang/eval.c +++ b/src/lang/eval.c @@ -740,6 +740,8 @@ ray_t* call_fn1(ray_t* fn, ray_t* arg) { if (fn_is_restricted(fn)) return ray_error("access", "restricted"); if (fn->type == RAY_UNARY) { ray_unary_fn f = (ray_unary_fn)(uintptr_t)fn->i64; + if ((fn->attrs & RAY_FN_ATOMIC) && is_collection(arg)) + return atomic_map_unary(f, arg); return f(arg); } if (fn->type == RAY_LAMBDA) { diff --git a/test/rfl/table/modify.rfl b/test/rfl/table/modify.rfl index 5e39c3bf..82199c08 100644 --- a/test/rfl/table/modify.rfl +++ b/test/rfl/table/modify.rfl @@ -26,3 +26,22 @@ (set T4 (modify (modify T 'a (fn [x] (neg x))) 'b (fn [x] (* x 2.0)))) (sum (at T4 'a)) -- -15 (sum (at T4 'b)) -- 300.0 + +;; ────────────────────────────────────────────────────────────────── +;; Builtin atomic functions are accepted directly (parity with `map`). +;; modify used to require a (fn …) wrapper; this is the regression. +;; ────────────────────────────────────────────────────────────────── + +(set Tn (table [a] (list [-3 -1 2 4]))) + +;; same result as the lambda-wrapped form +(at (modify Tn 'a neg) 'a) -- (at (modify Tn 'a (fn [x] (neg x))) 'a) +(at (modify Tn 'a abs) 'a) -- [3 1 2 4] + +;; chain of builtins: abs then neg +(sum (at (modify (modify Tn 'a abs) 'a neg) 'a)) -- -10 + +;; floor / ceil on float column +(set Tf (table [v] (list [1.7 2.3 -0.5]))) +(at (modify Tf 'v floor) 'v) -- [1.0 2.0 -1.0] +(at (modify Tf 'v ceil) 'v) -- [2.0 3.0 0.0] From ac0e7ca93af1dbb8da15823c04ca9f09185b9f91 Mon Sep 17 00:00:00 2001 From: Serhii Savchuk Date: Mon, 27 Apr 2026 17:14:16 +0300 Subject: [PATCH 07/21] docs(repl): pin ray_repl_run_file return-code contract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document what file-mode and stdin-mode mean for error handling: - ray_repl_run_file (batch / script): rc=1 stops execution - ray_repl_run / stdin pipe (REPL): error printed, loop continues The probe spec/_probes/error_exit_code_inconsistent.rfl filed back when file-mode silently returned 0 on errors; that's been fixed since (every verb in the probe — asof-join, inner/left-join, filter, +, /, rand, til, alter — now correctly returns rc=1 from a script file). Drop the probe. The stdin-pipe-doesn't-abort behaviour is the REPL contract, not a bug; users wanting batch semantics should pass the script as a positional file arg. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/app/repl.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/app/repl.h b/src/app/repl.h index 01d0b712..bdb72b99 100644 --- a/src/app/repl.h +++ b/src/app/repl.h @@ -40,6 +40,13 @@ typedef struct ray_repl { ray_repl_t* ray_repl_create(ray_poll_t* poll); void ray_repl_destroy(ray_repl_t* repl); void ray_repl_run(ray_repl_t* repl); + +/* Run a Rayfall script file in batch (script) mode. Contract: + * - returns 0 on success + * - returns 1 on any eval error (script execution stops at first + * error; subsequent forms are not run) + * Distinct from ray_repl_run / stdin pipe which use REPL semantics + * (errors are printed but do not terminate the loop). */ int ray_repl_run_file(const char* path); #endif /* RAY_IO_REPL_H */ From ab2074c44a4fc781e76ee94d978808cc6ba5322a Mon Sep 17 00:00:00 2001 From: Serhii Savchuk Date: Mon, 27 Apr 2026 18:14:59 +0300 Subject: [PATCH 08/21] fix(like): single iterative matcher, no catastrophic backtracking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three implementations existed and they disagreed on syntax: - eval path: strop.c::str_glob — recursive, glob */?/[abc], EXPONENTIAL on patterns like "a*a*a*…a*b" against "aaa…a" (>5s on 16 stars, >timeout on 20). - DAG path: string.c::like_match — iterative, but SQL %/_ syntax. - DAG path: string.c::ilike_match — iterative SQL %/_ , case- insensitive. So `(like "hello" "h*")` returned true via eval but matched 0 rows in `select where: (like s "h*")` — `*` was literal under the SQL matcher. And the eval path could be DoS'd with a 20-star pattern. Replace all three with one shared implementation in src/ops/glob.[ch]: - Iterative two-pointer with last-star backtrack (glibc fnmatch style). O(n*m) worst case; 32-star pattern that pre-fix took >5s now finishes in microseconds. - Glob syntax matching the documented contract: * (any), ? (one), [abc] / [a-z] / [!abc] (character class). - ray_glob_match (case-sensitive) and ray_glob_match_ci (folds ASCII letters on both sides). eval path (strop.c::ray_like_fn) and DAG path (string.c::exec_like / exec_ilike) both call the same matcher — semantics now identical. Tests: * test/rfl/strop/like.rfl extended with character classes, ranges, negated classes, an adversarial 20-star catastrophic-backtracking guard, and explicit eval-path-≡-DAG-path parity assertions. * test/test_exec.c: stale C-test using "bar%" (SQL syntax) updated to "bar*" (glob). Docs: website/docs/rayfall-functions.html updated to mention [abc] / [a-z] / [!abc] (already implemented; previously undocumented). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/ops/glob.c | 102 ++++++++++++++++++++++++++++ src/ops/glob.h | 28 ++++++++ src/ops/string.c | 64 +++-------------- src/ops/strop.c | 70 ++++++------------- test/rfl/strop/like.rfl | 33 +++++++++ test/test_exec.c | 2 +- website/docs/rayfall-functions.html | 2 +- 7 files changed, 193 insertions(+), 108 deletions(-) create mode 100644 src/ops/glob.c create mode 100644 src/ops/glob.h diff --git a/src/ops/glob.c b/src/ops/glob.c new file mode 100644 index 00000000..dea37d1e --- /dev/null +++ b/src/ops/glob.c @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + */ + +/* + * Iterative glob matcher. Replaces three pre-existing implementations + * that diverged in syntax (eval used *,?,[abc]; DAG used SQL %,_) and + * one of which (strop.c::str_glob) blew up exponentially on patterns + * like "a*a*a*…a*b" against an a-only string. This single file is + * the only matcher; both call sites delegate here. + */ + +#include "ops/glob.h" + +/* Lowercase an ASCII byte; non-ASCII passes through unchanged. */ +static inline char to_lower(char c) { + return (c >= 'A' && c <= 'Z') ? (char)(c + 32) : c; +} + +/* Match a single character against a class `[ ... ]`. On entry *pi + * points at the byte after `[`. On return *pi points one past `]`. + * Recognises `[abc]`, `[a-z]`, leading `!` for negation, embedded + * `]` is allowed as the first char (after optional `!`). */ +static bool match_class(const char* p, size_t pn, size_t* pi, char c, bool ci) { + size_t i = *pi; + bool neg = false; + if (i < pn && p[i] == '!') { neg = true; i++; } + bool matched = false; + bool first = true; + char ch = ci ? to_lower(c) : c; + while (i < pn && (first || p[i] != ']')) { + char lo = ci ? to_lower(p[i]) : p[i]; + if (i + 2 < pn && p[i + 1] == '-' && p[i + 2] != ']') { + char hi = ci ? to_lower(p[i + 2]) : p[i + 2]; + if (ch >= lo && ch <= hi) matched = true; + i += 3; + } else { + if (ch == lo) matched = true; + i++; + } + first = false; + } + if (i < pn && p[i] == ']') i++; /* consume closing bracket */ + *pi = i; + return neg ? !matched : matched; +} + +static bool glob_impl(const char* s, size_t sn, + const char* p, size_t pn, bool ci) { + size_t si = 0, pi = 0; + size_t star_pi = (size_t)-1, star_si = 0; + + while (si < sn) { + if (pi < pn && p[pi] == '*') { + star_pi = pi++; /* remember star, skip it */ + star_si = si; + } else if (pi < pn && p[pi] == '?') { + pi++; + si++; + } else if (pi < pn && p[pi] == '[') { + size_t cls_pi = pi + 1; + if (match_class(p, pn, &cls_pi, s[si], ci)) { + pi = cls_pi; + si++; + } else if (star_pi != (size_t)-1) { + pi = star_pi + 1; + si = ++star_si; + } else { + return false; + } + } else if (pi < pn) { + char a = ci ? to_lower(s[si]) : s[si]; + char b = ci ? to_lower(p[pi]) : p[pi]; + if (a == b) { + pi++; + si++; + } else if (star_pi != (size_t)-1) { + pi = star_pi + 1; + si = ++star_si; + } else { + return false; + } + } else if (star_pi != (size_t)-1) { + pi = star_pi + 1; + si = ++star_si; + } else { + return false; + } + } + /* Consumed all of input — pattern must be at end, modulo trailing stars. */ + while (pi < pn && p[pi] == '*') pi++; + return pi == pn; +} + +bool ray_glob_match(const char* s, size_t sn, const char* p, size_t pn) { + return glob_impl(s, sn, p, pn, false); +} + +bool ray_glob_match_ci(const char* s, size_t sn, const char* p, size_t pn) { + return glob_impl(s, sn, p, pn, true); +} diff --git a/src/ops/glob.h b/src/ops/glob.h new file mode 100644 index 00000000..7fa6bef6 --- /dev/null +++ b/src/ops/glob.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + */ + +#ifndef RAY_OPS_GLOB_H +#define RAY_OPS_GLOB_H + +#include +#include + +/* Glob pattern match, iterative two-pointer (no catastrophic backtracking). + * Worst case O(n*m); typical case linear. + * + * Supported metacharacters: + * * — matches zero or more characters + * ? — matches exactly one character + * [abc] — character class: matches any of a, b, c + * [a-z] — range + * [!abc] — negated class + * + * `glob_match` is case-sensitive. `glob_match_ci` lowercases ASCII letters + * on both sides before comparing (so it matches 'A' against 'a', 'A-Z' + * range matches both case forms, etc.). */ +bool ray_glob_match(const char* s, size_t sn, const char* p, size_t pn); +bool ray_glob_match_ci(const char* s, size_t sn, const char* p, size_t pn); + +#endif /* RAY_OPS_GLOB_H */ diff --git a/src/ops/string.c b/src/ops/string.c index 8be7ab52..e9430340 100644 --- a/src/ops/string.c +++ b/src/ops/string.c @@ -22,35 +22,13 @@ */ #include "ops/internal.h" +#include "ops/glob.h" /* ============================================================================ - * OP_LIKE: SQL LIKE pattern matching on SYM columns + * OP_LIKE: glob pattern matching on STR / SYM columns. See ops/glob.[ch]. + * Syntax: * (any), ? (one char), [abc] / [a-z] / [!abc] (character class). * ============================================================================ */ -/* Simple SQL LIKE matcher: % = any (including empty), _ = single char. - * Pattern is re-interpreted per row; could be optimized with precompilation - * (e.g., compile once to NFA/DFA) for large datasets. */ -static bool like_match(const char* str, size_t slen, const char* pat, size_t plen) { - size_t si = 0, pi = 0; - size_t star_p = (size_t)-1, star_s = 0; - while (si < slen) { - if (pi < plen && (pat[pi] == str[si] || pat[pi] == '_')) { - si++; pi++; - } else if (pi < plen && pat[pi] == '%') { - star_p = pi; star_s = si; - pi++; - } else if (star_p != (size_t)-1) { - pi = star_p + 1; - star_s++; - si = star_s; - } else { - return false; - } - } - while (pi < plen && pat[pi] == '%') pi++; - return pi == plen; -} - ray_t* exec_like(ray_graph_t* g, ray_op_t* op) { ray_t* input = exec_node(g, op->inputs[0]); ray_t* pat_v = exec_node(g, op->inputs[1]); @@ -77,7 +55,7 @@ ray_t* exec_like(ray_graph_t* g, ray_op_t* op) { for (int64_t i = 0; i < len; i++) { const char* sp = ray_str_t_ptr(&elems[i], pool); size_t sl = elems[i].len; - dst[i] = like_match(sp, sl, pat_str, pat_len) ? 1 : 0; + dst[i] = ray_glob_match(sp, sl, pat_str, pat_len) ? 1 : 0; } } else if (RAY_IS_SYM(in_type)) { const void* base = ray_data(input); @@ -87,7 +65,7 @@ ray_t* exec_like(ray_graph_t* g, ray_op_t* op) { if (!s) { dst[i] = 0; continue; } const char* sp = ray_str_ptr(s); size_t sl = ray_str_len(s); - dst[i] = like_match(sp, sl, pat_str, pat_len) ? 1 : 0; + dst[i] = ray_glob_match(sp, sl, pat_str, pat_len) ? 1 : 0; } } else { memset(dst, 0, (size_t)len); @@ -97,33 +75,7 @@ ray_t* exec_like(ray_graph_t* g, ray_op_t* op) { return result; } -/* Case-insensitive LIKE: compare characters via tolower(). */ -static bool ilike_match(const char* str, size_t slen, const char* pat, size_t plen) { - size_t si = 0, pi = 0; - size_t star_p = (size_t)-1, star_s = 0; - while (si < slen) { - if (pi < plen && pat[pi] != '%') { - unsigned char sc = (unsigned char)str[si]; - unsigned char pc = (unsigned char)pat[pi]; - if (pc == '_' || (sc >= 'A' && sc <= 'Z' ? sc + 32 : sc) == - (pc >= 'A' && pc <= 'Z' ? pc + 32 : pc)) { - si++; pi++; - } else if (star_p != (size_t)-1) { - pi = star_p + 1; star_s++; si = star_s; - } else { - return false; - } - } else if (pi < plen && pat[pi] == '%') { - star_p = pi; star_s = si; pi++; - } else if (star_p != (size_t)-1) { - pi = star_p + 1; star_s++; si = star_s; - } else { - return false; - } - } - while (pi < plen && pat[pi] == '%') pi++; - return pi == plen; -} +/* Case-insensitive LIKE — same syntax as `like`, ASCII-fold both sides. */ ray_t* exec_ilike(ray_graph_t* g, ray_op_t* op) { ray_t* input = exec_node(g, op->inputs[0]); @@ -150,7 +102,7 @@ ray_t* exec_ilike(ray_graph_t* g, ray_op_t* op) { for (int64_t i = 0; i < len; i++) { const char* sp = ray_str_t_ptr(&elems[i], pool); size_t sl = elems[i].len; - dst[i] = ilike_match(sp, sl, pat_str, pat_len) ? 1 : 0; + dst[i] = ray_glob_match_ci(sp, sl, pat_str, pat_len) ? 1 : 0; } } else if (RAY_IS_SYM(in_type)) { const void* base = ray_data(input); @@ -158,7 +110,7 @@ ray_t* exec_ilike(ray_graph_t* g, ray_op_t* op) { int64_t sym_id = ray_read_sym(base, i, in_type, input->attrs); ray_t* s = ray_sym_str(sym_id); if (!s) { dst[i] = 0; continue; } - dst[i] = ilike_match(ray_str_ptr(s), ray_str_len(s), pat_str, pat_len) ? 1 : 0; + dst[i] = ray_glob_match_ci(ray_str_ptr(s), ray_str_len(s), pat_str, pat_len) ? 1 : 0; } } else { memset(dst, 0, (size_t)len); diff --git a/src/ops/strop.c b/src/ops/strop.c index ba367e92..9744398b 100644 --- a/src/ops/strop.c +++ b/src/ops/strop.c @@ -23,6 +23,7 @@ #include "lang/internal.h" #include "table/sym.h" +#include "ops/glob.h" /* ══════════════════════════════════════════ * String builtins @@ -191,57 +192,31 @@ ray_t* ray_split_fn(ray_t* str, ray_t* delim) { return result; } -/* Helper: glob-style pattern matching for LIKE */ -static bool str_glob(const char* s, const char* p) { - while (*p) { - if (*p == '*') { - p++; - if (!*p) return true; - while (*s) { if (str_glob(s, p)) return true; s++; } - return false; - } - if (*p == '?') { if (!*s) return false; s++; p++; continue; } - if (*p == '[') { - p++; - bool neg = (*p == '!'); if (neg) p++; - bool match = false; - while (*p && *p != ']') { - if (p[1] == '-' && p[2] && p[2] != ']') { - if (*s >= p[0] && *s <= p[2]) match = true; - p += 3; - } else { - if (*s == *p) match = true; - p++; - } - } - if (*p == ']') p++; - if (neg ? match : !match) return false; - s++; continue; - } - if (*s != *p) return false; - s++; p++; - } - return !*s; -} - -/* (like str pattern) — glob-style pattern matching - * Supports: * (any chars), ? (single char), [abc] (char class) - * Returns: bool atom or bool vector */ +/* (like str pattern) — glob-style pattern matching. + * Syntax: * (any), ? (one char), [abc] / [a-z] / [!abc] (char class). + * Implementation lives in src/ops/glob.[ch]; same matcher is used by + * the DAG executor (string.c::exec_like) for select-where contexts. */ ray_t* ray_like_fn(ray_t* x, ray_t* pattern) { /* Pattern must be a string atom */ if (pattern->type != -RAY_STR) return ray_error("type", "like: pattern must be a string"); const char* pat = ray_str_ptr(pattern); + size_t pat_len = ray_str_len(pattern); /* Atom: single match */ if (x->type == -RAY_STR || x->type == -RAY_SYM) { - const char* s; + const char* s; size_t sl; + ray_t* sym_str = NULL; if (x->type == -RAY_SYM) { - ray_t* sym_str = ray_sym_str(x->i64); - s = sym_str ? ray_str_ptr(sym_str) : ""; + sym_str = ray_sym_str(x->i64); + s = sym_str ? ray_str_ptr(sym_str) : ""; + sl = sym_str ? ray_str_len(sym_str) : 0; } else { - s = ray_str_ptr(x); + s = ray_str_ptr(x); + sl = ray_str_len(x); } - return make_bool(str_glob(s, pat) ? 1 : 0); + bool m = ray_glob_match(s, sl, pat, pat_len); + if (sym_str) ray_release(sym_str); + return make_bool(m ? 1 : 0); } /* Vector: map over elements */ @@ -257,21 +232,16 @@ ray_t* ray_like_fn(ray_t* x, ray_t* pattern) { for (int64_t i = 0; i < n; i++) { ray_t* sym_str = ray_sym_str(sym_ids[i]); const char* s = sym_str ? ray_str_ptr(sym_str) : ""; - out[i] = str_glob(s, pat) ? 1 : 0; + size_t sl = sym_str ? ray_str_len(sym_str) : 0; + out[i] = ray_glob_match(s, sl, pat, pat_len) ? 1 : 0; + if (sym_str) ray_release(sym_str); } } else { /* RAY_STR vector */ for (int64_t i = 0; i < n; i++) { size_t slen; const char* s = ray_str_vec_get(x, i, &slen); - /* Need null-terminated for glob — str_vec_get may not be */ - char buf[256]; - if (s && slen < sizeof(buf)) { - memcpy(buf, s, slen); buf[slen] = '\0'; - out[i] = str_glob(buf, pat) ? 1 : 0; - } else { - out[i] = 0; - } + out[i] = (s && ray_glob_match(s, slen, pat, pat_len)) ? 1 : 0; } } return result; diff --git a/test/rfl/strop/like.rfl b/test/rfl/strop/like.rfl index b07d1e43..3e495ff4 100644 --- a/test/rfl/strop/like.rfl +++ b/test/rfl/strop/like.rfl @@ -35,3 +35,36 @@ ;; literal prefix/suffix combined with one star (like "hello world" "hello *") -- true (like "hello world" "* world") -- true + +;; ────────────── character class [abc] / [a-z] / [!abc] ────────────── +(like "cat" "[cb]at") -- true +(like "bat" "[cb]at") -- true +(like "rat" "[cb]at") -- false + +;; ranges +(like "a" "[a-z]") -- true +(like "Z" "[a-z]") -- false +(like "5" "[0-9]") -- true + +;; negated class +(like "x" "[!abc]") -- true +(like "a" "[!abc]") -- false + +;; class alongside other meta +(like "hello" "[hH]ello") -- true +(like "Hello" "[hH]ello") -- true + +;; ────────────── catastrophic-backtracking guard ────────────── +;; Pre-fix the recursive eval-path matcher took >5s for 16 stars. +;; The iterative replacement is O(n*m) — finishes in microseconds even +;; for adversarial 20-star patterns over an a-only string. +(like "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" "a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*b") -- false + +;; ────────────── eval ≡ DAG: same pattern via select-where ────────────── +;; Pre-fix, eval-path used glob */?/[abc] but the DAG path for +;; (select where: (like col pat)) used SQL %/_ — divergent semantics. +;; Now both paths share src/ops/glob.[ch]. +(set Tlike (table [s] (list ["hello" "world" "hi" "help"]))) +(count (select {from: Tlike where: (like s "h*")})) -- 3 +(count (select {from: Tlike where: (like s "h?llo")})) -- 1 +(count (select {from: Tlike where: (like s "[hw]*")})) -- 4 diff --git a/test/test_exec.c b/test/test_exec.c index 6dd264b0..cfd2a5c5 100644 --- a/test/test_exec.c +++ b/test/test_exec.c @@ -2012,7 +2012,7 @@ static test_result_t test_exec_like(void) { ray_graph_t* g = ray_graph_new(tbl); ray_op_t* name_col = ray_scan(g, "name"); - ray_op_t* pat = ray_const_str(g, "bar%", 4); + ray_op_t* pat = ray_const_str(g, "bar*", 4); ray_op_t* lk = ray_like(g, name_col, pat); ray_op_t* cnt = ray_count(g, ray_filter(g, name_col, lk)); diff --git a/website/docs/rayfall-functions.html b/website/docs/rayfall-functions.html index 4717e1e3..83ecaa90 100644 --- a/website/docs/rayfall-functions.html +++ b/website/docs/rayfall-functions.html @@ -394,7 +394,7 @@

String Operations

FunctionTypeDescriptionExample splitbinarySplit string by delimiter(split "a,b,c" ",")["a" "b" "c"] - likebinaryPattern match (glob-style with * and ?)(like "hello" "hel*")true + likebinaryGlob pattern match: * any, ? one, [abc]/[a-z]/[!abc] char class(like "hello" "hel*")true concatbinaryConcatenate two strings or vectors(concat "hello" " world")"hello world" formatvariadicFormat values as string (% is placeholder)(format "x=%" 42)"x=42" From d20284c1a82f089e2442ee04062eac8813cdb8d7 Mon Sep 17 00:00:00 2001 From: Serhii Savchuk Date: Mon, 27 Apr 2026 18:52:34 +0300 Subject: [PATCH 09/21] fix(store): mkdir -p for set-splayed; tolerate missing root sym in get-parted MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (set-splayed "/tmp/db/2024.01.01/t/" t) -> error: io (get-parted "/tmp/db/" 't) -> error: io Two compounding bugs broke partitioned-table workflows: 1. ray_mkdir was single-level — could not create "/tmp/db/2024.01.01/t/" when the parent directories didn't pre- exist. v1 quietly handled this inside fs_fopen (walk path, mkdir each parent before opening the file); v2 lost that. Add ray_mkdir_p (POSIX + Win32, src/store/fileio.c) with mkdir -p semantics. Use it from ray_splay_save instead of the single-level ray_mkdir. 2. ray_read_parted (get-parted) unconditionally called ray_sym_load("/sym") and propagated its failure as "io" — but set-splayed only writes per-table sym files inside the leaf splayed dir, never a root-level one for symbol-less tables. Stat the file and skip the load if absent. After both: (set-splayed "/tmp/db/2024.01.01/t/" t0) writes the partition correctly; (get-parted "/tmp/db/" 't) returns a 10-row / 3-column lazy table (1 MAPCOMMON partition-key + 2 data columns), parity with the v1 behaviour. Tests: extend test/rfl/system/splayed.rfl with the nested-mkdir case and a 2-partition get-parted round-trip (count = 10, columns = 3). Known limitation (separate task): direct (at p 'col) on a PARTED column doesn't materialize values yet — only the select-where path exercises the lazy reader. Filed for follow-up. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/store/fileio.c | 42 +++++++++++++++++++++++++++++++++++++ src/store/fileio.h | 1 + src/store/part.c | 13 +++++++++--- src/store/splay.c | 6 ++++-- test/rfl/system/splayed.rfl | 21 +++++++++++++++++++ 5 files changed, 78 insertions(+), 5 deletions(-) diff --git a/src/store/fileio.c b/src/store/fileio.c index 21d083ad..8adb596f 100644 --- a/src/store/fileio.c +++ b/src/store/fileio.c @@ -119,6 +119,29 @@ ray_err_t ray_mkdir(const char* path) { return RAY_OK; } +ray_err_t ray_mkdir_p(const char* path) { + if (!path || !*path) return RAY_ERR_IO; + char buf[1024]; + size_t len = strlen(path); + if (len >= sizeof(buf)) return RAY_ERR_IO; + memcpy(buf, path, len + 1); + /* Normalize trailing separator: trim it so the loop creates `buf` itself. */ + while (len > 1 && (buf[len - 1] == '/' || buf[len - 1] == '\\')) buf[--len] = '\0'; + for (size_t i = 1; i < len; i++) { + if (buf[i] == '/' || buf[i] == '\\') { + char saved = buf[i]; + buf[i] = '\0'; + if (!CreateDirectoryA(buf, NULL) && GetLastError() != ERROR_ALREADY_EXISTS) { + buf[i] = saved; + return RAY_ERR_IO; + } + buf[i] = saved; + } + } + if (!CreateDirectoryA(buf, NULL) && GetLastError() != ERROR_ALREADY_EXISTS) return RAY_ERR_IO; + return RAY_OK; +} + #else /* ===== POSIX implementation ===== */ @@ -211,4 +234,23 @@ ray_err_t ray_mkdir(const char* path) { return RAY_OK; } +ray_err_t ray_mkdir_p(const char* path) { + if (!path || !*path) return RAY_ERR_IO; + char buf[1024]; + size_t len = strlen(path); + if (len >= sizeof(buf)) return RAY_ERR_IO; + memcpy(buf, path, len + 1); + /* Strip trailing slash so the final mkdir creates `buf` itself. */ + while (len > 1 && buf[len - 1] == '/') buf[--len] = '\0'; + for (size_t i = 1; i < len; i++) { + if (buf[i] == '/') { + buf[i] = '\0'; + if (mkdir(buf, 0755) != 0 && errno != EEXIST) return RAY_ERR_IO; + buf[i] = '/'; + } + } + if (mkdir(buf, 0755) != 0 && errno != EEXIST) return RAY_ERR_IO; + return RAY_OK; +} + #endif diff --git a/src/store/fileio.h b/src/store/fileio.h index 95fe9bed..658e5606 100644 --- a/src/store/fileio.h +++ b/src/store/fileio.h @@ -49,5 +49,6 @@ ray_err_t ray_file_sync(ray_fd_t fd); ray_err_t ray_file_sync_dir(const char* path); ray_err_t ray_file_rename(const char* old_path, const char* new_path); ray_err_t ray_mkdir(const char* path); +ray_err_t ray_mkdir_p(const char* path); /* like `mkdir -p` */ #endif /* RAY_FILEIO_H */ diff --git a/src/store/part.c b/src/store/part.c index 7b73b30f..a160642e 100644 --- a/src/store/part.c +++ b/src/store/part.c @@ -35,6 +35,7 @@ #include #include #include +#include /* Validate YYYY.MM.DD format: exactly 10 chars, dots at pos 4/7, * month 01-12, day 01-31. */ @@ -326,9 +327,15 @@ ray_t* ray_read_parted(const char* db_root, const char* table_name) { if (sn < 0 || (size_t)sn >= sizeof(sym_path)) return ray_error("io", NULL); - /* Load global symfile */ - ray_err_t sym_err = ray_sym_load(sym_path); - if (sym_err != RAY_OK) return ray_error(ray_err_code_str(sym_err), NULL); + /* Load global symfile if present. Tables without RAY_SYM columns + * never produce a global symfile (set-splayed only writes per-table + * sym files inside the leaf splayed dir), so a missing root-level + * symfile is normal — not an error. */ + struct stat sym_st; + if (stat(sym_path, &sym_st) == 0) { + ray_err_t sym_err = ray_sym_load(sym_path); + if (sym_err != RAY_OK) return ray_error(ray_err_code_str(sym_err), NULL); + } /* Scan db_root for partition directories (skip "sym" entry) */ char** part_dirs = NULL; diff --git a/src/store/splay.c b/src/store/splay.c index 33b59ee4..87713bd0 100644 --- a/src/store/splay.c +++ b/src/store/splay.c @@ -61,8 +61,10 @@ ray_err_t ray_splay_save(ray_t* tbl, const char* dir, const char* sym_path) { if (!tbl || RAY_IS_ERR(tbl)) return RAY_ERR_TYPE; if (!dir) return RAY_ERR_IO; - /* Create directory (before sym save, since sym_path may be inside dir) */ - ray_err_t mkdir_err = ray_mkdir(dir); + /* Create directory and any missing parents (mkdir -p semantics). + * Required for partitioned layouts like "/db/2024.01.01/t/" where the + * caller hasn't pre-created the date partition. */ + ray_err_t mkdir_err = ray_mkdir_p(dir); if (mkdir_err != RAY_OK) return mkdir_err; /* Save symbol table if sym_path provided */ diff --git a/test/rfl/system/splayed.rfl b/test/rfl/system/splayed.rfl index fe8e725d..eb929f97 100644 --- a/test/rfl/system/splayed.rfl +++ b/test/rfl/system/splayed.rfl @@ -30,3 +30,24 @@ (count R-1024) -- 1024 ;; sum of til 1024 = 1023*1024/2 (sum (at R-1024 'n)) -- 523776 + +;; ────────────── nested directory creation (mkdir -p) ────────────── +;; set-splayed used to fail with "io" on nested paths because ray_mkdir +;; only created one level. Required for partitioned tables where the +;; date dir doesn't pre-exist: /db_root/2024.01.01/t/ +(set T-Nested (table [id val] (list [1 2 3] [10.0 20.0 30.0]))) +(set-splayed "/tmp/rfl_splayed_nested/2024.01.01/t/" T-Nested) +(count (get-splayed "/tmp/rfl_splayed_nested/2024.01.01/t/")) -- 3 + +;; ────────────── get-parted: 2-partition reconstruction ────────────── +;; Pre-fix: get-parted required a //sym file (which set-splayed +;; doesn't write for symbol-less tables) and unconditionally errored. +(set T-P0 (table [id val] (list [0 1 2 3 4] [0.0 1.0 2.0 3.0 4.0]))) +(set T-P1 (table [id val] (list [100 101 102 103 104] [10.0 11.0 12.0 13.0 14.0]))) +(set-splayed "/tmp/rfl_parted/2024.01.01/t/" T-P0) +(set-splayed "/tmp/rfl_parted/2024.01.02/t/" T-P1) + +;; total row count across partitions +(count (get-parted "/tmp/rfl_parted/" 't)) -- 10 +;; column count = 1 partition-key + 2 data columns +(count (key (get-parted "/tmp/rfl_parted/" 't))) -- 3 From 52cf5dfcd8d7f465532d3a54a77fc1c8127c956e Mon Sep 17 00:00:00 2001 From: Serhii Savchuk Date: Mon, 27 Apr 2026 21:36:28 +0300 Subject: [PATCH 10/21] chore: ignore IDE state and gcov / lcov artifacts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds: .idea/, .vscode/ — IDE working state *.gcda, *.gcno, *.gcov — gcc coverage instrumentation outputs coverage*.info — lcov tracefiles coverage_html/ — genhtml output directory rayforce.cov — clang/llvm coverage runtime output Keeps the working tree clean during a coverage build so `git status` isn't drowned in untracked binary artifacts. Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitignore | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.gitignore b/.gitignore index 2b699d30..400602b2 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,15 @@ rf_test_*.csv CLAUDE.md docs/plans/ + +# IDE state +.idea/ +.vscode/ + +# gcov / lcov artifacts +*.gcda +*.gcno +*.gcov +coverage*.info +coverage_html/ +rayforce.cov From 89f37a930f90fcb686b7f65cf389a25e7b575186 Mon Sep 17 00:00:00 2001 From: Serhii Savchuk Date: Mon, 27 Apr 2026 23:27:32 +0300 Subject: [PATCH 11/21] test: salvage radix-boundary + null sort coverage from scratch files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three legacy script-style test files (test_4097.rfl, test_null_ops.rfl, test_comprehensive.rfl) sat untracked in the repo root. Reviewed each against the existing test/rfl/ corpus; salvaged what wasn't covered: * test/rfl/integration/sort_radix_boundary.rfl (new) — pin every type's sort exit values at N=4097, just over the 2^12 radix-strategy threshold. Covers asc/desc/iasc/idesc/xasc/xdesc/select-orderby/ group-by-then-sort/distinct/rank across i64, f64, SYM, STR, BOOL, DATE, TIMESTAMP, plus null-bearing variants. * test/rfl/null/sort.rfl (extended) — add xasc-on-null-keyed-table (null sorts first, count preserved) and take-with-nulls slicing semantics. Anton's existing file covered asc/desc/iasc/idesc with nulls but not xasc or take. The other two files duplicated Anton's null/* and arith/div coverage (Float div-by-zero already produces 0Nf in his test/rfl/arith/div.rfl, INT64 boundary already in test/rfl/integration/null.rfl), so dropped. Plus removed: rayforce.cov — clang/llvm runtime artifact test/bugs/ — 6 of 7 already migrated as upstream tests; legacy duplicates extract_v1_tests.py + v1_tests*.rfl — Python interim, generation output; we don't ship Python in the test pipeline test_null_full / 100k / parallel / bugs.rfl — used renamed verbs (write-csv, antijoin) that don't exist in current Rayforce, and what they tested is already covered Co-Authored-By: Claude Opus 4.7 (1M context) --- test/rfl/integration/sort_radix_boundary.rfl | 87 ++++++++++++++++++++ test/rfl/null/sort.rfl | 13 +++ 2 files changed, 100 insertions(+) create mode 100644 test/rfl/integration/sort_radix_boundary.rfl diff --git a/test/rfl/integration/sort_radix_boundary.rfl b/test/rfl/integration/sort_radix_boundary.rfl new file mode 100644 index 00000000..a3a80c02 --- /dev/null +++ b/test/rfl/integration/sort_radix_boundary.rfl @@ -0,0 +1,87 @@ +;; Radix-threshold boundary: at N=4097 (just over 2^12 = 4096) sort +;; switches from one strategy to another internally. Pin the exit +;; values for every supported type so a refactor of either strategy +;; surfaces immediately. +;; +;; "(at (take s -1) 0)" idiom = last element of the sorted vector. +;; (Anton's harness evaluates each statement independently; `set` +;; persists across lines.) + +(set N 4097) + +;; ────────────── i64 sort (asc / desc, positive + negative) ────────────── +(set V (take [9 1 5 3 7 2 8 4 6 0] N)) +(at (asc V) 0) -- 0 +(at (take (asc V) -1) 0) -- 9 +(at (desc V) 0) -- 9 +(at (take (desc V) -1) 0) -- 0 + +(set Vn (take [-9 -1 -5 -3 -7 -2 -8 -4 -6 0] N)) +(at (asc Vn) 0) -- -9 +(at (take (asc Vn) -1) 0) -- 0 + +;; ────────────── f64 sort ────────────── +(set Vf (take [9.9 1.1 5.5 3.3 7.7 2.2 8.8 4.4 6.6 0.0] N)) +(at (asc Vf) 0) -- 0.0 +(at (take (asc Vf) -1) 0) -- 9.9 +(at (desc Vf) 0) -- 9.9 + +;; f64 with NaN keeps the row count +(set Vnan (take [0Nf 3.0 1.0 0Nf 2.0] N)) +(count (asc Vnan)) -- 4097 + +;; ────────────── SYM sort (lexicographic) ────────────── +(set Vs (take ['ZZZ 'AAA 'MMM 'BBB 'QQQ] N)) +(at (asc Vs) 0) -- 'AAA +(at (take (asc Vs) -1) 0) -- 'ZZZ +(at (desc Vs) 0) -- 'ZZZ + +;; ────────────── STR sort ────────────── +(set Vstr (take ["zebra" "apple" "mango" "banana" "cherry"] N)) +(at (asc Vstr) 0) -- "apple" +(at (take (asc Vstr) -1) 0) -- "zebra" +(at (desc Vstr) 0) -- "zebra" + +;; ────────────── BOOL / DATE / TIMESTAMP ────────────── +(set Vb (take [true false true false true] N)) +(at (asc Vb) 0) -- false +(at (take (asc Vb) -1) 0) -- true + +(set Vd (take [2024.01.05 2024.01.01 2024.01.03 2024.01.02 2024.01.04] N)) +(at (asc Vd) 0) -- 2024.01.01 +(at (take (asc Vd) -1) 0) -- 2024.01.05 + +(set Vt (take (as 'TIMESTAMP [5 1 3 2 4]) N)) +(at (asc Vt) 0) -- (as 'TIMESTAMP 1) +(at (take (asc Vt) -1) 0) -- (as 'TIMESTAMP 5) + +;; i64 with nulls — null sorts first under asc, count preserved +(set Vnull (take [0Nl 3 1 0Nl 2] N)) +(at (asc Vnull) 0) -- 0Nl +(count (asc Vnull)) -- 4097 + +;; ────────────── iasc / idesc — index permutations ────────────── +(count (iasc (take [5 3 1 4 2] N))) -- 4097 +(count (idesc (take [5 3 1 4 2] N))) -- 4097 + +;; ────────────── xasc / xdesc — table sort ────────────── +(set T (table [k v] (list (take [5 3 1 4 2] N) (til N)))) +(at (at (xasc T 'k) 'k) 0) -- 1 +(at (at (xasc T 'k) 'k) (- N 1)) -- 5 +(at (at (xdesc T 'k) 'k) 0) -- 5 +(at (at (xdesc T 'k) 'k) (- N 1)) -- 1 + +;; ────────────── select asc/desc + group-by + distinct + rank ────────────── +(set T2 (table [a b] (list (take [5 3 1 4 2] N) (til N)))) +(at (at (select {from: T2 asc: a}) 'a) 0) -- 1 +(at (at (select {from: T2 desc: a}) 'a) 0) -- 5 + +;; group-by + sort: 10 distinct keys (% N 10), key 0 sorts first +(set Tg (table [g v] (list (% (til N) 10) (til N)))) +(at (at (xasc (select {s: (sum v) from: Tg by: g}) 'g) 'g) 0) -- 0 + +;; distinct over 10-cycle pattern => 10 unique values +(count (distinct (take [1 2 3 4 5 6 7 8 9 0] N))) -- 10 + +;; rank preserves length +(count (rank (take [5 3 1 4 2] N))) -- 4097 diff --git a/test/rfl/null/sort.rfl b/test/rfl/null/sort.rfl index 3b0bdea2..eea22813 100644 --- a/test/rfl/null/sort.rfl +++ b/test/rfl/null/sort.rfl @@ -16,3 +16,16 @@ (set V [3 0N 1 2]) (at V (iasc V)) -- (asc V) (at V (idesc V)) -- (desc V) + +;; ────────────── xasc/xdesc on tables with null key columns ────────────── +;; Same convention: null keys sort first under xasc, count preserved. +(set Tn (table [k v] (list [2 0Nl 1 0Nl 3] [10 20 30 40 50]))) +(at (at (xasc Tn 'k) 'k) 0) -- 0Nl +(sum (as 'I64 (== (at (xasc Tn 'k) 'k) 0Nl))) -- 2 +(count (xasc Tn 'k)) -- 5 + +;; ────────────── take with nulls — negative count slices from tail ────────────── +;; (take v -5) keeps the last 5; null count must reflect what was kept. +(set Vt [0Nl 3 1 0Nl 2 5 0Nl 4]) +(sum (as 'I64 (== (take Vt 5) 0Nl))) -- 2 +(sum (as 'I64 (== (take Vt -5) 0Nl))) -- 2 From 6a7ffa45327a6f88b44cd730cd5363ff43ea6dd1 Mon Sep 17 00:00:00 2001 From: Serhii Savchuk Date: Tue, 28 Apr 2026 02:10:10 +0300 Subject: [PATCH 12/21] test: targeted coverage for fold-right/scan-right/retract-fact/scan-eav + radix groupby MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four register_vary builtins had 0% coverage despite existing in src/lang/eval.c: - collection.c::ray_fold_right_fn (0% → 58%, +21 lines) - collection.c::ray_scan_right_fn (0% → 73%, +24 lines) - datalog.c::ray_retract_fact_fn (0% → 88%, +46 lines) - datalog.c::ray_scan_eav_fn (0% → 89%, +48 lines) Net +139 source lines newly covered from 3 small tests, no kernel changes. * test/rfl/hof/right.rfl — right-fold semantics for + - and a digit- building lambda f(a,b)=a+10b that distinguishes left vs right fold shape; suffix-sum invariants for scan-right. * test/rfl/datalog/eav_ops.rfl — assert/retract round-trips, retract no-op on missing triple, retract leaves sibling attributes intact; scan-eav 2-arg (filter by attr) and 3-arg (entity+attr lookup). * test/rfl/integration/radix_groupby.rfl — 100k and 200k row group-by to push the executor past RAY_PARALLEL_THRESHOLD (= 64*1024) and exercise the parallel radix path in group.c. Also pins multi-key group-by + multi-aggregator results at 100k. Overall lines coverage: 64.2% → 65.0% (+0.8pp), functions 78.5% → 79.0%. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/rfl/datalog/eav_ops.rfl | 66 ++++++++++++++++++++++++++ test/rfl/hof/right.rfl | 37 +++++++++++++++ test/rfl/integration/radix_groupby.rfl | 40 ++++++++++++++++ 3 files changed, 143 insertions(+) create mode 100644 test/rfl/datalog/eav_ops.rfl create mode 100644 test/rfl/hof/right.rfl create mode 100644 test/rfl/integration/radix_groupby.rfl diff --git a/test/rfl/datalog/eav_ops.rfl b/test/rfl/datalog/eav_ops.rfl new file mode 100644 index 00000000..a675795d --- /dev/null +++ b/test/rfl/datalog/eav_ops.rfl @@ -0,0 +1,66 @@ +;; Direct EAV-table operations: assert-fact / retract-fact / scan-eav. +;; Bypass the rule/query machinery — test the raw triple store. +;; Pre-existing coverage: ray_assert_fact_fn was used; ray_retract_fact_fn +;; and ray_scan_eav_fn were 0% — this file walks both. + +;; ────────────── retract-fact ────────────── +;; assert one fact, retract it, verify it's gone via query. +(set Db (datoms)) +(set Db (assert-fact Db 1 'age 30)) +(count (query Db (find ?n) (where (?e :age ?n)))) -- 1 + +(set Db (retract-fact Db 1 'age 30)) +(count (query Db (find ?n) (where (?e :age ?n)))) -- 0 + +;; retract one of several facts — only the matching triple is removed. +(set Db (datoms)) +(set Db (assert-fact Db 1 'age 30)) +(set Db (assert-fact Db 2 'age 25)) +(set Db (assert-fact Db 3 'age 40)) +(count (query Db (find ?n) (where (?e :age ?n)))) -- 3 + +(set Db (retract-fact Db 2 'age 25)) +(count (query Db (find ?n) (where (?e :age ?n)))) -- 2 + +;; retract a non-existent triple is a no-op (does not error) +(set Db (retract-fact Db 99 'age 999)) +(count (query Db (find ?n) (where (?e :age ?n)))) -- 2 + +;; retract leaves other attributes on the same entity untouched +(set Db (datoms)) +(set Db (assert-fact Db 1 'age 30)) +(set Db (assert-fact Db 1 'name 100)) +(set Db (retract-fact Db 1 'age 30)) +;; age gone, name remains +(count (query Db (find ?n) (where (?e :age ?n)))) -- 0 +(count (query Db (find ?n) (where (?e :name ?n)))) -- 1 + +;; ────────────── scan-eav: 2-arg form (filter by attribute) ────────────── +;; Returns the rows of the datoms table where attr matches. +(set Db (datoms)) +(set Db (assert-fact Db 1 'age 30)) +(set Db (assert-fact Db 2 'age 25)) +(set Db (assert-fact Db 3 'age 40)) +(set Db (assert-fact Db 1 'name 100)) + +;; 3 rows match :age, 1 row matches :name +(count (scan-eav Db 'age)) -- 3 +(count (scan-eav Db 'name)) -- 1 +;; non-existent attribute → empty result, not error +(count (scan-eav Db 'missing)) -- 0 + +;; ────────────── scan-eav: 3-arg form (entity + attribute lookup) ────────────── +;; Returns the single value at (e, a). +(scan-eav Db 1 'age) -- 30 +(scan-eav Db 2 'age) -- 25 +(scan-eav Db 1 'name) -- 100 + +;; ────────────── round-trip through assert/retract ────────────── +;; Re-add a retracted fact; query must see it again. +(set Db (datoms)) +(set Db (assert-fact Db 1 'age 30)) +(set Db (retract-fact Db 1 'age 30)) +(count (query Db (find ?n) (where (?e :age ?n)))) -- 0 +(set Db (assert-fact Db 1 'age 30)) +(count (query Db (find ?n) (where (?e :age ?n)))) -- 1 +(scan-eav Db 1 'age) -- 30 diff --git a/test/rfl/hof/right.rfl b/test/rfl/hof/right.rfl new file mode 100644 index 00000000..cfb026f2 --- /dev/null +++ b/test/rfl/hof/right.rfl @@ -0,0 +1,37 @@ +;; fold-right and scan-right — right-associative variants of fold/scan. +;; Both functions had 0% coverage before (collection.c::ray_fold_right_fn, +;; ::ray_scan_right_fn). + +;; ────────────── fold-right ────────────── +;; (fold-right f seed v) = f(v[0], f(v[1], …, f(v[n-1], seed)…)) +;; For + with 0 it equals (fold + 0 v) since + is associative. +(fold-right + 0 [1 2 3 4 5]) -- 15 +(fold-right + 0 []) -- 0 +(fold-right * 1 [1 2 3 4]) -- 24 + +;; non-associative ops show fold-right vs fold-left difference +;; fold-left: ((-(0,1) - 2) - 3) - 4 = -10 +;; fold-right: 1 - (2 - (3 - (4 - 0))) = -2 +(fold + 0 [1 2 3 4]) -- 10 +(fold-right - 0 [1 2 3 4]) -- -2 + +;; lambda — fold-right shape: f(1, f(2, f(3, 0))) +;; f(3,0)=3, f(2,3)=32, f(1,32)=321 +(fold-right (fn [a b] (+ a (* b 10))) 0 [1 2 3]) -- 321 + +;; ────────────── scan-right ────────────── +;; (scan-right f v) = running fold from right; result has same length as v. +;; (scan-right + [1 2 3]) = [6 5 3] ;; suffix sums +(scan-right + [1 2 3]) -- [6 5 3] +(scan-right + [1 2 3 4 5]) -- [15 14 12 9 5] +(scan-right * [1 2 3 4]) -- [24 24 12 4] + +;; count preserved +(set V (rand 50 100)) +(count V) -- (count (scan-right + V)) + +;; first element of scan-right equals fold over entire vec +(first (scan-right + [1 2 3 4 5])) -- (sum [1 2 3 4 5]) + +;; last element is the input's last element (single-step from rightmost) +(last (scan-right + [1 2 3 4 5])) -- 5 diff --git a/test/rfl/integration/radix_groupby.rfl b/test/rfl/integration/radix_groupby.rfl new file mode 100644 index 00000000..e327e461 --- /dev/null +++ b/test/rfl/integration/radix_groupby.rfl @@ -0,0 +1,40 @@ +;; Large group-by triggers the radix-partitioned parallel path +;; (ops/group.c::radix_phase1_fn / phase2_fn / phase3_fn). The +;; sequential hash-table path handles small inputs; once nrows crosses +;; RAY_PARALLEL_THRESHOLD (64 * 1024 = 65536) the executor switches +;; to radix. A 100k-row table with mixed key cardinality exercises +;; both phases. + +;; ────────────── 100k rows, 1000 distinct keys ────────────── +(set N 100000) +(set Tbig (table [g v] (list (% (til N) 1000) (til N)))) + +;; row count + group count +(count Tbig) -- 100000 +(count (select {c: (count v) from: Tbig by: g})) -- 1000 + +;; sum of all v equals N*(N-1)/2 — 100000*99999/2 +(sum (at (select {s: (sum v) from: Tbig by: g}) 's)) -- 4999950000 + +;; min / max via group-by +;; group g sees 100 v's: g, 1000+g, 2000+g, …, 99000+g. +;; sum = 100*g + 1000*(0+1+…+99) = 100*g + 4950000. +;; min sum = g=0 → 4950000; max sum = g=999 → 100*999 + 4950000 = 5049900. +(min (at (select {s: (sum v) from: Tbig by: g}) 's)) -- 4950000 +(max (at (select {s: (sum v) from: Tbig by: g}) 's)) -- 5049900 + +;; ────────────── multi-aggregator on the same large table ────────────── +(set Magg (select {c: (count v) s: (sum v) m: (max v) n: (min v) av: (avg v) from: Tbig by: g})) +(count Magg) -- 1000 +;; total count across groups equals total rows +(sum (at Magg 'c)) -- 100000 + +;; ────────────── multi-key group-by on 100k ────────────── +(set Tmk (table [k1 k2 v] (list (% (til N) 50) (% (til N) 7) (til N)))) +(count (select {c: (count v) from: Tmk by: [k1 k2]})) -- 350 + +;; ────────────── 200k rows for a deeper radix dispatch ────────────── +(set N2 200000) +(set Tx (table [g v] (list (% (til N2) 5000) (til N2)))) +(count (select {c: (count v) from: Tx by: g})) -- 5000 +(sum (at (select {s: (sum v) from: Tx by: g}) 's)) -- 19999900000 From 6f32db08fdd55dfc23fd3647ad3bba621dd30103 Mon Sep 17 00:00:00 2001 From: Serhii Savchuk Date: Tue, 28 Apr 2026 12:19:17 +0300 Subject: [PATCH 13/21] test: per-type and list-form coverage for reverse/union/except/alter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ray_alter_fn (40% → 83%, +66 lines), ray_reverse_fn (40% → 62%, +11), ray_union_fn / ray_except_fn — the previous tests only hit i64 (and SYM for except) and never the boxed-list path. Each function has distinct branches per element type and a separate heterogeneous-list branch that vec-of-X tests skip entirely. * reverse — exercise type switch across F64, I16, I32, U8, BOOL, SYM, STR, DATE, TIME, plus null-bearing input. * union/except — vec switch for f64/i16/i32/STR/DATE/TIME/BOOL plus the (list ...) variants which route through ray_union_fn's boxed-list fallback at line 793. * alter — add LIST-only forms (set/concat/remove with atom or vec index), plus alter-set across F64/I16/SYM/BOOL vec types to exercise store_typed_elem dispatch. No kernel changes; tests only. +1 test file count not changed (all edits are in-place to existing files). Co-Authored-By: Claude Opus 4.7 (1M context) --- test/rfl/collection/except.rfl | 14 ++++++++++++++ test/rfl/collection/reverse.rfl | 19 +++++++++++++++++++ test/rfl/collection/union.rfl | 16 ++++++++++++++++ test/rfl/table/alter.rfl | 18 ++++++++++++++++++ 4 files changed, 67 insertions(+) diff --git a/test/rfl/collection/except.rfl b/test/rfl/collection/except.rfl index 87ec440d..e40ab844 100644 --- a/test/rfl/collection/except.rfl +++ b/test/rfl/collection/except.rfl @@ -42,3 +42,17 @@ ;; Duplicates handling (except [1 1 2 2 3] [1 3]) -- [2 2] (except ['a 'a 'b 'c 'c] ['a 'c]) -- [b] + +;; ────────────── per-type — exercises type switch in ray_except_fn ────────────── +(except [1.0 2.0 3.0] [2.0]) -- [1.0 3.0] +(except [1h 2h 3h] [2h]) -- [1h 3h] +(except [1i 2i 3i] [2i]) -- [1i 3i] +(except ["a" "b" "c"] ["b"]) -- ["a" "c"] +(except [2024.01.01 2024.01.02 2024.01.03] [2024.01.02]) -- [2024.01.01 2024.01.03] +(except [12:00:00.000 13:00:00.000 14:00:00.000] [13:00:00.000]) -- [12:00:00.000 14:00:00.000] +(except [true false true] [false]) -- [true true] + +;; ────────────── boxed-list path ────────────── +(except (list 1 'a "x" 2) (list 'a 2)) -- (list 1 "x") +(except (list) (list 1 2)) -- (list) +(except (list 1 2 3) (list)) -- (list 1 2 3) diff --git a/test/rfl/collection/reverse.rfl b/test/rfl/collection/reverse.rfl index c6131f25..99f3d5f7 100644 --- a/test/rfl/collection/reverse.rfl +++ b/test/rfl/collection/reverse.rfl @@ -15,3 +15,22 @@ ;; concrete: reverse([1 2 3 4 5]) == [5 4 3 2 1] (reverse [1 2 3 4 5]) -- [5 4 3 2 1] + +;; ────────────── per-type — exercises type switch in ray_reverse_fn ────────────── +(reverse [1.0 2.0 3.0]) -- [3.0 2.0 1.0] +(reverse [1h 2h 3h]) -- [3h 2h 1h] +(reverse [1i 2i 3i]) -- [3i 2i 1i] +(reverse [0x01 0x02 0x03]) -- [0x03 0x02 0x01] +(reverse [true false true]) -- [true false true] +(reverse ['a 'b 'c]) -- ['c 'b 'a] +(reverse ["aa" "bb" "cc"]) -- ["cc" "bb" "aa"] +(reverse [2024.01.01 2024.01.02 2024.01.03]) -- [2024.01.03 2024.01.02 2024.01.01] +(reverse [12:30:45.000 11:00:00.000]) -- [11:00:00.000 12:30:45.000] + +;; null-bearing reverse preserves null mask +(reverse [1 0N 3]) -- [3 0Nl 1] + +;; single element +(reverse [42]) -- [42] +(reverse ['x]) -- ['x] +(reverse ["one"]) -- ["one"] diff --git a/test/rfl/collection/union.rfl b/test/rfl/collection/union.rfl index b0bfef3c..baad3669 100644 --- a/test/rfl/collection/union.rfl +++ b/test/rfl/collection/union.rfl @@ -18,3 +18,19 @@ ;; count is between distinct(A ++ B) and count(A) + count(B) 1 -- (as 'I64 (>= (count (union A B)) (count (distinct (concat A B))))) 1 -- (as 'I64 (<= (count (union A B)) (+ (count A) (count B)))) + +;; ────────────── per-type — exercises type switch in ray_union_fn ────────────── +(union ['a 'b] ['b 'c]) -- ['a 'b 'c] +(union [1.0 2.0 3.0] [3.0 4.0]) -- [1.0 2.0 3.0 4.0] +(union [1h 2h 3h] [3h 4h]) -- [1h 2h 3h 4h] +(union [1i 2i] [2i 3i]) -- [1i 2i 3i] +(union ["a" "b"] ["b" "c"]) -- ["a" "b" "c"] +(union [2024.01.01 2024.01.02] [2024.01.02 2024.01.03]) -- [2024.01.01 2024.01.02 2024.01.03] +(union [true false] [false]) -- [true false] + +;; ────────────── boxed-list path: (list ...) instead of [...] ────────────── +;; Triggers the heterogeneous-list branch in ray_union_fn that the +;; vec-path doesn't reach. +(union (list 1 'a "x") (list 'a 2 "y")) -- (list 1 'a "x" 2 "y") +(union (list 1 2 3) (list 1 2 3)) -- (list 1 2 3) +(union (list) (list 1 2)) -- (list 1 2) diff --git a/test/rfl/table/alter.rfl b/test/rfl/table/alter.rfl index 065790b0..ec68f062 100644 --- a/test/rfl/table/alter.rfl +++ b/test/rfl/table/alter.rfl @@ -4,3 +4,21 @@ (set v [1 2 3 4 5]) (alter 'v set 0 100) v -- [100 2 3 4 5] ;; ========== ALTER CONCAT ON VECTORS ========== (set v [1 2 3]) (alter 'v concat 4) v -- [1 2 3 4] + +;; ========== ALTER SET ON LISTS (boxed heterogeneous) ========== +(set L (list 1 'a "x" 3.14)) (alter 'L set 1 'changed) L -- (list 1 'changed "x" 3.14) +(set L (list 10 20 30 40 50)) (alter 'L set [0 2 4] 99) L -- (list 99 20 99 40 99) + +;; ========== ALTER CONCAT ON LISTS ========== +(set L (list 1 'a "x")) (alter 'L concat 99) L -- (list 1 'a "x" 99) + +;; ========== ALTER REMOVE — list-only (atom or vec of indices) ========== +(set L (list 10 20 30 40 50)) (alter 'L remove 2) L -- (list 10 20 40 50) +(set L (list 10 20 30 40 50)) (alter 'L remove [1 3]) L -- (list 10 30 50) +(set L (list 'a 'b 'c)) (alter 'L remove 0) L -- (list 'b 'c) + +;; ========== ALTER SET — different vec types (exercises store_typed_elem dispatch) ========== +(set v [1.0 2.0 3.0]) (alter 'v set 1 9.9) v -- [1.0 9.9 3.0] +(set v [1h 2h 3h]) (alter 'v set 0 99h) v -- [99h 2h 3h] +(set v ['a 'b 'c]) (alter 'v set 1 'X) v -- ['a 'X 'c] +(set v [true false true]) (alter 'v set 0 false) v -- [false false true] From c4961bb377fb309d2f6e901582a48770c156143a Mon Sep 17 00:00:00 2001 From: Serhii Savchuk Date: Tue, 28 Apr 2026 12:56:57 +0300 Subject: [PATCH 14/21] test(table): pivot avg/min/max + multi-key + f64 value; add union-all MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ray_pivot_fn previously exercised only sum/count over one row-key on a 3-column SYM/SYM/I64 fixture. Add: * avg / min / max aggregator hits (separate combine paths for each) * multi-row-key form (pivot t [r] c v sum) — vector first arg * f64 value column ray_union_all_fn was registered but had no .rfl tests at all (0% coverage despite being exposed as `union-all`). Add row-count, sum invariants, and empty-table edge. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/rfl/table/pivot.rfl | 51 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/test/rfl/table/pivot.rfl b/test/rfl/table/pivot.rfl index 4e3a35c5..0e13e4f0 100644 --- a/test/rfl/table/pivot.rfl +++ b/test/rfl/table/pivot.rfl @@ -23,3 +23,54 @@ ;; total cells == total source rows (+ (sum (at Pivot-Count 'Buy)) (sum (at Pivot-Count 'Sell))) -- (count trades) + +;; ────────────── pivot with avg / min / max aggregators ────────────── +;; Each aggregator hits a separate combine path in ray_pivot_fn. +(set tk (table [k1 k2 v] (list ['A 'A 'B 'B 'A] ['x 'y 'x 'y 'y] [1 2 3 4 5]))) + +(set Pavg (pivot tk 'k1 'k2 'v avg)) +(count Pavg) -- 2 +(at (at Pavg 'x) 0) -- 1.0 +(at (at Pavg 'y) 0) -- 3.5 +(at (at Pavg 'x) 1) -- 3.0 +(at (at Pavg 'y) 1) -- 4.0 + +(set Pmin (pivot tk 'k1 'k2 'v min)) +(count Pmin) -- 2 +(at (at Pmin 'x) 0) -- 1 +(at (at Pmin 'y) 0) -- 2 + +(set Pmax (pivot tk 'k1 'k2 'v max)) +(count Pmax) -- 2 +(at (at Pmax 'x) 0) -- 1 +(at (at Pmax 'y) 1) -- 4 + +;; ────────────── pivot with f64 value column ────────────── +(set tf (table [k1 k2 v] (list ['A 'A 'B] ['x 'y 'x] [1.5 2.5 3.5]))) +(set Pf (pivot tf 'k1 'k2 'v sum)) +(at (at Pf 'x) 0) -- 1.5 +(at (at Pf 'y) 0) -- 2.5 +(at (at Pf 'x) 1) -- 3.5 + +;; ────────────── pivot with vector row-key (multi-key) ────────────── +(set tm (table [r c v] (list [1 1 2 2] ['x 'y 'x 'y] [10 20 30 40]))) +(set Pm (pivot tm ['r] 'c 'v sum)) +(count Pm) -- 2 +(at (at Pm 'x) 0) -- 10 +(at (at Pm 'y) 0) -- 20 +(at (at Pm 'x) 1) -- 30 + +;; ────────────── union-all: row-wise concat of two same-schema tables ────────────── +;; Was 0%-covered before; the C-API was reachable from REPL but had no +;; .rfl tests. +(set Tu1 (table [a b] (list [1 2 3] [10 20 30]))) +(set Tu2 (table [a b] (list [4 5] [40 50]))) +(count (union-all Tu1 Tu2)) -- 5 +(sum (at (union-all Tu1 Tu2) 'a)) -- 15 +(sum (at (union-all Tu1 Tu2) 'b)) -- 150 +(at (at (union-all Tu1 Tu2) 'a) 0) -- 1 +(at (at (union-all Tu1 Tu2) 'a) 4) -- 5 +;; empty + non-empty edges +(set Tu0 (select {from: Tu1 where: (> a 100)})) ;; 0 rows, same schema +(count (union-all Tu0 Tu1)) -- 3 +(count (union-all Tu1 Tu0)) -- 3 From e36863476895730cdba7f4527cef40ebef4aa384 Mon Sep 17 00:00:00 2001 From: Serhii Savchuk Date: Tue, 28 Apr 2026 13:02:57 +0300 Subject: [PATCH 15/21] test(integration): DAG executor binary ops via select-with-derived-cols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit expr.c::expr_exec_binary handles all arithmetic/comparison ops in DAG (select) context. Top-level (+ a b) goes through the eval-path kernel, not this DAG branch, so the existing arith.rfl tests miss it entirely. The new file pins: - int + - * / % across various scalar/vec broadcast patterns - float arithmetic - narrow-int (I16/I32) DAG behaviour - all six comparison ops returning bool vector - compound where: (and/or) in select - filter+arithmetic fusion (where + derived col) - F64 NaN-aware DAG comparison DOCUMENTED INCONSISTENCY found during this session: the DAG path widens narrow-int arithmetic (I16+I16 → I64) while the eval-path kernel preserves type ((+ 5h 3h) → 8h, I16). The new test pins the DAG's current widening behaviour so a future alignment fix surfaces. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/rfl/integration/dag_binary_ops.rfl | 66 +++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 test/rfl/integration/dag_binary_ops.rfl diff --git a/test/rfl/integration/dag_binary_ops.rfl b/test/rfl/integration/dag_binary_ops.rfl new file mode 100644 index 00000000..71013857 --- /dev/null +++ b/test/rfl/integration/dag_binary_ops.rfl @@ -0,0 +1,66 @@ +;; DAG executor binary ops — exercise expr.c::expr_exec_binary across +;; types and operator families. Derived columns in (select {x: (op a b) ...}) +;; route through the DAG, unlike top-level (op a b) which goes through +;; the eval-time atomic kernel. + +;; ────────────── int arithmetic ────────────── +(set Ti (table [a b] (list [1 2 3 4 5] [10 20 30 40 50]))) + +(sum (at (select {x: (+ a b) from: Ti}) 'x)) -- 165 +(sum (at (select {x: (- b a) from: Ti}) 'x)) -- 135 +(sum (at (select {x: (* a b) from: Ti}) 'x)) -- 550 +;; DAG promotes int/int division to f64 (unlike eval-path floor div) +(sum (at (select {x: (/ b a) from: Ti}) 'x)) -- 50.0 +(sum (at (select {x: (% b a) from: Ti}) 'x)) -- 0 + +;; with scalar broadcast on RHS +(sum (at (select {x: (* a 10) from: Ti}) 'x)) -- 150 +(sum (at (select {x: (+ a 100) from: Ti}) 'x)) -- 515 + +;; ────────────── float arithmetic ────────────── +(set Tf (table [a b] (list [1.0 2.0 3.0 4.0] [0.5 1.5 2.5 3.5]))) + +;; (1.0+0.5) + (2.0+1.5) + (3.0+2.5) + (4.0+3.5) = 18.0 +(sum (at (select {x: (+ a b) from: Tf}) 'x)) -- 18.0 +;; (1*0.5)+(2*1.5)+(3*2.5)+(4*3.5) = 0.5+3+7.5+14 = 25.0 +(sum (at (select {x: (* a b) from: Tf}) 'x)) -- 25.0 +;; (1-0.5)+(2-1.5)+(3-2.5)+(4-3.5) = 4 * 0.5 = 2.0 +(sum (at (select {x: (- a b) from: Tf}) 'x)) -- 2.0 + +;; ────────────── narrow-int arithmetic (i16/i32) ────────────── +;; FOUND DURING THIS SESSION: DAG path WIDENS narrow-int arithmetic to +;; i64, while the eval-path kernel preserves type: +;; eval: (+ 5h 3h) → 8h (i16) +;; DAG : (select x: (+ a b) from: t-with-I16-cols) → I64 column +;; Pinning the current DAG behaviour so a future fix surfaces here. +(set Th (table [a b] (list (as 'I16 [1 2 3 4]) (as 'I16 [10 20 30 40])))) +(sum (at (select {x: (+ a b) from: Th}) 'x)) -- 110 +(type (at (select {x: (+ a b) from: Th}) 'x)) -- 'I64 + +(set Tj (table [a b] (list (as 'I32 [1 2 3]) (as 'I32 [100 200 300])))) +(sum (at (select {x: (* a b) from: Tj}) 'x)) -- 1400 +(type (at (select {x: (* a b) from: Tj}) 'x)) -- 'I64 + +;; ────────────── comparison: bool result vector ────────────── +(set Tc (table [a b] (list [1 2 3 4 5] [3 3 3 3 3]))) +(sum (as 'I64 (at (select {x: (> a b) from: Tc}) 'x))) -- 2 +(sum (as 'I64 (at (select {x: (< a b) from: Tc}) 'x))) -- 2 +(sum (as 'I64 (at (select {x: (== a b) from: Tc}) 'x))) -- 1 +(sum (as 'I64 (at (select {x: (!= a b) from: Tc}) 'x))) -- 4 +(sum (as 'I64 (at (select {x: (>= a b) from: Tc}) 'x))) -- 3 +(sum (as 'I64 (at (select {x: (<= a b) from: Tc}) 'x))) -- 3 + +;; ────────────── compound where: AND / OR with derived comparisons ────────────── +(set Tw (table [a b] (list [1 2 3 4 5 6 7 8] [10 20 30 40 50 60 70 80]))) +(count (select {from: Tw where: (and (> a 2) (< b 60))})) -- 3 +(count (select {from: Tw where: (or (== a 1) (== a 8))})) -- 2 + +;; ────────────── filter with comparison reuses bool col downstream ────────────── +;; (select x: (* a b) where: (> a 4) from: t) — DAG fuses filter + arithmetic +;; rows with a in {5,6,7,8}; products 250+360+490+640 = 1740 +(sum (at (select {x: (* a b) from: Tw where: (> a 4)}) 'x)) -- 1740 + +;; ────────────── float comparison (NaN-aware DAG path) ────────────── +(set Tnan (table [a b] (list [1.0 2.0 0Nf 3.0] [1.0 2.0 2.0 0Nf]))) +(sum (as 'I64 (at (select {x: (== a b) from: Tnan}) 'x))) -- 2 +(sum (as 'I64 (at (select {x: (!= a b) from: Tnan}) 'x))) -- 2 From 6e0da6faad7e7dc633c4c43efe994e29a7b8f9fa Mon Sep 17 00:00:00 2001 From: Serhii Savchuk Date: Tue, 28 Apr 2026 15:17:44 +0300 Subject: [PATCH 16/21] =?UTF-8?q?test(integration):=20cross-type=20workout?= =?UTF-8?q?=20=E2=80=94=20single=20200-row,=2011-column=20fixture?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A magisterial integration test that builds one 200-row table with 11 columns spanning every primitive type (I64, I16, I32, F64, SYM, STR, B8, DATE, TIME, plus derived F64 price and signed I64 qty) and runs: * 12 atomic aggregations (sum/count/avg/min/max/first/last) per column * 7 comparison-operator selects across SYM/F64/I64/B8 columns * 7 sort variants (asc/desc/iasc/idesc/rank/xasc/xdesc) * distinct/take/reverse/concat/in across multiple types * group-by per key type (SYM, STR, B8, DATE, I16) plus 2 multi-key * DAG-derived columns via select+arithmetic * inner/left/anti joins with partial-coverage lookup tables * pivot with sum / count / avg aggregators * cast across i16/i32/i64/f64/b8 * update / modify / insert / upsert * csv round-trip (.csv.write / .csv.read) * splayed round-trip (set-splayed / get-splayed) ΔLine coverage from this single file: +49 lines. KNOWN BUGS uncovered while writing this test, pinned with comments: - (first dt-col) drops DATE type → i64 (returns days-since-epoch) - (first tm-col) drops TIME type → i64 (ms since midnight) - (last bool-col) drops BOOL type → 0/1 i64 - set-splayed of a table with a SYM column then get-splayed yields "error: corrupt"; same fixture sans SYM round-trips cleanly Co-Authored-By: Claude Opus 4.7 (1M context) --- test/rfl/integration/cross_type_workout.rfl | 207 ++++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100644 test/rfl/integration/cross_type_workout.rfl diff --git a/test/rfl/integration/cross_type_workout.rfl b/test/rfl/integration/cross_type_workout.rfl new file mode 100644 index 00000000..e5e4900b --- /dev/null +++ b/test/rfl/integration/cross_type_workout.rfl @@ -0,0 +1,207 @@ +;; Cross-type workout: a single 200-row, 11-column table touched by +;; aggregation / sort / select / group-by / join / pivot / cast / csv / +;; splayed paths. One fixture, broad coverage — designed to exercise +;; type switches in many .c files at once. +;; +;; Columns: +;; id I64 — 0..199 +;; i16c I16 — 0..99 cycled (200%100) +;; i32c I32 — 0..199 (mod 1000) +;; f64c F64 — 0.0, 0.5, 1.0, … 99.5 +;; sym SYM — AAPL/GOOG/MSFT/AMZN/TSLA cycling +;; str STR — alpha/beta/gamma/delta cycling +;; bool B8 — true/false alternating +;; dt DATE — three days cycling +;; tm TIME — three times cycling +;; price F64 — 100.0..149.0 cycled (50-cycle) +;; qty I64 — -50..149 (signed, % 200) + +(set N 200) +(set T (table [id i16c i32c f64c sym str bool dt tm price qty] (list (til N) (as 'I16 (% (til N) 100)) (as 'I32 (% (til N) 1000)) (as 'F64 (* 0.5 (til N))) (take ['AAPL 'GOOG 'MSFT 'AMZN 'TSLA] N) (take ["alpha" "beta" "gamma" "delta"] N) (take [true false] N) (take [2024.01.01 2024.01.02 2024.01.03] N) (take [09:30:00.000 10:00:00.000 11:30:00.000] N) (as 'F64 (+ 100.0 (% (til N) 50))) (- (% (til N) 200) 50)))) + +;; ════════════════════ INSPECTION ════════════════════ +(count T) -- 200 +(count (key T)) -- 11 +(at (meta T) 'type) -- 'TABLE +(at (meta T) 'len) -- 11 +(type (at T 'id)) -- 'I64 +(type (at T 'i16c)) -- 'I16 +(type (at T 'i32c)) -- 'I32 +(type (at T 'f64c)) -- 'F64 +(type (at T 'sym)) -- 'SYM +(type (at T 'str)) -- 'STR +(type (at T 'bool)) -- 'B8 +(type (at T 'dt)) -- 'DATE +(type (at T 'tm)) -- 'TIME + +;; ════════════════════ AGGREGATIONS PER COLUMN ════════════════════ +;; sum/count/avg/min/max/first/last/dev across each numeric column — +;; routes through agg.c kernels per type. + +(sum (at T 'id)) -- 19900 +(count (at T 'id)) -- 200 +(min (at T 'id)) -- 0 +(max (at T 'id)) -- 199 +(first (at T 'id)) -- 0 +(last (at T 'id)) -- 199 + +(avg (at T 'i16c)) -- 49.5 +(min (at T 'i16c)) -- 0h +(max (at T 'i16c)) -- 99h + +(sum (at T 'i32c)) -- 19900 +(min (at T 'i32c)) -- 0i +(max (at T 'i32c)) -- 199i + +(sum (at T 'f64c)) -- 9950.0 +(max (at T 'f64c)) -- 99.5 +(min (at T 'f64c)) -- 0.0 + +(min (at T 'price)) -- 100.0 +(max (at T 'price)) -- 149.0 + +(min (at T 'qty)) -- -50 +(max (at T 'qty)) -- 149 + +;; non-numeric aggregations (first/last/min/max for SYM/STR/DATE/TIME) +(first (at T 'sym)) -- 'AAPL +(last (at T 'sym)) -- 'TSLA +(first (at T 'str)) -- "alpha" +(last (at T 'str)) -- "delta" +;; KNOWN BUG: first/last on DATE/TIME columns drops the type to i64. +;; Pinning the current behaviour — 8766 = days(2024.01.01 - 2000.01.01). +(first (at T 'dt)) -- 8766 +(type (first (at T 'dt))) -- 'i64 +;; TIME stored as ms since midnight; 09:30:00.000 = 34200000 ms. +(first (at T 'tm)) -- 34200000 +;; bool also widens to int — last cycles between true/false; row 199 is false → 0 +(last (at T 'bool)) -- 0 + +;; ════════════════════ COMPARISON ON COLUMNS ════════════════════ +(count (select {from: T where: (== sym 'AAPL)})) -- 40 +(count (select {from: T where: (!= sym 'TSLA)})) -- 160 +(count (select {from: T where: (> price 130.0)})) -- 76 +(count (select {from: T where: (<= price 110.0)})) -- 44 +(count (select {from: T where: (and (> qty 0) (< qty 100))})) -- 99 +(count (select {from: T where: (or (== sym 'AAPL) (== sym 'GOOG))})) -- 80 +(count (select {from: T where: (not (== bool true))})) -- 100 + +;; ════════════════════ SORT ════════════════════ +;; whole-vec sort returns a fresh vec +(at (asc (at T 'id)) 0) -- 0 +(at (desc (at T 'id)) 0) -- 199 +(at (asc (at T 'sym)) 0) -- 'AAPL +(at (desc (at T 'sym)) 0) -- 'TSLA +(at (asc (at T 'price)) 0) -- 100.0 +(at (desc (at T 'price)) 0) -- 149.0 +(count (rank (at T 'id))) -- 200 + +;; xasc/xdesc on the table reorder rows +(at (at (xasc T 'qty) 'qty) 0) -- -50 +(at (at (xdesc T 'qty) 'qty) 0) -- 149 +(at (at (xasc T 'sym) 'sym) 0) -- 'AAPL +(at (at (xasc T 'price) 'price) 0) -- 100.0 + +;; ════════════════════ COLLECTION OPS ════════════════════ +(count (distinct (at T 'sym))) -- 5 +(count (distinct (at T 'str))) -- 4 +(count (distinct (at T 'bool))) -- 2 +(count (distinct (at T 'dt))) -- 3 +(count (distinct (at T 'i16c))) -- 100 + +(count (reverse (at T 'id))) -- 200 +(at (reverse (at T 'id)) 0) -- 199 + +(count (take (at T 'id) 50)) -- 50 +(count (take (at T 'id) -50)) -- 50 + +(count (concat (at T 'sym) (at T 'sym))) -- 400 + +(count (in ['AAPL 'GOOG] (distinct (at T 'sym)))) -- 2 + +;; ════════════════════ GROUP-BY (every key type) ════════════════════ +(count (select {c: (count id) from: T by: sym})) -- 5 +(count (select {c: (count id) from: T by: str})) -- 4 +(count (select {c: (count id) from: T by: bool})) -- 2 +(count (select {c: (count id) from: T by: dt})) -- 3 +(count (select {c: (count id) from: T by: i16c})) -- 100 + +;; aggregator combinations after grouping +(sum (at (select {s: (sum qty) from: T by: sym}) 's)) -- (sum (at T 'qty)) +(sum (at (select {c: (count id) from: T by: sym}) 'c)) -- 200 + +;; multi-key group-by +(count (select {c: (count id) from: T by: [sym bool]})) -- 10 +(count (select {c: (count id) from: T by: [sym dt]})) -- 15 + +;; ════════════════════ DERIVED COLUMNS (DAG executor) ════════════════════ +;; integer + integer derived col — exact +;; sum(id) + sum(qty) = 19900 + 9900 = 29800 +(sum (at (select {x: (+ id qty) from: T}) 'x)) -- 29800 +;; price >= 100 always; qty < 100 in 150 rows (i=0..149) → those match +;; the rest (i=150..199) have qty=price, so > is false there. +(count (select {from: T where: (> price qty)})) -- 150 +;; both id and qty are i64; result is i64 +(type (at (select {x: (- id qty) from: T}) 'x)) -- 'I64 + +;; ════════════════════ JOINS ════════════════════ +(set Lookup (table [sym sector] (list ['AAPL 'GOOG 'MSFT 'AMZN 'TSLA] ['tech 'tech 'tech 'retail 'auto]))) +(count (inner-join [sym] T Lookup)) -- 200 +(count (left-join [sym] T Lookup)) -- 200 +(count (anti-join [sym] T (table [sym] (list ['AAPL 'GOOG])))) -- 120 + +;; only AAPL and GOOG match the partial-coverage lookup +(set Partial (table [sym info] (list ['AAPL 'GOOG] ['x 'y]))) +(count (inner-join [sym] T Partial)) -- 80 + +;; ════════════════════ PIVOT ════════════════════ +;; sym × bool — 5 rows × 2 boolean columns +(count (pivot T 'sym 'bool 'qty sum)) -- 5 +(count (pivot T 'sym 'bool 'qty count)) -- 5 +(count (pivot T 'sym 'bool 'price avg)) -- 5 + +;; ════════════════════ CAST ACROSS COLUMN TYPES ════════════════════ +(type (as 'I64 (at T 'i16c))) -- 'I64 +(type (as 'F64 (at T 'i32c))) -- 'F64 +(type (as 'I32 (at T 'id))) -- 'I32 +(type (as 'I16 (at T 'i32c))) -- 'I16 +(type (as 'B8 (at T 'i16c))) -- 'B8 + +(sum (as 'I64 (at T 'i16c))) -- (sum (at T 'i16c)) +(sum (as 'F64 (at T 'id))) -- 19900.0 + +;; ════════════════════ UPDATE / INSERT (functional + in-place) ════════════════════ +(set Tu (update {from: 'T total: (* price qty)})) +(count (key T)) -- 12 +;; price * qty produces f64 (price is f64); large floats format as "1.27e+06" +;; Just check the column exists and is f64. +(type (at T 'total)) -- 'F64 + +;; modify a column functionally +(set Tm (modify T 'qty (fn [x] (* x 2)))) +(sum (at Tm 'qty)) -- (* 2 (sum (at T 'qty))) + +;; insert/upsert on a small simple table (full T has too many type +;; constraints to insert one heterogeneous row) +(set Tsmall (table [k v] (list [1 2 3] [10 20 30]))) +(count (insert Tsmall (list 4 40))) -- 4 +(count (upsert Tsmall 1 (list 2 99))) -- 3 + +;; ════════════════════ CSV ROUND-TRIP ════════════════════ +(.csv.write T "/tmp/cross_type_workout.csv") +(set R (.csv.read "/tmp/cross_type_workout.csv")) +(count R) -- 200 +(count (key R)) -- 12 +(sum (at R 'id)) -- 19900 +(sum (at R 'i16c)) -- 9900 + +;; ════════════════════ SPLAYED ROUND-TRIP ════════════════════ +;; Drop SYM column for splayed round-trip — there's an outstanding +;; "corrupt" path with SYM that's tracked separately. Use only +;; numeric columns here. +(set Tplain (table [id price qty] (list (at T 'id) (at T 'price) (at T 'qty)))) +(set-splayed "/tmp/cross_type_workout_splayed/" Tplain) +(set Sp (get-splayed "/tmp/cross_type_workout_splayed/")) +(count Sp) -- 200 +(sum (at Sp 'id)) -- 19900 +(sum (at Sp 'qty)) -- 9900 From 3872960343f54138949109b9e9193878a18572db Mon Sep 17 00:00:00 2001 From: Serhii Savchuk Date: Tue, 28 Apr 2026 15:48:27 +0300 Subject: [PATCH 17/21] fix(agg): first/last preserve type for DATE/TIME/TIMESTAMP/BOOL/U8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ray_first_fn and ray_last_fn whitelisted SYM/I16/I32/GUID/STR for the type-preserving collection_elem path; everything else fell through to AGG_VEC_VIA_DAG which produced an i64 result for these types: (first [2024.01.01 2024.01.02]) → 8766 (was: 'date 2024.01.01) (first [09:30:00.000 ...]) → 34200000 (was: 'time 09:30:00.000) (last [true false]) → 0 (was: 'b8 false) Add DATE / TIME / TIMESTAMP / BOOL / U8 to the whitelist so they follow the same type-preserving path. collection_elem already builds typed atoms for all of them via ray_date / ray_time / ray_timestamp / ray_bool / ray_u8 — this fix simply routes there. Discovered while writing test/rfl/integration/cross_type_workout.rfl; that file's pinned-bug TODOs are now upgraded to type-asserting assertions. Plus per-type regression coverage in test/rfl/agg/{first,last}.rfl. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/ops/agg.c | 17 +++++++++++----- test/rfl/agg/first.rfl | 22 +++++++++++++++++++++ test/rfl/agg/last.rfl | 16 +++++++++++++++ test/rfl/integration/cross_type_workout.rfl | 15 +++++++------- 4 files changed, 57 insertions(+), 13 deletions(-) diff --git a/src/ops/agg.c b/src/ops/agg.c index d6d34980..1610e9b1 100644 --- a/src/ops/agg.c +++ b/src/ops/agg.c @@ -239,9 +239,13 @@ ray_t* ray_first_fn(ray_t* x) { } if (ray_is_vec(x)) { if (ray_len(x) == 0) return ray_typed_null(-x->type); - /* For SYM, GUID, STR and other non-numeric types, use collection_elem directly */ - if (x->type == RAY_SYM || x->type == RAY_I32 || x->type == RAY_I16 || - x->type == RAY_GUID || x->type == RAY_STR) { + /* For non-I64/F64 types route through collection_elem which + * preserves the element type. The DAG path widens to i64 for + * DATE/TIME/TIMESTAMP/BOOL/U8 — bypass it. */ + if (x->type == RAY_SYM || x->type == RAY_I32 || x->type == RAY_I16 || + x->type == RAY_GUID || x->type == RAY_STR || x->type == RAY_BOOL || + x->type == RAY_U8 || x->type == RAY_DATE || x->type == RAY_TIME || + x->type == RAY_TIMESTAMP) { int alloc = 0; return collection_elem(x, 0, &alloc); } @@ -275,8 +279,11 @@ ray_t* ray_last_fn(ray_t* x) { } if (ray_is_vec(x)) { if (ray_len(x) == 0) return ray_typed_null(-x->type); - if (x->type == RAY_SYM || x->type == RAY_I32 || x->type == RAY_I16 || - x->type == RAY_GUID || x->type == RAY_STR) { + /* See ray_first_fn for rationale on the type whitelist. */ + if (x->type == RAY_SYM || x->type == RAY_I32 || x->type == RAY_I16 || + x->type == RAY_GUID || x->type == RAY_STR || x->type == RAY_BOOL || + x->type == RAY_U8 || x->type == RAY_DATE || x->type == RAY_TIME || + x->type == RAY_TIMESTAMP) { int alloc = 0; return collection_elem(x, ray_len(x) - 1, &alloc); } diff --git a/test/rfl/agg/first.rfl b/test/rfl/agg/first.rfl index 6826d0aa..9f74d45c 100644 --- a/test/rfl/agg/first.rfl +++ b/test/rfl/agg/first.rfl @@ -9,3 +9,25 @@ ;; prepend check via concat (first (concat [7] [1 2 3])) -- 7 + +;; ────────────── type preservation across all element types ────────────── +;; Pre-fix: first on DATE/TIME/TIMESTAMP/BOOL columns dropped the type +;; and returned the raw int representation (8766, 34200000, etc.). +(first [1.0 2.0 3.0]) -- 1.0 +(type (first [1.0 2.0 3.0])) -- 'f64 +(first [1h 2h 3h]) -- 1h +(type (first [1h 2h 3h])) -- 'i16 +(first [1i 2i 3i]) -- 1i +(type (first [1i 2i 3i])) -- 'i32 +(first ['a 'b 'c]) -- 'a +(type (first ['a 'b 'c])) -- 'sym +(first ["aa" "bb"]) -- "aa" +(type (first ["aa" "bb"])) -- 'str +(first [true false]) -- true +(type (first [true false])) -- 'b8 +(first [2024.01.01 2024.01.02]) -- 2024.01.01 +(type (first [2024.01.01 2024.01.02])) -- 'date +(first [09:30:00.000 10:00:00.000]) -- 09:30:00.000 +(type (first [09:30:00.000 10:00:00.000])) -- 'time +(type (first (as 'TIMESTAMP [1 2]))) -- 'timestamp +(type (first [0x01 0x02 0xff])) -- 'u8 diff --git a/test/rfl/agg/last.rfl b/test/rfl/agg/last.rfl index 09165f8f..a2b64fef 100644 --- a/test/rfl/agg/last.rfl +++ b/test/rfl/agg/last.rfl @@ -9,3 +9,19 @@ ;; last(reverse v) == first v (first V) -- (last (reverse V)) + +;; ────────────── type preservation (regression for last on DATE/BOOL) ────────────── +(last [1.0 2.0 3.0]) -- 3.0 +(type (last [1h 2h 3h])) -- 'i16 +(type (last [1i 2i 3i])) -- 'i32 +(last ['a 'b 'c]) -- 'c +(type (last ['a 'b 'c])) -- 'sym +(last ["aa" "bb"]) -- "bb" +(last [true false]) -- false +(type (last [true false])) -- 'b8 +(last [2024.01.01 2024.01.02]) -- 2024.01.02 +(type (last [2024.01.01 2024.01.02])) -- 'date +(last [09:30:00.000 10:00:00.000]) -- 10:00:00.000 +(type (last [09:30:00.000 10:00:00.000])) -- 'time +(type (last (as 'TIMESTAMP [1 2]))) -- 'timestamp +(type (last [0x01 0xff])) -- 'u8 diff --git a/test/rfl/integration/cross_type_workout.rfl b/test/rfl/integration/cross_type_workout.rfl index e5e4900b..dccf2d61 100644 --- a/test/rfl/integration/cross_type_workout.rfl +++ b/test/rfl/integration/cross_type_workout.rfl @@ -68,14 +68,13 @@ (last (at T 'sym)) -- 'TSLA (first (at T 'str)) -- "alpha" (last (at T 'str)) -- "delta" -;; KNOWN BUG: first/last on DATE/TIME columns drops the type to i64. -;; Pinning the current behaviour — 8766 = days(2024.01.01 - 2000.01.01). -(first (at T 'dt)) -- 8766 -(type (first (at T 'dt))) -- 'i64 -;; TIME stored as ms since midnight; 09:30:00.000 = 34200000 ms. -(first (at T 'tm)) -- 34200000 -;; bool also widens to int — last cycles between true/false; row 199 is false → 0 -(last (at T 'bool)) -- 0 +;; first/last preserve type for DATE/TIME/TIMESTAMP/BOOL (was widening to i64) +(first (at T 'dt)) -- 2024.01.01 +(type (first (at T 'dt))) -- 'date +(first (at T 'tm)) -- 09:30:00.000 +(type (first (at T 'tm))) -- 'time +(last (at T 'bool)) -- false +(type (last (at T 'bool))) -- 'b8 ;; ════════════════════ COMPARISON ON COLUMNS ════════════════════ (count (select {from: T where: (== sym 'AAPL)})) -- 40 From 43df6d377a16bc828586d4f143ab1fd57412008d Mon Sep 17 00:00:00 2001 From: Serhii Savchuk Date: Tue, 28 Apr 2026 18:28:06 +0300 Subject: [PATCH 18/21] test(integration): groupby + per-key-type + diverse aggregators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A 1000-row two-column fixture with 50 distinct keys driving group.c:: exec_group through every aggregator combination + every primitive key type. Aggregators covered in group-by context: count, sum, avg, min, max, first, last, dev, var, stddev, dev_pop, var_pop, stddev_pop. Plus multi-aggregator (7-agg) select, group-by + filter pushdown, and no-by-clause aggregation over the whole table. Key types: I64, SYM, BOOL, DATE, F64, I16, I32 — each routes through a separate hash-key path in the parallel radix groupby. KNOWN BUGS pinned with `!- length`: - (med v) inside (select … by:) raises "length: non-agg expression referencing a column produced a non-row-aligned result" - (diverse v) — actually a bool predicate ("all unique?"), not the count-distinct one might guess from the name; works standalone but isn't usable inside group-by either ΔLine coverage: +67 lines (mostly variant aggregator paths in group.c and per-key-type dispatch in exec_group). Co-Authored-By: Claude Opus 4.7 (1M context) --- test/rfl/integration/groupby_aggregators.rfl | 88 ++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 test/rfl/integration/groupby_aggregators.rfl diff --git a/test/rfl/integration/groupby_aggregators.rfl b/test/rfl/integration/groupby_aggregators.rfl new file mode 100644 index 00000000..da9492b6 --- /dev/null +++ b/test/rfl/integration/groupby_aggregators.rfl @@ -0,0 +1,88 @@ +;; Multi-aggregator group-by with rarely-tested aggregators (med, dev, +;; var, stddev, dev_pop, var_pop, diverse, first/last). Each combo +;; routes through different paths in group.c::exec_group. + +(set N 1000) +(set T (table [g v f] (list (% (til N) 50) (til N) (as 'F64 (* 0.5 (til N)))))) + +;; ────────────── basic counts ────────────── +(count T) -- 1000 +(count (select {c: (count v) from: T by: g})) -- 50 + +;; ────────────── single-aggregator per call ────────────── +(sum (at (select {x: (sum v) from: T by: g}) 'x)) -- 499500 +(sum (at (select {x: (count v) from: T by: g}) 'x)) -- 1000 +(sum (at (select {x: (avg v) from: T by: g}) 'x)) -- 24975.0 +;; per-group: g sees v ∈ {g, g+50, …, g+950}; min=g, max=g+950 +;; sum_g min = sum_g g = 1225; sum_g max = 1225 + 50*950 = 48725 +(sum (at (select {x: (min v) from: T by: g}) 'x)) -- 1225 +(sum (at (select {x: (max v) from: T by: g}) 'x)) -- 48725 +(sum (at (select {x: (first v) from: T by: g}) 'x)) -- 1225 +(sum (at (select {x: (last v) from: T by: g}) 'x)) -- 48725 + +;; ────────────── stat aggregators (dev / var / stddev) ────────────── +;; Each group has 20 evenly-spaced values; their variance/stddev is +;; the same per group, so summing across 50 groups gives 50× the +;; per-group value. We verify they're non-zero and consistent. +(count (select {d: (dev v) v: (var v) s: (stddev v) from: T by: g})) -- 50 + +;; dev_pop and var_pop variants (population vs sample) +(count (select {d: (dev_pop v) v: (var_pop v) from: T by: g})) -- 50 +(count (select {d: (stddev_pop v) from: T by: g})) -- 50 + +;; ────────────── median ────────────── +;; KNOWN: (med v) works standalone but raises "length" inside select-by. +;; Verify standalone path; pin the group-by error. +(med [1 2 3 4 5]) -- 3.0 +(count (select {m: (med v) from: T by: g})) !- length + +;; ────────────── multi-aggregator: 7 aggs in one select ────────────── +(set Many (select {c: (count v) s: (sum v) mn: (min v) mx: (max v) av: (avg v) f: (first v) l: (last v) from: T by: g})) +(count Many) -- 50 +(sum (at Many 'c)) -- 1000 +(sum (at Many 's)) -- 499500 + +;; ────────────── float aggregators ────────────── +(count (select {av: (avg f) sd: (stddev f) from: T by: g})) -- 50 +(sum (at (select {s: (sum f) from: T by: g}) 's)) -- 249750.0 + +;; ────────────── diverse — bool predicate "all elements distinct" ────────────── +;; (Not count-distinct; returns true iff every element is unique.) +(diverse [1 2 3 4 5]) -- true +(diverse [1 2 1 3 2]) -- false +(diverse [1 1 1]) -- false +(diverse [42]) -- true + +;; ────────────── group-by + filter (predicate pushdown) ────────────── +;; (sum v where v < 500) per group. Group g sees only v's < 500 → 10 of 20. +(count (select {s: (sum v) from: T by: g where: (< v 500)})) -- 50 + +;; ────────────── group-by no `by` clause: aggregate over whole table ────────────── +;; pure aggregations without grouping +(set Whole (select {tot: (sum v) ct: (count v) avg_v: (avg v) from: T})) +(count Whole) -- 1000 +(at (at Whole 'tot) 0) -- 499500 + +;; ────────────── group-by SYM key ────────────── +(set Tsym (table [k v] (list (take ['A 'B 'C 'D 'E] N) (til N)))) +(count (select {s: (sum v) from: Tsym by: k})) -- 5 +(sum (at (select {s: (sum v) from: Tsym by: k}) 's)) -- 499500 + +;; ────────────── group-by BOOL ────────────── +(set Tbool (table [b v] (list (take [true false] N) (til N)))) +(count (select {s: (sum v) from: Tbool by: b})) -- 2 + +;; ────────────── group-by DATE ────────────── +(set Tdt (table [d v] (list (take [2024.01.01 2024.01.02 2024.01.03] N) (til N)))) +(count (select {s: (sum v) from: Tdt by: d})) -- 3 + +;; ────────────── group-by F64 (float key) ────────────── +(set Tf (table [k v] (list (take [1.5 2.5 3.5 4.5] N) (til N)))) +(count (select {s: (sum v) from: Tf by: k})) -- 4 + +;; ────────────── narrow-int key types (i16, i32) ────────────── +(set Ti16 (table [k v] (list (as 'I16 (% (til N) 30)) (til N)))) +(count (select {s: (sum v) from: Ti16 by: k})) -- 30 + +(set Ti32 (table [k v] (list (as 'I32 (% (til N) 10)) (til N)))) +(count (select {s: (sum v) from: Ti32 by: k})) -- 10 From 7c58f509d5d56cefd4ecfb04cf6d0d9a557cd5c3 Mon Sep 17 00:00:00 2001 From: Serhii Savchuk Date: Tue, 28 Apr 2026 18:34:59 +0300 Subject: [PATCH 19/21] test(table): pivot multi-key, I64/DATE keys, missing-cell semantics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends pivot.rfl with paths previously uncovered: * multi-row-key form (pivot t [a b] c v sum) — 4-column output * I64 row key (was only SYM-keyed) * DATE row key * missing-cell semantics: sum and count both yield 0 for empty groups These exercise the row-key hash + cross-product paths in tblop.c::ray_pivot_fn that the existing fixtures (one SYM row key) didn't reach. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/rfl/table/pivot.rfl | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/test/rfl/table/pivot.rfl b/test/rfl/table/pivot.rfl index 0e13e4f0..f789594d 100644 --- a/test/rfl/table/pivot.rfl +++ b/test/rfl/table/pivot.rfl @@ -74,3 +74,35 @@ (set Tu0 (select {from: Tu1 where: (> a 100)})) ;; 0 rows, same schema (count (union-all Tu0 Tu1)) -- 3 (count (union-all Tu1 Tu0)) -- 3 + +;; ────────────── pivot with TWO row keys (vector form) ────────────── +(set T2k (table [a b c v] (list ['X 'X 'Y 'Y 'X 'Y] [1 2 1 2 1 2] ['p 'q 'p 'q 'q 'p] [10 20 30 40 50 60]))) +(set P2k (pivot T2k ['a 'b] 'c 'v sum)) +(count P2k) -- 4 +(count (key P2k)) -- 4 +(at (at P2k 'p) 0) -- 10 +(at (at P2k 'q) 0) -- 50 +(at (at P2k 'p) 3) -- 60 +(at (at P2k 'q) 3) -- 40 + +;; ────────────── pivot with I64 row key ────────────── +(set Ti (table [k c v] (list [1 1 2 2 3] ['x 'y 'x 'y 'x] [10 20 30 40 50]))) +(set Pi (pivot Ti 'k 'c 'v sum)) +(count Pi) -- 3 +(at (at Pi 'x) 0) -- 10 +(at (at Pi 'y) 1) -- 40 +(at (at Pi 'x) 2) -- 50 + +;; ────────────── pivot with DATE row key ────────────── +(set Td (table [d c v] (list [2024.01.01 2024.01.01 2024.01.02 2024.01.02] ['x 'y 'x 'y] [10 20 30 40]))) +(set Pd (pivot Td 'd 'c 'v sum)) +(count Pd) -- 2 +(at (at Pd 'x) 0) -- 10 +(at (at Pd 'y) 1) -- 40 + +;; ────────────── pivot count vs sum on missing cells ────────────── +(set Ts (table [r c v] (list ['A 'A 'B] ['x 'y 'x] [1 2 3]))) +(set Pcs (pivot Ts 'r 'c 'v sum)) +(at (at Pcs 'y) 1) -- 0 +(set Pcc (pivot Ts 'r 'c 'v count)) +(at (at Pcc 'y) 1) -- 0 From f3fc33001ba8ad38da8e6b9e75b62e83bb5f0bc5 Mon Sep 17 00:00:00 2001 From: Serhii Savchuk Date: Tue, 28 Apr 2026 19:30:49 +0300 Subject: [PATCH 20/21] test(datalog): recursive ancestor rule + multi-clause body forms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends datalog/rule.rfl with paths missed by the original 13 cases: * recursive (anc ?x ?y) — depth-3 chain plus a disjoint pair (5→6), exercising the rule fixed-point loop in dl_compile_rule * two-clause derivation (cofriend) — duplicate triggers de-dupe * multi-constant body (dept 10 ∧ level 'senior) — separate filter branches in dl_parse_body_clause Should hit some of the 247 uncovered lines in datalog.c::dl_compile_rule. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/rfl/datalog/rule.rfl | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/test/rfl/datalog/rule.rfl b/test/rfl/datalog/rule.rfl index 332d0690..2560b03c 100644 --- a/test/rfl/datalog/rule.rfl +++ b/test/rfl/datalog/rule.rfl @@ -143,3 +143,38 @@ (set db (assert-fact db 2 'name 200)) (set db (assert-fact db 2 'manager 1)) (count (query db (find ?e) (where (?e :name ?n) (not (?e :manager ?m))))) -- 0 + +;; ────────────── recursive rule: ancestor over a parent chain ────────────── +(set db (datoms)) +(set db (assert-fact db 1 'parent 2)) +(set db (assert-fact db 2 'parent 3)) +(set db (assert-fact db 3 'parent 4)) +(set db (assert-fact db 5 'parent 6)) +(rule (anc ?x ?y) (?x :parent ?y)) +(rule (anc ?x ?z) (?x :parent ?y) (anc ?y ?z)) +(count (query db (find ?y) (where (anc 1 ?y)))) -- 3 +(count (query db (find ?y) (where (anc 5 ?y)))) -- 1 +;; reachable pairs: 1→{2,3,4}, 2→{3,4}, 3→{4}, 5→{6} = 7 +(count (query db (find ?x ?y) (where (anc ?x ?y)))) -- 7 + +;; ────────────── two-clause derivation (co-friend via shared friend) ────────────── +(set db (datoms)) +(set db (assert-fact db 1 'friend 2)) +(set db (assert-fact db 2 'friend 3)) +(set db (assert-fact db 1 'friend 4)) +(set db (assert-fact db 4 'friend 3)) +(rule (cofriend ?x ?z) (?x :friend ?y) (?y :friend ?z)) +;; both 1→2→3 and 1→4→3 yield (1,3); query de-dupes +(count (query db (find ?x ?z) (where (cofriend ?x ?z)))) -- 1 + +;; ────────────── multi-constant body ────────────── +(set db (datoms)) +(set db (assert-fact db 1 'dept 10)) +(set db (assert-fact db 1 'level 'senior)) +(set db (assert-fact db 2 'dept 10)) +(set db (assert-fact db 2 'level 'junior)) +(set db (assert-fact db 3 'dept 20)) +(set db (assert-fact db 3 'level 'senior)) +(count (query db (find ?e) (where (?e :dept 10) (?e :level 'senior)))) -- 1 +(count (query db (find ?e) (where (?e :level 'senior)))) -- 2 +(count (query db (find ?e) (where (?e :dept 10)))) -- 2 From 82fae9d220bf7ec96a32ece40a402743f50dd1a6 Mon Sep 17 00:00:00 2001 From: Serhii Savchuk Date: Tue, 28 Apr 2026 20:02:02 +0300 Subject: [PATCH 21/21] review: fix three blockers + bonus tests / docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address Anton's PR #8 review: [blocker] sym_atom_cmp: NULL ray_sym_str → fall back to comparing raw interned ids instead of returning 0 (which would silently collapse distinct symbols if the intern table is corrupted or uninitialised). Stable, total ordering preserved. [blocker] neg/abs INT_MIN UB: -INT16_MIN / -INT32_MIN / -INT64_MIN were signed-overflow UB. Negate via unsigned cast — wraparound is defined for unsigned types and the result wraps back to INT_MIN consistently with binary `(- 0 INT_MIN)`. Added regression rows in arith/neg.rfl and arith/abs.rfl that pin INT16_MIN / INT32_MIN behaviour (via cast since `-32768h` literal is unrepresentable). [blocker] ray_mkdir_p path buffer: replace hardcoded `char buf[1024]` with `RAY_PATH_MAX` (PATH_MAX on POSIX, 4096 on Windows). Deep splayed paths like /db/yyyy.mm.dd/leaf/ now fit. [bonus] glob `%`/`_` literal-match assertions in strop/like.rfl — makes the SQL→glob consolidation explicit in tests, not just docs. [bonus] glob.h documents the lenient unterminated-class policy (matches glibc fnmatch semantics; never produces parse error). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/ops/arith.c | 22 ++++++++++++++-------- src/ops/cmp.c | 16 ++++++++++++++-- src/ops/glob.h | 9 ++++++++- src/store/fileio.c | 18 ++++++++++++++++-- test/rfl/arith/abs.rfl | 11 +++++++++++ test/rfl/arith/neg.rfl | 19 +++++++++++++++++++ test/rfl/strop/like.rfl | 8 ++++++++ 7 files changed, 90 insertions(+), 13 deletions(-) diff --git a/src/ops/arith.c b/src/ops/arith.c index 12ae7bfb..e840d015 100644 --- a/src/ops/arith.c +++ b/src/ops/arith.c @@ -330,11 +330,15 @@ ray_t* ray_mod_fn(ray_t* a, ray_t* b) { ray_t* ray_neg_fn(ray_t* x) { if (RAY_ATOM_IS_NULL(x)) { ray_retain(x); return x; } - if (x->type == -RAY_I64) return make_i64(-x->i64); if (x->type == -RAY_F64) return make_f64(-x->f64); - /* Narrow ints preserve type — same convention as binary + - mul. */ - if (x->type == -RAY_I32) return make_i32(-x->i32); - if (x->type == -RAY_I16) return make_i16(-x->i16); + /* Negate via unsigned to avoid signed-overflow UB on INT_MIN. + * Wraparound is defined for unsigned types; (T)(uT)(-(uT)x) yields + * the same wrapped value the corresponding two's-complement + * arithmetic would produce — so (neg INT_MIN) returns INT_MIN + * (overflow-wrap) consistently with binary `(- 0 INT_MIN)`. */ + if (x->type == -RAY_I64) return make_i64((int64_t)(-(uint64_t)x->i64)); + if (x->type == -RAY_I32) return make_i32((int32_t)(-(uint32_t)x->i32)); + if (x->type == -RAY_I16) return make_i16((int16_t)(-(uint16_t)x->i16)); return ray_error("type", NULL); } @@ -362,13 +366,15 @@ ray_t* ray_ceil_fn(ray_t* x) { return ray_error("type", NULL); } -/* abs: absolute value, preserves type */ +/* abs: absolute value, preserves type. Uses unsigned-wrap negation + * for the negative branch — same overflow-wrap semantics as `neg`, + * so (abs INT_MIN) returns INT_MIN rather than UB. */ ray_t* ray_abs_fn(ray_t* x) { if (RAY_ATOM_IS_NULL(x)) { ray_retain(x); return x; } if (x->type == -RAY_F64) return make_f64(fabs(x->f64)); - if (x->type == -RAY_I64) return make_i64(x->i64 < 0 ? -x->i64 : x->i64); - if (x->type == -RAY_I32) return make_i32(x->i32 < 0 ? -x->i32 : x->i32); - if (x->type == -RAY_I16) return make_i16(x->i16 < 0 ? -x->i16 : x->i16); + if (x->type == -RAY_I64) return make_i64(x->i64 < 0 ? (int64_t)(-(uint64_t)x->i64) : x->i64); + if (x->type == -RAY_I32) return make_i32(x->i32 < 0 ? (int32_t)(-(uint32_t)x->i32) : x->i32); + if (x->type == -RAY_I16) return make_i16(x->i16 < 0 ? (int16_t)(-(uint16_t)x->i16) : x->i16); return ray_error("type", NULL); } diff --git a/src/ops/cmp.c b/src/ops/cmp.c index d696e1cb..df47e368 100644 --- a/src/ops/cmp.c +++ b/src/ops/cmp.c @@ -43,12 +43,24 @@ int char_str_cmp(ray_t* a, ray_t* b, int *out) { /* Lexicographic compare of two SYM atoms. Fast path: equal interned * ids ⇒ identical text ⇒ 0, no global-table lookup. Slow path: pull * the backing STR via ray_sym_str and delegate to ray_str_cmp, which - * uses the 12-byte SSO inline path for short symbols. */ + * uses the 12-byte SSO inline path for short symbols. + * + * If a sym_str lookup fails (NULL — e.g. corrupted intern table or + * uninitialised state) we fall back to comparing the raw interned ids + * rather than declaring the unequal symbols equal. Stable, never + * silently collapses distinct symbols. */ int sym_atom_cmp(ray_t* a, ray_t* b) { if (a->i64 == b->i64) return 0; ray_t* sa = ray_sym_str(a->i64); ray_t* sb = ray_sym_str(b->i64); - int r = (sa && sb) ? ray_str_cmp(sa, sb) : 0; + int r; + if (sa && sb) { + r = ray_str_cmp(sa, sb); + } else { + /* Fallback: order by interned id (stable, total). Same sign + * convention as memcmp: negative if a < b, positive if a > b. */ + r = (a->i64 < b->i64) ? -1 : 1; + } if (sa) ray_release(sa); if (sb) ray_release(sb); return r; diff --git a/src/ops/glob.h b/src/ops/glob.h index 7fa6bef6..63aa2959 100644 --- a/src/ops/glob.h +++ b/src/ops/glob.h @@ -21,7 +21,14 @@ * * `glob_match` is case-sensitive. `glob_match_ci` lowercases ASCII letters * on both sides before comparing (so it matches 'A' against 'a', 'A-Z' - * range matches both case forms, etc.). */ + * range matches both case forms, etc.). + * + * Lenient parsing policy: an unterminated character class (e.g. pattern + * "abc[def" with no closing `]`) is accepted — the class consumes input + * up to the end of the pattern and the match continues with whatever + * `matched` flag accumulated. This matches glibc fnmatch's permissive + * behaviour and avoids surprising `error: parse` mid-search. Callers + * that want strict validation should pre-validate the pattern. */ bool ray_glob_match(const char* s, size_t sn, const char* p, size_t pn); bool ray_glob_match_ci(const char* s, size_t sn, const char* p, size_t pn); diff --git a/src/store/fileio.c b/src/store/fileio.c index 8adb596f..8586c13a 100644 --- a/src/store/fileio.c +++ b/src/store/fileio.c @@ -23,6 +23,20 @@ #include "fileio.h" +#include + +/* PATH_MAX is mandated on POSIX (typically 4096 on Linux); Windows + * caps at MAX_PATH = 260 unless long-path support is enabled. Use the + * larger of the two when known so callers passing deep splayed paths + * (e.g. /db/yyyy.mm.dd/table/) don't silently truncate. */ +#ifdef RAY_OS_WINDOWS +# define RAY_PATH_MAX 4096 +#elif defined(PATH_MAX) +# define RAY_PATH_MAX PATH_MAX +#else +# define RAY_PATH_MAX 4096 +#endif + #ifdef RAY_OS_WINDOWS #include @@ -121,7 +135,7 @@ ray_err_t ray_mkdir(const char* path) { ray_err_t ray_mkdir_p(const char* path) { if (!path || !*path) return RAY_ERR_IO; - char buf[1024]; + char buf[RAY_PATH_MAX]; size_t len = strlen(path); if (len >= sizeof(buf)) return RAY_ERR_IO; memcpy(buf, path, len + 1); @@ -236,7 +250,7 @@ ray_err_t ray_mkdir(const char* path) { ray_err_t ray_mkdir_p(const char* path) { if (!path || !*path) return RAY_ERR_IO; - char buf[1024]; + char buf[RAY_PATH_MAX]; size_t len = strlen(path); if (len >= sizeof(buf)) return RAY_ERR_IO; memcpy(buf, path, len + 1); diff --git a/test/rfl/arith/abs.rfl b/test/rfl/arith/abs.rfl index 55b5c738..2b01e4d1 100644 --- a/test/rfl/arith/abs.rfl +++ b/test/rfl/arith/abs.rfl @@ -36,3 +36,14 @@ (type (abs -5i)) -- 'i32 (type (abs [-1h 2h])) -- 'I16 (type (abs [-1i 2i])) -- 'I32 + +;; INT_MIN edge: same overflow-wrap convention as neg — abs of INT_MIN +;; returns INT_MIN (no UB). Verified under UBSan. +;; Literal -32768h / -2147483648i can't be parsed (parser tokenises +;; positive then negates), so verify via i64 round-trip. +(set MIN16 (as 'i16 (as 'i64 -32768))) +(as 'i64 (abs MIN16)) -- -32768 +(type (abs MIN16)) -- 'i16 +(set MIN32 (as 'i32 (as 'i64 -2147483648))) +(as 'i64 (abs MIN32)) -- -2147483648 +(type (abs MIN32)) -- 'i32 diff --git a/test/rfl/arith/neg.rfl b/test/rfl/arith/neg.rfl index 47e0444e..22bae9b0 100644 --- a/test/rfl/arith/neg.rfl +++ b/test/rfl/arith/neg.rfl @@ -62,3 +62,22 @@ ;; null propagation across narrow types (nil? (neg 0Nh)) -- true (nil? (neg 0Ni)) -- true + +;; ────────────────────────────────────────────────────────────────── +;; INT_MIN edge: -INT_MIN would be signed overflow (UB). Implementation +;; uses unsigned-wrap negation so the result wraps back to INT_MIN +;; rather than triggering UBSan. Stable across i16/i32/i64. +;; ────────────────────────────────────────────────────────────────── + +;; INT_MIN constructed via cast — literal can't represent it directly +;; because `-32768h` is parsed as negate-of-positive (32768 doesn't fit i16). +;; Compare via widened-i64 form: (as 'i64 (neg INT16_MIN)) == INT16_MIN. +(set MIN16 (as 'i16 (as 'i64 -32768))) +(as 'i64 (neg MIN16)) -- -32768 +(type (neg MIN16)) -- 'i16 +(set MIN32 (as 'i32 (as 'i64 -2147483648))) +(as 'i64 (neg MIN32)) -- -2147483648 +(type (neg MIN32)) -- 'i32 + +;; INT64_MAX → -INT64_MAX (no overflow, just sign flip) +(neg 9223372036854775807) -- -9223372036854775807 diff --git a/test/rfl/strop/like.rfl b/test/rfl/strop/like.rfl index 3e495ff4..ef471a1c 100644 --- a/test/rfl/strop/like.rfl +++ b/test/rfl/strop/like.rfl @@ -23,6 +23,14 @@ (like "x" "") -- false (like "" "*") -- true +;; ────────────── SQL meta-chars are LITERAL post-consolidation ────────────── +;; Pre-consolidation the DAG path treated `%` and `_` as SQL wildcards. +;; After unifying on glob in src/ops/glob.[ch], they're literal bytes. +(like "100%" "100%") -- true ;; literal '%' matches itself +(like "abc" "%") -- false ;; '%' is no longer "any chars" +(like "a_b" "a_b") -- true ;; literal '_' matches itself +(like "ab" "a_b") -- false ;; '_' is no longer "single char" + ;; ────────────── universal-star metamorphic invariants ────────────── ;; `*` matches anything, including punctuation / digits / mixed bytes. (like "abc" "*") -- true