From 38e7932b6d3106e253f1d0222e8a412aef449593 Mon Sep 17 00:00:00 2001
From: Serhii Savchuk <ser.vasilich@hotmail.com>
Date: Mon, 27 Apr 2026 13:34:27 +0300
Subject: [PATCH 01/21] test(rfl): consolidate non-duplicate coverage from spec
 migration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds 624 unique test assertions on top of Anton's test/rfl/ corpus,
deduped against his parallel rewrite (commit 536fef4 et al).

Stats:
* 35 modified files — our backup was a strict superset of upstream;
  replace upstream wholesale.  Whitespace-only diffs in arith/abs,
  arith/ceil, arith/floor, arith/neg, arith/round, cmp/ge, cmp/gt,
  cmp/ne were also false-positive "Anton-only" results from sorting
  with extra spaces.
* 12 new files — coverage Anton didn't write at all:
    test/rfl/datalog/rule.rfl       — full EAV / rule / TC / negation
    test/rfl/embedding/hnsw.rfl     — HNSW index build/query/info
    test/rfl/integration/arena.rfl  — 100k-element churn + .sys.gc
    test/rfl/integration/cow.rfl    — copy-on-write aliasing
    test/rfl/integration/morsel.rfl — 1023/1024/1025 + 2047/2048/2049
    test/rfl/integration/optimizer.rfl — filter reorder, pushdown,
                                         selection-bitmap edges
    test/rfl/integration/str_pool.rfl — 12-byte SSO boundary
    test/rfl/system/csv_roundtrip.rfl — schema-less .csv.read
    test/rfl/system/splayed.rfl     — set/get-splayed round-trip
    test/rfl/table/modify.rfl       — functional column update
    test/rfl/table/pivot.rfl        — wide reshape, sum/count aggr
    test/rfl/table/select.rfl       — 50 select-clause assertions

Coverage delta vs origin/master:
  Tests:     921/922  →  933/934   (+12 unique tests)
  Lines:     63.4%    →  63.9%     (+217 lines)
  Functions: 77.5%    →  78.0%     (+8 functions)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 test/rfl/agg/sum.rfl               |  13 +++
 test/rfl/arith/abs.rfl             |  28 +++++-
 test/rfl/arith/add.rfl             | 130 +++++++++++++++++++++++++-
 test/rfl/arith/ceil.rfl            |  19 +++-
 test/rfl/arith/div.rfl             |  44 ++++++++-
 test/rfl/arith/floor.rfl           |  18 +++-
 test/rfl/arith/mod.rfl             |  31 +++++-
 test/rfl/arith/mul.rfl             |  62 ++++++++++--
 test/rfl/arith/neg.rfl             |  39 +++++++-
 test/rfl/arith/round.rfl           |  25 ++++-
 test/rfl/arith/sqrt.rfl            |   6 ++
 test/rfl/arith/sub.rfl             |  74 ++++++++++++++-
 test/rfl/cmp/eq.rfl                |  49 +++++++++-
 test/rfl/cmp/ge.rfl                |  27 +++++-
 test/rfl/cmp/gt.rfl                |  37 +++++++-
 test/rfl/cmp/le.rfl                |  15 ++-
 test/rfl/cmp/lt.rfl                |  17 ++++
 test/rfl/cmp/ne.rfl                |  23 ++++-
 test/rfl/collection/rand.rfl       |   7 ++
 test/rfl/collection/til.rfl        |   5 +
 test/rfl/datalog/rule.rfl          | 145 +++++++++++++++++++++++++++++
 test/rfl/embedding/hnsw.rfl        |  25 +++++
 test/rfl/hof/apply.rfl             |  13 +++
 test/rfl/hof/fold.rfl              |  16 ++++
 test/rfl/hof/map.rfl               |   7 ++
 test/rfl/hof/scan.rfl              |   6 ++
 test/rfl/integration/arena.rfl     |  25 +++++
 test/rfl/integration/cow.rfl       |  20 ++++
 test/rfl/integration/joins.rfl     |  36 ++++++-
 test/rfl/integration/morsel.rfl    |  52 +++++++++++
 test/rfl/integration/null.rfl      |   6 ++
 test/rfl/integration/optimizer.rfl |  54 +++++++++++
 test/rfl/integration/str_pool.rfl  |  44 +++++++++
 test/rfl/sort/asc.rfl              |  36 +++++++
 test/rfl/sort/iasc.rfl             |  11 +++
 test/rfl/sort/rank.rfl             |  20 +++-
 test/rfl/strop/like.rfl            |  29 ++++++
 test/rfl/system/csv_roundtrip.rfl  |  75 +++++++++++++++
 test/rfl/system/serde.rfl          |  12 +++
 test/rfl/system/splayed.rfl        |  32 +++++++
 test/rfl/table/meta.rfl            |  25 ++++-
 test/rfl/table/modify.rfl          |  28 ++++++
 test/rfl/table/pivot.rfl           |  25 +++++
 test/rfl/table/select.rfl          | 117 +++++++++++++++++++++++
 test/rfl/temporal/date.rfl         |  36 +++++++
 test/rfl/temporal/time.rfl         |  13 +++
 test/rfl/type/as.rfl               |  39 ++++++++
 47 files changed, 1572 insertions(+), 44 deletions(-)
 create mode 100644 test/rfl/datalog/rule.rfl
 create mode 100644 test/rfl/embedding/hnsw.rfl
 create mode 100644 test/rfl/integration/arena.rfl
 create mode 100644 test/rfl/integration/cow.rfl
 create mode 100644 test/rfl/integration/morsel.rfl
 create mode 100644 test/rfl/integration/optimizer.rfl
 create mode 100644 test/rfl/integration/str_pool.rfl
 create mode 100644 test/rfl/system/csv_roundtrip.rfl
 create mode 100644 test/rfl/system/splayed.rfl
 create mode 100644 test/rfl/table/modify.rfl
 create mode 100644 test/rfl/table/pivot.rfl
 create mode 100644 test/rfl/table/select.rfl

diff --git a/test/rfl/agg/sum.rfl b/test/rfl/agg/sum.rfl
index 2925e651..01e4ba7c 100644
--- a/test/rfl/agg/sum.rfl
+++ b/test/rfl/agg/sum.rfl
@@ -11,3 +11,16 @@
 ;; linearity: scales with constant — falsifies sum ≡ 0.
 (set V (- (rand 100 2000) 1000))
 (* 3 (sum V)) -- (sum (* 3 V))
+
+;; ────────────── error: sum is not defined for the null sentinel ──────────────
+;; (sum null) refers to RAY_NULL_OBJ, not a typed-null atom; it isn't
+;; reduceable and must surface as a type error.
+(sum null) !- type
+
+;; ────────────── null skipping (null_policy: skip_nulls) ──────────────
+;; Rayforce aggregations skip typed-null elements.  Vector literals
+;; reject mixing nulls and values across types, so use (list ...) which
+;; produces a heterogeneous list that sum handles.
+(sum (list 1 0Ni 3 0Ni 5)) -- 9
+(sum (list 0Ni 0Ni 0Ni))   -- 0
+(sum (list))               -- 0
diff --git a/test/rfl/arith/abs.rfl b/test/rfl/arith/abs.rfl
index 1497f697..d8bb50e9 100644
--- a/test/rfl/arith/abs.rfl
+++ b/test/rfl/arith/abs.rfl
@@ -1,20 +1,38 @@
 ;; Invariants for `abs` (absolute value).
 
-;; non-negative:  (>= (abs a) 0)
+;; non-negative
 (set A (- (rand 256 200000) 100000))
 (count A) -- (sum (>= (abs A) 0))
 
-;; idempotent:  (abs (abs a)) == (abs a)
+;; idempotent
 (count A) -- (sum (== (abs (abs A)) (abs A)))
 
 ;; |x| == |-x|
 (count A) -- (sum (== (abs A) (abs (neg A))))
 
-;; abs dominates:  (>= (abs a) a)  and  (>= (abs a) (neg a))
+;; abs dominates
 (count A) -- (sum (>= (abs A) A))
 (count A) -- (sum (>= (abs A) (neg A)))
 
 ;; concrete
 (abs -5) -- 5
-(abs 5) -- 5
-(abs 0) -- 0
+(abs 5)  -- 5
+(abs 0)  -- 0
+
+;; null atoms propagate through abs
+(nil? (abs 0N))  -- true
+(nil? (abs 0Ni)) -- true
+
+;; ──────────────────────────────────────────────────────────────────
+;; Vec + cross-type — abs widens narrow ints to i64
+;; ──────────────────────────────────────────────────────────────────
+
+(abs [1 -2 3 -4])     -- [1 2 3 4]
+(abs [-1.5 2.5 -3.5]) -- [1.5 2.5 3.5]
+(abs -5.0)            -- 5.0
+(type (abs -5))       -- 'i64
+(type (abs -5.0))     -- 'f64
+(type (abs -5h))      -- 'i64
+(type (abs -5i))      -- 'i64
+(type (abs [-1h 2h])) -- 'I64
+(type (abs [-1i 2i])) -- 'I64
diff --git a/test/rfl/arith/add.rfl b/test/rfl/arith/add.rfl
index d480e468..f2e9ee69 100644
--- a/test/rfl/arith/add.rfl
+++ b/test/rfl/arith/add.rfl
@@ -1,4 +1,4 @@
-;; Invariants for `+` on i64 vectors.
+;; Invariants for `+`.
 
 ;; commutativity:  (+ a b) == (+ b a)
 (set A (- (rand 256 200000) 100000))
@@ -18,3 +18,131 @@
 ;; identity (left):  (+ 0 a) == a
 (set A (- (rand 256 200000) 100000))
 (count A) -- (sum (== (+ 0 A) A))
+
+;; ──────────────────────────────────────────────────────────────────
+;; Mixed numeric — atom/vec/broadcast/coercion
+;; ──────────────────────────────────────────────────────────────────
+
+;; int+int atom
+(+ 3 4) -- 7
+
+;; int+int vec
+(+ [1 2 3] [10 20 30]) -- [11 22 33]
+
+;; int+int broadcast
+(+ [1 2 3] 10) -- [11 12 13]
+
+;; neg + pos
+(+ -5 3) -- -2
+
+;; neg vec + atom
+(+ [-1 -2 -3] 1) -- [0 -1 -2]
+
+;; float+float atom
+(+ 1.5 2.5) -- 4.0
+
+;; int+float coerce atom
+(+ 1 2.5) -- 3.5
+
+;; int_vec + float atom
+(+ [1 2 3] 0.5) -- [1.5 2.5 3.5]
+
+;; int_vec + float_vec
+(+ [1 2 3] [0.5 0.5 0.5]) -- [1.5 2.5 3.5]
+
+;; ──────────────────────────────────────────────────────────────────
+;; Type sweep — each width, result type, widening
+;; ──────────────────────────────────────────────────────────────────
+
+;; + I16 atom value
+(+ 1h 2h) -- 3
+
+;; + I32 atom value
+(+ 1i 2i) -- 3
+
+;; + I16 atom type preserves
+(type (+ 1h 2h)) -- 'i16
+
+;; + I32 atom type preserves
+(type (+ 1i 2i)) -- 'i32
+
+;; + I16+I32 widens to I32
+(type (+ 1h 2i)) -- 'i32
+
+;; + I32+I64 widens to I64
+(type (+ 1i 2)) -- 'i64
+
+;; + I64+F64 widens to F64
+(type (+ 1 2.0)) -- 'f64
+
+;; + I16 vec element-wise
+(+ [1h 2h 3h] [1h 2h 3h]) -- [2 4 6]
+
+;; + I32 vec element-wise
+(+ [1i 2i 3i] [1i 2i 3i]) -- [2 4 6]
+
+;; + F64 vec element-wise
+(+ [1.0 2.0 3.0] [1.0 2.0 3.0]) -- [2.0 4.0 6.0]
+
+;; + I16 vec type preserves
+(type (+ [1h 2h] [1h 2h])) -- 'I16
+
+;; + I32 vec type preserves
+(type (+ [1i 2i] [1i 2i])) -- 'I32
+
+;; + atom-vec broadcast I16
+(+ 10h [1h 2h 3h]) -- [11 12 13]
+
+;; + atom-vec broadcast F64
+(+ 10.0 [1.0 2.0 3.0]) -- [11.0 12.0 13.0]
+
+;; ──────────────────────────────────────────────────────────────────
+;; Null propagation — typed null atoms propagate through +
+;; ──────────────────────────────────────────────────────────────────
+
+;; + int null propagates
+(nil? (+ 0Ni 5)) -- true
+
+;; + rhs int null propagates
+(nil? (+ 5 0Ni)) -- true
+
+;; + f64 null propagates
+(nil? (+ 0Nf 5.0)) -- true
+
+;; ──────────────────────────────────────────────────────────────────
+;; Type errors
+;; ──────────────────────────────────────────────────────────────────
+
+(+ null 1)          !- type
+(+ "abc" 1)         !- type
+(+ 0Nf 2024.03.20)  !- type
+
+;; ──────────────────────────────────────────────────────────────────
+;; Morsel boundaries — 1023 / 1024 / 1025
+;; ──────────────────────────────────────────────────────────────────
+
+;; identity + 0 preserves vec at 1023
+(set V (til 1023))
+(+ V 0) -- V
+
+;; sum after +0 preserved at 1023
+(sum (+ V 0)) -- (sum V)
+
+;; vec+vec at 1023 doubles sum
+(sum (+ V V)) -- (* 2 (sum V))
+
+;; identity at 1024
+(set V (til 1024))
+(+ V 0) -- V
+(sum (+ V V)) -- (* 2 (sum V))
+
+;; last element at morsel 1024
+(last (+ V 1)) -- 1024
+
+;; identity at 1025
+(set V (til 1025))
+(+ V 0) -- V
+(sum (+ V V)) -- (* 2 (sum V))
+
+;; last element at morsel 1025
+(last (+ V 1)) -- 1025
diff --git a/test/rfl/arith/ceil.rfl b/test/rfl/arith/ceil.rfl
index f87ade50..c3f549d2 100644
--- a/test/rfl/arith/ceil.rfl
+++ b/test/rfl/arith/ceil.rfl
@@ -12,6 +12,21 @@
 (count A) -- (sum (== (ceil (ceil A)) (ceil A)))
 
 ;; concrete
-(ceil 3.2) -- 4.0
-(ceil 3.0) -- 3.0
+(ceil 3.2)  -- 4.0
+(ceil 3.0)  -- 3.0
 (ceil -3.7) -- -3.0
+
+;; ──────────────────────────────────────────────────────────────────
+;; Integer input — identity
+;; ──────────────────────────────────────────────────────────────────
+
+(ceil 5)  -- 5
+(ceil 5h) -- 5
+(ceil 5i) -- 5
+
+;; vec — float input, float result
+(ceil [1.2 2.8 -0.5 -1.5]) -- [2.0 3.0 0.0 -1.0]
+
+;; metamorphic vs floor:  ceil(x) == -floor(-x)
+(ceil 1.5)  -- (neg (floor (neg 1.5)))
+(ceil -0.5) -- (neg (floor (neg -0.5)))
diff --git a/test/rfl/arith/div.rfl b/test/rfl/arith/div.rfl
index 9655514c..767efea5 100644
--- a/test/rfl/arith/div.rfl
+++ b/test/rfl/arith/div.rfl
@@ -13,5 +13,45 @@
 (/ 7 2) -- 3
 (/ 10 3) -- 3
 
-;; divide-by-zero yields typed null (not an error in rayforce2)
-(/ 5 0) -- 0Nl
+;; floor with negative — toward -inf, not toward 0
+(/ -10 3)  -- -4
+(/ 10 -3)  -- -4
+(/ -10 -3) -- 3
+
+;; divide-by-zero yields typed null
+(/ 5 0)  -- 0Nl
+(/ 10 0) -- 0Nl
+
+;; floats — Rayforce folds IEEE +Inf / NaN outcomes back to typed null
+(nil? (/ 1.0 0.0)) -- true
+(nil? (/ 0.0 0.0)) -- true
+(nil? (div 1.0 0.0)) -- true
+(nil? (div 0.0 0.0)) -- true
+
+;; const-fold of div-by-zero must agree with the runtime path
+;; (regression for the optimizer doing the fold at compile time)
+(set X 1.0)
+(nil? (/ X 0.0)) -- true
+(nil? (div X 0.0)) -- true
+(== (nil? (/ 1 0)) (nil? (/ X 0))) -- true
+(== (nil? (/ 1.0 0.0)) (nil? (/ X 0.0))) -- true
+
+;; ──────────────────────────────────────────────────────────────────
+;; Type sweep — narrow ints / float / cross-width
+;; ──────────────────────────────────────────────────────────────────
+
+(/ 6h 2h) -- 3
+(type (/ 6h 2h)) -- 'i16
+(/ 6i 2i) -- 3
+(type (/ 6i 2i)) -- 'i32
+(/ 10.0 4.0) -- 2.0
+(type (/ 6.0 2.0)) -- 'f64
+(/ [12 15 18] 3) -- [4 5 6]
+
+;; ──────────────────────────────────────────────────────────────────
+;; Null propagation
+;; ──────────────────────────────────────────────────────────────────
+
+(nil? (/ 0Ni 5))   -- true
+(nil? (/ 5 0Ni))   -- true
+(nil? (/ 0Nf 5.0)) -- true
diff --git a/test/rfl/arith/floor.rfl b/test/rfl/arith/floor.rfl
index 1632ce67..ef279ac9 100644
--- a/test/rfl/arith/floor.rfl
+++ b/test/rfl/arith/floor.rfl
@@ -12,6 +12,20 @@
 (count A) -- (sum (== (floor (floor A)) (floor A)))
 
 ;; concrete
-(floor 3.7) -- 3.0
-(floor 3.0) -- 3.0
+(floor 3.7)  -- 3.0
+(floor 3.0)  -- 3.0
 (floor -3.2) -- -4.0
+
+;; ──────────────────────────────────────────────────────────────────
+;; Integer input — identity
+;; ──────────────────────────────────────────────────────────────────
+
+(floor 5)  -- 5
+(floor 5h) -- 5
+(floor 5i) -- 5
+
+;; vec — float input, float result
+(floor [1.2 2.8 -0.5 -1.5]) -- [1.0 2.0 -1.0 -2.0]
+
+;; relation to ceil — floor <= ceil
+(<= (floor 1.5) (ceil 1.5)) -- true
diff --git a/test/rfl/arith/mod.rfl b/test/rfl/arith/mod.rfl
index 65cc3457..287554b1 100644
--- a/test/rfl/arith/mod.rfl
+++ b/test/rfl/arith/mod.rfl
@@ -1,4 +1,4 @@
-;; Invariants for `%` (modulo).
+;; Invariants for `%` (floor-modulo).
 
 ;; range:  0 <= (% a m) < m  for positive m
 (set A (- (rand 256 10000) 5000))
@@ -12,3 +12,32 @@
 ;; concrete
 (% 7 3) -- 1
 (% 0 5) -- 0
+
+;; floor-mod with negative — sign follows divisor (Python semantics)
+(% -10 3) -- 2
+(% 10 -3) -- -2
+
+;; mod by 1 is always 0
+(% 17 1)  -- 0
+(% -42 1) -- 0
+
+;; mod by self is 0
+(% 7 7) -- 0
+
+;; ──────────────────────────────────────────────────────────────────
+;; Type sweep
+;; ──────────────────────────────────────────────────────────────────
+
+(% 10h 3h) -- 1
+(type (% 10h 3h)) -- 'i16
+(% 10i 3i) -- 1
+(type (% 10i 3i)) -- 'i32
+
+(% [10 11 12] 3) -- [1 2 0]
+
+;; ──────────────────────────────────────────────────────────────────
+;; Null + zero divisor
+;; ──────────────────────────────────────────────────────────────────
+
+(nil? (% 0Ni 5)) -- true
+(% 10 0) -- 0Nl
diff --git a/test/rfl/arith/mul.rfl b/test/rfl/arith/mul.rfl
index 37ab4511..c5f278a4 100644
--- a/test/rfl/arith/mul.rfl
+++ b/test/rfl/arith/mul.rfl
@@ -1,29 +1,77 @@
-;; Invariants for `*` on i64 vectors.
+;; Invariants for `*`.
 
-;; commutativity:  (* a b) == (* b a)
+;; commutativity
 (set A (- (rand 256 400) 200))
 (set B (- (rand 256 400) 200))
 (count A) -- (sum (== (* A B) (* B A)))
 
-;; associativity:  (* (* a b) c) == (* a (* b c))  — narrow range to avoid overflow
+;; associativity (narrow range)
 (set A (- (rand 256 20) 10))
 (set B (- (rand 256 20) 10))
 (set C (- (rand 256 20) 10))
 (count A) -- (sum (== (* (* A B) C) (* A (* B C))))
 
-;; identity:  (* a 1) == a  and  (* 1 a) == a
+;; identity (left/right)
 (set A (- (rand 256 200000) 100000))
 (count A) -- (sum (== (* A 1) A))
 (count A) -- (sum (== (* 1 A) A))
 
-;; zero annihilates:  (* a 0) == 0
+;; zero annihilates
 (count A) -- (sum (== (* A 0) 0))
 
-;; distributivity over +:  (* a (+ b c)) == (+ (* a b) (* a c))
+;; distributivity over +
 (set A (- (rand 256 100) 50))
 (set B (- (rand 256 100) 50))
 (set C (- (rand 256 100) 50))
 (count A) -- (sum (== (* A (+ B C)) (+ (* A B) (* A C))))
 
-;; concrete
+;; ──────────────────────────────────────────────────────────────────
+;; Concrete atom / vec / broadcast / cross-width
+;; ──────────────────────────────────────────────────────────────────
+
+(* 2 3)             -- 6
+(* -3 4)            -- -12
+(* -3 -4)           -- 12
 (* [2 3 4] [5 6 7]) -- [10 18 28]
+(* [1 2 3] 2)       -- [2 4 6]
+(* 2 [1 2 3])       -- [2 4 6]
+
+;; I16 / I32 / F64
+(* 2h 3h) -- 6
+(type (* 2h 3h)) -- 'i16
+(* 2i 3i) -- 6
+(type (* 2i 3i)) -- 'i32
+(* 2.5 4.0) -- 10.0
+(type (* 1 2.0)) -- 'f64
+
+;; ──────────────────────────────────────────────────────────────────
+;; Null propagation
+;; ──────────────────────────────────────────────────────────────────
+
+(nil? (* 0Ni 5))    -- true
+(nil? (* 5 0Ni))    -- true
+(nil? (* 0Ni 0))    -- true
+
+;; ──────────────────────────────────────────────────────────────────
+;; Type errors
+;; ──────────────────────────────────────────────────────────────────
+
+(* null 1)                !- type
+(* "abc" 2)               !- type
+(* 02:15:07.000 02:15:07.000) !- type
+
+;; ──────────────────────────────────────────────────────────────────
+;; Morsel boundaries
+;; ──────────────────────────────────────────────────────────────────
+
+(set V (til 1023))
+(* V 1) -- V
+(sum (* 2 V)) -- (* 2 (sum V))
+
+(set V (til 1024))
+(* V 1) -- V
+(sum (* 0 V)) -- 0
+
+(set V (til 1025))
+(* V 1) -- V
+(sum (* 2 V)) -- (* 2 (sum V))
diff --git a/test/rfl/arith/neg.rfl b/test/rfl/arith/neg.rfl
index 640401fb..532d0048 100644
--- a/test/rfl/arith/neg.rfl
+++ b/test/rfl/arith/neg.rfl
@@ -12,6 +12,41 @@
 (count A) -- (sum (== (neg (+ A B)) (+ (neg A) (neg B))))
 
 ;; concrete
-(neg 5) -- -5
-(neg 0) -- 0
+(neg 5)  -- -5
+(neg 0)  -- 0
 (neg -3) -- 3
+
+;; null atoms propagate through neg
+(nil? (neg 0N))  -- true
+(nil? (neg 0Ni)) -- true
+
+;; ──────────────────────────────────────────────────────────────────
+;; Vec + cross-type
+;; ──────────────────────────────────────────────────────────────────
+
+(neg [1 2 3])       -- [-1 -2 -3]
+(neg [1.0 2.0 3.0]) -- [-1.0 -2.0 -3.0]
+(neg 5.0)           -- -5.0
+(type (neg 5))      -- 'i64
+(type (neg 5.0))    -- 'f64
+
+;; involution per-type (vec)
+(neg (neg [1 2 3]))         -- [1 2 3]
+(neg (neg [1.0 2.0 3.0]))   -- [1.0 2.0 3.0]
+
+;; ──────────────────────────────────────────────────────────────────
+;; Morsel boundary
+;; ──────────────────────────────────────────────────────────────────
+
+(set V (til 1024))
+(neg (neg V)) -- V
+(sum (+ V (neg V))) -- 0
+
+;; ──────────────────────────────────────────────────────────────────
+;; Probe: neg on narrow-int currently raises type
+;; (filed in spec/_probes/neg_narrow_int.rfl)
+;; ──────────────────────────────────────────────────────────────────
+
+(neg 5h)      !- type
+(neg 5i)      !- type
+(neg [1h 2h]) !- type
diff --git a/test/rfl/arith/round.rfl b/test/rfl/arith/round.rfl
index c2223cf2..84c31679 100644
--- a/test/rfl/arith/round.rfl
+++ b/test/rfl/arith/round.rfl
@@ -5,8 +5,25 @@
 (count A) -- (sum (or (== (round A) (floor A)) (== (round A) (ceil A))))
 
 ;; concrete — half-away-from-zero / banker's, depending on impl
-(round 3.5) -- 4.0
-(round 3.4) -- 3.0
-(round 3.6) -- 4.0
-(round 3.0) -- 3.0
+(round 3.5)  -- 4.0
+(round 3.4)  -- 3.0
+(round 3.6)  -- 4.0
+(round 3.0)  -- 3.0
 (round -3.4) -- -3.0
+
+;; ──────────────────────────────────────────────────────────────────
+;; Integer input — round always returns f64
+;; ──────────────────────────────────────────────────────────────────
+
+(round 5)  -- 5.0
+(round 5h) -- 5.0
+(round 5i) -- 5.0
+(type (round 5)) -- 'f64
+
+;; vec — also f64 result
+(round [1.2 2.7 -1.5]) -- [1.0 3.0 -2.0]
+
+;; bounded between floor and ceil for arbitrary input
+(set X 1.7)
+(>= (round X) (floor X)) -- true
+(<= (round X) (ceil X))  -- true
diff --git a/test/rfl/arith/sqrt.rfl b/test/rfl/arith/sqrt.rfl
index 69ac12e6..5b22013c 100644
--- a/test/rfl/arith/sqrt.rfl
+++ b/test/rfl/arith/sqrt.rfl
@@ -7,6 +7,12 @@
 (sqrt 9.0) -- 3.0
 (sqrt 25.0) -- 5.0
 
+;; sqrt of a negative produces IEEE NaN (still f64, not nil) — NaN is
+;; the only float that is not equal to itself.
+(type (sqrt -1.0)) -- 'f64
+(nil? (sqrt -1.0)) -- false
+(!= (sqrt -1.0) (sqrt -1.0)) -- true
+
 ;; roundtrip: (sqrt x)^2 ≈ x  for x >= 0
 (set A (as 'F64 (rand 256 1000)))
 (count A) -- (sum (< (abs (- (* (sqrt A) (sqrt A)) A)) 0.001))
diff --git a/test/rfl/arith/sub.rfl b/test/rfl/arith/sub.rfl
index 3c190df7..4ea43241 100644
--- a/test/rfl/arith/sub.rfl
+++ b/test/rfl/arith/sub.rfl
@@ -1,4 +1,4 @@
-;; Invariants for `-` on i64 vectors.
+;; Invariants for `-`.
 
 ;; self-inverse:  (- a a) == 0   (rand never emits null)
 (set A (- (rand 256 200000) 100000))
@@ -12,3 +12,75 @@
 (set A (- (rand 256 100000) 50000))
 (set B (- (rand 256 100000) 50000))
 (count A) -- (sum (== (- (+ A B) B) A))
+
+;; anti-commutativity:  (- a b) == (neg (- b a))
+(set A (- (rand 256 100000) 50000))
+(set B (- (rand 256 100000) 50000))
+(count A) -- (sum (== (- A B) (neg (- B A))))
+
+;; ──────────────────────────────────────────────────────────────────
+;; Atom / vec / broadcast / cross-width
+;; ──────────────────────────────────────────────────────────────────
+
+;; - atom
+(- 10 3) -- 7
+
+;; - negative
+(- 3 10) -- -7
+
+;; - vec vec
+(- [10 20 30] [1 2 3]) -- [9 18 27]
+
+;; - vec atom
+(- [10 20 30] 5) -- [5 15 25]
+
+;; - atom vec (broadcast)
+(- 100 [1 2 3]) -- [99 98 97]
+
+;; - float atom
+(- 1.5 0.5) -- 1.0
+
+;; - I16 atom
+(- 5h 3h) -- 2
+(type (- 5h 3h)) -- 'i16
+
+;; - I32 atom
+(- 5i 3i) -- 2
+(type (- 5i 3i)) -- 'i32
+
+;; - cross-width widens
+(type (- 5h 3i)) -- 'i32
+(type (- 5 3.0)) -- 'f64
+
+;; ──────────────────────────────────────────────────────────────────
+;; Null propagation
+;; ──────────────────────────────────────────────────────────────────
+
+(nil? (- 0Ni 5)) -- true
+(nil? (- 5 0Ni)) -- true
+(nil? (- 0Nf 5.0)) -- true
+
+;; ──────────────────────────────────────────────────────────────────
+;; Type errors
+;; ──────────────────────────────────────────────────────────────────
+
+(- null 1)                                      !- type
+(- "a" 1)                                       !- type
+(- 2025.03.04D15:41:47.087221025 2025.12.13)    !- type
+
+;; ──────────────────────────────────────────────────────────────────
+;; Morsel boundaries
+;; ──────────────────────────────────────────────────────────────────
+
+(set V (til 1023))
+(sum (- V V)) -- 0
+(- V 0) -- V
+
+(set V (til 1024))
+(sum (- V V)) -- 0
+(- V 0) -- V
+(last (- V 1)) -- 1022
+
+(set V (til 1025))
+(sum (- V V)) -- 0
+(last (- V 1)) -- 1023
diff --git a/test/rfl/cmp/eq.rfl b/test/rfl/cmp/eq.rfl
index 90d5d8ec..b527d964 100644
--- a/test/rfl/cmp/eq.rfl
+++ b/test/rfl/cmp/eq.rfl
@@ -1,4 +1,4 @@
-;; Invariants for `==` on i64 vectors.
+;; Invariants for `==`.
 
 ;; reflexive:  (== a a) is true for every element
 (set A (- (rand 256 200000) 100000))
@@ -8,3 +8,50 @@
 (set A (- (rand 256 200000) 100000))
 (set B (- (rand 256 200000) 100000))
 (count A) -- (sum (== (== A B) (== B A)))
+
+;; ──────────────────────────────────────────────────────────────────
+;; Concrete — atom + vec across all comparable types
+;; ──────────────────────────────────────────────────────────────────
+
+;; reflexive concrete
+(== 5 5)         -- true
+(== -5 -5)       -- true
+(== 0 0)         -- true
+(== 3.14 3.14)   -- true
+(== "hello" "hello") -- true
+(== "" "")       -- true
+(== true true)   -- true
+(== false false) -- true
+
+;; non-equal
+(== 3 4) -- false
+(== "a" "b") -- false
+
+;; symmetric
+(== 3 5) -- (== 5 3)
+
+;; vec element-wise
+(== [1 2 3] [1 2 3]) -- [true true true]
+(== [1 2 3] [1 5 3]) -- [true false true]
+
+;; cross-width numeric promotion
+(== 5h 5)   -- true
+(== 5i 5)   -- true
+(== 5 5.0)  -- true
+
+;; sym
+(== 'foo 'foo) -- true
+(== 'foo 'bar) -- false
+(== ['a 'b] ['a 'b]) -- [true true]
+
+;; date
+(== 2024.01.15 2024.01.15) -- true
+(== 2024.01.15 2024.06.01) -- false
+
+;; ──────────────────────────────────────────────────────────────────
+;; Morsel boundary
+;; ──────────────────────────────────────────────────────────────────
+
+(set V (til 1024))
+(sum (== V V)) -- 1024
+(sum (== V 0)) -- 1
diff --git a/test/rfl/cmp/ge.rfl b/test/rfl/cmp/ge.rfl
index 1e686ffb..4d0ddfed 100644
--- a/test/rfl/cmp/ge.rfl
+++ b/test/rfl/cmp/ge.rfl
@@ -8,6 +8,27 @@
 (set B (- (rand 256 200000) 100000))
 (count A) -- (sum (== (>= A B) (<= B A)))
 
-(>= 2 1) -- true
-(>= 1 2) -- false
-(>= 3 3) -- true
+;; >= equivalent to (or > ==)
+(count A) -- (sum (== (>= A B) (or (> A B) (== A B))))
+
+;; concrete
+(>= 2 1)  -- true
+(>= 1 2)  -- false
+(>= 3 3)  -- true
+(>= -1 -5) -- true
+
+;; cross-types
+(>= 5h 5h) -- true
+(>= 7i 5i) -- true
+(>= 2.5 2.5) -- true
+
+;; STR
+(>= "x" "x") -- true
+(>= "b" "a") -- true
+
+;; DATE
+(>= 2024.01.15 2024.01.15) -- true
+(>= 2024.06.01 2024.01.15) -- true
+
+;; vec
+(>= [1 5 5] [2 2 5]) -- [false true true]
diff --git a/test/rfl/cmp/gt.rfl b/test/rfl/cmp/gt.rfl
index b8124773..16ae306d 100644
--- a/test/rfl/cmp/gt.rfl
+++ b/test/rfl/cmp/gt.rfl
@@ -8,6 +8,37 @@
 (set B (- (rand 256 200000) 100000))
 (count A) -- (sum (== (> A B) (< B A)))
 
-(> 2 1) -- true
-(> 1 2) -- false
-(> 3 3) -- false
+;; concrete
+(> 2 1)  -- true
+(> 1 2)  -- false
+(> 3 3)  -- false
+(> -1 -5) -- true
+
+;; ──────────────────────────────────────────────────────────────────
+;; Across types
+;; ──────────────────────────────────────────────────────────────────
+
+(> 7h 5h) -- true
+(> 7i 5i) -- true
+(> 2.5 1.5) -- true
+
+;; STR — lex order
+(> "b" "a")  -- true
+(> "aa" "a") -- true
+(> "x" "x")  -- false
+
+;; DATE
+(> 2024.06.01 2024.01.15) -- true
+
+;; vec
+(> [1 5 3] [2 2 2]) -- [false true true]
+(> [1 5 10] 5)      -- [false false true]
+
+;; ──────────────────────────────────────────────────────────────────
+;; Probe: > on SYM atoms raises type — known bug (see _probes/cmp_sym_ordering)
+;; ──────────────────────────────────────────────────────────────────
+
+(> 'b 'a)   !- type
+(< 'a 'b)   !- type
+(>= 'a 'a)  !- type
+(<= 'a 'a)  !- type
diff --git a/test/rfl/cmp/le.rfl b/test/rfl/cmp/le.rfl
index 1bd3079b..5e098305 100644
--- a/test/rfl/cmp/le.rfl
+++ b/test/rfl/cmp/le.rfl
@@ -8,7 +8,20 @@
 (set B (- (rand 256 200000) 100000))
 (count A) -- (sum (== (<= A B) (or (< A B) (== A B))))
 
-;; antisymmetric when equal: (<= a b) and (<= b a) implies (== a b)
+;; concrete
 (<= 1 2) -- true
 (<= 2 1) -- false
 (<= 3 3) -- true
+
+;; cross-types
+(<= 5h 5h)   -- true
+(<= 3i 5i)   -- true
+(<= 2.5 2.5) -- true
+
+;; STR / DATE
+(<= "a" "b") -- true
+(<= "x" "x") -- true
+(<= 2024.01.15 2024.06.01) -- true
+
+;; vec
+(<= [1 5 5] [2 2 5]) -- [true false true]
diff --git a/test/rfl/cmp/lt.rfl b/test/rfl/cmp/lt.rfl
index b5cf2e85..9be58bb3 100644
--- a/test/rfl/cmp/lt.rfl
+++ b/test/rfl/cmp/lt.rfl
@@ -11,6 +11,23 @@
 ;; trichotomy: exactly one of <, >, == holds
 (count A) -- (sum (+ (+ (as 'I64 (< A B)) (as 'I64 (> A B))) (as 'I64 (== A B))))
 
+;; concrete
 (< 1 2) -- true
 (< 2 1) -- false
 (< 3 3) -- false
+(< -5 -1) -- true
+
+;; cross-types
+(< 3h 5h) -- true
+(< 3i 5i) -- true
+(< 1.5 2.5) -- true
+
+;; STR
+(< "a" "b") -- true
+
+;; DATE
+(< 2024.01.15 2024.06.01) -- true
+
+;; vec
+(< [1 5 3] [2 2 2]) -- [true false false]
+(< [1 5 10] 5)      -- [true false false]
diff --git a/test/rfl/cmp/ne.rfl b/test/rfl/cmp/ne.rfl
index e7fc3671..adbf6d79 100644
--- a/test/rfl/cmp/ne.rfl
+++ b/test/rfl/cmp/ne.rfl
@@ -11,5 +11,24 @@
 ;; symmetric
 (count A) -- (sum (== (!= A B) (!= B A)))
 
-(!= 1 2) -- true
-(!= 3 3) -- false
+;; concrete
+(!= 1 2)  -- true
+(!= 3 3)  -- false
+(!= -1 1) -- true
+
+;; cross-type
+(!= "a" "a") -- false
+(!= "a" "b") -- true
+(!= 'a 'a) -- false
+(!= 'a 'b) -- true
+
+;; cross-width
+(!= 5h 5)  -- false
+(!= 5h 6)  -- true
+(!= 5 5.0) -- false
+
+;; vec
+(!= [1 2 3] [1 5 3]) -- [false true false]
+
+;; complementary to == per-element
+(!= [1 2 3] [1 2 3]) -- [false false false]
diff --git a/test/rfl/collection/rand.rfl b/test/rfl/collection/rand.rfl
index b296cd3c..c940475f 100644
--- a/test/rfl/collection/rand.rfl
+++ b/test/rfl/collection/rand.rfl
@@ -7,3 +7,10 @@
 ;; int random
 ;; ========== VERIFY RANGE ==========
 (and (>= (min (rand 100 10)) 0) (< (max (rand 100 10)) 10)) -- true
+
+;; ========== ZERO COUNT IS EMPTY (NOT AN ERROR) ==========
+(rand 0 10) -- []
+
+;; ========== DOMAIN ERRORS ==========
+;; non-positive upper bound is rejected as a domain error
+(rand 5 0) !- domain
diff --git a/test/rfl/collection/til.rfl b/test/rfl/collection/til.rfl
index 3a1c5739..4a866014 100644
--- a/test/rfl/collection/til.rfl
+++ b/test/rfl/collection/til.rfl
@@ -15,3 +15,8 @@ N -- (sum (== 1 (- (at V (+ 1 (til N))) (at V (til N)))))
 ;; first/last
 (first (til 50)) -- 0
 (last (til 50)) -- 49
+
+;; ========== DOMAIN ERRORS ==========
+;; til of a negative count is rejected as a domain error
+(til -1)   !- domain
+(til -100) !- domain
diff --git a/test/rfl/datalog/rule.rfl b/test/rfl/datalog/rule.rfl
new file mode 100644
index 00000000..332d0690
--- /dev/null
+++ b/test/rfl/datalog/rule.rfl
@@ -0,0 +1,145 @@
+;; Datalog: EAV facts asserted into a `datoms` store; queries answered via
+;; pattern matching with optional rule expansion.  Counts here were
+;; cross-checked against a Python reference evaluator.
+
+;; ────────────── simple EAV query ──────────────
+;; 3 facts, 3 (?e ?n) bindings.
+(set db (datoms))
+(set db (assert-fact db 1 'age 30))
+(set db (assert-fact db 2 'age 25))
+(set db (assert-fact db 3 'age 40))
+(count (query db (find ?e ?n) (where (?e :age ?n)))) -- 3
+
+;; constant in object slot filters down to no rows when no fact matches
+(count (query db (find ?e) (where (?e :age 999)))) -- 0
+
+;; ────────────── two-clause join on entity ──────────────
+(set db (datoms))
+(set db (assert-fact db 1 'name 100))
+(set db (assert-fact db 1 'dept 10))
+(set db (assert-fact db 2 'name 200))
+(set db (assert-fact db 2 'dept 20))
+(set db (assert-fact db 3 'name 300))
+(set db (assert-fact db 3 'dept 10))
+(count (query db (find ?e ?n ?d) (where (?e :name ?n) (?e :dept ?d)))) -- 3
+
+;; constant in object slot for one of the two clauses (dept = 10) keeps
+;; only the two entities with that dept
+(count (query db (find ?e ?n) (where (?e :name ?n) (?e :dept 10)))) -- 2
+
+;; ────────────── wildcard underscore ──────────────
+;; `_` matches any value but does not bind.  Only entities with a :dept
+;; fact are returned.
+(set db (datoms))
+(set db (assert-fact db 1 'name 100))
+(set db (assert-fact db 1 'dept 10))
+(set db (assert-fact db 2 'name 200))
+(set db (assert-fact db 3 'dept 30))
+(count (query db (find ?e) (where (?e :dept _)))) -- 2
+
+;; ────────────── triple-clause join with constant filter ──────────────
+;; Find all (?c ?s) for entities with :age 30.  Two such entities exist.
+(set db (datoms))
+(set db (assert-fact db 1 'age 30))
+(set db (assert-fact db 1 'city 100))
+(set db (assert-fact db 1 'salary 80000))
+(set db (assert-fact db 2 'age 25))
+(set db (assert-fact db 2 'city 200))
+(set db (assert-fact db 2 'salary 60000))
+(set db (assert-fact db 3 'age 30))
+(set db (assert-fact db 3 'city 100))
+(set db (assert-fact db 3 'salary 90000))
+(count (query db (find ?e ?c ?s) (where (?e :age 30) (?e :city ?c) (?e :salary ?s)))) -- 2
+
+;; ────────────── rules: derived relations ──────────────
+;; (employee ?e ?n ?d) projects (entity, name, dept) when both facts hold
+(set db (datoms))
+(set db (assert-fact db 1 'name 100))
+(set db (assert-fact db 1 'dept 10))
+(set db (assert-fact db 2 'name 200))
+(set db (assert-fact db 2 'dept 20))
+(set db (assert-fact db 3 'name 300))
+(set db (assert-fact db 3 'dept 10))
+(rule (employee ?e ?n ?d) (?e :name ?n) (?e :dept ?d))
+(count (query db (find ?n ?d) (where (employee ?e ?n ?d)))) -- 3
+
+;; rule with constant body — (eng ?e) iff (?e :dept 10).  Two engineers.
+(set db (datoms))
+(set db (assert-fact db 1 'name 100))
+(set db (assert-fact db 1 'dept 10))
+(set db (assert-fact db 1 'salary 80000))
+(set db (assert-fact db 2 'name 200))
+(set db (assert-fact db 2 'dept 10))
+(set db (assert-fact db 2 'salary 60000))
+(set db (assert-fact db 3 'name 300))
+(set db (assert-fact db 3 'dept 20))
+(set db (assert-fact db 3 'salary 90000))
+(rule (eng ?e) (?e :dept 10))
+(count (query db (find ?e) (where (eng ?e)))) -- 2
+
+;; ────────────── transitive closure ──────────────
+;; Linear chain 1→2→3→4→5: TC has 4+3+2+1 = 10 reachable pairs.
+(set db (datoms))
+(set db (assert-fact db 1 'edge 2))
+(set db (assert-fact db 2 'edge 3))
+(set db (assert-fact db 3 'edge 4))
+(set db (assert-fact db 4 'edge 5))
+(rule (reachable ?x ?y) (?x :edge ?y))
+(rule (reachable ?x ?z) (?x :edge ?y) (reachable ?y ?z))
+(count (query db (find ?x ?y) (where (reachable ?x ?y)))) -- 10
+
+;; DAG 1→2, 1→3, 2→4, 3→4, 4→5, 5→6 — 14 reachable pairs.
+(set db (datoms))
+(set db (assert-fact db 1 'edge 2))
+(set db (assert-fact db 1 'edge 3))
+(set db (assert-fact db 2 'edge 4))
+(set db (assert-fact db 3 'edge 4))
+(set db (assert-fact db 4 'edge 5))
+(set db (assert-fact db 5 'edge 6))
+(count (query db (find ?x ?y) (where (reachable ?x ?y)))) -- 14
+
+;; Cycle 1↔2, 2→3 — every node reaches every other in {1,2,3} except
+;; that 3 has no outgoing edge.  Reachable pairs: 1→{1,2,3}, 2→{1,2,3} = 6.
+(set db (datoms))
+(set db (assert-fact db 1 'edge 2))
+(set db (assert-fact db 2 'edge 1))
+(set db (assert-fact db 2 'edge 3))
+(count (query db (find ?x ?y) (where (reachable ?x ?y)))) -- 6
+
+;; Self-loop on 1: only one reachable pair (1,1).
+(set db (datoms))
+(set db (assert-fact db 1 'edge 1))
+(count (query db (find ?x ?y) (where (reachable ?x ?y)))) -- 1
+
+;; ────────────── stratified negation ──────────────
+;; (not ...) excludes entities whose negated atom is satisfiable.
+
+;; non-managers in dept 10 = entities 2 and 4 (1 and 3 have :manager fact;
+;; 1 in dept 10 has manager so excluded, 2 and 4 have no manager → kept,
+;; 3 in dept 20 doesn't match :dept 10).
+(set db (datoms))
+(set db (assert-fact db 1 'dept 10))
+(set db (assert-fact db 1 'manager 1))
+(set db (assert-fact db 2 'dept 10))
+(set db (assert-fact db 3 'dept 20))
+(set db (assert-fact db 3 'manager 1))
+(set db (assert-fact db 4 'dept 10))
+(count (query db (find ?e) (where (?e :dept 10) (not (?e :manager ?m))))) -- 2
+
+;; non-managers among named entities: 2 and 4 have no manager → 2.
+(set db (datoms))
+(set db (assert-fact db 1 'name 100))
+(set db (assert-fact db 1 'manager 1))
+(set db (assert-fact db 2 'name 200))
+(set db (assert-fact db 3 'name 300))
+(set db (assert-fact db 3 'manager 1))
+(set db (assert-fact db 4 'name 400))
+(count (query db (find ?e) (where (?e :name ?n) (not (?e :manager ?m))))) -- 2
+
+;; everyone is a manager → empty result under (not ...)
+(set db (datoms))
+(set db (assert-fact db 1 'name 100))
+(set db (assert-fact db 1 'manager 1))
+(set db (assert-fact db 2 'name 200))
+(set db (assert-fact db 2 'manager 1))
+(count (query db (find ?e) (where (?e :name ?n) (not (?e :manager ?m))))) -- 0
diff --git a/test/rfl/embedding/hnsw.rfl b/test/rfl/embedding/hnsw.rfl
new file mode 100644
index 00000000..067526a1
--- /dev/null
+++ b/test/rfl/embedding/hnsw.rfl
@@ -0,0 +1,25 @@
+;; Invariants for `hnsw-build` / `ann` / `hnsw-info` / `hnsw-free`.
+;; HNSW approximate nearest-neighbour index over a list of float vectors.
+;; The index is opaque; queries return a table with `_rowid` and `_dist`.
+
+;; Five vectors, exact-match query at row 0.
+(set V (list [1.0 0.0 0.0] [0.9 0.1 0.0] [0.0 1.0 0.0] [0.0 0.0 1.0] [0.5 0.5 0.0]))
+(set Idx (hnsw-build V 'l2 8 100))
+
+;; ────────────── hnsw-info exposes build parameters ──────────────
+(at (hnsw-info Idx) 'nrows)  -- 5
+(at (hnsw-info Idx) 'dim)    -- 3
+(at (hnsw-info Idx) 'metric) -- 'l2
+(at (hnsw-info Idx) 'M)      -- 8
+(at (hnsw-info Idx) 'efc)    -- 100
+
+;; ────────────── exact-match top-1 ──────────────
+;; Querying with the same vector as row 0 must return row 0 with dist 0.
+(first (at (ann Idx [1.0 0.0 0.0] 1) '_rowid)) -- 0
+(first (at (ann Idx [1.0 0.0 0.0] 1) '_dist))  -- 0.0
+
+;; ────────────── top-k respects requested count and orders by distance ──────────────
+(count (ann Idx [1.0 0.0 0.0] 3)) -- 3
+(<= (first (at (ann Idx [1.0 0.0 0.0] 3) '_dist)) (last (at (ann Idx [1.0 0.0 0.0] 3) '_dist))) -- true
+
+(hnsw-free Idx)
diff --git a/test/rfl/hof/apply.rfl b/test/rfl/hof/apply.rfl
index b3c678fa..bdb2f8dc 100644
--- a/test/rfl/hof/apply.rfl
+++ b/test/rfl/hof/apply.rfl
@@ -9,6 +9,19 @@
 (apply + [1 2 3] [10 20 30]) -- [11 22 33]
 (apply * [1 2 3] [4 5 6]) -- [4 10 18]
 (apply - [10 20] [1 2]) -- [9 18]
+(apply + [1 2] [3 4]) -- [4 6]
+(apply * [1 2 3] [10 20 30]) -- [10 40 90]
 
 ;; Mismatched lengths take the shorter side.
 (apply + [1 2 3 4] [10 20]) -- [11 22]
+
+;; lambda
+(apply (fn [x y] (+ x (* 2 y))) [1 2] [10 20]) -- [21 42]
+
+;; floats
+(apply + [1.5 2.5] [0.5 0.5]) -- [2.0 3.0]
+
+;; result count matches input count
+(set V (rand 50 100))
+(set W (rand 50 100))
+(count V) -- (count (apply + V W))
diff --git a/test/rfl/hof/fold.rfl b/test/rfl/hof/fold.rfl
index fd732df7..47dd97ca 100644
--- a/test/rfl/hof/fold.rfl
+++ b/test/rfl/hof/fold.rfl
@@ -7,3 +7,19 @@
 ;; fold is sum for +/0
 (set V (rand 50 1000))
 (sum V) -- (fold + 0 V)
+
+;; fold without seed
+(fold + [1 2 3 4 5]) -- 15
+(fold * [1 2 3 4]) -- 24
+
+;; fold with seed accumulates from seed
+(fold + 10 [1 2 3 4 5]) -- 25
+
+;; lambda
+(fold (fn [a b] (+ a b)) [1 2 3]) -- 6
+
+;; empty + seed → seed
+(fold + 42 ()) -- 42
+
+;; floats
+(fold + [1.5 2.5 3.0]) -- 7.0
diff --git a/test/rfl/hof/map.rfl b/test/rfl/hof/map.rfl
index abf16a76..9d5dc9f7 100644
--- a/test/rfl/hof/map.rfl
+++ b/test/rfl/hof/map.rfl
@@ -10,6 +10,13 @@
 ;; identity map is identity
 (count V) -- (sum (== (map (fn [x] x) V) V))
 
+;; builtin verbs as fn arguments
+(map neg [1 2 3]) -- [-1 -2 -3]
+(map abs [-1 2 -3]) -- [1 2 3]
+
+;; map on empty
+(count (map neg ())) -- 0
+
 ;; ---- Regression tests ported from test_lang_rf.inc::test_rf_map ----
 ;; Ported from test_lang_rf.inc::test_rf_map.
 
diff --git a/test/rfl/hof/scan.rfl b/test/rfl/hof/scan.rfl
index b56d4d5d..402d251e 100644
--- a/test/rfl/hof/scan.rfl
+++ b/test/rfl/hof/scan.rfl
@@ -1,6 +1,11 @@
 ;; Invariants for `scan` (prefix fold: (scan f v)).
 
 (scan + [1 2 3 4]) -- [1 3 6 10]
+(scan + [1 2 3 4 5]) -- [1 3 6 10 15]
+(scan * [1 2 3 4]) -- [1 2 6 24]
+
+;; singleton
+(scan + [42]) -- [42]
 
 ;; count preserved
 (set V (rand 50 100))
@@ -8,3 +13,4 @@
 
 ;; last element equals fold
 (last (scan + V)) -- (fold + 0 V)
+(last (scan + [1 2 3 4 5])) -- (sum [1 2 3 4 5])
diff --git a/test/rfl/integration/arena.rfl b/test/rfl/integration/arena.rfl
new file mode 100644
index 00000000..60532074
--- /dev/null
+++ b/test/rfl/integration/arena.rfl
@@ -0,0 +1,25 @@
+;; Heavy-allocation churn — 100 000-element vectors in tight succession.
+;; Tests that the arena/buddy allocator holds up under pressure and that
+;; .sys.gc doesn't corrupt working state.
+
+(set N 100000)
+
+;; closed-form: sum 0..99999 = 99999 * 100000 / 2
+(sum (til N)) -- 4999950000
+
+;; linearity at 100k: sum(2*v) == 2*sum(v)
+(sum (* 2 (til N))) -- (* 2 (sum (til N)))
+
+;; concat-distributive: sum(v ++ v) == 2*sum(v)
+(sum (concat (til N) (til N))) -- (* 2 (sum (til N)))
+
+;; count survives an atomic op at 100k
+(count (+ (til N) 7)) -- 100000
+
+;; ────────────── gc doesn't perturb computation ──────────────
+;; Compute, gc, recompute — must be identical.
+(set S1 (sum (til 100000)))
+(.sys.gc)
+(set S2 (sum (til 100000)))
+S1 -- S2
+S1 -- 4999950000
diff --git a/test/rfl/integration/cow.rfl b/test/rfl/integration/cow.rfl
new file mode 100644
index 00000000..bdb4c728
--- /dev/null
+++ b/test/rfl/integration/cow.rfl
@@ -0,0 +1,20 @@
+;; Copy-on-write semantics: two names bound to the "same" value see
+;; identical content, but a derived operation must produce a new value
+;; without mutating any aliasing name.  If COW were broken,
+;; (reverse y) would also reverse x.
+
+(set X [1 2 3])
+(set Y X)
+(sum (== X Y)) -- 3
+
+;; reverse leaves the source aliases intact
+(set Z (reverse Y))
+X -- [1 2 3]
+
+;; asc on a different vector doesn't touch X
+(set Z (asc [3 1 2]))
+X -- [1 2 3]
+
+;; concat returns a new vec; original unchanged
+(set Z (concat X [4 5]))
+X -- [1 2 3]
diff --git a/test/rfl/integration/joins.rfl b/test/rfl/integration/joins.rfl
index cb58f0d1..40ed9a52 100644
--- a/test/rfl/integration/joins.rfl
+++ b/test/rfl/integration/joins.rfl
@@ -45,7 +45,7 @@
 ;; window-join with multiple aggregations — both columns must be present
 (set trades (table [Sym Time Price] (list [a a] [10:00:01.000 10:00:05.000] [100 200])))(set quotes (table [Sym Time Bid Ask] (list [a a a] [10:00:00.000 10:00:02.000 10:00:04.000] [99 100 101] [110 111 112])))(set intervals (map-left + [-2000 2000] (at trades 'Time)))(set r (window-join [Sym Time] intervals trades quotes {lo: (min Bid) hi: (max Ask)}))(at r 'lo) -- [99 101]
 (set trades (table [Sym Time Price] (list [a a] [10:00:01.000 10:00:05.000] [100 200])))(set quotes (table [Sym Time Bid Ask] (list [a a a] [10:00:00.000 10:00:02.000 10:00:04.000] [99 100 101] [110 111 112])))(set intervals (map-left + [-2000 2000] (at trades 'Time)))(set r (window-join [Sym Time] intervals trades quotes {lo: (min Bid) hi: (max Ask)}))(at r 'hi) -- [111 112]
-;; window-join canonical example (docs/queries-joins.html)
+;; window-join rayforce1 canonical example (docs/queries-joins.html)
 ;;   trades at 12:00:01, 12:00:04, 12:00:06 ± 1s windows
 ;;   quotes at 12:00:00..12:00:09 sizes [928 528 648 914 918 626 577 817 620 698]
 ;;     trade @ 01 window [00,02] -> sizes [928 528 648], min=528, max=928
@@ -73,7 +73,7 @@
 ;;   trade @ 05 window [04, 06] -> matches at 04 (1)
 (set trades (table [Sym Time Price] (list [a a] [10:00:01.000 10:00:05.000] [100 200])))(set quotes (table [Sym Time Tag] (list [a a a] [10:00:00.000 10:00:02.000 10:00:04.000] [foo bar baz])))(set intervals (map-left + [-1000 1000] (at trades 'Time)))(at (window-join [Sym Time] intervals trades quotes {n: (count Tag)}) 'n) -- [2 1]
 ;; window-join COUNT must include window matches whose source value is null
-;; ((count Col) == COUNT(*), not COUNT(non-null Col)).
+;; (kdb+/group.c semantics: (count Col) == COUNT(*), not COUNT(non-null Col)).
 ;;   trade @ 01 window [00, 02]: Bid rows at 00(99) and 02(NULL) -> count=2, min=99
 ;;   trade @ 05 window [04, 06]: Bid row at 04(101)              -> count=1, min=101
 (set trades (table [Sym Time Price] (list [a a] [10:00:01.000 10:00:05.000] [100 200])))(set quotes (table [Sym Time Bid] (list [a a a] [10:00:00.000 10:00:02.000 10:00:04.000] [99 0Nl 101])))(set intervals (map-left + [-1000 1000] (at trades 'Time)))(at (window-join [Sym Time] intervals trades quotes {c: (count Bid)}) 'c) -- [2 1]
@@ -127,3 +127,35 @@
 (inner-join [a] [1 2 3] (table [a] (list [1]))) !- type
 ;; error: asof-join wrong arity
 (asof-join [a b]) !- arity
+
+;; ──────────────────────────────────────────────────────────────────
+;; anti-join — rows of left whose key is NOT present in right
+;; ──────────────────────────────────────────────────────────────────
+(set lt (table [sym qty] (list ['AAPL 'GOOG 'MSFT 'AMZN 'TSLA] [100 50 200 75 90])))
+(set bl (table [sym] (list ['GOOG 'MSFT])))
+(count (anti-join [sym] lt bl)) -- 3
+(sum (at (anti-join [sym] lt bl) 'qty)) -- 265
+
+;; anti-join: empty exclusion → identity (left preserved)
+(set bl0 (table [sym] (list (as 'SYMBOL []))))
+(count (anti-join [sym] lt bl0)) -- 5
+
+;; anti-join: full exclusion → empty
+(set bla (table [sym] (list ['AAPL 'GOOG 'MSFT 'AMZN 'TSLA])))
+(count (anti-join [sym] lt bla)) -- 0
+
+;; ──────────────────────────────────────────────────────────────────
+;; inner-join + left-join: shared 10-row trades + 4-row quotes
+;; ──────────────────────────────────────────────────────────────────
+(set trades-10 (table [sym price size] (list ['AAPL 'GOOG 'MSFT 'AAPL 'GOOG 'MSFT 'AAPL 'GOOG 'MSFT 'AAPL] [150.0 2800.0 310.0 151.5 2795.0 309.0 152.0 2810.0 311.0 150.5] [100 50 200 150 40 250 120 60 180 90])))
+(set quotes-4 (table [sym bid ask] (list ['AAPL 'GOOG 'MSFT 'AMZN] [150.0 2799.5 309.5 3300.0] [150.5 2801.0 310.5 3301.0])))
+
+;; inner-join keeps left rows that have a right match
+(count (inner-join [sym] trades-10 quotes-4)) -- 10
+(sum (at (inner-join [sym] trades-10 quotes-4) 'bid)) -- 9927.0
+
+;; left-join: every left row preserved (4 quotes survive, AMZN with no
+;; trade match yields a null-padded right side under quotes-on-left)
+(count (left-join [sym] quotes-4 trades-10)) -- 11
+(sum (at (left-join [sym] quotes-4 trades-10) 'bid)) -- 13227.0
+(count (select {from: (left-join [sym] quotes-4 trades-10) where: (== sym 'AMZN)})) -- 1
diff --git a/test/rfl/integration/morsel.rfl b/test/rfl/integration/morsel.rfl
new file mode 100644
index 00000000..9d8b988f
--- /dev/null
+++ b/test/rfl/integration/morsel.rfl
@@ -0,0 +1,52 @@
+;; Morsel-boundary stress: 1023 / 1024 / 1025, then 2047 / 2048 / 2049.
+;; Rayforce executes vector ops in morsels of 1024.  The interesting
+;; failure mode is running state across morsel boundaries — sum,
+;; count, scan, fold all need to carry partial results forward.
+
+;; ────────────── single-morsel boundary ──────────────
+(set V1023 (til 1023))
+(set V1024 (til 1024))
+(set V1025 (til 1025))
+
+;; closed-form sums: sum of 0..n-1 == n*(n-1)/2
+(sum V1023) -- 522753
+(sum V1024) -- 523776
+(sum V1025) -- 524800
+
+;; count preservation under atomic ops
+(count (+ V1023 1)) -- 1023
+(count (+ V1024 1)) -- 1024
+(count (+ V1025 1)) -- 1025
+(count (* V1023 2)) -- 1023
+(count (* V1024 2)) -- 1024
+(count (* V1025 2)) -- 1025
+
+;; permutation invariance: sum is symmetric under reverse
+(sum V1024) -- (sum (reverse V1024))
+(sum V1025) -- (sum (reverse V1025))
+
+;; first/last around the boundary
+(first V1024) -- 0
+(last V1024) -- 1023
+(first V1025) -- 0
+(last V1025) -- 1024
+
+;; ────────────── two-morsel boundary ──────────────
+;; 2047 (just under), 2048 (exact two morsels), 2049 (overflow into a
+;; third morsel of one element).  Running state across morsels is what
+;; we exercise here.
+(set V2047 (til 2047))
+(set V2048 (til 2048))
+(set V2049 (til 2049))
+
+(sum V2047) -- 2094081
+(sum V2048) -- 2096128
+(sum V2049) -- 2098176
+
+(count (+ V2047 7)) -- 2047
+(count (+ V2048 7)) -- 2048
+(count (+ V2049 7)) -- 2049
+
+;; linearity across the boundary
+(sum (* 3 V2048)) -- (* 3 (sum V2048))
+(sum (* 3 V2049)) -- (* 3 (sum V2049))
diff --git a/test/rfl/integration/null.rfl b/test/rfl/integration/null.rfl
index add0b175..ed918065 100644
--- a/test/rfl/integration/null.rfl
+++ b/test/rfl/integration/null.rfl
@@ -6,6 +6,12 @@
 (nil? 0) -- false
 (nil? 1) -- false
 (nil? "") -- false
+;; nil? distinguishes typed nulls from zero-valued atoms across types
+(nil? 0Ni)   -- true
+(nil? 0Nf)   -- true
+(nil? 5)     -- false
+(nil? 3.14)  -- false
+(nil? false) -- false
 ;; ========== NULL PROPAGATION ==========
 (+ 1 0Nl) -- 0Nl
 (* 5 0Nl) -- 0Nl
diff --git a/test/rfl/integration/optimizer.rfl b/test/rfl/integration/optimizer.rfl
new file mode 100644
index 00000000..8de3e9f5
--- /dev/null
+++ b/test/rfl/integration/optimizer.rfl
@@ -0,0 +1,54 @@
+;; Integration tests targeting the query optimizer:
+;; const-fold, filter-reorder, predicate pushdown, selection-bitmap edges.
+;; Each test asserts that the optimized output equals what a hand-rolled
+;; non-optimized form produces.
+
+;; ────────────── filter reorder: and-conjoined predicates commute ──────────────
+;; The optimizer is free to reorder `(and p q)` for selectivity.  The
+;; result must be identical to either order's hand-rolled equivalent.
+(set T (table [sym price size] (list ['AAPL 'GOOG 'MSFT 'AAPL 'GOOG] [150.0 2800.0 310.0 151.5 2795.0] [100 50 200 150 40])))
+(set R1 (select {from: T where: (and (== sym 'AAPL) (> price 150))}))
+(set R2 (select {from: T where: (and (> price 150) (== sym 'AAPL))}))
+(count R1) -- (count R2)
+(sum (at R1 'price)) -- (sum (at R2 'price))
+(sum (at R1 'size))  -- (sum (at R2 'size))
+
+;; nested-select chain (filter, then filter again) must equal the single
+;; conjoined form
+(set R3 (select {from: (select {from: T where: (== sym 'AAPL)}) where: (> price 150)}))
+(count R3) -- (count R1)
+
+;; ────────────── predicate pushdown across group-by ──────────────
+;; (select s: agg from: t by: k where: pred) must equal manually
+;; filtering then grouping.
+(set Tg (table [g v] (list [1 1 2 2 2 3] [10 20 30 40 50 60])))
+
+(set Pre (select {s: (sum v) from: Tg by: g where: (> v 20)}))
+(set Manual (select {s: (sum v) from: (select {from: Tg where: (> v 20)}) by: g}))
+
+(count Pre) -- (count Manual)
+(sum (at Pre 's)) -- (sum (at Manual 's))
+;; oracle: filter v>20 keeps {30,40,50,60}; group 2 sums 120, group 3 sums 60.
+(sum (at Pre 's)) -- 180
+
+;; ────────────── selection bitmap: all-pass / none-pass / single-row ──────────────
+(set Ts (table [a b] (list [1 2 3 4 5] [10 20 30 40 50])))
+
+;; all-pass — selection bitmap is all-ones; row count = source row count
+(count (select {from: Ts where: (> a 0)})) -- 5
+(sum (at (select {from: Ts where: (> a 0)}) 'b)) -- 150
+
+;; none-pass — bitmap is all-zeros; row count = 0
+(count (select {from: Ts where: (> a 1000)})) -- 0
+(sum (at (select {from: Ts where: (> a 1000)}) 'b)) -- 0
+
+;; conjoined narrows: 2 < a < 5 keeps rows {3, 4}
+(count (select {from: Ts where: (and (> a 2) (< a 5))})) -- 2
+(sum (at (select {from: Ts where: (and (> a 2) (< a 5))}) 'b)) -- 70
+
+;; nested-select equivalent must match
+(count (select {from: (select {from: Ts where: (> a 2)}) where: (< a 5)})) -- 2
+
+;; single-row (==-keyed lookup)
+(count (select {from: Ts where: (== a 3)})) -- 1
+(sum (at (select {from: Ts where: (== a 3)}) 'b)) -- 30
diff --git a/test/rfl/integration/str_pool.rfl b/test/rfl/integration/str_pool.rfl
new file mode 100644
index 00000000..ac4375d7
--- /dev/null
+++ b/test/rfl/integration/str_pool.rfl
@@ -0,0 +1,44 @@
+;; Rayforce's RAY_STR has a 12-byte SSO boundary: strings ≤ 12 bytes are
+;; stored inline in the 16-byte ray_str_t; longer strings live in a
+;; per-vector pool with a 4-byte prefix for fast comparison rejection.
+;; These tests pin down behaviour around the boundary.
+
+;; ────────────── count is independent of representation ──────────────
+(count "")             -- 0
+(count "x")            -- 1
+(count "abcdefghijk")  -- 11
+(count "abcdefghijkl") -- 12
+(count "abcdefghijklm") -- 13
+(count "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") -- 100
+
+;; ────────────── equality across representations ──────────────
+(== "abc" "abc") -- true
+(!= "abc" "abd") -- true
+(== "abcdefghijklm" "abcdefghijklm") -- true
+;; inline string is never equal to a strictly-longer pool string
+(!= "abcdefghijkl" "abcdefghijklm") -- true
+
+;; ────────────── concat across the boundary ──────────────
+;; 12 + 1 = 13 → result lands in the pool
+(count (concat "abcdefghijkl" "x")) -- 13
+(concat "abcdefghijkl" "x") -- "abcdefghijklx"
+
+;; concat fits within inline (3 + 3 = 6)
+(concat "abc" "def") -- "abcdef"
+(count (concat "abc" "def")) -- 6
+
+;; concat pool + inline / pool + pool retains length
+(concat "abcdefghijklm" "!") -- "abcdefghijklm!"
+(count (concat "abcdefghijklm" "!")) -- 14
+
+;; concat with empty operand
+(concat "" "abc") -- "abc"
+(concat "abc" "") -- "abc"
+(concat "" "") -- ""
+
+;; ────────────── like across the boundary ──────────────
+(like "abcdef" "abc*")              -- true
+(like "abcdefghijklmnop" "abcdef*") -- true
+(like "abcdefghijklmnop" "*ghij*")  -- true
+(like "" "")                        -- true
+(like "" "x")                       -- false
diff --git a/test/rfl/sort/asc.rfl b/test/rfl/sort/asc.rfl
index d0f83028..07ff8a37 100644
--- a/test/rfl/sort/asc.rfl
+++ b/test/rfl/sort/asc.rfl
@@ -22,3 +22,39 @@ N -- (sum (<= (at S (til N)) (at S (+ 1 (til N)))))
 
 ;; idempotent
 (asc (asc V)) -- (asc V)
+
+;; ──────────────────────────────────────────────────────────────────
+;; Float / negatives / mixed
+;; ──────────────────────────────────────────────────────────────────
+
+(asc [3.14 1.41 2.71]) -- [1.41 2.71 3.14]
+(asc [-1 -3 -2]) -- [-3 -2 -1]
+
+;; ──────────────────────────────────────────────────────────────────
+;; Morsel boundaries — 1023 / 1024 / 1025 reversed
+;; ──────────────────────────────────────────────────────────────────
+
+(set V1 (reverse (til 1023)))
+(count (asc V1)) -- 1023
+(sum (asc V1))   -- 522753
+(first (asc V1)) -- 0
+(last (asc V1))  -- 1022
+
+(set V2 (reverse (til 1024)))
+(count (asc V2)) -- 1024
+(sum (asc V2))   -- 523776
+(first (asc V2)) -- 0
+(last (asc V2))  -- 1023
+
+(set V3 (reverse (til 1025)))
+(count (asc V3)) -- 1025
+(sum (asc V3))   -- 524800
+(first (asc V3)) -- 0
+(last (asc V3))  -- 1024
+
+;; ──────────────────────────────────────────────────────────────────
+;; Idempotence and asc + reverse == desc
+;; ──────────────────────────────────────────────────────────────────
+
+(asc (asc V2)) -- (asc V2)
+(reverse (asc V2)) -- (desc V2)
diff --git a/test/rfl/sort/iasc.rfl b/test/rfl/sort/iasc.rfl
index 19a9b5b0..428deffe 100644
--- a/test/rfl/sort/iasc.rfl
+++ b/test/rfl/sort/iasc.rfl
@@ -2,6 +2,8 @@
 
 (iasc [30 10 20]) -- [1 2 0]
 (iasc [5]) -- [0]
+(iasc [1 2 3]) -- [0 1 2]
+(iasc [3 2 1]) -- [2 1 0]
 
 ;; indices sort to ascending values
 (set V (rand 200 10000))
@@ -9,3 +11,12 @@
 
 ;; is a permutation of (til n)
 (asc (iasc V)) -- (til (count V))
+
+;; stable: ties preserve source order
+(iasc [2 1 2 1]) -- [1 3 0 2]
+
+;; floats
+(iasc [3.14 1.41 2.71]) -- [1 2 0]
+
+;; negatives
+(iasc [-1 -3 -2]) -- [1 2 0]
diff --git a/test/rfl/sort/rank.rfl b/test/rfl/sort/rank.rfl
index e9fc8c98..9b3ca91a 100644
--- a/test/rfl/sort/rank.rfl
+++ b/test/rfl/sort/rank.rfl
@@ -6,5 +6,21 @@
 (set V (rand 100 10000))
 (asc (rank V)) -- (til (count V))
 
-;; rank is inverse of iasc: (at (iasc V) (rank V)) == (til n)  -- is this right?
-;; Actually: (at V (iasc V)) is sorted; rank tells us where each V[i] lands.
+;; concrete
+(rank [1 2 3 4 5]) -- [0 1 2 3 4]
+(rank [5 4 3 2 1]) -- [4 3 2 1 0]
+
+;; sum of ranks is n*(n-1)/2
+(set V [30 10 40 20])
+(sum (rank V)) -- 6
+
+;; rank of min is 0, max is n-1
+(at (rank V) 1) -- 0
+(at (rank V) 2) -- 3
+
+;; (at v (iasc v)) == (asc v)
+(set V (rand 100 10000))
+(at V (iasc V)) -- (asc V)
+
+;; floats
+(rank [3.14 1.41 2.71]) -- [2 0 1]
diff --git a/test/rfl/strop/like.rfl b/test/rfl/strop/like.rfl
index 92e7be85..b07d1e43 100644
--- a/test/rfl/strop/like.rfl
+++ b/test/rfl/strop/like.rfl
@@ -6,3 +6,32 @@
 (like "hello" "*o") -- true
 (like "hello" "*") -- true
 (like "hello" "h?llo") -- true
+
+;; ────────────── extended literal / wildcard coverage ──────────────
+(like "hello world" "hello*") -- true
+(like "hello world" "*world") -- true
+(like "foobar" "*oo*") -- true
+(like "cat" "c?t") -- true
+(like "cat" "c??t") -- false
+
+;; like is anchored, not a substring search — "ell" does NOT match "hello"
+(like "hello" "ell") -- false
+
+;; ────────────── empty operands ──────────────
+(like "" "") -- true
+(like "" "x") -- false
+(like "x" "") -- false
+(like "" "*") -- true
+
+;; ────────────── universal-star metamorphic invariants ──────────────
+;; `*` matches anything, including punctuation / digits / mixed bytes.
+(like "abc" "*") -- true
+(like "xyz 123 !@#" "*") -- true
+
+;; idempotence: text matches itself when the pattern has no meta-chars
+(like "plain text" "plain text") -- true
+(like "foo bar" "foo bar") -- true
+
+;; literal prefix/suffix combined with one star
+(like "hello world" "hello *") -- true
+(like "hello world" "* world") -- true
diff --git a/test/rfl/system/csv_roundtrip.rfl b/test/rfl/system/csv_roundtrip.rfl
new file mode 100644
index 00000000..aa99aed9
--- /dev/null
+++ b/test/rfl/system/csv_roundtrip.rfl
@@ -0,0 +1,75 @@
+;; Round-trip invariants for `.csv.write` + `.csv.read` (no schema arg).
+;; The schema-less form auto-infers column types from the first data row;
+;; this test pins down the inference rules: ints → I64, mixed-precision
+;; floats → F64, integer-valued floats collapse to I64, strings → SYM.
+;;
+;; Tests use unique /tmp/ paths and depend on a clean /tmp.
+
+;; ────────────── int round-trip ──────────────
+(set Tint (table [a b] (list [1 2 3 4 5] [10 20 30 40 50])))
+(.csv.write Tint "/tmp/rfl_csv_int.csv")
+(set Rint (.csv.read "/tmp/rfl_csv_int.csv"))
+(count Rint) -- 5
+(sum (at Rint 'a)) -- 15
+(sum (at Rint 'b)) -- 150
+(type (at Rint 'a)) -- 'I64
+(key Rint) -- (key Tint)
+
+;; ────────────── float round-trip with non-integer values ──────────────
+(set Tflt (table [x y] (list [1.5 2.5 3.14] [10.1 20.2 30.3])))
+(.csv.write Tflt "/tmp/rfl_csv_float.csv")
+(set Rflt (.csv.read "/tmp/rfl_csv_float.csv"))
+(count Rflt) -- 3
+(type (at Rflt 'x)) -- 'F64
+(first (at Rflt 'x)) -- 1.5
+(at (at Rflt 'x) 1) -- 2.5
+
+;; integer-valued floats collapse to I64 on read (documented inference)
+(set Tiv (table [x] (list [10.0 20.0 30.0])))
+(.csv.write Tiv "/tmp/rfl_csv_ivfloat.csv")
+(set Riv (.csv.read "/tmp/rfl_csv_ivfloat.csv"))
+(count Riv) -- 3
+(sum (at Riv 'x)) -- 60
+(type (at Riv 'x)) -- 'I64
+
+;; ────────────── string round-trip — string column inferred as SYM ──────────────
+(set Tstr (table [name score] (list ["Alice" "Bob" "Charlie"] [10 20 30])))
+(.csv.write Tstr "/tmp/rfl_csv_str.csv")
+(set Rstr (.csv.read "/tmp/rfl_csv_str.csv"))
+(count Rstr) -- 3
+(type (at Rstr 'name)) -- 'SYM
+(sum (at Rstr 'score)) -- 60
+(first (at Rstr 'name)) -- 'Alice
+
+;; ────────────── multi-column mixed types ──────────────
+(set Tmix (table [id name qty price] (list [1 2 3 4 5] ["AAPL" "GOOG" "MSFT" "AAPL" "GOOG"] [100 50 200 150 40] [150.5 2800.0 310.25 151.5 2795.75])))
+(.csv.write Tmix "/tmp/rfl_csv_mixed.csv")
+(set Rmix (.csv.read "/tmp/rfl_csv_mixed.csv"))
+(count Rmix) -- 5
+(sum (at Rmix 'id)) -- 15
+(sum (at Rmix 'qty)) -- 540
+(type (at Rmix 'price)) -- 'F64
+(sum (at Rmix 'price)) -- 6208.0
+(type (at Rmix 'name)) -- 'SYM
+(key Rmix) -- (key Tmix)
+
+;; ────────────── morsel-boundary sizes ──────────────
+(set T1 (table [a] (list [42])))
+(.csv.write T1 "/tmp/rfl_csv_one.csv")
+(set R1 (.csv.read "/tmp/rfl_csv_one.csv"))
+(count R1) -- 1
+(first (at R1 'a)) -- 42
+
+;; exactly 1024 rows — morsel boundary
+(set T1024 (table [n] (list (til 1024))))
+(.csv.write T1024 "/tmp/rfl_csv_1024.csv")
+(set R1024 (.csv.read "/tmp/rfl_csv_1024.csv"))
+(count R1024) -- 1024
+(sum (at R1024 'n)) -- 523776
+
+;; 1100 rows — crosses morsel boundary mid-stream
+(set T1100 (table [n] (list (til 1100))))
+(.csv.write T1100 "/tmp/rfl_csv_1100.csv")
+(set R1100 (.csv.read "/tmp/rfl_csv_1100.csv"))
+(count R1100) -- 1100
+(sum (at R1100 'n)) -- 604450
diff --git a/test/rfl/system/serde.rfl b/test/rfl/system/serde.rfl
index a05c7f5b..de864774 100644
--- a/test/rfl/system/serde.rfl
+++ b/test/rfl/system/serde.rfl
@@ -14,3 +14,15 @@
 (de (ser 3.14)) -- 3.14
 (de (ser 'hello)) -- 'hello
 (de (ser "world")) -- "world"
+(de (ser true)) -- true
+
+;; ────────────── homogeneous vectors round-trip ──────────────
+(de (ser [1 2 3]))     -- [1 2 3]
+(de (ser [1.5 2.5]))   -- [1.5 2.5]
+(de (ser ["a" "b"]))   -- ["a" "b"]
+(de (ser ['a 'b]))     -- ['a 'b]
+
+;; ────────────── vectors with embedded nulls ──────────────
+;; Both the value bits AND the null mask must survive serialize/deser.
+(count (de (ser [1 0N 3]))) -- 3
+(sum (de (ser [1 0N 3])))   -- 4
diff --git a/test/rfl/system/splayed.rfl b/test/rfl/system/splayed.rfl
new file mode 100644
index 00000000..fe8e725d
--- /dev/null
+++ b/test/rfl/system/splayed.rfl
@@ -0,0 +1,32 @@
+;; Round-trip invariants for `set-splayed` / `get-splayed` — write a
+;; table to a directory as one file per column, then read it back.
+;;
+;; Tests use unique /tmp/ paths because the overwrite path is a known
+;; bug (`error: corrupt`); these only exercise the fresh-write path.
+;; Stale data from prior runs would also flip these to "corrupt", so
+;; CI environments should clean /tmp/rfl_splayed_* between runs.
+
+;; ────────────── small table ──────────────
+(set T-Small (table [a b] (list [1 2 3 4 5] [10.0 20.0 30.0 40.0 50.0])))
+(set-splayed "/tmp/rfl_splayed_small/" T-Small)
+(set R-Small (get-splayed "/tmp/rfl_splayed_small/"))
+
+(count R-Small) -- 5
+(sum (at R-Small 'a)) -- 15
+(sum (at R-Small 'b)) -- 150.0
+
+;; column names survive the round trip (key returns the SYM vec)
+(key R-Small) -- (key T-Small)
+
+;; column values match exactly
+(at R-Small 'a) -- (at T-Small 'a)
+(at R-Small 'b) -- (at T-Small 'b)
+
+;; ────────────── morsel-boundary table (1024 rows) ──────────────
+(set T-1024 (table [n] (list (til 1024))))
+(set-splayed "/tmp/rfl_splayed_1024/" T-1024)
+(set R-1024 (get-splayed "/tmp/rfl_splayed_1024/"))
+
+(count R-1024) -- 1024
+;; sum of til 1024 = 1023*1024/2
+(sum (at R-1024 'n)) -- 523776
diff --git a/test/rfl/table/meta.rfl b/test/rfl/table/meta.rfl
index d3a0d89b..37f26140 100644
--- a/test/rfl/table/meta.rfl
+++ b/test/rfl/table/meta.rfl
@@ -1,8 +1,27 @@
-;; Invariants for `meta` — per-table metadata dict.
+;; Invariants for `meta` — describes any object as a {type, len?} dict.
+;; Atoms get only :type; containers (vectors, tables, dicts) also expose
+;; :len.  For tables, :len is the column count (not the row count).
 
+;; ────────────── tables ──────────────
 (set T (table [a b c] (list [1 2] [1.0 2.0] ['x 'y])))
 (type (meta T)) -- 'DICT
-
-;; meta exposes at least type and len fields
 (at (meta T) 'type) -- 'TABLE
 (at (meta T) 'len) -- 3
+
+;; ────────────── atoms — :type only, no :len ──────────────
+(at (meta 42) 'type)         -- 'i64
+(at (meta 3.14) 'type)       -- 'f64
+(at (meta "x") 'type)        -- 'str
+(at (meta 'foo) 'type)       -- 'sym
+(at (meta true) 'type)       -- 'b8
+(at (meta 2024.01.15) 'type) -- 'date
+
+;; ────────────── vectors — :type + :len ──────────────
+(at (meta [1 2 3]) 'type)    -- 'I64
+(at (meta [1 2 3]) 'len)     -- 3
+(at (meta [1.0 2.0]) 'type)  -- 'F64
+(at (meta ["a" "b"]) 'type)  -- 'STR
+(at (meta ['a 'b]) 'type)    -- 'SYM
+
+;; ────────────── dict ──────────────
+(at (meta (dict ['x 'y 'z] [1 2 3])) 'type) -- 'DICT
diff --git a/test/rfl/table/modify.rfl b/test/rfl/table/modify.rfl
new file mode 100644
index 00000000..5e39c3bf
--- /dev/null
+++ b/test/rfl/table/modify.rfl
@@ -0,0 +1,28 @@
+;; Invariants for `modify` — functional column update on a table.
+;; (modify tbl col-name fn) returns a new table with the named column
+;; transformed by fn; the source table is left untouched (functional).
+
+(set T (table [a b] (list [1 2 3 4 5] [10.0 20.0 30.0 40.0 50.0])))
+
+;; row count preserved
+(set T2 (modify T 'a (fn [x] (* x 10))))
+(count T2) -- 5
+
+;; the named column is transformed
+(sum (at T2 'a)) -- 150
+
+;; non-named columns are passed through unchanged
+(sum (at T2 'b)) -- (sum (at T 'b))
+
+;; source table is untouched
+(sum (at T 'a)) -- 15
+
+;; modifying b with a float-returning fn updates only b
+(set T3 (modify T 'b (fn [x] (+ x 1.0))))
+(sum (at T3 'b)) -- 155.0
+(sum (at T3 'a)) -- 15
+
+;; chained modifies compose
+(set T4 (modify (modify T 'a (fn [x] (neg x))) 'b (fn [x] (* x 2.0))))
+(sum (at T4 'a)) -- -15
+(sum (at T4 'b)) -- 300.0
diff --git a/test/rfl/table/pivot.rfl b/test/rfl/table/pivot.rfl
new file mode 100644
index 00000000..4e3a35c5
--- /dev/null
+++ b/test/rfl/table/pivot.rfl
@@ -0,0 +1,25 @@
+;; Invariants for `pivot` — wide-table reshape across one (or more) row
+;; keys, one pivot key, and one value column with an aggregator.
+
+(set trades (table [sym side qty] (list ['AAPL 'GOOG 'AAPL 'MSFT 'AAPL 'GOOG] ['Buy 'Sell 'Sell 'Buy 'Buy 'Buy] [100 50 75 200 50 30])))
+
+;; ────────────── pivot with sum aggregator ──────────────
+(set Pivot-Sum (pivot trades 'sym 'side 'qty sum))
+
+;; row count is the number of distinct row-keys
+(count Pivot-Sum) -- 3
+
+;; per-side sums match what a hand group-by would produce
+(sum (at Pivot-Sum 'Buy))  -- 380
+(sum (at Pivot-Sum 'Sell)) -- 125
+
+;; total across pivoted columns equals total across the source value col
+(+ (sum (at Pivot-Sum 'Buy)) (sum (at Pivot-Sum 'Sell))) -- (sum (at trades 'qty))
+
+;; ────────────── pivot with count aggregator ──────────────
+(set Pivot-Count (pivot trades 'sym 'side 'qty count))
+
+(count Pivot-Count) -- 3
+
+;; total cells == total source rows
+(+ (sum (at Pivot-Count 'Buy)) (sum (at Pivot-Count 'Sell))) -- (count trades)
diff --git a/test/rfl/table/select.rfl b/test/rfl/table/select.rfl
new file mode 100644
index 00000000..b865d9d2
--- /dev/null
+++ b/test/rfl/table/select.rfl
@@ -0,0 +1,117 @@
+;; Invariants for `select` — table query with where / by / take / asc / desc.
+;;
+;; Tests are organised by clause, then by combinations.  Two fixtures are
+;; used: a 10-row "trades" table for basics + projection + a single-key
+;; group-by, and a 15-row "trades-15" table for systematic clause coverage
+;; with sym/price/size/tms/d.
+
+;; ────────────── 10-row fixture ──────────────
+(set trades (table [sym price size] (list ['AAPL 'GOOG 'MSFT 'AAPL 'GOOG 'MSFT 'AAPL 'GOOG 'MSFT 'AAPL] [150.0 2800.0 310.0 151.5 2795.0 309.0 152.0 2810.0 311.0 150.5] [100 50 200 150 40 250 120 60 180 90])))
+
+;; ── basics
+(count trades) -- 10
+(count (select {from: trades})) -- 10
+(count (select {from: trades where: (== sym 'AAPL)})) -- 4
+(count (select {from: trades where: (> price 200)})) -- 6
+(sum (at trades 'price)) -- 9939.0
+(sum (at trades 'size)) -- 1240
+(max (at trades 'price)) -- 2810.0
+(min (at trades 'size)) -- 40
+(count (distinct (at trades 'sym))) -- 3
+
+;; ── projection: derived columns retain row count + arithmetic semantics
+(sum (at (select {notional: (* price size) from: trades}) 'notional)) -- 685140.0
+(sum (at (select {ps: (+ price size) from: trades}) 'ps)) -- 11179.0
+
+;; ── single-key group-by on the 10-row fixture
+(count (select {s: (sum size) from: trades by: sym})) -- 3
+(sum (at (select {s: (sum size) from: trades by: sym}) 's)) -- 1240
+(sum (at (select {s: (sum size) from: trades by: sym where: (== sym 'AAPL)}) 's)) -- 460
+
+;; ────────────── 15-row fixture (sym, price, size, tms, d) ──────────────
+(set trades-15 (table [sym price size tms d] (list ['AAPL 'GOOG 'MSFT 'AAPL 'GOOG 'MSFT 'AAPL 'GOOG 'MSFT 'AMZN 'TSLA 'AAPL 'GOOG 'MSFT 'TSLA] [150.0 2800.0 310.0 151.5 2795.0 309.0 152.0 2810.0 311.0 3300.0 700.0 150.5 2805.0 312.0 702.5] [100 50 200 150 40 250 120 60 180 75 90 110 55 190 95] [09:30:15.000 09:30:15.100 09:30:15.200 09:30:15.300 09:30:15.400 09:30:15.500 09:30:15.600 09:30:15.700 09:30:15.800 09:30:15.900 09:30:16.000 09:30:16.100 09:30:16.200 09:30:16.300 09:30:16.400] [2024.01.15 2024.01.15 2024.01.15 2024.01.15 2024.01.15 2024.01.16 2024.01.16 2024.01.16 2024.01.16 2024.01.16 2024.01.17 2024.01.17 2024.01.17 2024.01.17 2024.01.17])))
+
+;; ── where: simple comparisons against scalars
+(count (select {from: trades-15 where: (== sym 'AAPL)})) -- 4
+(count (select {from: trades-15 where: (!= sym 'MSFT)})) -- 11
+(count (select {from: trades-15 where: (> price 200.0)})) -- 11
+(count (select {from: trades-15 where: (< size 100)})) -- 7
+(count (select {from: trades-15 where: (>= size 100)})) -- 8
+(count (select {from: trades-15 where: (<= price 2000.0)})) -- 10
+
+;; ── where: compound and / or / not
+(count (select {from: trades-15 where: (and (> price 500.0) (< size 200))})) -- 7
+(count (select {from: trades-15 where: (or (== sym 'AMZN) (== sym 'TSLA))})) -- 3
+(count (select {from: trades-15 where: (not (== sym 'AAPL))})) -- 11
+(count (select {from: trades-15 where: (and (> size 50) (or (== sym 'AAPL) (== sym 'GOOG)))})) -- 6
+
+;; ── projection: total invariants under arithmetic columns
+(sum (at (select {notional: (* price size) from: trades-15}) 'notional)) -- 1278942.5
+(sum (at (select {ps: (+ price size) from: trades-15}) 'ps)) -- 19523.5
+(count (select {t: sym from: trades-15})) -- 15
+
+;; ── sort: asc / desc preserve totals; first-row equals min/max
+(sum (at (select {from: trades-15 asc: price}) 'price)) -- 17758.5
+(sum (at (select {from: trades-15 desc: price}) 'price)) -- 17758.5
+(first (at (select {from: trades-15 asc: size}) 'size)) -- 40
+(first (at (select {from: trades-15 desc: size}) 'size)) -- 250
+(count (select {from: trades-15 asc: tms})) -- 15
+
+;; ── take: clamping behavior
+(count (select {from: trades-15 take: 5})) -- 5
+(count (select {from: trades-15 take: 0})) -- 0
+(count (select {from: trades-15 take: 15})) -- 15
+
+;; ── edge cases: all-pass / none-pass / single-row / by-distinct-time
+(count (select {from: trades-15 where: (> size -1)})) -- 15
+(count (select {from: trades-15 where: (> size 999999)})) -- 0
+(count (select {from: trades-15 where: (== size 200)})) -- 1
+(count (select {s: (sum size) from: trades-15 by: tms})) -- 15
+(count (select {s: (sum size) from: trades-15 by: d})) -- 3
+
+;; ── group-by clause coverage on the 15-row fixture
+(count (select {s: (sum size) from: trades-15 by: sym})) -- 5
+(sum (at (select {s: (sum size) from: trades-15 by: sym}) 's)) -- 1765
+(sum (at (select {p: (sum price) from: trades-15 by: sym}) 'p)) -- 17758.5
+(sum (at (select {s: (sum size) from: trades-15 by: sym where: (== sym 'AAPL)}) 's)) -- 480
+(sum (at (select {c: (count size) from: trades-15 by: sym}) 'c)) -- 15
+
+;; ── combo: where + by + projection / asc + take / no-match
+(count (select {from: trades-15 where: (> price 500.0) asc: price take: 3})) -- 3
+(count (select {s: (sum size) from: trades-15 by: sym asc: sym})) -- 5
+(count (select {from: trades-15 where: (== sym 'NOPE) by: sym})) -- 0
+(first (at (select {p: (avg price) from: trades-15 by: sym where: (== sym 'GOOG)}) 'p)) -- 2802.5
+
+;; ────────────── multi-key group-by + multi-aggregator fixture ──────────────
+(set trades-mk (table [sym sector side qty price] (list ['AAPL 'AAPL 'AAPL 'GOOG 'GOOG 'MSFT 'MSFT 'MSFT 'AMZN 'AMZN 'WMT 'WMT 'TSLA 'TSLA] ['Tech 'Tech 'Tech 'Tech 'Tech 'Tech 'Tech 'Tech 'Retail 'Retail 'Retail 'Retail 'Auto 'Auto] ['Buy 'Sell 'Buy 'Buy 'Sell 'Buy 'Sell 'Buy 'Buy 'Sell 'Buy 'Sell 'Buy 'Sell] [100 50 75 40 60 200 80 100 75 25 150 90 90 30] [150.0 151.0 149.5 2800.0 2795.0 310.0 311.0 309.0 3300.0 3305.0 180.0 181.0 700.0 705.0])))
+
+;; ── single key
+(count (select {s: (sum qty) from: trades-mk by: sym})) -- 6
+(sum (at (select {s: (sum qty) from: trades-mk by: sym}) 's)) -- 1165
+(sum (at (select {p: (sum price) from: trades-mk by: sym}) 'p)) -- 15346.5
+(sum (at (select {c: (count qty) from: trades-mk by: sym}) 'c)) -- 14
+
+;; ── two-key
+(count (select {s: (sum qty) from: trades-mk by: [sector side]})) -- 6
+(sum (at (select {s: (sum qty) from: trades-mk by: [sector side]}) 's)) -- 1165
+(sum (at (select {s: (sum qty) from: trades-mk by: [sector side] where: (and (== sector 'Tech) (== side 'Buy))}) 's)) -- 515
+
+;; ── three-key
+(count (select {s: (sum qty) from: trades-mk by: [sym sector side]})) -- 12
+(sum (at (select {s: (sum qty) from: trades-mk by: [sym sector side]}) 's)) -- 1165
+
+;; ── int key (non-symbol grouping)
+(count (select {s: (sum price) from: trades-mk by: qty})) -- 11
+(sum (at (select {s: (sum price) from: trades-mk by: qty}) 's)) -- 15346.5
+
+;; ── filter-then-group preserves filtered totals
+(count (select {s: (sum qty) from: trades-mk by: sym where: (== side 'Buy)})) -- 6
+(sum (at (select {s: (sum qty) from: trades-mk by: sym where: (== side 'Buy)}) 's)) -- 830
+
+;; ── multi-aggregator on a single group (AAPL)
+(set aapl (select {s: (sum qty) c: (count qty) mx: (max price) mn: (min price) av: (avg price) from: trades-mk by: sym where: (== sym 'AAPL)}))
+(first (at aapl 's))  -- 225
+(first (at aapl 'c))  -- 3
+(first (at aapl 'mx)) -- 151.0
+(first (at aapl 'mn)) -- 149.5
+(first (at aapl 'av)) -- 150.16666666666666
diff --git a/test/rfl/temporal/date.rfl b/test/rfl/temporal/date.rfl
index b23c0d98..06aaf382 100644
--- a/test/rfl/temporal/date.rfl
+++ b/test/rfl/temporal/date.rfl
@@ -7,3 +7,39 @@
 (yyyy 2024.06.15) -- 2024
 (mm 2024.06.15) -- 6
 (dd 2024.06.15) -- 15
+
+;; ────────────── month-boundary arithmetic ──────────────
+;; date + days / date - days / date - date all stay in DATE space.
+(+ 2024.01.31 1) -- 2024.02.01
+(+ 2024.03.31 1) -- 2024.04.01
+(+ 2024.12.31 1) -- 2025.01.01
+
+;; ────────────── leap-year edges ──────────────
+;; 2024 is leap (div 4); Feb 28 → Feb 29 → Mar 1 with single-day steps.
+(+ 2024.02.28 1) -- 2024.02.29
+(+ 2024.02.29 1) -- 2024.03.01
+;; 2023 is not leap; Feb 28 → Mar 1 directly.
+(+ 2023.02.28 1) -- 2023.03.01
+;; Century rule: 2000 is leap (div 400), 2100 is not (div 100, not 400).
+(+ 2000.02.28 1) -- 2000.02.29
+(+ 2100.02.28 1) -- 2100.03.01
+
+;; ────────────── subtraction ──────────────
+(- 2024.03.01 1) -- 2024.02.29
+(- 2023.03.01 1) -- 2023.02.28
+(- 2025.01.01 1) -- 2024.12.31
+;; before epoch (date 0 = 2000.01.01)
+(- 2000.01.01 1) -- 1999.12.31
+
+;; ────────────── date - date = days (integer) ──────────────
+(- 2024.03.01 2024.02.01) -- 29
+(- 2023.03.01 2023.02.01) -- 28
+(- 2025.01.01 2024.01.01) -- 366
+(- 2024.01.01 2023.01.01) -- 365
+
+;; ────────────── ordering ──────────────
+(< 2024.01.01 2024.01.02) -- true
+(> 2024.12.31 2024.01.01) -- true
+(== 2024.06.15 2024.06.15) -- true
+(<= 2024.01.01 2024.01.01) -- true
+(>= 2024.12.31 2024.06.15) -- true
diff --git a/test/rfl/temporal/time.rfl b/test/rfl/temporal/time.rfl
index dc4cd62a..0c935fec 100644
--- a/test/rfl/temporal/time.rfl
+++ b/test/rfl/temporal/time.rfl
@@ -6,3 +6,16 @@
 (hh 12:30:45.000) -- 12
 (minute 12:30:45.000) -- 30
 (ss 12:30:45.000) -- 45
+
+;; with sub-second precision the integer extractors still drop ms
+(hh 12:30:45.123)     -- 12
+(minute 12:30:45.123) -- 30
+(ss 12:30:45.123)     -- 45
+
+;; midnight / end-of-day boundaries
+(hh 00:00:00.000) -- 0
+(hh 23:59:59.999) -- 23
+
+;; ordering
+(< 09:00:00.000 10:00:00.000) -- true
+(== 12:30:45.000 12:30:45.000) -- true
diff --git a/test/rfl/type/as.rfl b/test/rfl/type/as.rfl
index 7fe2782a..4e8f7b81 100644
--- a/test/rfl/type/as.rfl
+++ b/test/rfl/type/as.rfl
@@ -367,6 +367,45 @@
 (as 'I32 (list 1i 2i 3i)) -- [1i 2i 3i]
 (as 'F64 (list 1.0 2.0 3.0)) -- [1.0 2.0 3.0]
 (as 'B8 (list true false true)) -- [true false true]
+;; ========== STRING → NUMERIC, EDGE VALUES ==========
+;; integer-valued floats from strings
+(as 'f64 "0.0") -- 0.0
+(as 'f64 "100") -- 100.0
+
+;; INT16/INT32 boundary parses — negative-extreme literals can't be written
+;; (parser tokenises positive then negates), so verify via i64 round-trip.
+(as 'i64 (as 'i16 "-32768")) -- -32768
+(as 'i64 (as 'i16 "32767")) -- 32767
+(as 'i64 (as 'i32 "-2147483648")) -- -2147483648
+(as 'i64 (as 'i32 "2147483647")) -- 2147483647
+
+;; ========== NULL PRESERVATION ACROSS CASTS ==========
+;; Casting any null returns null of target type, never sentinel/INT_MIN.
+(nil? (as 'i64 0Nh)) -- true
+(nil? (as 'i64 0Ni)) -- true
+(nil? (as 'i32 0N))  -- true
+(nil? (as 'i16 0N))  -- true
+(nil? (as 'f64 0N))  -- true
+(nil? (as 'i64 0Nf)) -- true
+(nil? (as 'i32 0Nf)) -- true
+(nil? (as 'i16 0Nf)) -- true
+(nil? (as 'i64 0Nd)) -- true
+
+;; ========== TEMPORAL OFFSET SEMANTICS ==========
+;; DATE is days since 2000.01.01, TIME is ms since midnight.
+(as 'i64 2000.01.01) -- 0
+(as 'i64 2000.01.02) -- 1
+(as 'i64 1999.12.31) -- -1
+(as 'i64 00:00:00.000) -- 0
+(as 'i64 00:00:01.000) -- 1000
+(as 'i64 01:00:00.000) -- 3600000
+(as 'i64 23:59:59.999) -- 86399999
+
+;; integer → DATE/TIME round-trip
+(as 'date 0) -- 2000.01.01
+(as 'date 1) -- 2000.01.02
+(as 'time 1000) -- 00:00:01.000
+
 ;; ========== PARALLEL CAST (large vectors) ==========
 ;; i64 -> i32 (large vector triggers parallel processing)
 (sum (as 'I32 (til 100000))) -- 4999950000

From 153e9e95303beb084b1f72f0b669e3e3d8e27d9e Mon Sep 17 00:00:00 2001
From: Serhii Savchuk <ser.vasilich@hotmail.com>
Date: Mon, 27 Apr 2026 14:46:12 +0300
Subject: [PATCH 02/21] fix(lang): raise arity on wrong-arg-count for
 UNARY/BINARY builtins
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously +/-/* and other RAY_BINARY verbs silently truncated extras:

  (+ 1 2 3)     -> 3        (third arg dropped)
  (+ 1 2 3 4 5) -> 3
  (- 10 1 2)    -> 9
  (* 2 3 4)     -> 6

Same hole in RAY_UNARY: extras after the first argument were released
without raising. `(+ 1)` and `(+)` were already domain errors but the
3+-arg case fell through to the binary kernel against the first two
args and threw the rest away.

Now both dispatch sites — the bytecode VM (eval.c:1656) and the tree-
walking eval (eval.c:2378) — raise:
  arity: expected N arg(s), got M

`and` and `or` were registered as RAY_BINARY but Anton's tests expected
variadic fold semantics ((and a b c) -> fold AND).  Switch them to
RAY_VARY via ray_and_vary_fn / ray_or_vary_fn — left-fold over the
existing binary kernel.  The (and X Y) DAG path through select where:
clauses still emits OP_AND / OP_OR via the expression compiler, so
fused execution is unaffected.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/lang/eval.c        | 14 ++++++--------
 src/lang/eval.h        |  2 ++
 src/ops/cmp.c          | 28 ++++++++++++++++++++++++++++
 test/rfl/arith/add.rfl | 10 ++++++++++
 test/rfl/arith/mul.rfl |  5 +++++
 test/rfl/arith/sub.rfl |  5 +++++
 test/rfl/cmp/and.rfl   | 10 ++++++++++
 test/rfl/cmp/or.rfl    | 10 ++++++++++
 8 files changed, 76 insertions(+), 8 deletions(-)

diff --git a/src/lang/eval.c b/src/lang/eval.c
index aeaae3c6..89f7dc01 100644
--- a/src/lang/eval.c
+++ b/src/lang/eval.c
@@ -1661,18 +1661,16 @@ op_callf: {
         switch (fn_obj->type) {
         case RAY_UNARY:
             if (fn_is_restricted(fn_obj)) { for (int32_t i = 0; i < n; i++) ray_release(fn_args[i]); result = ray_error("access", "restricted"); break; }
-            if (n < 1) { result = ray_error("arity", "expected 1 arg, got 0"); break; }
+            if (n != 1) { for (int32_t i = 0; i < n; i++) ray_release(fn_args[i]); result = ray_error("arity", "expected 1 arg, got %d", n); break; }
             result = ((ray_unary_fn)(uintptr_t)fn_obj->i64)(fn_args[0]);
             ray_release(fn_args[0]);
-            for (int32_t i = 1; i < n; i++) ray_release(fn_args[i]);
             break;
         case RAY_BINARY:
             if (fn_is_restricted(fn_obj)) { for (int32_t i = 0; i < n; i++) ray_release(fn_args[i]); result = ray_error("access", "restricted"); break; }
-            if (n < 2) { for (int32_t i = 0; i < n; i++) ray_release(fn_args[i]); result = ray_error("arity", "expected 2 args, got %d", n); break; }
+            if (n != 2) { for (int32_t i = 0; i < n; i++) ray_release(fn_args[i]); result = ray_error("arity", "expected 2 args, got %d", n); break; }
             result = ((ray_binary_fn)(uintptr_t)fn_obj->i64)(fn_args[0], fn_args[1]);
             ray_release(fn_args[0]);
             ray_release(fn_args[1]);
-            for (int32_t i = 2; i < n; i++) ray_release(fn_args[i]);
             break;
         case RAY_VARY:
             if (fn_is_restricted(fn_obj)) { for (int32_t i = 0; i < n; i++) ray_release(fn_args[i]); result = ray_error("access", "restricted"); break; }
@@ -2021,8 +2019,8 @@ static void ray_register_builtins(void) {
     register_binary_op("<=",  RAY_FN_ATOMIC, ray_lte_fn,    OP_LE);
     register_binary_op("==",  RAY_FN_ATOMIC, ray_eq_fn,  OP_EQ);
     register_binary_op("!=",  RAY_FN_ATOMIC, ray_neq_fn,    OP_NE);
-    register_binary_op("and", RAY_FN_NONE,   ray_and_fn, OP_AND);
-    register_binary_op("or",  RAY_FN_NONE,   ray_or_fn,  OP_OR);
+    register_vary("and", RAY_FN_NONE, ray_and_vary_fn);
+    register_vary("or",  RAY_FN_NONE, ray_or_vary_fn);
     register_unary_op("not",  RAY_FN_NONE,   ray_not_fn, OP_NOT);
     register_unary_op("neg",  RAY_FN_ATOMIC, ray_neg_fn, OP_NEG);
     register_unary("round",   RAY_FN_ATOMIC, ray_round_fn);
@@ -2392,7 +2390,7 @@ ray_t* ray_eval(ray_t* obj) {
 
     switch (head->type) {
         case RAY_UNARY: {
-            if (n < 2) { ray_release(head); ret = ray_error("domain", NULL); goto out; }
+            if (n != 2) { ray_release(head); ret = ray_error("arity", "expected 1 arg, got %d", (int)(n-1)); goto out; }
             if (fn_is_restricted(head)) { ray_release(head); ret = ray_error("access", "restricted"); goto out; }
             ray_unary_fn fn = (ray_unary_fn)(uintptr_t)head->i64;
             uint8_t fn_attrs = head->attrs;
@@ -2412,7 +2410,7 @@ ray_t* ray_eval(ray_t* obj) {
             ret = result; goto out;
         }
         case RAY_BINARY: {
-            if (n < 3) { ray_release(head); ret = ray_error("domain", NULL); goto out; }
+            if (n != 3) { ray_release(head); ret = ray_error("arity", "expected 2 args, got %d", (int)(n-1)); goto out; }
             if (fn_is_restricted(head)) { ray_release(head); ret = ray_error("access", "restricted"); goto out; }
             ray_binary_fn fn = (ray_binary_fn)(uintptr_t)head->i64;
             uint8_t fn_attrs = head->attrs;
diff --git a/src/lang/eval.h b/src/lang/eval.h
index d79a8dab..df86e735 100644
--- a/src/lang/eval.h
+++ b/src/lang/eval.h
@@ -201,6 +201,8 @@ ray_t* ray_neq_fn(ray_t* a, ray_t* b);
 /* Logic */
 ray_t* ray_and_fn(ray_t* a, ray_t* b);
 ray_t* ray_or_fn(ray_t* a, ray_t* b);
+ray_t* ray_and_vary_fn(ray_t** args, int64_t n);
+ray_t* ray_or_vary_fn(ray_t** args, int64_t n);
 ray_t* ray_not_fn(ray_t* x);
 ray_t* ray_neg_fn(ray_t* x);
 
diff --git a/src/ops/cmp.c b/src/ops/cmp.c
index 3af97cea..7c86d417 100644
--- a/src/ops/cmp.c
+++ b/src/ops/cmp.c
@@ -215,6 +215,34 @@ ray_t* ray_or_fn(ray_t* a, ray_t* b) {
     return make_bool((is_truthy(a) || is_truthy(b)) ? 1 : 0);
 }
 
+/* Variadic left-fold over the binary kernels.  (and a b c) folds as
+ * (and (and a b) c) — same shape Lisp/Clojure use. */
+ray_t* ray_and_vary_fn(ray_t** args, int64_t n) {
+    if (n < 2) return ray_error("arity", "expected at least 2 args, got %lld", (long long)n);
+    ray_t* acc = ray_and_fn(args[0], args[1]);
+    if (!acc || RAY_IS_ERR(acc)) return acc;
+    for (int64_t i = 2; i < n; i++) {
+        ray_t* next = ray_and_fn(acc, args[i]);
+        ray_release(acc);
+        if (!next || RAY_IS_ERR(next)) return next;
+        acc = next;
+    }
+    return acc;
+}
+
+ray_t* ray_or_vary_fn(ray_t** args, int64_t n) {
+    if (n < 2) return ray_error("arity", "expected at least 2 args, got %lld", (long long)n);
+    ray_t* acc = ray_or_fn(args[0], args[1]);
+    if (!acc || RAY_IS_ERR(acc)) return acc;
+    for (int64_t i = 2; i < n; i++) {
+        ray_t* next = ray_or_fn(acc, args[i]);
+        ray_release(acc);
+        if (!next || RAY_IS_ERR(next)) return next;
+        acc = next;
+    }
+    return acc;
+}
+
 /* Unary */
 ray_t* ray_not_fn(ray_t* x) {
     /* Element-wise for bool vectors */
diff --git a/test/rfl/arith/add.rfl b/test/rfl/arith/add.rfl
index f2e9ee69..56df4e63 100644
--- a/test/rfl/arith/add.rfl
+++ b/test/rfl/arith/add.rfl
@@ -146,3 +146,13 @@
 
 ;; last element at morsel 1025
 (last (+ V 1)) -- 1025
+
+;; ──────────────────────────────────────────────────────────────────
+;; Arity — binary verb: too few or too many args raises `arity`.
+;; (Pre-fix: extras were silently dropped — `(+ 1 2 3)` returned 3.)
+;; ──────────────────────────────────────────────────────────────────
+
+(+)             !- arity
+(+ 1)           !- arity
+(+ 1 2 3)       !- arity
+(+ 1 2 3 4 5)   !- arity
diff --git a/test/rfl/arith/mul.rfl b/test/rfl/arith/mul.rfl
index c5f278a4..c19cdeaa 100644
--- a/test/rfl/arith/mul.rfl
+++ b/test/rfl/arith/mul.rfl
@@ -75,3 +75,8 @@
 (set V (til 1025))
 (* V 1) -- V
 (sum (* 2 V)) -- (* 2 (sum V))
+
+
+;; ── Arity — binary verb (pre-fix: extras silently dropped) ──
+(* 2 3 4)       !- arity
+(* 2 3 4 5)     !- arity
diff --git a/test/rfl/arith/sub.rfl b/test/rfl/arith/sub.rfl
index 4ea43241..56bae9a4 100644
--- a/test/rfl/arith/sub.rfl
+++ b/test/rfl/arith/sub.rfl
@@ -84,3 +84,8 @@
 (set V (til 1025))
 (sum (- V V)) -- 0
 (last (- V 1)) -- 1023
+
+
+;; ── Arity — binary verb (pre-fix: extras silently dropped) ──
+(- 10 1 2)      !- arity
+(- 10 1 2 3)    !- arity
diff --git a/test/rfl/cmp/and.rfl b/test/rfl/cmp/and.rfl
index 50fd6340..973d2d56 100644
--- a/test/rfl/cmp/and.rfl
+++ b/test/rfl/cmp/and.rfl
@@ -25,3 +25,13 @@
 (and [true false true] [false true false]) -- [false false false]
 (and [true false true] [false true false] [true false true]) -- [false false false]
 (and [true false true] true) -- [true false true]
+
+;; ── variadic fold: (and a b c) == (and (and a b) c) ──
+(and true true true) -- true
+(and true true false) -- false
+(and true true true true true) -- true
+(and true true true true false) -- false
+
+;; ── arity boundaries ──
+(and)        !- arity
+(and true)   !- arity
diff --git a/test/rfl/cmp/or.rfl b/test/rfl/cmp/or.rfl
index a1dc543b..cedfb089 100644
--- a/test/rfl/cmp/or.rfl
+++ b/test/rfl/cmp/or.rfl
@@ -30,3 +30,13 @@
 (or [true false true] [false true false]) -- [true true true]
 (or [true false true] [false true false] [true false true]) -- [true true true]
 (or [true false true] true) -- [true true true]
+
+;; ── variadic fold: (or a b c) == (or (or a b) c) ──
+(or false false false) -- false
+(or false false true)  -- true
+(or false false false false false) -- false
+(or false false false false true)  -- true
+
+;; ── arity boundaries ──
+(or)         !- arity
+(or false)   !- arity

From 072e040414f107f2467b8e39a9573ed6f71f1cab Mon Sep 17 00:00:00 2001
From: Serhii Savchuk <ser.vasilich@hotmail.com>
Date: Mon, 27 Apr 2026 15:09:46 +0300
Subject: [PATCH 03/21] fix(cmp): lexicographic ordering for SYM atoms (>, <,
 >=, <=)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously (> 'b 'a) raised "error: type: cannot compare sym and sym"
even for the reflexive case (>= 'a 'a).  Equality comparisons (==/!=)
worked because they could just check interned IDs, but ordering had no
implementation at all and fell through to the is_numeric() guard.

Add sym_atom_cmp(a, b):

  - Fast path: equal interned IDs => identical text (interning gives
    one ID per text), return 0 without touching the global sym table.
  - Slow path: ray_sym_str(id) returns a RAY_STR atom; ray_str_cmp
    delegates to ray_str_t_cmp which uses the 12-byte SSO inline path
    for short symbols and prefix-then-fullcompare for pooled ones.

Wire it into ray_gt_fn / ray_lt_fn / ray_gte_fn / ray_lte_fn at the
same dispatch site as the existing -RAY_GUID branch.  Vector and
broadcast paths inherit the fix automatically — atomic_map_binary
unboxes SYM elements via collection_elem (-> ray_sym(id) atom), so
each pairwise call lands in the new branch.

The pre-fix probe (test/rfl/cmp/gt.rfl:41-44, "(> 'b 'a) !- type") is
replaced with positive assertions covering atoms, broadcast, vec/vec,
and the SSO inline/pooled boundary.  Mirror coverage added to lt.rfl,
ge.rfl, le.rfl.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/ops/cmp.c       | 22 ++++++++++++++++++++++
 test/rfl/cmp/ge.rfl |  9 +++++++++
 test/rfl/cmp/gt.rfl | 27 ++++++++++++++++++++++-----
 test/rfl/cmp/le.rfl |  9 +++++++++
 test/rfl/cmp/lt.rfl | 12 ++++++++++++
 5 files changed, 74 insertions(+), 5 deletions(-)

diff --git a/src/ops/cmp.c b/src/ops/cmp.c
index 7c86d417..d696e1cb 100644
--- a/src/ops/cmp.c
+++ b/src/ops/cmp.c
@@ -40,9 +40,25 @@ int char_str_cmp(ray_t* a, ray_t* b, int *out) {
     return 0;
 }
 
+/* Lexicographic compare of two SYM atoms.  Fast path: equal interned
+ * ids ⇒ identical text ⇒ 0, no global-table lookup.  Slow path: pull
+ * the backing STR via ray_sym_str and delegate to ray_str_cmp, which
+ * uses the 12-byte SSO inline path for short symbols. */
+int sym_atom_cmp(ray_t* a, ray_t* b) {
+    if (a->i64 == b->i64) return 0;
+    ray_t* sa = ray_sym_str(a->i64);
+    ray_t* sb = ray_sym_str(b->i64);
+    int r = (sa && sb) ? ray_str_cmp(sa, sb) : 0;
+    if (sa) ray_release(sa);
+    if (sb) ray_release(sb);
+    return r;
+}
+
 /* Comparison */
 ray_t* ray_gt_fn(ray_t* a, ray_t* b) {
     { int c; if (char_str_cmp(a, b, &c) == 0) return make_bool(c > 0 ? 1 : 0); }
+    if (a->type == -RAY_SYM && b->type == -RAY_SYM)
+        return make_bool(sym_atom_cmp(a, b) > 0 ? 1 : 0);
     if (a->type == -RAY_GUID && b->type == -RAY_GUID)
         return make_bool(memcmp(ray_data(a->obj), ray_data(b->obj), 16) > 0 ? 1 : 0);
     /* Temporal comparison (same or cross-temporal via nanosecond conversion) */
@@ -63,6 +79,8 @@ ray_t* ray_gt_fn(ray_t* a, ray_t* b) {
 
 ray_t* ray_lt_fn(ray_t* a, ray_t* b) {
     { int c; if (char_str_cmp(a, b, &c) == 0) return make_bool(c < 0 ? 1 : 0); }
+    if (a->type == -RAY_SYM && b->type == -RAY_SYM)
+        return make_bool(sym_atom_cmp(a, b) < 0 ? 1 : 0);
     if (a->type == -RAY_GUID && b->type == -RAY_GUID)
         return make_bool(memcmp(ray_data(a->obj), ray_data(b->obj), 16) < 0 ? 1 : 0);
     if (is_temporal(a) && is_temporal(b)) {
@@ -82,6 +100,8 @@ ray_t* ray_lt_fn(ray_t* a, ray_t* b) {
 
 ray_t* ray_gte_fn(ray_t* a, ray_t* b) {
     { int c; if (char_str_cmp(a, b, &c) == 0) return make_bool(c >= 0 ? 1 : 0); }
+    if (a->type == -RAY_SYM && b->type == -RAY_SYM)
+        return make_bool(sym_atom_cmp(a, b) >= 0 ? 1 : 0);
     if (a->type == -RAY_GUID && b->type == -RAY_GUID)
         return make_bool(memcmp(ray_data(a->obj), ray_data(b->obj), 16) >= 0 ? 1 : 0);
     if (is_temporal(a) && is_temporal(b)) {
@@ -102,6 +122,8 @@ ray_t* ray_gte_fn(ray_t* a, ray_t* b) {
 
 ray_t* ray_lte_fn(ray_t* a, ray_t* b) {
     { int c; if (char_str_cmp(a, b, &c) == 0) return make_bool(c <= 0 ? 1 : 0); }
+    if (a->type == -RAY_SYM && b->type == -RAY_SYM)
+        return make_bool(sym_atom_cmp(a, b) <= 0 ? 1 : 0);
     if (a->type == -RAY_GUID && b->type == -RAY_GUID)
         return make_bool(memcmp(ray_data(a->obj), ray_data(b->obj), 16) <= 0 ? 1 : 0);
     if (is_temporal(a) && is_temporal(b)) {
diff --git a/test/rfl/cmp/ge.rfl b/test/rfl/cmp/ge.rfl
index 4d0ddfed..580a9b7a 100644
--- a/test/rfl/cmp/ge.rfl
+++ b/test/rfl/cmp/ge.rfl
@@ -32,3 +32,12 @@
 
 ;; vec
 (>= [1 5 5] [2 2 5]) -- [false true true]
+
+;; ──────────────────────────────────────────────────────────────────
+;; SYM ordering: reflexive, lexicographic.
+;; ──────────────────────────────────────────────────────────────────
+
+(>= 'a 'a) -- true
+(>= 'b 'a) -- true
+(>= 'a 'b) -- false
+(>= ['a 'b 'c] 'b) -- [false true true]
diff --git a/test/rfl/cmp/gt.rfl b/test/rfl/cmp/gt.rfl
index 16ae306d..92551fc2 100644
--- a/test/rfl/cmp/gt.rfl
+++ b/test/rfl/cmp/gt.rfl
@@ -35,10 +35,27 @@
 (> [1 5 10] 5)      -- [false false true]
 
 ;; ──────────────────────────────────────────────────────────────────
-;; Probe: > on SYM atoms raises type — known bug (see _probes/cmp_sym_ordering)
+;; SYM ordering: lexicographic compare via interned text.
 ;; ──────────────────────────────────────────────────────────────────
 
-(> 'b 'a)   !- type
-(< 'a 'b)   !- type
-(>= 'a 'a)  !- type
-(<= 'a 'a)  !- type
+;; atom — basic ordering
+(> 'b 'a) -- true
+(> 'a 'b) -- false
+(> 'a 'a) -- false
+
+;; longer prefix is greater (shorter is "less" on prefix tie)
+(> 'foobar 'foo) -- true
+(> 'foo 'foobar) -- false
+
+;; broadcast: vec > atom
+(> ['a 'b 'c] 'a) -- [false true true]
+(> 'b ['a 'b 'c]) -- [true false false]
+
+;; vec/vec pairwise
+(> ['b 'b 'b] ['a 'b 'c]) -- [true false false]
+
+;; mixed inline (≤ 12 bytes) and pooled (> 12 bytes) — exercises SSO.
+;; Note: hyphens in symbols would be parsed as ops, so use underscore.
+;; 's' > 'l' so 'short > 'longer_…
+(> 'short 'longer_than_twelve_bytes_symbol) -- true
+(> 'longer_than_twelve_bytes_symbol 'short) -- false
diff --git a/test/rfl/cmp/le.rfl b/test/rfl/cmp/le.rfl
index 5e098305..964ca8f7 100644
--- a/test/rfl/cmp/le.rfl
+++ b/test/rfl/cmp/le.rfl
@@ -25,3 +25,12 @@
 
 ;; vec
 (<= [1 5 5] [2 2 5]) -- [true false true]
+
+;; ──────────────────────────────────────────────────────────────────
+;; SYM ordering: reflexive, lexicographic.
+;; ──────────────────────────────────────────────────────────────────
+
+(<= 'a 'a) -- true
+(<= 'a 'b) -- true
+(<= 'b 'a) -- false
+(<= ['a 'b 'c] 'b) -- [true true false]
diff --git a/test/rfl/cmp/lt.rfl b/test/rfl/cmp/lt.rfl
index 9be58bb3..254124c3 100644
--- a/test/rfl/cmp/lt.rfl
+++ b/test/rfl/cmp/lt.rfl
@@ -31,3 +31,15 @@
 ;; vec
 (< [1 5 3] [2 2 2]) -- [true false false]
 (< [1 5 10] 5)      -- [true false false]
+
+;; ──────────────────────────────────────────────────────────────────
+;; SYM ordering: lexicographic compare via interned text.
+;; ──────────────────────────────────────────────────────────────────
+
+(< 'a 'b) -- true
+(< 'b 'a) -- false
+(< 'a 'a) -- false
+(< 'foo 'foobar) -- true
+(< 'foobar 'foo) -- false
+(< ['a 'b 'c] 'b) -- [true false false]
+(< ['a 'b] ['b 'a]) -- [true false]

From 9ff6e807f8a7cf12c144dcba0525b2eeb50df5e2 Mon Sep 17 00:00:00 2001
From: Serhii Savchuk <ser.vasilich@hotmail.com>
Date: Mon, 27 Apr 2026 15:32:24 +0300
Subject: [PATCH 04/21] fix(arith): neg preserves narrow-int type; pin all
 type-promotion rules
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ray_neg_fn only handled -RAY_I64 and -RAY_F64 atoms; (neg 5h) and
(neg 5i) raised "error: type" even though abs already accepted them.

Add the i16/i32 branches and preserve type — same convention as
binary +, -, *, %, /:

    (neg 5h)       -> -5h     (i16)
    (neg 5i)       -> -5i     (i32)
    (neg [1h 2h])  -> [-1h -2h]   (I16)

(Vector path inherits the fix via RAY_FN_ATOMIC.)  Type preservation
is the right call here: in Rayforce typed nulls live in a separate
nullmap bit, so INT_MIN is just a regular value and there's no
overflow concern that would justify widening like abs does.

Add test/rfl/arith/type_preservation.rfl pinning the full type-
promotion matrix so a regression in any op surfaces loudly:

  - same-width +/-/*//%  preserve type (i16, i32, i64, f64)
  - cross-width +/-/*//%  follow "wider wins" (i16+i32→i32, etc.)
  - all comparisons return bool
  - unary neg/floor/ceil  preserve type
  - unary abs widens narrow ints to i64 (documented inconsistency)
  - math fns (round/sqrt/log/exp)  always return f64

Replaces the stale `(neg 5h) !- type` probes in test/rfl/arith/neg.rfl
with positive assertions including type assertions and null
propagation across i16/i32.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/ops/arith.c                      |   3 +
 test/rfl/arith/neg.rfl               |  22 +++--
 test/rfl/arith/type_preservation.rfl | 115 +++++++++++++++++++++++++++
 3 files changed, 135 insertions(+), 5 deletions(-)
 create mode 100644 test/rfl/arith/type_preservation.rfl

diff --git a/src/ops/arith.c b/src/ops/arith.c
index 63b9352c..029a1a84 100644
--- a/src/ops/arith.c
+++ b/src/ops/arith.c
@@ -332,6 +332,9 @@ ray_t* ray_neg_fn(ray_t* x) {
     if (RAY_ATOM_IS_NULL(x)) { ray_retain(x); return x; }
     if (x->type == -RAY_I64) return make_i64(-x->i64);
     if (x->type == -RAY_F64) return make_f64(-x->f64);
+    /* Narrow ints preserve type — same convention as binary + - mul. */
+    if (x->type == -RAY_I32) return make_i32(-x->i32);
+    if (x->type == -RAY_I16) return make_i16(-x->i16);
     return ray_error("type", NULL);
 }
 
diff --git a/test/rfl/arith/neg.rfl b/test/rfl/arith/neg.rfl
index 532d0048..47e0444e 100644
--- a/test/rfl/arith/neg.rfl
+++ b/test/rfl/arith/neg.rfl
@@ -43,10 +43,22 @@
 (sum (+ V (neg V))) -- 0
 
 ;; ──────────────────────────────────────────────────────────────────
-;; Probe: neg on narrow-int currently raises type
-;; (filed in spec/_probes/neg_narrow_int.rfl)
+;; Narrow ints preserve type on negation (same convention as +/-/*).
 ;; ──────────────────────────────────────────────────────────────────
 
-(neg 5h)      !- type
-(neg 5i)      !- type
-(neg [1h 2h]) !- type
+(neg 5h) -- -5h
+(neg 5i) -- -5i
+(neg -5h) -- 5h
+(neg -5i) -- 5i
+(neg [1h 2h 3h]) -- [-1h -2h -3h]
+(neg [1i 2i 3i]) -- [-1i -2i -3i]
+
+;; type is preserved
+(type (neg 5h))         -- 'i16
+(type (neg 5i))         -- 'i32
+(type (neg [1h 2h]))    -- 'I16
+(type (neg [1i 2i]))    -- 'I32
+
+;; null propagation across narrow types
+(nil? (neg 0Nh)) -- true
+(nil? (neg 0Ni)) -- true
diff --git a/test/rfl/arith/type_preservation.rfl b/test/rfl/arith/type_preservation.rfl
new file mode 100644
index 00000000..6f4591b8
--- /dev/null
+++ b/test/rfl/arith/type_preservation.rfl
@@ -0,0 +1,115 @@
+;; Regression: pin down type-promotion rules for every arithmetic op.
+;; Each assertion locks in the current behavior so an accidental change
+;; (e.g. someone widening `+` to i64 unconditionally) breaks loudly.
+;;
+;; Documented rules:
+;;   1. Same-width binary +/-/*/// preserve type.
+;;        (+ 5h 3h) → i16,  (+ 5i 3i) → i32,  (+ 5 3) → i64,  (+ 5.0 3.0) → f64
+;;   2. Cross-width binary: wider wins.
+;;        (+ i16 i32) → i32,  (+ i16 i64) → i64,  (+ i16 f64) → f64
+;;   3. Comparison ops always return bool.
+;;   4. Unary neg / floor / ceil preserve type.
+;;   5. Unary abs widens narrow ints to i64. (Documented inconsistency;
+;;      see SPEC for justification — flagged for review.)
+;;   6. Math fns (round, sqrt, log, exp) always produce f64.
+
+;; ── (1) Same-width preservation: + - * /
+(type (+ 5h 3h)) -- 'i16
+(type (- 5h 3h)) -- 'i16
+(type (* 5h 3h)) -- 'i16
+(type (/ 6h 3h)) -- 'i16
+(type (+ 5i 3i)) -- 'i32
+(type (- 5i 3i)) -- 'i32
+(type (* 5i 3i)) -- 'i32
+(type (/ 6i 3i)) -- 'i32
+(type (+ 5 3))   -- 'i64
+(type (- 5 3))   -- 'i64
+(type (* 5 3))   -- 'i64
+(type (/ 6 3))   -- 'i64
+(type (+ 5.0 3.0)) -- 'f64
+(type (- 5.0 3.0)) -- 'f64
+(type (* 5.0 3.0)) -- 'f64
+(type (/ 6.0 3.0)) -- 'f64
+
+;; vector form mirrors atomic
+(type (+ [1h 2h] [3h 4h])) -- 'I16
+(type (+ [1i 2i] [3i 4i])) -- 'I32
+(type (+ [1 2] [3 4]))     -- 'I64
+(type (+ [1.0 2.0] [3.0 4.0])) -- 'F64
+
+;; ── (2) Cross-width: wider wins
+(type (+ 5h 3i))   -- 'i32
+(type (+ 5h 3))    -- 'i64
+(type (+ 5i 3))    -- 'i64
+(type (+ 5h 3.0))  -- 'f64
+(type (+ 5i 3.0))  -- 'f64
+(type (+ 5 3.0))   -- 'f64
+
+(type (+ [1h 2h] [3i 4i])) -- 'I32
+(type (+ [1h 2h] [3 4]))   -- 'I64
+(type (+ [1h 2h] [3.0 4.0])) -- 'F64
+(type (+ [1i 2i] [3.0 4.0])) -- 'F64
+
+;; ── (3) Comparison always → bool
+(type (== 5h 3h)) -- 'b8
+(type (!= 5h 3h)) -- 'b8
+(type (> 5h 3h))  -- 'b8
+(type (< 5h 3h))  -- 'b8
+(type (>= 5h 3h)) -- 'b8
+(type (<= 5h 3h)) -- 'b8
+
+(type (== 5h 3))   -- 'b8
+(type (== 5h 3.0)) -- 'b8
+
+(type (== [1h 2h] [3h 4h])) -- 'B8
+
+;; ── (4) Unary preserve type: neg, floor, ceil
+(type (neg 5h))   -- 'i16
+(type (neg 5i))   -- 'i32
+(type (neg 5))    -- 'i64
+(type (neg 5.0))  -- 'f64
+(type (neg [1h 2h])) -- 'I16
+(type (neg [1i 2i])) -- 'I32
+
+(type (floor 5h))  -- 'i16
+(type (floor 5i))  -- 'i32
+(type (floor 5))   -- 'i64
+(type (floor 5.0)) -- 'f64
+
+(type (ceil 5h))  -- 'i16
+(type (ceil 5i))  -- 'i32
+(type (ceil 5))   -- 'i64
+(type (ceil 5.0)) -- 'f64
+
+;; ── (5) abs widens narrow ints to i64 (inconsistent with neg — see SPEC)
+(type (abs 5h))   -- 'i64
+(type (abs 5i))   -- 'i64
+(type (abs 5))    -- 'i64
+(type (abs 5.0))  -- 'f64
+
+;; ── (6) Math fns always → f64
+(type (round 5h)) -- 'f64
+(type (round 5i)) -- 'f64
+(type (round 5))  -- 'f64
+(type (round 5.0)) -- 'f64
+
+(type (sqrt 5h)) -- 'f64
+(type (sqrt 5i)) -- 'f64
+(type (sqrt 5))  -- 'f64
+(type (sqrt 5.0)) -- 'f64
+
+(type (log 5h)) -- 'f64
+(type (log 5i)) -- 'f64
+(type (log 5))  -- 'f64
+(type (log 5.0)) -- 'f64
+
+(type (exp 5h)) -- 'f64
+(type (exp 5i)) -- 'f64
+(type (exp 5))  -- 'f64
+(type (exp 5.0)) -- 'f64
+
+;; ── Mod (%) preserves type same as +/-/*
+(type (% 7h 3h)) -- 'i16
+(type (% 7i 3i)) -- 'i32
+(type (% 7 3))   -- 'i64
+(type (% 7.0 3.0)) -- 'f64

From ea281efc90c917406d3df80b4a13e65aaa33883c Mon Sep 17 00:00:00 2001
From: Serhii Savchuk <ser.vasilich@hotmail.com>
Date: Mon, 27 Apr 2026 16:04:42 +0300
Subject: [PATCH 05/21] fix(arith): abs preserves narrow-int type (was widening
 to i64)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

abs was the lone outlier in the unary-numeric family — neg, floor,
ceil all preserve i16/i32, but abs widened them to i64.  No good
reason for the inconsistency: in Rayforce typed nulls live in a
separate nullmap bit, so INT_MIN is just a regular value and the
"overflow protection" rationale that justifies widening in some
languages doesn't apply here.

Switch the i16/i32 branches to make_i16/make_i32:

    (abs -5h)     -> 5h    (i16, was i64)
    (abs -5i)     -> 5i    (i32, was i64)
    (abs [-1h 2h]) -> [1h 2h] (I16, was I64)

Update test/rfl/arith/abs.rfl and the type_preservation.rfl regression
table: abs now joins neg/floor/ceil under the "preserve narrow ints"
rule.

Coverage of remaining type-promotion oddities (left intentional):
  - sum widens I16/I32 → i64 (overflow guard)
  - round of int → f64 (mathematically noop, but produces float anyway)
Both pinned in type_preservation.rfl so future changes surface.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/ops/arith.c                      |  4 ++--
 test/rfl/arith/abs.rfl               | 10 +++++-----
 test/rfl/arith/type_preservation.rfl | 11 ++++++-----
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/ops/arith.c b/src/ops/arith.c
index 029a1a84..12ae7bfb 100644
--- a/src/ops/arith.c
+++ b/src/ops/arith.c
@@ -367,8 +367,8 @@ ray_t* ray_abs_fn(ray_t* x) {
     if (RAY_ATOM_IS_NULL(x)) { ray_retain(x); return x; }
     if (x->type == -RAY_F64) return make_f64(fabs(x->f64));
     if (x->type == -RAY_I64) return make_i64(x->i64 < 0 ? -x->i64 : x->i64);
-    if (x->type == -RAY_I32) return make_i64(x->i32 < 0 ? -(int64_t)x->i32 : x->i32);
-    if (x->type == -RAY_I16) return make_i64(x->i16 < 0 ? -(int64_t)x->i16 : x->i16);
+    if (x->type == -RAY_I32) return make_i32(x->i32 < 0 ? -x->i32 : x->i32);
+    if (x->type == -RAY_I16) return make_i16(x->i16 < 0 ? -x->i16 : x->i16);
     return ray_error("type", NULL);
 }
 
diff --git a/test/rfl/arith/abs.rfl b/test/rfl/arith/abs.rfl
index d8bb50e9..55b5c738 100644
--- a/test/rfl/arith/abs.rfl
+++ b/test/rfl/arith/abs.rfl
@@ -24,7 +24,7 @@
 (nil? (abs 0Ni)) -- true
 
 ;; ──────────────────────────────────────────────────────────────────
-;; Vec + cross-type — abs widens narrow ints to i64
+;; Vec + type preservation — abs keeps narrow-int type (same as neg)
 ;; ──────────────────────────────────────────────────────────────────
 
 (abs [1 -2 3 -4])     -- [1 2 3 4]
@@ -32,7 +32,7 @@
 (abs -5.0)            -- 5.0
 (type (abs -5))       -- 'i64
 (type (abs -5.0))     -- 'f64
-(type (abs -5h))      -- 'i64
-(type (abs -5i))      -- 'i64
-(type (abs [-1h 2h])) -- 'I64
-(type (abs [-1i 2i])) -- 'I64
+(type (abs -5h))      -- 'i16
+(type (abs -5i))      -- 'i32
+(type (abs [-1h 2h])) -- 'I16
+(type (abs [-1i 2i])) -- 'I32
diff --git a/test/rfl/arith/type_preservation.rfl b/test/rfl/arith/type_preservation.rfl
index 6f4591b8..9ea79ecf 100644
--- a/test/rfl/arith/type_preservation.rfl
+++ b/test/rfl/arith/type_preservation.rfl
@@ -9,8 +9,7 @@
 ;;        (+ i16 i32) → i32,  (+ i16 i64) → i64,  (+ i16 f64) → f64
 ;;   3. Comparison ops always return bool.
 ;;   4. Unary neg / floor / ceil preserve type.
-;;   5. Unary abs widens narrow ints to i64. (Documented inconsistency;
-;;      see SPEC for justification — flagged for review.)
+;;   5. Unary abs preserves type (same as neg/floor/ceil).
 ;;   6. Math fns (round, sqrt, log, exp) always produce f64.
 
 ;; ── (1) Same-width preservation: + - * /
@@ -81,11 +80,13 @@
 (type (ceil 5))   -- 'i64
 (type (ceil 5.0)) -- 'f64
 
-;; ── (5) abs widens narrow ints to i64 (inconsistent with neg — see SPEC)
-(type (abs 5h))   -- 'i64
-(type (abs 5i))   -- 'i64
+;; ── (5) abs preserves type (same convention as neg/floor/ceil)
+(type (abs 5h))   -- 'i16
+(type (abs 5i))   -- 'i32
 (type (abs 5))    -- 'i64
 (type (abs 5.0))  -- 'f64
+(type (abs [1h -2h])) -- 'I16
+(type (abs [1i -2i])) -- 'I32
 
 ;; ── (6) Math fns always → f64
 (type (round 5h)) -- 'f64

From 3df78bb4561e94cf51fbb924422f88433a334010 Mon Sep 17 00:00:00 2001
From: Serhii Savchuk <ser.vasilich@hotmail.com>
Date: Mon, 27 Apr 2026 16:30:13 +0300
Subject: [PATCH 06/21] fix(eval): call_fn1 routes atomic UNARY builtins
 through atomic_map_unary
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

(modify t 'a neg) raised "error: type" while (map neg [1 2 3]) worked.
modify calls call_fn1(fn, column_vec) internally — call_fn1 saw a
RAY_UNARY function and invoked it directly on the whole vector.  But
ray_neg_fn (and every other atomic unary kernel: abs, floor, ceil,
round, sqrt, log, exp, not) is written for a single atom and rejects
positive-typed (vector) input with "type".

call_fn2 already had the parallel routing for binary atomic builtins:

    if ((fn->attrs & RAY_FN_ATOMIC) && (is_collection(a) || ...))
        return atomic_map_binary(f, a, b);

call_fn1 simply forgot the unary mirror.  Add it.

Affects every code path that uses call_fn1 — modify, fold (1-arg form),
the apply 1-arg case — so any future caller now gets vector
auto-mapping for free, matching the user's intuition that "if it
works in map, it should work everywhere".

Tests: extend test/rfl/table/modify.rfl with parity assertions
showing (modify t 'a neg) ≡ (modify t 'a (fn [x] (neg x))), plus
chained modify with abs/neg and floor/ceil over a float column.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/lang/eval.c           |  2 ++
 test/rfl/table/modify.rfl | 19 +++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/src/lang/eval.c b/src/lang/eval.c
index 89f7dc01..ab63ada5 100644
--- a/src/lang/eval.c
+++ b/src/lang/eval.c
@@ -740,6 +740,8 @@ ray_t* call_fn1(ray_t* fn, ray_t* arg) {
     if (fn_is_restricted(fn)) return ray_error("access", "restricted");
     if (fn->type == RAY_UNARY) {
         ray_unary_fn f = (ray_unary_fn)(uintptr_t)fn->i64;
+        if ((fn->attrs & RAY_FN_ATOMIC) && is_collection(arg))
+            return atomic_map_unary(f, arg);
         return f(arg);
     }
     if (fn->type == RAY_LAMBDA) {
diff --git a/test/rfl/table/modify.rfl b/test/rfl/table/modify.rfl
index 5e39c3bf..82199c08 100644
--- a/test/rfl/table/modify.rfl
+++ b/test/rfl/table/modify.rfl
@@ -26,3 +26,22 @@
 (set T4 (modify (modify T 'a (fn [x] (neg x))) 'b (fn [x] (* x 2.0))))
 (sum (at T4 'a)) -- -15
 (sum (at T4 'b)) -- 300.0
+
+;; ──────────────────────────────────────────────────────────────────
+;; Builtin atomic functions are accepted directly (parity with `map`).
+;; modify used to require a (fn …) wrapper; this is the regression.
+;; ──────────────────────────────────────────────────────────────────
+
+(set Tn (table [a] (list [-3 -1 2 4])))
+
+;; same result as the lambda-wrapped form
+(at (modify Tn 'a neg) 'a) -- (at (modify Tn 'a (fn [x] (neg x))) 'a)
+(at (modify Tn 'a abs) 'a) -- [3 1 2 4]
+
+;; chain of builtins: abs then neg
+(sum (at (modify (modify Tn 'a abs) 'a neg) 'a)) -- -10
+
+;; floor / ceil on float column
+(set Tf (table [v] (list [1.7 2.3 -0.5])))
+(at (modify Tf 'v floor) 'v) -- [1.0 2.0 -1.0]
+(at (modify Tf 'v ceil) 'v)  -- [2.0 3.0 0.0]

From ac0e7ca93af1dbb8da15823c04ca9f09185b9f91 Mon Sep 17 00:00:00 2001
From: Serhii Savchuk <ser.vasilich@hotmail.com>
Date: Mon, 27 Apr 2026 17:14:16 +0300
Subject: [PATCH 07/21] docs(repl): pin ray_repl_run_file return-code contract
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Document what file-mode and stdin-mode mean for error handling:

  - ray_repl_run_file (batch / script):  rc=1 stops execution
  - ray_repl_run / stdin pipe (REPL):    error printed, loop continues

The probe spec/_probes/error_exit_code_inconsistent.rfl filed back
when file-mode silently returned 0 on errors; that's been fixed
since (every verb in the probe — asof-join, inner/left-join, filter,
+, /, rand, til, alter — now correctly returns rc=1 from a script
file).  Drop the probe.

The stdin-pipe-doesn't-abort behaviour is the REPL contract, not a
bug; users wanting batch semantics should pass the script as a
positional file arg.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/app/repl.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/app/repl.h b/src/app/repl.h
index 01d0b712..bdb72b99 100644
--- a/src/app/repl.h
+++ b/src/app/repl.h
@@ -40,6 +40,13 @@ typedef struct ray_repl {
 ray_repl_t* ray_repl_create(ray_poll_t* poll);
 void       ray_repl_destroy(ray_repl_t* repl);
 void       ray_repl_run(ray_repl_t* repl);
+
+/* Run a Rayfall script file in batch (script) mode.  Contract:
+ *   - returns 0 on success
+ *   - returns 1 on any eval error (script execution stops at first
+ *     error; subsequent forms are not run)
+ * Distinct from ray_repl_run / stdin pipe which use REPL semantics
+ * (errors are printed but do not terminate the loop). */
 int        ray_repl_run_file(const char* path);
 
 #endif /* RAY_IO_REPL_H */

From ab2074c44a4fc781e76ee94d978808cc6ba5322a Mon Sep 17 00:00:00 2001
From: Serhii Savchuk <ser.vasilich@hotmail.com>
Date: Mon, 27 Apr 2026 18:14:59 +0300
Subject: [PATCH 08/21] fix(like): single iterative matcher, no catastrophic
 backtracking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three implementations existed and they disagreed on syntax:

  - eval path: strop.c::str_glob — recursive, glob */?/[abc],
    EXPONENTIAL on patterns like "a*a*a*…a*b" against "aaa…a"
    (>5s on 16 stars, >timeout on 20).
  - DAG path: string.c::like_match — iterative, but SQL %/_ syntax.
  - DAG path: string.c::ilike_match — iterative SQL %/_ , case-
    insensitive.

So `(like "hello" "h*")` returned true via eval but matched 0 rows
in `select where: (like s "h*")` — `*` was literal under the SQL
matcher.  And the eval path could be DoS'd with a 20-star pattern.

Replace all three with one shared implementation in src/ops/glob.[ch]:

  - Iterative two-pointer with last-star backtrack (glibc fnmatch
    style).  O(n*m) worst case; 32-star pattern that pre-fix took
    >5s now finishes in microseconds.
  - Glob syntax matching the documented contract: * (any), ? (one),
    [abc] / [a-z] / [!abc] (character class).
  - ray_glob_match (case-sensitive) and ray_glob_match_ci (folds
    ASCII letters on both sides).

eval path (strop.c::ray_like_fn) and DAG path (string.c::exec_like /
exec_ilike) both call the same matcher — semantics now identical.

Tests:
  * test/rfl/strop/like.rfl extended with character classes, ranges,
    negated classes, an adversarial 20-star catastrophic-backtracking
    guard, and explicit eval-path-≡-DAG-path parity assertions.
  * test/test_exec.c: stale C-test using "bar%" (SQL syntax) updated
    to "bar*" (glob).

Docs: website/docs/rayfall-functions.html updated to mention [abc] /
[a-z] / [!abc] (already implemented; previously undocumented).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/ops/glob.c                      | 102 ++++++++++++++++++++++++++++
 src/ops/glob.h                      |  28 ++++++++
 src/ops/string.c                    |  64 +++--------------
 src/ops/strop.c                     |  70 ++++++-------------
 test/rfl/strop/like.rfl             |  33 +++++++++
 test/test_exec.c                    |   2 +-
 website/docs/rayfall-functions.html |   2 +-
 7 files changed, 193 insertions(+), 108 deletions(-)
 create mode 100644 src/ops/glob.c
 create mode 100644 src/ops/glob.h

diff --git a/src/ops/glob.c b/src/ops/glob.c
new file mode 100644
index 00000000..dea37d1e
--- /dev/null
+++ b/src/ops/glob.c
@@ -0,0 +1,102 @@
+/*
+ *   Copyright (c) 2025-2026 Anton Kundenko <singaraiona@gmail.com>
+ *   All rights reserved.
+ */
+
+/*
+ * Iterative glob matcher.  Replaces three pre-existing implementations
+ * that diverged in syntax (eval used *,?,[abc]; DAG used SQL %,_) and
+ * one of which (strop.c::str_glob) blew up exponentially on patterns
+ * like "a*a*a*…a*b" against an a-only string.  This single file is
+ * the only matcher; both call sites delegate here.
+ */
+
+#include "ops/glob.h"
+
+/* Lowercase an ASCII byte; non-ASCII passes through unchanged. */
+static inline char to_lower(char c) {
+    return (c >= 'A' && c <= 'Z') ? (char)(c + 32) : c;
+}
+
+/* Match a single character against a class `[ ... ]`.  On entry *pi
+ * points at the byte after `[`.  On return *pi points one past `]`.
+ * Recognises `[abc]`, `[a-z]`, leading `!` for negation, embedded
+ * `]` is allowed as the first char (after optional `!`). */
+static bool match_class(const char* p, size_t pn, size_t* pi, char c, bool ci) {
+    size_t i = *pi;
+    bool neg = false;
+    if (i < pn && p[i] == '!') { neg = true; i++; }
+    bool matched = false;
+    bool first = true;
+    char ch = ci ? to_lower(c) : c;
+    while (i < pn && (first || p[i] != ']')) {
+        char lo = ci ? to_lower(p[i]) : p[i];
+        if (i + 2 < pn && p[i + 1] == '-' && p[i + 2] != ']') {
+            char hi = ci ? to_lower(p[i + 2]) : p[i + 2];
+            if (ch >= lo && ch <= hi) matched = true;
+            i += 3;
+        } else {
+            if (ch == lo) matched = true;
+            i++;
+        }
+        first = false;
+    }
+    if (i < pn && p[i] == ']') i++;  /* consume closing bracket */
+    *pi = i;
+    return neg ? !matched : matched;
+}
+
+static bool glob_impl(const char* s, size_t sn,
+                     const char* p, size_t pn, bool ci) {
+    size_t si = 0, pi = 0;
+    size_t star_pi = (size_t)-1, star_si = 0;
+
+    while (si < sn) {
+        if (pi < pn && p[pi] == '*') {
+            star_pi = pi++;        /* remember star, skip it */
+            star_si = si;
+        } else if (pi < pn && p[pi] == '?') {
+            pi++;
+            si++;
+        } else if (pi < pn && p[pi] == '[') {
+            size_t cls_pi = pi + 1;
+            if (match_class(p, pn, &cls_pi, s[si], ci)) {
+                pi = cls_pi;
+                si++;
+            } else if (star_pi != (size_t)-1) {
+                pi = star_pi + 1;
+                si = ++star_si;
+            } else {
+                return false;
+            }
+        } else if (pi < pn) {
+            char a = ci ? to_lower(s[si]) : s[si];
+            char b = ci ? to_lower(p[pi]) : p[pi];
+            if (a == b) {
+                pi++;
+                si++;
+            } else if (star_pi != (size_t)-1) {
+                pi = star_pi + 1;
+                si = ++star_si;
+            } else {
+                return false;
+            }
+        } else if (star_pi != (size_t)-1) {
+            pi = star_pi + 1;
+            si = ++star_si;
+        } else {
+            return false;
+        }
+    }
+    /* Consumed all of input — pattern must be at end, modulo trailing stars. */
+    while (pi < pn && p[pi] == '*') pi++;
+    return pi == pn;
+}
+
+bool ray_glob_match(const char* s, size_t sn, const char* p, size_t pn) {
+    return glob_impl(s, sn, p, pn, false);
+}
+
+bool ray_glob_match_ci(const char* s, size_t sn, const char* p, size_t pn) {
+    return glob_impl(s, sn, p, pn, true);
+}
diff --git a/src/ops/glob.h b/src/ops/glob.h
new file mode 100644
index 00000000..7fa6bef6
--- /dev/null
+++ b/src/ops/glob.h
@@ -0,0 +1,28 @@
+/*
+ *   Copyright (c) 2025-2026 Anton Kundenko <singaraiona@gmail.com>
+ *   All rights reserved.
+ */
+
+#ifndef RAY_OPS_GLOB_H
+#define RAY_OPS_GLOB_H
+
+#include <stdbool.h>
+#include <stddef.h>
+
+/* Glob pattern match, iterative two-pointer (no catastrophic backtracking).
+ * Worst case O(n*m); typical case linear.
+ *
+ * Supported metacharacters:
+ *   *        — matches zero or more characters
+ *   ?        — matches exactly one character
+ *   [abc]    — character class: matches any of a, b, c
+ *   [a-z]    — range
+ *   [!abc]   — negated class
+ *
+ * `glob_match` is case-sensitive.  `glob_match_ci` lowercases ASCII letters
+ * on both sides before comparing (so it matches 'A' against 'a', 'A-Z'
+ * range matches both case forms, etc.). */
+bool ray_glob_match(const char* s, size_t sn, const char* p, size_t pn);
+bool ray_glob_match_ci(const char* s, size_t sn, const char* p, size_t pn);
+
+#endif /* RAY_OPS_GLOB_H */
diff --git a/src/ops/string.c b/src/ops/string.c
index 8be7ab52..e9430340 100644
--- a/src/ops/string.c
+++ b/src/ops/string.c
@@ -22,35 +22,13 @@
  */
 
 #include "ops/internal.h"
+#include "ops/glob.h"
 
 /* ============================================================================
- * OP_LIKE: SQL LIKE pattern matching on SYM columns
+ * OP_LIKE: glob pattern matching on STR / SYM columns.  See ops/glob.[ch].
+ * Syntax: * (any), ? (one char), [abc] / [a-z] / [!abc] (character class).
  * ============================================================================ */
 
-/* Simple SQL LIKE matcher: % = any (including empty), _ = single char.
- * Pattern is re-interpreted per row; could be optimized with precompilation
- * (e.g., compile once to NFA/DFA) for large datasets. */
-static bool like_match(const char* str, size_t slen, const char* pat, size_t plen) {
-    size_t si = 0, pi = 0;
-    size_t star_p = (size_t)-1, star_s = 0;
-    while (si < slen) {
-        if (pi < plen && (pat[pi] == str[si] || pat[pi] == '_')) {
-            si++; pi++;
-        } else if (pi < plen && pat[pi] == '%') {
-            star_p = pi; star_s = si;
-            pi++;
-        } else if (star_p != (size_t)-1) {
-            pi = star_p + 1;
-            star_s++;
-            si = star_s;
-        } else {
-            return false;
-        }
-    }
-    while (pi < plen && pat[pi] == '%') pi++;
-    return pi == plen;
-}
-
 ray_t* exec_like(ray_graph_t* g, ray_op_t* op) {
     ray_t* input = exec_node(g, op->inputs[0]);
     ray_t* pat_v = exec_node(g, op->inputs[1]);
@@ -77,7 +55,7 @@ ray_t* exec_like(ray_graph_t* g, ray_op_t* op) {
         for (int64_t i = 0; i < len; i++) {
             const char* sp = ray_str_t_ptr(&elems[i], pool);
             size_t sl = elems[i].len;
-            dst[i] = like_match(sp, sl, pat_str, pat_len) ? 1 : 0;
+            dst[i] = ray_glob_match(sp, sl, pat_str, pat_len) ? 1 : 0;
         }
     } else if (RAY_IS_SYM(in_type)) {
         const void* base = ray_data(input);
@@ -87,7 +65,7 @@ ray_t* exec_like(ray_graph_t* g, ray_op_t* op) {
             if (!s) { dst[i] = 0; continue; }
             const char* sp = ray_str_ptr(s);
             size_t sl = ray_str_len(s);
-            dst[i] = like_match(sp, sl, pat_str, pat_len) ? 1 : 0;
+            dst[i] = ray_glob_match(sp, sl, pat_str, pat_len) ? 1 : 0;
         }
     } else {
         memset(dst, 0, (size_t)len);
@@ -97,33 +75,7 @@ ray_t* exec_like(ray_graph_t* g, ray_op_t* op) {
     return result;
 }
 
-/* Case-insensitive LIKE: compare characters via tolower(). */
-static bool ilike_match(const char* str, size_t slen, const char* pat, size_t plen) {
-    size_t si = 0, pi = 0;
-    size_t star_p = (size_t)-1, star_s = 0;
-    while (si < slen) {
-        if (pi < plen && pat[pi] != '%') {
-            unsigned char sc = (unsigned char)str[si];
-            unsigned char pc = (unsigned char)pat[pi];
-            if (pc == '_' || (sc >= 'A' && sc <= 'Z' ? sc + 32 : sc) ==
-                             (pc >= 'A' && pc <= 'Z' ? pc + 32 : pc)) {
-                si++; pi++;
-            } else if (star_p != (size_t)-1) {
-                pi = star_p + 1; star_s++; si = star_s;
-            } else {
-                return false;
-            }
-        } else if (pi < plen && pat[pi] == '%') {
-            star_p = pi; star_s = si; pi++;
-        } else if (star_p != (size_t)-1) {
-            pi = star_p + 1; star_s++; si = star_s;
-        } else {
-            return false;
-        }
-    }
-    while (pi < plen && pat[pi] == '%') pi++;
-    return pi == plen;
-}
+/* Case-insensitive LIKE — same syntax as `like`, ASCII-fold both sides. */
 
 ray_t* exec_ilike(ray_graph_t* g, ray_op_t* op) {
     ray_t* input = exec_node(g, op->inputs[0]);
@@ -150,7 +102,7 @@ ray_t* exec_ilike(ray_graph_t* g, ray_op_t* op) {
         for (int64_t i = 0; i < len; i++) {
             const char* sp = ray_str_t_ptr(&elems[i], pool);
             size_t sl = elems[i].len;
-            dst[i] = ilike_match(sp, sl, pat_str, pat_len) ? 1 : 0;
+            dst[i] = ray_glob_match_ci(sp, sl, pat_str, pat_len) ? 1 : 0;
         }
     } else if (RAY_IS_SYM(in_type)) {
         const void* base = ray_data(input);
@@ -158,7 +110,7 @@ ray_t* exec_ilike(ray_graph_t* g, ray_op_t* op) {
             int64_t sym_id = ray_read_sym(base, i, in_type, input->attrs);
             ray_t* s = ray_sym_str(sym_id);
             if (!s) { dst[i] = 0; continue; }
-            dst[i] = ilike_match(ray_str_ptr(s), ray_str_len(s), pat_str, pat_len) ? 1 : 0;
+            dst[i] = ray_glob_match_ci(ray_str_ptr(s), ray_str_len(s), pat_str, pat_len) ? 1 : 0;
         }
     } else {
         memset(dst, 0, (size_t)len);
diff --git a/src/ops/strop.c b/src/ops/strop.c
index ba367e92..9744398b 100644
--- a/src/ops/strop.c
+++ b/src/ops/strop.c
@@ -23,6 +23,7 @@
 
 #include "lang/internal.h"
 #include "table/sym.h"
+#include "ops/glob.h"
 
 /* ══════════════════════════════════════════
  * String builtins
@@ -191,57 +192,31 @@ ray_t* ray_split_fn(ray_t* str, ray_t* delim) {
     return result;
 }
 
-/* Helper: glob-style pattern matching for LIKE */
-static bool str_glob(const char* s, const char* p) {
-    while (*p) {
-        if (*p == '*') {
-            p++;
-            if (!*p) return true;
-            while (*s) { if (str_glob(s, p)) return true; s++; }
-            return false;
-        }
-        if (*p == '?') { if (!*s) return false; s++; p++; continue; }
-        if (*p == '[') {
-            p++;
-            bool neg = (*p == '!'); if (neg) p++;
-            bool match = false;
-            while (*p && *p != ']') {
-                if (p[1] == '-' && p[2] && p[2] != ']') {
-                    if (*s >= p[0] && *s <= p[2]) match = true;
-                    p += 3;
-                } else {
-                    if (*s == *p) match = true;
-                    p++;
-                }
-            }
-            if (*p == ']') p++;
-            if (neg ? match : !match) return false;
-            s++; continue;
-        }
-        if (*s != *p) return false;
-        s++; p++;
-    }
-    return !*s;
-}
-
-/* (like str pattern) — glob-style pattern matching
- * Supports: * (any chars), ? (single char), [abc] (char class)
- * Returns: bool atom or bool vector */
+/* (like str pattern) — glob-style pattern matching.
+ * Syntax: * (any), ? (one char), [abc] / [a-z] / [!abc] (char class).
+ * Implementation lives in src/ops/glob.[ch]; same matcher is used by
+ * the DAG executor (string.c::exec_like) for select-where contexts. */
 ray_t* ray_like_fn(ray_t* x, ray_t* pattern) {
     /* Pattern must be a string atom */
     if (pattern->type != -RAY_STR) return ray_error("type", "like: pattern must be a string");
     const char* pat = ray_str_ptr(pattern);
+    size_t pat_len = ray_str_len(pattern);
 
     /* Atom: single match */
     if (x->type == -RAY_STR || x->type == -RAY_SYM) {
-        const char* s;
+        const char* s; size_t sl;
+        ray_t* sym_str = NULL;
         if (x->type == -RAY_SYM) {
-            ray_t* sym_str = ray_sym_str(x->i64);
-            s = sym_str ? ray_str_ptr(sym_str) : "";
+            sym_str = ray_sym_str(x->i64);
+            s  = sym_str ? ray_str_ptr(sym_str) : "";
+            sl = sym_str ? ray_str_len(sym_str) : 0;
         } else {
-            s = ray_str_ptr(x);
+            s  = ray_str_ptr(x);
+            sl = ray_str_len(x);
         }
-        return make_bool(str_glob(s, pat) ? 1 : 0);
+        bool m = ray_glob_match(s, sl, pat, pat_len);
+        if (sym_str) ray_release(sym_str);
+        return make_bool(m ? 1 : 0);
     }
 
     /* Vector: map over elements */
@@ -257,21 +232,16 @@ ray_t* ray_like_fn(ray_t* x, ray_t* pattern) {
             for (int64_t i = 0; i < n; i++) {
                 ray_t* sym_str = ray_sym_str(sym_ids[i]);
                 const char* s = sym_str ? ray_str_ptr(sym_str) : "";
-                out[i] = str_glob(s, pat) ? 1 : 0;
+                size_t sl = sym_str ? ray_str_len(sym_str) : 0;
+                out[i] = ray_glob_match(s, sl, pat, pat_len) ? 1 : 0;
+                if (sym_str) ray_release(sym_str);
             }
         } else {
             /* RAY_STR vector */
             for (int64_t i = 0; i < n; i++) {
                 size_t slen;
                 const char* s = ray_str_vec_get(x, i, &slen);
-                /* Need null-terminated for glob — str_vec_get may not be */
-                char buf[256];
-                if (s && slen < sizeof(buf)) {
-                    memcpy(buf, s, slen); buf[slen] = '\0';
-                    out[i] = str_glob(buf, pat) ? 1 : 0;
-                } else {
-                    out[i] = 0;
-                }
+                out[i] = (s && ray_glob_match(s, slen, pat, pat_len)) ? 1 : 0;
             }
         }
         return result;
diff --git a/test/rfl/strop/like.rfl b/test/rfl/strop/like.rfl
index b07d1e43..3e495ff4 100644
--- a/test/rfl/strop/like.rfl
+++ b/test/rfl/strop/like.rfl
@@ -35,3 +35,36 @@
 ;; literal prefix/suffix combined with one star
 (like "hello world" "hello *") -- true
 (like "hello world" "* world") -- true
+
+;; ────────────── character class [abc] / [a-z] / [!abc] ──────────────
+(like "cat" "[cb]at") -- true
+(like "bat" "[cb]at") -- true
+(like "rat" "[cb]at") -- false
+
+;; ranges
+(like "a" "[a-z]") -- true
+(like "Z" "[a-z]") -- false
+(like "5" "[0-9]") -- true
+
+;; negated class
+(like "x" "[!abc]") -- true
+(like "a" "[!abc]") -- false
+
+;; class alongside other meta
+(like "hello" "[hH]ello") -- true
+(like "Hello" "[hH]ello") -- true
+
+;; ────────────── catastrophic-backtracking guard ──────────────
+;; Pre-fix the recursive eval-path matcher took >5s for 16 stars.
+;; The iterative replacement is O(n*m) — finishes in microseconds even
+;; for adversarial 20-star patterns over an a-only string.
+(like "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" "a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*b") -- false
+
+;; ────────────── eval ≡ DAG: same pattern via select-where ──────────────
+;; Pre-fix, eval-path used glob */?/[abc] but the DAG path for
+;; (select where: (like col pat)) used SQL %/_ — divergent semantics.
+;; Now both paths share src/ops/glob.[ch].
+(set Tlike (table [s] (list ["hello" "world" "hi" "help"])))
+(count (select {from: Tlike where: (like s "h*")})) -- 3
+(count (select {from: Tlike where: (like s "h?llo")})) -- 1
+(count (select {from: Tlike where: (like s "[hw]*")})) -- 4
diff --git a/test/test_exec.c b/test/test_exec.c
index 6dd264b0..cfd2a5c5 100644
--- a/test/test_exec.c
+++ b/test/test_exec.c
@@ -2012,7 +2012,7 @@ static test_result_t test_exec_like(void) {
     ray_graph_t* g = ray_graph_new(tbl);
 
     ray_op_t* name_col = ray_scan(g, "name");
-    ray_op_t* pat = ray_const_str(g, "bar%", 4);
+    ray_op_t* pat = ray_const_str(g, "bar*", 4);
     ray_op_t* lk = ray_like(g, name_col, pat);
     ray_op_t* cnt = ray_count(g, ray_filter(g, name_col, lk));
 
diff --git a/website/docs/rayfall-functions.html b/website/docs/rayfall-functions.html
index 4717e1e3..83ecaa90 100644
--- a/website/docs/rayfall-functions.html
+++ b/website/docs/rayfall-functions.html
@@ -394,7 +394,7 @@ <h2 id="string-ops">String Operations</h2>
         <thead><tr><th>Function</th><th>Type</th><th>Description</th><th>Example</th></tr></thead>
         <tbody>
           <tr><td><code>split</code></td><td>binary</td><td>Split string by delimiter</td><td><code>(split "a,b,c" ",")</code> &rarr; <code>["a" "b" "c"]</code></td></tr>
-          <tr><td><code>like</code></td><td>binary</td><td>Pattern match (glob-style with * and ?)</td><td><code>(like "hello" "hel*")</code> &rarr; <code>true</code></td></tr>
+          <tr><td><code>like</code></td><td>binary</td><td>Glob pattern match: <code>*</code> any, <code>?</code> one, <code>[abc]</code>/<code>[a-z]</code>/<code>[!abc]</code> char class</td><td><code>(like "hello" "hel*")</code> &rarr; <code>true</code></td></tr>
           <tr><td><code>concat</code></td><td>binary</td><td>Concatenate two strings or vectors</td><td><code>(concat "hello" " world")</code> &rarr; <code>"hello world"</code></td></tr>
           <tr><td><code>format</code></td><td>variadic</td><td>Format values as string (% is placeholder)</td><td><code>(format "x=%" 42)</code> &rarr; <code>"x=42"</code></td></tr>
         </tbody>

From d20284c1a82f089e2442ee04062eac8813cdb8d7 Mon Sep 17 00:00:00 2001
From: Serhii Savchuk <ser.vasilich@hotmail.com>
Date: Mon, 27 Apr 2026 18:52:34 +0300
Subject: [PATCH 09/21] fix(store): mkdir -p for set-splayed; tolerate missing
 root sym in get-parted
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

(set-splayed "/tmp/db/2024.01.01/t/" t)         -> error: io
(get-parted  "/tmp/db/" 't)                     -> error: io

Two compounding bugs broke partitioned-table workflows:

1. ray_mkdir was single-level — could not create
   "/tmp/db/2024.01.01/t/" when the parent directories didn't pre-
   exist.  v1 quietly handled this inside fs_fopen (walk path,
   mkdir each parent before opening the file); v2 lost that.

   Add ray_mkdir_p (POSIX + Win32, src/store/fileio.c) with
   mkdir -p semantics.  Use it from ray_splay_save instead of the
   single-level ray_mkdir.

2. ray_read_parted (get-parted) unconditionally called
   ray_sym_load("<db_root>/sym") and propagated its failure as
   "io" — but set-splayed only writes per-table sym files inside
   the leaf splayed dir, never a root-level one for symbol-less
   tables.  Stat the file and skip the load if absent.

After both: (set-splayed "/tmp/db/2024.01.01/t/" t0) writes the
partition correctly; (get-parted "/tmp/db/" 't) returns a 10-row /
3-column lazy table (1 MAPCOMMON partition-key + 2 data columns),
parity with the v1 behaviour.

Tests: extend test/rfl/system/splayed.rfl with the nested-mkdir case
and a 2-partition get-parted round-trip (count = 10, columns = 3).

Known limitation (separate task): direct (at p 'col) on a PARTED
column doesn't materialize values yet — only the select-where path
exercises the lazy reader.  Filed for follow-up.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/store/fileio.c          | 42 +++++++++++++++++++++++++++++++++++++
 src/store/fileio.h          |  1 +
 src/store/part.c            | 13 +++++++++---
 src/store/splay.c           |  6 ++++--
 test/rfl/system/splayed.rfl | 21 +++++++++++++++++++
 5 files changed, 78 insertions(+), 5 deletions(-)

diff --git a/src/store/fileio.c b/src/store/fileio.c
index 21d083ad..8adb596f 100644
--- a/src/store/fileio.c
+++ b/src/store/fileio.c
@@ -119,6 +119,29 @@ ray_err_t ray_mkdir(const char* path) {
     return RAY_OK;
 }
 
+ray_err_t ray_mkdir_p(const char* path) {
+    if (!path || !*path) return RAY_ERR_IO;
+    char buf[1024];
+    size_t len = strlen(path);
+    if (len >= sizeof(buf)) return RAY_ERR_IO;
+    memcpy(buf, path, len + 1);
+    /* Normalize trailing separator: trim it so the loop creates `buf` itself. */
+    while (len > 1 && (buf[len - 1] == '/' || buf[len - 1] == '\\')) buf[--len] = '\0';
+    for (size_t i = 1; i < len; i++) {
+        if (buf[i] == '/' || buf[i] == '\\') {
+            char saved = buf[i];
+            buf[i] = '\0';
+            if (!CreateDirectoryA(buf, NULL) && GetLastError() != ERROR_ALREADY_EXISTS) {
+                buf[i] = saved;
+                return RAY_ERR_IO;
+            }
+            buf[i] = saved;
+        }
+    }
+    if (!CreateDirectoryA(buf, NULL) && GetLastError() != ERROR_ALREADY_EXISTS) return RAY_ERR_IO;
+    return RAY_OK;
+}
+
 #else
 
 /* ===== POSIX implementation ===== */
@@ -211,4 +234,23 @@ ray_err_t ray_mkdir(const char* path) {
     return RAY_OK;
 }
 
+ray_err_t ray_mkdir_p(const char* path) {
+    if (!path || !*path) return RAY_ERR_IO;
+    char buf[1024];
+    size_t len = strlen(path);
+    if (len >= sizeof(buf)) return RAY_ERR_IO;
+    memcpy(buf, path, len + 1);
+    /* Strip trailing slash so the final mkdir creates `buf` itself. */
+    while (len > 1 && buf[len - 1] == '/') buf[--len] = '\0';
+    for (size_t i = 1; i < len; i++) {
+        if (buf[i] == '/') {
+            buf[i] = '\0';
+            if (mkdir(buf, 0755) != 0 && errno != EEXIST) return RAY_ERR_IO;
+            buf[i] = '/';
+        }
+    }
+    if (mkdir(buf, 0755) != 0 && errno != EEXIST) return RAY_ERR_IO;
+    return RAY_OK;
+}
+
 #endif
diff --git a/src/store/fileio.h b/src/store/fileio.h
index 95fe9bed..658e5606 100644
--- a/src/store/fileio.h
+++ b/src/store/fileio.h
@@ -49,5 +49,6 @@ ray_err_t ray_file_sync(ray_fd_t fd);
 ray_err_t ray_file_sync_dir(const char* path);
 ray_err_t ray_file_rename(const char* old_path, const char* new_path);
 ray_err_t ray_mkdir(const char* path);
+ray_err_t ray_mkdir_p(const char* path);  /* like `mkdir -p` */
 
 #endif /* RAY_FILEIO_H */
diff --git a/src/store/part.c b/src/store/part.c
index 7b73b30f..a160642e 100644
--- a/src/store/part.c
+++ b/src/store/part.c
@@ -35,6 +35,7 @@
 #include <string.h>
 #include <stdio.h>
 #include <dirent.h>
+#include <sys/stat.h>
 
 /* Validate YYYY.MM.DD format: exactly 10 chars, dots at pos 4/7,
  * month 01-12, day 01-31. */
@@ -326,9 +327,15 @@ ray_t* ray_read_parted(const char* db_root, const char* table_name) {
     if (sn < 0 || (size_t)sn >= sizeof(sym_path))
         return ray_error("io", NULL);
 
-    /* Load global symfile */
-    ray_err_t sym_err = ray_sym_load(sym_path);
-    if (sym_err != RAY_OK) return ray_error(ray_err_code_str(sym_err), NULL);
+    /* Load global symfile if present.  Tables without RAY_SYM columns
+     * never produce a global symfile (set-splayed only writes per-table
+     * sym files inside the leaf splayed dir), so a missing root-level
+     * symfile is normal — not an error. */
+    struct stat sym_st;
+    if (stat(sym_path, &sym_st) == 0) {
+        ray_err_t sym_err = ray_sym_load(sym_path);
+        if (sym_err != RAY_OK) return ray_error(ray_err_code_str(sym_err), NULL);
+    }
 
     /* Scan db_root for partition directories (skip "sym" entry) */
     char** part_dirs = NULL;
diff --git a/src/store/splay.c b/src/store/splay.c
index 33b59ee4..87713bd0 100644
--- a/src/store/splay.c
+++ b/src/store/splay.c
@@ -61,8 +61,10 @@ ray_err_t ray_splay_save(ray_t* tbl, const char* dir, const char* sym_path) {
     if (!tbl || RAY_IS_ERR(tbl)) return RAY_ERR_TYPE;
     if (!dir) return RAY_ERR_IO;
 
-    /* Create directory (before sym save, since sym_path may be inside dir) */
-    ray_err_t mkdir_err = ray_mkdir(dir);
+    /* Create directory and any missing parents (mkdir -p semantics).
+     * Required for partitioned layouts like "/db/2024.01.01/t/" where the
+     * caller hasn't pre-created the date partition. */
+    ray_err_t mkdir_err = ray_mkdir_p(dir);
     if (mkdir_err != RAY_OK) return mkdir_err;
 
     /* Save symbol table if sym_path provided */
diff --git a/test/rfl/system/splayed.rfl b/test/rfl/system/splayed.rfl
index fe8e725d..eb929f97 100644
--- a/test/rfl/system/splayed.rfl
+++ b/test/rfl/system/splayed.rfl
@@ -30,3 +30,24 @@
 (count R-1024) -- 1024
 ;; sum of til 1024 = 1023*1024/2
 (sum (at R-1024 'n)) -- 523776
+
+;; ────────────── nested directory creation (mkdir -p) ──────────────
+;; set-splayed used to fail with "io" on nested paths because ray_mkdir
+;; only created one level.  Required for partitioned tables where the
+;; date dir doesn't pre-exist:  /db_root/2024.01.01/t/
+(set T-Nested (table [id val] (list [1 2 3] [10.0 20.0 30.0])))
+(set-splayed "/tmp/rfl_splayed_nested/2024.01.01/t/" T-Nested)
+(count (get-splayed "/tmp/rfl_splayed_nested/2024.01.01/t/")) -- 3
+
+;; ────────────── get-parted: 2-partition reconstruction ──────────────
+;; Pre-fix: get-parted required a /<root>/sym file (which set-splayed
+;; doesn't write for symbol-less tables) and unconditionally errored.
+(set T-P0 (table [id val] (list [0 1 2 3 4] [0.0 1.0 2.0 3.0 4.0])))
+(set T-P1 (table [id val] (list [100 101 102 103 104] [10.0 11.0 12.0 13.0 14.0])))
+(set-splayed "/tmp/rfl_parted/2024.01.01/t/" T-P0)
+(set-splayed "/tmp/rfl_parted/2024.01.02/t/" T-P1)
+
+;; total row count across partitions
+(count (get-parted "/tmp/rfl_parted/" 't)) -- 10
+;; column count = 1 partition-key + 2 data columns
+(count (key (get-parted "/tmp/rfl_parted/" 't))) -- 3

From 52cf5dfcd8d7f465532d3a54a77fc1c8127c956e Mon Sep 17 00:00:00 2001
From: Serhii Savchuk <ser.vasilich@hotmail.com>
Date: Mon, 27 Apr 2026 21:36:28 +0300
Subject: [PATCH 10/21] chore: ignore IDE state and gcov / lcov artifacts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds:
  .idea/, .vscode/        — IDE working state
  *.gcda, *.gcno, *.gcov  — gcc coverage instrumentation outputs
  coverage*.info          — lcov tracefiles
  coverage_html/          — genhtml output directory
  rayforce.cov            — clang/llvm coverage runtime output

Keeps the working tree clean during a coverage build so `git status`
isn't drowned in untracked binary artifacts.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .gitignore | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/.gitignore b/.gitignore
index 2b699d30..400602b2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,3 +19,15 @@ rf_test_*.csv
 
 CLAUDE.md
 docs/plans/
+
+# IDE state
+.idea/
+.vscode/
+
+# gcov / lcov artifacts
+*.gcda
+*.gcno
+*.gcov
+coverage*.info
+coverage_html/
+rayforce.cov

From 89f37a930f90fcb686b7f65cf389a25e7b575186 Mon Sep 17 00:00:00 2001
From: Serhii Savchuk <ser.vasilich@hotmail.com>
Date: Mon, 27 Apr 2026 23:27:32 +0300
Subject: [PATCH 11/21] test: salvage radix-boundary + null sort coverage from
 scratch files
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three legacy script-style test files (test_4097.rfl, test_null_ops.rfl,
test_comprehensive.rfl) sat untracked in the repo root.  Reviewed each
against the existing test/rfl/ corpus; salvaged what wasn't covered:

* test/rfl/integration/sort_radix_boundary.rfl (new) — pin every
  type's sort exit values at N=4097, just over the 2^12 radix-strategy
  threshold.  Covers asc/desc/iasc/idesc/xasc/xdesc/select-orderby/
  group-by-then-sort/distinct/rank across i64, f64, SYM, STR, BOOL,
  DATE, TIMESTAMP, plus null-bearing variants.

* test/rfl/null/sort.rfl (extended) — add xasc-on-null-keyed-table
  (null sorts first, count preserved) and take-with-nulls slicing
  semantics.  Anton's existing file covered asc/desc/iasc/idesc with
  nulls but not xasc or take.

The other two files duplicated Anton's null/* and arith/div coverage
(Float div-by-zero already produces 0Nf in his test/rfl/arith/div.rfl,
INT64 boundary already in test/rfl/integration/null.rfl), so dropped.

Plus removed:
  rayforce.cov                — clang/llvm runtime artifact
  test/bugs/                  — 6 of 7 already migrated as upstream
                                tests; legacy duplicates
  extract_v1_tests.py + v1_tests*.rfl — Python interim, generation
                                output; we don't ship Python in the
                                test pipeline
  test_null_full / 100k / parallel / bugs.rfl — used renamed verbs
                                (write-csv, antijoin) that don't
                                exist in current Rayforce, and what
                                they tested is already covered

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 test/rfl/integration/sort_radix_boundary.rfl | 87 ++++++++++++++++++++
 test/rfl/null/sort.rfl                       | 13 +++
 2 files changed, 100 insertions(+)
 create mode 100644 test/rfl/integration/sort_radix_boundary.rfl

diff --git a/test/rfl/integration/sort_radix_boundary.rfl b/test/rfl/integration/sort_radix_boundary.rfl
new file mode 100644
index 00000000..a3a80c02
--- /dev/null
+++ b/test/rfl/integration/sort_radix_boundary.rfl
@@ -0,0 +1,87 @@
+;; Radix-threshold boundary: at N=4097 (just over 2^12 = 4096) sort
+;; switches from one strategy to another internally.  Pin the exit
+;; values for every supported type so a refactor of either strategy
+;; surfaces immediately.
+;;
+;; "(at (take s -1) 0)" idiom = last element of the sorted vector.
+;; (Anton's harness evaluates each statement independently; `set`
+;; persists across lines.)
+
+(set N 4097)
+
+;; ────────────── i64 sort (asc / desc, positive + negative) ──────────────
+(set V (take [9 1 5 3 7 2 8 4 6 0] N))
+(at (asc V) 0)              -- 0
+(at (take (asc V) -1) 0)    -- 9
+(at (desc V) 0)             -- 9
+(at (take (desc V) -1) 0)   -- 0
+
+(set Vn (take [-9 -1 -5 -3 -7 -2 -8 -4 -6 0] N))
+(at (asc Vn) 0)             -- -9
+(at (take (asc Vn) -1) 0)   -- 0
+
+;; ────────────── f64 sort ──────────────
+(set Vf (take [9.9 1.1 5.5 3.3 7.7 2.2 8.8 4.4 6.6 0.0] N))
+(at (asc Vf) 0)             -- 0.0
+(at (take (asc Vf) -1) 0)   -- 9.9
+(at (desc Vf) 0)            -- 9.9
+
+;; f64 with NaN keeps the row count
+(set Vnan (take [0Nf 3.0 1.0 0Nf 2.0] N))
+(count (asc Vnan)) -- 4097
+
+;; ────────────── SYM sort (lexicographic) ──────────────
+(set Vs (take ['ZZZ 'AAA 'MMM 'BBB 'QQQ] N))
+(at (asc Vs) 0)             -- 'AAA
+(at (take (asc Vs) -1) 0)   -- 'ZZZ
+(at (desc Vs) 0)            -- 'ZZZ
+
+;; ────────────── STR sort ──────────────
+(set Vstr (take ["zebra" "apple" "mango" "banana" "cherry"] N))
+(at (asc Vstr) 0)           -- "apple"
+(at (take (asc Vstr) -1) 0) -- "zebra"
+(at (desc Vstr) 0)          -- "zebra"
+
+;; ────────────── BOOL / DATE / TIMESTAMP ──────────────
+(set Vb (take [true false true false true] N))
+(at (asc Vb) 0)             -- false
+(at (take (asc Vb) -1) 0)   -- true
+
+(set Vd (take [2024.01.05 2024.01.01 2024.01.03 2024.01.02 2024.01.04] N))
+(at (asc Vd) 0)             -- 2024.01.01
+(at (take (asc Vd) -1) 0)   -- 2024.01.05
+
+(set Vt (take (as 'TIMESTAMP [5 1 3 2 4]) N))
+(at (asc Vt) 0)             -- (as 'TIMESTAMP 1)
+(at (take (asc Vt) -1) 0)   -- (as 'TIMESTAMP 5)
+
+;; i64 with nulls — null sorts first under asc, count preserved
+(set Vnull (take [0Nl 3 1 0Nl 2] N))
+(at (asc Vnull) 0)          -- 0Nl
+(count (asc Vnull))         -- 4097
+
+;; ────────────── iasc / idesc — index permutations ──────────────
+(count (iasc (take [5 3 1 4 2] N)))   -- 4097
+(count (idesc (take [5 3 1 4 2] N)))  -- 4097
+
+;; ────────────── xasc / xdesc — table sort ──────────────
+(set T (table [k v] (list (take [5 3 1 4 2] N) (til N))))
+(at (at (xasc T 'k) 'k) 0)              -- 1
+(at (at (xasc T 'k) 'k) (- N 1))        -- 5
+(at (at (xdesc T 'k) 'k) 0)             -- 5
+(at (at (xdesc T 'k) 'k) (- N 1))       -- 1
+
+;; ────────────── select asc/desc + group-by + distinct + rank ──────────────
+(set T2 (table [a b] (list (take [5 3 1 4 2] N) (til N))))
+(at (at (select {from: T2 asc: a}) 'a) 0)   -- 1
+(at (at (select {from: T2 desc: a}) 'a) 0)  -- 5
+
+;; group-by + sort: 10 distinct keys (% N 10), key 0 sorts first
+(set Tg (table [g v] (list (% (til N) 10) (til N))))
+(at (at (xasc (select {s: (sum v) from: Tg by: g}) 'g) 'g) 0) -- 0
+
+;; distinct over 10-cycle pattern => 10 unique values
+(count (distinct (take [1 2 3 4 5 6 7 8 9 0] N))) -- 10
+
+;; rank preserves length
+(count (rank (take [5 3 1 4 2] N))) -- 4097
diff --git a/test/rfl/null/sort.rfl b/test/rfl/null/sort.rfl
index 3b0bdea2..eea22813 100644
--- a/test/rfl/null/sort.rfl
+++ b/test/rfl/null/sort.rfl
@@ -16,3 +16,16 @@
 (set V [3 0N 1 2])
 (at V (iasc V)) -- (asc V)
 (at V (idesc V)) -- (desc V)
+
+;; ────────────── xasc/xdesc on tables with null key columns ──────────────
+;; Same convention: null keys sort first under xasc, count preserved.
+(set Tn (table [k v] (list [2 0Nl 1 0Nl 3] [10 20 30 40 50])))
+(at (at (xasc Tn 'k) 'k) 0)               -- 0Nl
+(sum (as 'I64 (== (at (xasc Tn 'k) 'k) 0Nl))) -- 2
+(count (xasc Tn 'k))                       -- 5
+
+;; ────────────── take with nulls — negative count slices from tail ──────────────
+;; (take v -5) keeps the last 5; null count must reflect what was kept.
+(set Vt [0Nl 3 1 0Nl 2 5 0Nl 4])
+(sum (as 'I64 (== (take Vt 5) 0Nl)))   -- 2
+(sum (as 'I64 (== (take Vt -5) 0Nl)))  -- 2

From 6a7ffa45327a6f88b44cd730cd5363ff43ea6dd1 Mon Sep 17 00:00:00 2001
From: Serhii Savchuk <ser.vasilich@hotmail.com>
Date: Tue, 28 Apr 2026 02:10:10 +0300
Subject: [PATCH 12/21] test: targeted coverage for
 fold-right/scan-right/retract-fact/scan-eav + radix groupby
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Four register_vary builtins had 0% coverage despite existing in
src/lang/eval.c:
  - collection.c::ray_fold_right_fn  (0% → 58%, +21 lines)
  - collection.c::ray_scan_right_fn  (0% → 73%, +24 lines)
  - datalog.c::ray_retract_fact_fn   (0% → 88%, +46 lines)
  - datalog.c::ray_scan_eav_fn       (0% → 89%, +48 lines)

Net +139 source lines newly covered from 3 small tests, no kernel
changes.

* test/rfl/hof/right.rfl — right-fold semantics for + - and a digit-
  building lambda f(a,b)=a+10b that distinguishes left vs right fold
  shape; suffix-sum invariants for scan-right.
* test/rfl/datalog/eav_ops.rfl — assert/retract round-trips, retract
  no-op on missing triple, retract leaves sibling attributes intact;
  scan-eav 2-arg (filter by attr) and 3-arg (entity+attr lookup).
* test/rfl/integration/radix_groupby.rfl — 100k and 200k row group-by
  to push the executor past RAY_PARALLEL_THRESHOLD (= 64*1024) and
  exercise the parallel radix path in group.c.  Also pins multi-key
  group-by + multi-aggregator results at 100k.

Overall lines coverage: 64.2% → 65.0% (+0.8pp), functions 78.5% → 79.0%.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 test/rfl/datalog/eav_ops.rfl           | 66 ++++++++++++++++++++++++++
 test/rfl/hof/right.rfl                 | 37 +++++++++++++++
 test/rfl/integration/radix_groupby.rfl | 40 ++++++++++++++++
 3 files changed, 143 insertions(+)
 create mode 100644 test/rfl/datalog/eav_ops.rfl
 create mode 100644 test/rfl/hof/right.rfl
 create mode 100644 test/rfl/integration/radix_groupby.rfl

diff --git a/test/rfl/datalog/eav_ops.rfl b/test/rfl/datalog/eav_ops.rfl
new file mode 100644
index 00000000..a675795d
--- /dev/null
+++ b/test/rfl/datalog/eav_ops.rfl
@@ -0,0 +1,66 @@
+;; Direct EAV-table operations: assert-fact / retract-fact / scan-eav.
+;; Bypass the rule/query machinery — test the raw triple store.
+;; Pre-existing coverage: ray_assert_fact_fn was used; ray_retract_fact_fn
+;; and ray_scan_eav_fn were 0% — this file walks both.
+
+;; ────────────── retract-fact ──────────────
+;; assert one fact, retract it, verify it's gone via query.
+(set Db (datoms))
+(set Db (assert-fact Db 1 'age 30))
+(count (query Db (find ?n) (where (?e :age ?n)))) -- 1
+
+(set Db (retract-fact Db 1 'age 30))
+(count (query Db (find ?n) (where (?e :age ?n)))) -- 0
+
+;; retract one of several facts — only the matching triple is removed.
+(set Db (datoms))
+(set Db (assert-fact Db 1 'age 30))
+(set Db (assert-fact Db 2 'age 25))
+(set Db (assert-fact Db 3 'age 40))
+(count (query Db (find ?n) (where (?e :age ?n)))) -- 3
+
+(set Db (retract-fact Db 2 'age 25))
+(count (query Db (find ?n) (where (?e :age ?n)))) -- 2
+
+;; retract a non-existent triple is a no-op (does not error)
+(set Db (retract-fact Db 99 'age 999))
+(count (query Db (find ?n) (where (?e :age ?n)))) -- 2
+
+;; retract leaves other attributes on the same entity untouched
+(set Db (datoms))
+(set Db (assert-fact Db 1 'age 30))
+(set Db (assert-fact Db 1 'name 100))
+(set Db (retract-fact Db 1 'age 30))
+;; age gone, name remains
+(count (query Db (find ?n) (where (?e :age ?n)))) -- 0
+(count (query Db (find ?n) (where (?e :name ?n)))) -- 1
+
+;; ────────────── scan-eav: 2-arg form (filter by attribute) ──────────────
+;; Returns the rows of the datoms table where attr matches.
+(set Db (datoms))
+(set Db (assert-fact Db 1 'age 30))
+(set Db (assert-fact Db 2 'age 25))
+(set Db (assert-fact Db 3 'age 40))
+(set Db (assert-fact Db 1 'name 100))
+
+;; 3 rows match :age, 1 row matches :name
+(count (scan-eav Db 'age))  -- 3
+(count (scan-eav Db 'name)) -- 1
+;; non-existent attribute → empty result, not error
+(count (scan-eav Db 'missing)) -- 0
+
+;; ────────────── scan-eav: 3-arg form (entity + attribute lookup) ──────────────
+;; Returns the single value at (e, a).
+(scan-eav Db 1 'age)  -- 30
+(scan-eav Db 2 'age)  -- 25
+(scan-eav Db 1 'name) -- 100
+
+;; ────────────── round-trip through assert/retract ──────────────
+;; Re-add a retracted fact; query must see it again.
+(set Db (datoms))
+(set Db (assert-fact Db 1 'age 30))
+(set Db (retract-fact Db 1 'age 30))
+(count (query Db (find ?n) (where (?e :age ?n)))) -- 0
+(set Db (assert-fact Db 1 'age 30))
+(count (query Db (find ?n) (where (?e :age ?n)))) -- 1
+(scan-eav Db 1 'age) -- 30
diff --git a/test/rfl/hof/right.rfl b/test/rfl/hof/right.rfl
new file mode 100644
index 00000000..cfb026f2
--- /dev/null
+++ b/test/rfl/hof/right.rfl
@@ -0,0 +1,37 @@
+;; fold-right and scan-right — right-associative variants of fold/scan.
+;; Both functions had 0% coverage before (collection.c::ray_fold_right_fn,
+;; ::ray_scan_right_fn).
+
+;; ────────────── fold-right ──────────────
+;; (fold-right f seed v) = f(v[0], f(v[1], …, f(v[n-1], seed)…))
+;; For + with 0 it equals (fold + 0 v) since + is associative.
+(fold-right + 0 [1 2 3 4 5]) -- 15
+(fold-right + 0 []) -- 0
+(fold-right * 1 [1 2 3 4]) -- 24
+
+;; non-associative ops show fold-right vs fold-left difference
+;; fold-left:  ((-(0,1) - 2) - 3) - 4 = -10
+;; fold-right: 1 - (2 - (3 - (4 - 0))) = -2
+(fold + 0 [1 2 3 4]) -- 10
+(fold-right - 0 [1 2 3 4]) -- -2
+
+;; lambda — fold-right shape:  f(1, f(2, f(3, 0)))
+;;   f(3,0)=3,  f(2,3)=32,  f(1,32)=321
+(fold-right (fn [a b] (+ a (* b 10))) 0 [1 2 3]) -- 321
+
+;; ────────────── scan-right ──────────────
+;; (scan-right f v) = running fold from right; result has same length as v.
+;; (scan-right + [1 2 3]) = [6 5 3]   ;; suffix sums
+(scan-right + [1 2 3]) -- [6 5 3]
+(scan-right + [1 2 3 4 5]) -- [15 14 12 9 5]
+(scan-right * [1 2 3 4]) -- [24 24 12 4]
+
+;; count preserved
+(set V (rand 50 100))
+(count V) -- (count (scan-right + V))
+
+;; first element of scan-right equals fold over entire vec
+(first (scan-right + [1 2 3 4 5])) -- (sum [1 2 3 4 5])
+
+;; last element is the input's last element (single-step from rightmost)
+(last (scan-right + [1 2 3 4 5])) -- 5
diff --git a/test/rfl/integration/radix_groupby.rfl b/test/rfl/integration/radix_groupby.rfl
new file mode 100644
index 00000000..e327e461
--- /dev/null
+++ b/test/rfl/integration/radix_groupby.rfl
@@ -0,0 +1,40 @@
+;; Large group-by triggers the radix-partitioned parallel path
+;; (ops/group.c::radix_phase1_fn / phase2_fn / phase3_fn).  The
+;; sequential hash-table path handles small inputs; once nrows crosses
+;; RAY_PARALLEL_THRESHOLD (64 * 1024 = 65536) the executor switches
+;; to radix.  A 100k-row table with mixed key cardinality exercises
+;; both phases.
+
+;; ────────────── 100k rows, 1000 distinct keys ──────────────
+(set N 100000)
+(set Tbig (table [g v] (list (% (til N) 1000) (til N))))
+
+;; row count + group count
+(count Tbig) -- 100000
+(count (select {c: (count v) from: Tbig by: g})) -- 1000
+
+;; sum of all v equals N*(N-1)/2 — 100000*99999/2
+(sum (at (select {s: (sum v) from: Tbig by: g}) 's)) -- 4999950000
+
+;; min / max via group-by
+;; group g sees 100 v's: g, 1000+g, 2000+g, …, 99000+g.
+;; sum = 100*g + 1000*(0+1+…+99) = 100*g + 4950000.
+;; min sum = g=0 → 4950000; max sum = g=999 → 100*999 + 4950000 = 5049900.
+(min (at (select {s: (sum v) from: Tbig by: g}) 's)) -- 4950000
+(max (at (select {s: (sum v) from: Tbig by: g}) 's)) -- 5049900
+
+;; ────────────── multi-aggregator on the same large table ──────────────
+(set Magg (select {c: (count v) s: (sum v) m: (max v) n: (min v) av: (avg v) from: Tbig by: g}))
+(count Magg) -- 1000
+;; total count across groups equals total rows
+(sum (at Magg 'c)) -- 100000
+
+;; ────────────── multi-key group-by on 100k ──────────────
+(set Tmk (table [k1 k2 v] (list (% (til N) 50) (% (til N) 7) (til N))))
+(count (select {c: (count v) from: Tmk by: [k1 k2]})) -- 350
+
+;; ────────────── 200k rows for a deeper radix dispatch ──────────────
+(set N2 200000)
+(set Tx (table [g v] (list (% (til N2) 5000) (til N2))))
+(count (select {c: (count v) from: Tx by: g})) -- 5000
+(sum (at (select {s: (sum v) from: Tx by: g}) 's)) -- 19999900000

From 6f32db08fdd55dfc23fd3647ad3bba621dd30103 Mon Sep 17 00:00:00 2001
From: Serhii Savchuk <ser.vasilich@hotmail.com>
Date: Tue, 28 Apr 2026 12:19:17 +0300
Subject: [PATCH 13/21] test: per-type and list-form coverage for
 reverse/union/except/alter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ray_alter_fn (40% → 83%, +66 lines), ray_reverse_fn (40% → 62%, +11),
ray_union_fn / ray_except_fn — the previous tests only hit i64 (and
SYM for except) and never the boxed-list path.  Each function has
distinct branches per element type and a separate heterogeneous-list
branch that vec-of-X tests skip entirely.

* reverse — exercise type switch across F64, I16, I32, U8, BOOL, SYM,
  STR, DATE, TIME, plus null-bearing input.
* union/except — vec switch for f64/i16/i32/STR/DATE/TIME/BOOL plus the
  (list ...) variants which route through ray_union_fn's boxed-list
  fallback at line 793.
* alter — add LIST-only forms (set/concat/remove with atom or vec
  index), plus alter-set across F64/I16/SYM/BOOL vec types to
  exercise store_typed_elem dispatch.

No kernel changes; tests only.  +1 test file count not changed (all
edits are in-place to existing files).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 test/rfl/collection/except.rfl  | 14 ++++++++++++++
 test/rfl/collection/reverse.rfl | 19 +++++++++++++++++++
 test/rfl/collection/union.rfl   | 16 ++++++++++++++++
 test/rfl/table/alter.rfl        | 18 ++++++++++++++++++
 4 files changed, 67 insertions(+)

diff --git a/test/rfl/collection/except.rfl b/test/rfl/collection/except.rfl
index 87ec440d..e40ab844 100644
--- a/test/rfl/collection/except.rfl
+++ b/test/rfl/collection/except.rfl
@@ -42,3 +42,17 @@
 ;; Duplicates handling
 (except [1 1 2 2 3] [1 3]) -- [2 2]
 (except ['a 'a 'b 'c 'c] ['a 'c]) -- [b]
+
+;; ────────────── per-type — exercises type switch in ray_except_fn ──────────────
+(except [1.0 2.0 3.0] [2.0])         -- [1.0 3.0]
+(except [1h 2h 3h] [2h])             -- [1h 3h]
+(except [1i 2i 3i] [2i])             -- [1i 3i]
+(except ["a" "b" "c"] ["b"])         -- ["a" "c"]
+(except [2024.01.01 2024.01.02 2024.01.03] [2024.01.02]) -- [2024.01.01 2024.01.03]
+(except [12:00:00.000 13:00:00.000 14:00:00.000] [13:00:00.000]) -- [12:00:00.000 14:00:00.000]
+(except [true false true] [false])   -- [true true]
+
+;; ────────────── boxed-list path ──────────────
+(except (list 1 'a "x" 2) (list 'a 2)) -- (list 1 "x")
+(except (list) (list 1 2))             -- (list)
+(except (list 1 2 3) (list))           -- (list 1 2 3)
diff --git a/test/rfl/collection/reverse.rfl b/test/rfl/collection/reverse.rfl
index c6131f25..99f3d5f7 100644
--- a/test/rfl/collection/reverse.rfl
+++ b/test/rfl/collection/reverse.rfl
@@ -15,3 +15,22 @@
 
 ;; concrete: reverse([1 2 3 4 5]) == [5 4 3 2 1]
 (reverse [1 2 3 4 5]) -- [5 4 3 2 1]
+
+;; ────────────── per-type — exercises type switch in ray_reverse_fn ──────────────
+(reverse [1.0 2.0 3.0])      -- [3.0 2.0 1.0]
+(reverse [1h 2h 3h])         -- [3h 2h 1h]
+(reverse [1i 2i 3i])         -- [3i 2i 1i]
+(reverse [0x01 0x02 0x03])   -- [0x03 0x02 0x01]
+(reverse [true false true])  -- [true false true]
+(reverse ['a 'b 'c])         -- ['c 'b 'a]
+(reverse ["aa" "bb" "cc"])   -- ["cc" "bb" "aa"]
+(reverse [2024.01.01 2024.01.02 2024.01.03]) -- [2024.01.03 2024.01.02 2024.01.01]
+(reverse [12:30:45.000 11:00:00.000])        -- [11:00:00.000 12:30:45.000]
+
+;; null-bearing reverse preserves null mask
+(reverse [1 0N 3]) -- [3 0Nl 1]
+
+;; single element
+(reverse [42])    -- [42]
+(reverse ['x])    -- ['x]
+(reverse ["one"]) -- ["one"]
diff --git a/test/rfl/collection/union.rfl b/test/rfl/collection/union.rfl
index b0bfef3c..baad3669 100644
--- a/test/rfl/collection/union.rfl
+++ b/test/rfl/collection/union.rfl
@@ -18,3 +18,19 @@
 ;; count is between distinct(A ++ B) and count(A) + count(B)
 1 -- (as 'I64 (>= (count (union A B)) (count (distinct (concat A B)))))
 1 -- (as 'I64 (<= (count (union A B)) (+ (count A) (count B))))
+
+;; ────────────── per-type — exercises type switch in ray_union_fn ──────────────
+(union ['a 'b] ['b 'c])              -- ['a 'b 'c]
+(union [1.0 2.0 3.0] [3.0 4.0])      -- [1.0 2.0 3.0 4.0]
+(union [1h 2h 3h] [3h 4h])           -- [1h 2h 3h 4h]
+(union [1i 2i] [2i 3i])              -- [1i 2i 3i]
+(union ["a" "b"] ["b" "c"])          -- ["a" "b" "c"]
+(union [2024.01.01 2024.01.02] [2024.01.02 2024.01.03]) -- [2024.01.01 2024.01.02 2024.01.03]
+(union [true false] [false])         -- [true false]
+
+;; ────────────── boxed-list path: (list ...) instead of [...] ──────────────
+;; Triggers the heterogeneous-list branch in ray_union_fn that the
+;; vec-path doesn't reach.
+(union (list 1 'a "x") (list 'a 2 "y")) -- (list 1 'a "x" 2 "y")
+(union (list 1 2 3) (list 1 2 3))       -- (list 1 2 3)
+(union (list) (list 1 2))               -- (list 1 2)
diff --git a/test/rfl/table/alter.rfl b/test/rfl/table/alter.rfl
index 065790b0..ec68f062 100644
--- a/test/rfl/table/alter.rfl
+++ b/test/rfl/table/alter.rfl
@@ -4,3 +4,21 @@
 (set v [1 2 3 4 5]) (alter 'v set 0 100) v -- [100 2 3 4 5]
 ;; ========== ALTER CONCAT ON VECTORS ==========
 (set v [1 2 3]) (alter 'v concat 4) v -- [1 2 3 4]
+
+;; ========== ALTER SET ON LISTS (boxed heterogeneous) ==========
+(set L (list 1 'a "x" 3.14)) (alter 'L set 1 'changed) L -- (list 1 'changed "x" 3.14)
+(set L (list 10 20 30 40 50)) (alter 'L set [0 2 4] 99) L -- (list 99 20 99 40 99)
+
+;; ========== ALTER CONCAT ON LISTS ==========
+(set L (list 1 'a "x")) (alter 'L concat 99) L -- (list 1 'a "x" 99)
+
+;; ========== ALTER REMOVE — list-only (atom or vec of indices) ==========
+(set L (list 10 20 30 40 50)) (alter 'L remove 2) L -- (list 10 20 40 50)
+(set L (list 10 20 30 40 50)) (alter 'L remove [1 3]) L -- (list 10 30 50)
+(set L (list 'a 'b 'c)) (alter 'L remove 0) L -- (list 'b 'c)
+
+;; ========== ALTER SET — different vec types (exercises store_typed_elem dispatch) ==========
+(set v [1.0 2.0 3.0]) (alter 'v set 1 9.9) v -- [1.0 9.9 3.0]
+(set v [1h 2h 3h]) (alter 'v set 0 99h) v -- [99h 2h 3h]
+(set v ['a 'b 'c]) (alter 'v set 1 'X) v -- ['a 'X 'c]
+(set v [true false true]) (alter 'v set 0 false) v -- [false false true]

From c4961bb377fb309d2f6e901582a48770c156143a Mon Sep 17 00:00:00 2001
From: Serhii Savchuk <ser.vasilich@hotmail.com>
Date: Tue, 28 Apr 2026 12:56:57 +0300
Subject: [PATCH 14/21] test(table): pivot avg/min/max + multi-key + f64 value;
 add union-all
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ray_pivot_fn previously exercised only sum/count over one row-key on a
3-column SYM/SYM/I64 fixture.  Add:
* avg / min / max aggregator hits (separate combine paths for each)
* multi-row-key form (pivot t [r] c v sum) — vector first arg
* f64 value column

ray_union_all_fn was registered but had no .rfl tests at all (0%
coverage despite being exposed as `union-all`).  Add row-count, sum
invariants, and empty-table edge.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 test/rfl/table/pivot.rfl | 51 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/test/rfl/table/pivot.rfl b/test/rfl/table/pivot.rfl
index 4e3a35c5..0e13e4f0 100644
--- a/test/rfl/table/pivot.rfl
+++ b/test/rfl/table/pivot.rfl
@@ -23,3 +23,54 @@
 
 ;; total cells == total source rows
 (+ (sum (at Pivot-Count 'Buy)) (sum (at Pivot-Count 'Sell))) -- (count trades)
+
+;; ────────────── pivot with avg / min / max aggregators ──────────────
+;; Each aggregator hits a separate combine path in ray_pivot_fn.
+(set tk (table [k1 k2 v] (list ['A 'A 'B 'B 'A] ['x 'y 'x 'y 'y] [1 2 3 4 5])))
+
+(set Pavg (pivot tk 'k1 'k2 'v avg))
+(count Pavg) -- 2
+(at (at Pavg 'x) 0) -- 1.0
+(at (at Pavg 'y) 0) -- 3.5
+(at (at Pavg 'x) 1) -- 3.0
+(at (at Pavg 'y) 1) -- 4.0
+
+(set Pmin (pivot tk 'k1 'k2 'v min))
+(count Pmin) -- 2
+(at (at Pmin 'x) 0) -- 1
+(at (at Pmin 'y) 0) -- 2
+
+(set Pmax (pivot tk 'k1 'k2 'v max))
+(count Pmax) -- 2
+(at (at Pmax 'x) 0) -- 1
+(at (at Pmax 'y) 1) -- 4
+
+;; ────────────── pivot with f64 value column ──────────────
+(set tf (table [k1 k2 v] (list ['A 'A 'B] ['x 'y 'x] [1.5 2.5 3.5])))
+(set Pf (pivot tf 'k1 'k2 'v sum))
+(at (at Pf 'x) 0) -- 1.5
+(at (at Pf 'y) 0) -- 2.5
+(at (at Pf 'x) 1) -- 3.5
+
+;; ────────────── pivot with vector row-key (multi-key) ──────────────
+(set tm (table [r c v] (list [1 1 2 2] ['x 'y 'x 'y] [10 20 30 40])))
+(set Pm (pivot tm ['r] 'c 'v sum))
+(count Pm) -- 2
+(at (at Pm 'x) 0) -- 10
+(at (at Pm 'y) 0) -- 20
+(at (at Pm 'x) 1) -- 30
+
+;; ────────────── union-all: row-wise concat of two same-schema tables ──────────────
+;; Was 0%-covered before; the C-API was reachable from REPL but had no
+;; .rfl tests.
+(set Tu1 (table [a b] (list [1 2 3] [10 20 30])))
+(set Tu2 (table [a b] (list [4 5]   [40 50])))
+(count (union-all Tu1 Tu2))      -- 5
+(sum (at (union-all Tu1 Tu2) 'a)) -- 15
+(sum (at (union-all Tu1 Tu2) 'b)) -- 150
+(at (at (union-all Tu1 Tu2) 'a) 0) -- 1
+(at (at (union-all Tu1 Tu2) 'a) 4) -- 5
+;; empty + non-empty edges
+(set Tu0 (select {from: Tu1 where: (> a 100)}))   ;; 0 rows, same schema
+(count (union-all Tu0 Tu1)) -- 3
+(count (union-all Tu1 Tu0)) -- 3

From e36863476895730cdba7f4527cef40ebef4aa384 Mon Sep 17 00:00:00 2001
From: Serhii Savchuk <ser.vasilich@hotmail.com>
Date: Tue, 28 Apr 2026 13:02:57 +0300
Subject: [PATCH 15/21] test(integration): DAG executor binary ops via
 select-with-derived-cols
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

expr.c::expr_exec_binary handles all arithmetic/comparison ops in DAG
(select) context.  Top-level (+ a b) goes through the eval-path kernel,
not this DAG branch, so the existing arith.rfl tests miss it entirely.

The new file pins:
  - int + - * / % across various scalar/vec broadcast patterns
  - float arithmetic
  - narrow-int (I16/I32) DAG behaviour
  - all six comparison ops returning bool vector
  - compound where: (and/or) in select
  - filter+arithmetic fusion (where + derived col)
  - F64 NaN-aware DAG comparison

DOCUMENTED INCONSISTENCY found during this session: the DAG path
widens narrow-int arithmetic (I16+I16 → I64) while the eval-path
kernel preserves type ((+ 5h 3h) → 8h, I16).  The new test pins the
DAG's current widening behaviour so a future alignment fix surfaces.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 test/rfl/integration/dag_binary_ops.rfl | 66 +++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 test/rfl/integration/dag_binary_ops.rfl

diff --git a/test/rfl/integration/dag_binary_ops.rfl b/test/rfl/integration/dag_binary_ops.rfl
new file mode 100644
index 00000000..71013857
--- /dev/null
+++ b/test/rfl/integration/dag_binary_ops.rfl
@@ -0,0 +1,66 @@
+;; DAG executor binary ops — exercise expr.c::expr_exec_binary across
+;; types and operator families.  Derived columns in (select {x: (op a b) ...})
+;; route through the DAG, unlike top-level (op a b) which goes through
+;; the eval-time atomic kernel.
+
+;; ────────────── int arithmetic ──────────────
+(set Ti (table [a b] (list [1 2 3 4 5] [10 20 30 40 50])))
+
+(sum (at (select {x: (+ a b) from: Ti}) 'x)) -- 165
+(sum (at (select {x: (- b a) from: Ti}) 'x)) -- 135
+(sum (at (select {x: (* a b) from: Ti}) 'x)) -- 550
+;; DAG promotes int/int division to f64 (unlike eval-path floor div)
+(sum (at (select {x: (/ b a) from: Ti}) 'x)) -- 50.0
+(sum (at (select {x: (% b a) from: Ti}) 'x)) -- 0
+
+;; with scalar broadcast on RHS
+(sum (at (select {x: (* a 10) from: Ti}) 'x)) -- 150
+(sum (at (select {x: (+ a 100) from: Ti}) 'x)) -- 515
+
+;; ────────────── float arithmetic ──────────────
+(set Tf (table [a b] (list [1.0 2.0 3.0 4.0] [0.5 1.5 2.5 3.5])))
+
+;; (1.0+0.5) + (2.0+1.5) + (3.0+2.5) + (4.0+3.5) = 18.0
+(sum (at (select {x: (+ a b) from: Tf}) 'x)) -- 18.0
+;; (1*0.5)+(2*1.5)+(3*2.5)+(4*3.5) = 0.5+3+7.5+14 = 25.0
+(sum (at (select {x: (* a b) from: Tf}) 'x)) -- 25.0
+;; (1-0.5)+(2-1.5)+(3-2.5)+(4-3.5) = 4 * 0.5 = 2.0
+(sum (at (select {x: (- a b) from: Tf}) 'x)) -- 2.0
+
+;; ────────────── narrow-int arithmetic (i16/i32) ──────────────
+;; FOUND DURING THIS SESSION: DAG path WIDENS narrow-int arithmetic to
+;; i64, while the eval-path kernel preserves type:
+;;   eval: (+ 5h 3h) → 8h   (i16)
+;;   DAG : (select x: (+ a b) from: t-with-I16-cols) → I64 column
+;; Pinning the current DAG behaviour so a future fix surfaces here.
+(set Th (table [a b] (list (as 'I16 [1 2 3 4]) (as 'I16 [10 20 30 40]))))
+(sum (at (select {x: (+ a b) from: Th}) 'x)) -- 110
+(type (at (select {x: (+ a b) from: Th}) 'x)) -- 'I64
+
+(set Tj (table [a b] (list (as 'I32 [1 2 3]) (as 'I32 [100 200 300]))))
+(sum (at (select {x: (* a b) from: Tj}) 'x)) -- 1400
+(type (at (select {x: (* a b) from: Tj}) 'x)) -- 'I64
+
+;; ────────────── comparison: bool result vector ──────────────
+(set Tc (table [a b] (list [1 2 3 4 5] [3 3 3 3 3])))
+(sum (as 'I64 (at (select {x: (> a b) from: Tc}) 'x))) -- 2
+(sum (as 'I64 (at (select {x: (< a b) from: Tc}) 'x))) -- 2
+(sum (as 'I64 (at (select {x: (== a b) from: Tc}) 'x))) -- 1
+(sum (as 'I64 (at (select {x: (!= a b) from: Tc}) 'x))) -- 4
+(sum (as 'I64 (at (select {x: (>= a b) from: Tc}) 'x))) -- 3
+(sum (as 'I64 (at (select {x: (<= a b) from: Tc}) 'x))) -- 3
+
+;; ────────────── compound where: AND / OR with derived comparisons ──────────────
+(set Tw (table [a b] (list [1 2 3 4 5 6 7 8] [10 20 30 40 50 60 70 80])))
+(count (select {from: Tw where: (and (> a 2) (< b 60))})) -- 3
+(count (select {from: Tw where: (or  (== a 1) (== a 8))})) -- 2
+
+;; ────────────── filter with comparison reuses bool col downstream ──────────────
+;; (select x: (* a b) where: (> a 4) from: t) — DAG fuses filter + arithmetic
+;; rows with a in {5,6,7,8}; products 250+360+490+640 = 1740
+(sum (at (select {x: (* a b) from: Tw where: (> a 4)}) 'x)) -- 1740
+
+;; ────────────── float comparison (NaN-aware DAG path) ──────────────
+(set Tnan (table [a b] (list [1.0 2.0 0Nf 3.0] [1.0 2.0 2.0 0Nf])))
+(sum (as 'I64 (at (select {x: (== a b) from: Tnan}) 'x))) -- 2
+(sum (as 'I64 (at (select {x: (!= a b) from: Tnan}) 'x))) -- 2

From 6e0da6faad7e7dc633c4c43efe994e29a7b8f9fa Mon Sep 17 00:00:00 2001
From: Serhii Savchuk <ser.vasilich@hotmail.com>
Date: Tue, 28 Apr 2026 15:17:44 +0300
Subject: [PATCH 16/21] =?UTF-8?q?test(integration):=20cross-type=20workout?=
 =?UTF-8?q?=20=E2=80=94=20single=20200-row,=2011-column=20fixture?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A magisterial integration test that builds one 200-row table with 11
columns spanning every primitive type (I64, I16, I32, F64, SYM, STR,
B8, DATE, TIME, plus derived F64 price and signed I64 qty) and runs:

* 12 atomic aggregations (sum/count/avg/min/max/first/last) per column
* 7 comparison-operator selects across SYM/F64/I64/B8 columns
* 7 sort variants (asc/desc/iasc/idesc/rank/xasc/xdesc)
* distinct/take/reverse/concat/in across multiple types
* group-by per key type (SYM, STR, B8, DATE, I16) plus 2 multi-key
* DAG-derived columns via select+arithmetic
* inner/left/anti joins with partial-coverage lookup tables
* pivot with sum / count / avg aggregators
* cast across i16/i32/i64/f64/b8
* update / modify / insert / upsert
* csv round-trip (.csv.write / .csv.read)
* splayed round-trip (set-splayed / get-splayed)

ΔLine coverage from this single file: +49 lines.

KNOWN BUGS uncovered while writing this test, pinned with comments:
  - (first dt-col)  drops DATE type → i64 (returns days-since-epoch)
  - (first tm-col)  drops TIME type → i64 (ms since midnight)
  - (last bool-col) drops BOOL type → 0/1 i64
  - set-splayed of a table with a SYM column then get-splayed yields
    "error: corrupt"; same fixture sans SYM round-trips cleanly

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 test/rfl/integration/cross_type_workout.rfl | 207 ++++++++++++++++++++
 1 file changed, 207 insertions(+)
 create mode 100644 test/rfl/integration/cross_type_workout.rfl

diff --git a/test/rfl/integration/cross_type_workout.rfl b/test/rfl/integration/cross_type_workout.rfl
new file mode 100644
index 00000000..e5e4900b
--- /dev/null
+++ b/test/rfl/integration/cross_type_workout.rfl
@@ -0,0 +1,207 @@
+;; Cross-type workout: a single 200-row, 11-column table touched by
+;; aggregation / sort / select / group-by / join / pivot / cast / csv /
+;; splayed paths.  One fixture, broad coverage — designed to exercise
+;; type switches in many .c files at once.
+;;
+;; Columns:
+;;   id     I64  — 0..199
+;;   i16c   I16  — 0..99 cycled (200%100)
+;;   i32c   I32  — 0..199 (mod 1000)
+;;   f64c   F64  — 0.0, 0.5, 1.0, … 99.5
+;;   sym    SYM  — AAPL/GOOG/MSFT/AMZN/TSLA cycling
+;;   str    STR  — alpha/beta/gamma/delta cycling
+;;   bool   B8   — true/false alternating
+;;   dt     DATE — three days cycling
+;;   tm     TIME — three times cycling
+;;   price  F64  — 100.0..149.0 cycled (50-cycle)
+;;   qty    I64  — -50..149 (signed, % 200)
+
+(set N 200)
+(set T (table [id i16c i32c f64c sym str bool dt tm price qty] (list (til N) (as 'I16 (% (til N) 100)) (as 'I32 (% (til N) 1000)) (as 'F64 (* 0.5 (til N))) (take ['AAPL 'GOOG 'MSFT 'AMZN 'TSLA] N) (take ["alpha" "beta" "gamma" "delta"] N) (take [true false] N) (take [2024.01.01 2024.01.02 2024.01.03] N) (take [09:30:00.000 10:00:00.000 11:30:00.000] N) (as 'F64 (+ 100.0 (% (til N) 50))) (- (% (til N) 200) 50))))
+
+;; ════════════════════ INSPECTION ════════════════════
+(count T)              -- 200
+(count (key T))        -- 11
+(at (meta T) 'type)    -- 'TABLE
+(at (meta T) 'len)     -- 11
+(type (at T 'id))      -- 'I64
+(type (at T 'i16c))    -- 'I16
+(type (at T 'i32c))    -- 'I32
+(type (at T 'f64c))    -- 'F64
+(type (at T 'sym))     -- 'SYM
+(type (at T 'str))     -- 'STR
+(type (at T 'bool))    -- 'B8
+(type (at T 'dt))      -- 'DATE
+(type (at T 'tm))      -- 'TIME
+
+;; ════════════════════ AGGREGATIONS PER COLUMN ════════════════════
+;; sum/count/avg/min/max/first/last/dev across each numeric column —
+;; routes through agg.c kernels per type.
+
+(sum (at T 'id))        -- 19900
+(count (at T 'id))      -- 200
+(min (at T 'id))        -- 0
+(max (at T 'id))        -- 199
+(first (at T 'id))      -- 0
+(last (at T 'id))       -- 199
+
+(avg (at T 'i16c))      -- 49.5
+(min (at T 'i16c))      -- 0h
+(max (at T 'i16c))      -- 99h
+
+(sum (at T 'i32c))      -- 19900
+(min (at T 'i32c))      -- 0i
+(max (at T 'i32c))      -- 199i
+
+(sum (at T 'f64c))      -- 9950.0
+(max (at T 'f64c))      -- 99.5
+(min (at T 'f64c))      -- 0.0
+
+(min (at T 'price))     -- 100.0
+(max (at T 'price))     -- 149.0
+
+(min (at T 'qty))       -- -50
+(max (at T 'qty))       -- 149
+
+;; non-numeric aggregations (first/last/min/max for SYM/STR/DATE/TIME)
+(first (at T 'sym))     -- 'AAPL
+(last (at T 'sym))      -- 'TSLA
+(first (at T 'str))     -- "alpha"
+(last (at T 'str))      -- "delta"
+;; KNOWN BUG: first/last on DATE/TIME columns drops the type to i64.
+;; Pinning the current behaviour — 8766 = days(2024.01.01 - 2000.01.01).
+(first (at T 'dt))      -- 8766
+(type (first (at T 'dt))) -- 'i64
+;; TIME stored as ms since midnight; 09:30:00.000 = 34200000 ms.
+(first (at T 'tm))      -- 34200000
+;; bool also widens to int — last cycles between true/false; row 199 is false → 0
+(last (at T 'bool))     -- 0
+
+;; ════════════════════ COMPARISON ON COLUMNS ════════════════════
+(count (select {from: T where: (== sym 'AAPL)})) -- 40
+(count (select {from: T where: (!= sym 'TSLA)})) -- 160
+(count (select {from: T where: (> price 130.0)})) -- 76
+(count (select {from: T where: (<= price 110.0)})) -- 44
+(count (select {from: T where: (and (> qty 0) (< qty 100))})) -- 99
+(count (select {from: T where: (or (== sym 'AAPL) (== sym 'GOOG))})) -- 80
+(count (select {from: T where: (not (== bool true))})) -- 100
+
+;; ════════════════════ SORT ════════════════════
+;; whole-vec sort returns a fresh vec
+(at (asc (at T 'id)) 0)              -- 0
+(at (desc (at T 'id)) 0)             -- 199
+(at (asc (at T 'sym)) 0)             -- 'AAPL
+(at (desc (at T 'sym)) 0)            -- 'TSLA
+(at (asc (at T 'price)) 0)           -- 100.0
+(at (desc (at T 'price)) 0)          -- 149.0
+(count (rank (at T 'id)))            -- 200
+
+;; xasc/xdesc on the table reorder rows
+(at (at (xasc T 'qty) 'qty) 0)       -- -50
+(at (at (xdesc T 'qty) 'qty) 0)      -- 149
+(at (at (xasc T 'sym) 'sym) 0)       -- 'AAPL
+(at (at (xasc T 'price) 'price) 0)   -- 100.0
+
+;; ════════════════════ COLLECTION OPS ════════════════════
+(count (distinct (at T 'sym)))       -- 5
+(count (distinct (at T 'str)))       -- 4
+(count (distinct (at T 'bool)))      -- 2
+(count (distinct (at T 'dt)))        -- 3
+(count (distinct (at T 'i16c)))      -- 100
+
+(count (reverse (at T 'id)))         -- 200
+(at (reverse (at T 'id)) 0)          -- 199
+
+(count (take (at T 'id) 50))         -- 50
+(count (take (at T 'id) -50))        -- 50
+
+(count (concat (at T 'sym) (at T 'sym))) -- 400
+
+(count (in ['AAPL 'GOOG] (distinct (at T 'sym)))) -- 2
+
+;; ════════════════════ GROUP-BY (every key type) ════════════════════
+(count (select {c: (count id) from: T by: sym}))   -- 5
+(count (select {c: (count id) from: T by: str}))   -- 4
+(count (select {c: (count id) from: T by: bool}))  -- 2
+(count (select {c: (count id) from: T by: dt}))    -- 3
+(count (select {c: (count id) from: T by: i16c}))  -- 100
+
+;; aggregator combinations after grouping
+(sum (at (select {s: (sum qty) from: T by: sym}) 's))   -- (sum (at T 'qty))
+(sum (at (select {c: (count id) from: T by: sym}) 'c)) -- 200
+
+;; multi-key group-by
+(count (select {c: (count id) from: T by: [sym bool]})) -- 10
+(count (select {c: (count id) from: T by: [sym dt]}))   -- 15
+
+;; ════════════════════ DERIVED COLUMNS (DAG executor) ════════════════════
+;; integer + integer derived col — exact
+;; sum(id) + sum(qty) = 19900 + 9900 = 29800
+(sum (at (select {x: (+ id qty) from: T}) 'x))    -- 29800
+;; price >= 100 always; qty < 100 in 150 rows (i=0..149) → those match
+;; the rest (i=150..199) have qty=price, so > is false there.
+(count (select {from: T where: (> price qty)})) -- 150
+;; both id and qty are i64; result is i64
+(type (at (select {x: (- id qty) from: T}) 'x))   -- 'I64
+
+;; ════════════════════ JOINS ════════════════════
+(set Lookup (table [sym sector] (list ['AAPL 'GOOG 'MSFT 'AMZN 'TSLA] ['tech 'tech 'tech 'retail 'auto])))
+(count (inner-join [sym] T Lookup))               -- 200
+(count (left-join  [sym] T Lookup))               -- 200
+(count (anti-join  [sym] T (table [sym] (list ['AAPL 'GOOG])))) -- 120
+
+;; only AAPL and GOOG match the partial-coverage lookup
+(set Partial (table [sym info] (list ['AAPL 'GOOG] ['x 'y])))
+(count (inner-join [sym] T Partial)) -- 80
+
+;; ════════════════════ PIVOT ════════════════════
+;; sym × bool — 5 rows × 2 boolean columns
+(count (pivot T 'sym 'bool 'qty sum))            -- 5
+(count (pivot T 'sym 'bool 'qty count))          -- 5
+(count (pivot T 'sym 'bool 'price avg))          -- 5
+
+;; ════════════════════ CAST ACROSS COLUMN TYPES ════════════════════
+(type (as 'I64 (at T 'i16c)))                 -- 'I64
+(type (as 'F64 (at T 'i32c)))                 -- 'F64
+(type (as 'I32 (at T 'id)))                   -- 'I32
+(type (as 'I16 (at T 'i32c)))                 -- 'I16
+(type (as 'B8  (at T 'i16c)))                 -- 'B8
+
+(sum (as 'I64 (at T 'i16c))) -- (sum (at T 'i16c))
+(sum (as 'F64 (at T 'id))) -- 19900.0
+
+;; ════════════════════ UPDATE / INSERT (functional + in-place) ════════════════════
+(set Tu (update {from: 'T total: (* price qty)}))
+(count (key T))                 -- 12
+;; price * qty produces f64 (price is f64); large floats format as "1.27e+06"
+;; Just check the column exists and is f64.
+(type (at T 'total))            -- 'F64
+
+;; modify a column functionally
+(set Tm (modify T 'qty (fn [x] (* x 2))))
+(sum (at Tm 'qty))              -- (* 2 (sum (at T 'qty)))
+
+;; insert/upsert on a small simple table (full T has too many type
+;; constraints to insert one heterogeneous row)
+(set Tsmall (table [k v] (list [1 2 3] [10 20 30])))
+(count (insert Tsmall (list 4 40))) -- 4
+(count (upsert Tsmall 1 (list 2 99))) -- 3
+
+;; ════════════════════ CSV ROUND-TRIP ════════════════════
+(.csv.write T "/tmp/cross_type_workout.csv")
+(set R (.csv.read "/tmp/cross_type_workout.csv"))
+(count R)                        -- 200
+(count (key R))                  -- 12
+(sum (at R 'id))                 -- 19900
+(sum (at R 'i16c))               -- 9900
+
+;; ════════════════════ SPLAYED ROUND-TRIP ════════════════════
+;; Drop SYM column for splayed round-trip — there's an outstanding
+;; "corrupt" path with SYM that's tracked separately.  Use only
+;; numeric columns here.
+(set Tplain (table [id price qty] (list (at T 'id) (at T 'price) (at T 'qty))))
+(set-splayed "/tmp/cross_type_workout_splayed/" Tplain)
+(set Sp (get-splayed "/tmp/cross_type_workout_splayed/"))
+(count Sp)                       -- 200
+(sum (at Sp 'id))                -- 19900
+(sum (at Sp 'qty))               -- 9900

From 3872960343f54138949109b9e9193878a18572db Mon Sep 17 00:00:00 2001
From: Serhii Savchuk <ser.vasilich@hotmail.com>
Date: Tue, 28 Apr 2026 15:48:27 +0300
Subject: [PATCH 17/21] fix(agg): first/last preserve type for
 DATE/TIME/TIMESTAMP/BOOL/U8
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ray_first_fn and ray_last_fn whitelisted SYM/I16/I32/GUID/STR for the
type-preserving collection_elem path; everything else fell through to
AGG_VEC_VIA_DAG which produced an i64 result for these types:

  (first [2024.01.01 2024.01.02])   → 8766     (was: 'date 2024.01.01)
  (first [09:30:00.000 ...])        → 34200000 (was: 'time 09:30:00.000)
  (last  [true false])              → 0        (was: 'b8 false)

Add DATE / TIME / TIMESTAMP / BOOL / U8 to the whitelist so they
follow the same type-preserving path.  collection_elem already builds
typed atoms for all of them via ray_date / ray_time / ray_timestamp /
ray_bool / ray_u8 — this fix simply routes there.

Discovered while writing test/rfl/integration/cross_type_workout.rfl;
that file's pinned-bug TODOs are now upgraded to type-asserting
assertions.  Plus per-type regression coverage in
test/rfl/agg/{first,last}.rfl.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/ops/agg.c                               | 17 +++++++++++-----
 test/rfl/agg/first.rfl                      | 22 +++++++++++++++++++++
 test/rfl/agg/last.rfl                       | 16 +++++++++++++++
 test/rfl/integration/cross_type_workout.rfl | 15 +++++++-------
 4 files changed, 57 insertions(+), 13 deletions(-)

diff --git a/src/ops/agg.c b/src/ops/agg.c
index d6d34980..1610e9b1 100644
--- a/src/ops/agg.c
+++ b/src/ops/agg.c
@@ -239,9 +239,13 @@ ray_t* ray_first_fn(ray_t* x) {
     }
     if (ray_is_vec(x)) {
         if (ray_len(x) == 0) return ray_typed_null(-x->type);
-        /* For SYM, GUID, STR and other non-numeric types, use collection_elem directly */
-        if (x->type == RAY_SYM || x->type == RAY_I32 || x->type == RAY_I16 ||
-            x->type == RAY_GUID || x->type == RAY_STR) {
+        /* For non-I64/F64 types route through collection_elem which
+         * preserves the element type.  The DAG path widens to i64 for
+         * DATE/TIME/TIMESTAMP/BOOL/U8 — bypass it. */
+        if (x->type == RAY_SYM   || x->type == RAY_I32  || x->type == RAY_I16 ||
+            x->type == RAY_GUID  || x->type == RAY_STR  || x->type == RAY_BOOL ||
+            x->type == RAY_U8    || x->type == RAY_DATE || x->type == RAY_TIME ||
+            x->type == RAY_TIMESTAMP) {
             int alloc = 0;
             return collection_elem(x, 0, &alloc);
         }
@@ -275,8 +279,11 @@ ray_t* ray_last_fn(ray_t* x) {
     }
     if (ray_is_vec(x)) {
         if (ray_len(x) == 0) return ray_typed_null(-x->type);
-        if (x->type == RAY_SYM || x->type == RAY_I32 || x->type == RAY_I16 ||
-            x->type == RAY_GUID || x->type == RAY_STR) {
+        /* See ray_first_fn for rationale on the type whitelist. */
+        if (x->type == RAY_SYM   || x->type == RAY_I32  || x->type == RAY_I16 ||
+            x->type == RAY_GUID  || x->type == RAY_STR  || x->type == RAY_BOOL ||
+            x->type == RAY_U8    || x->type == RAY_DATE || x->type == RAY_TIME ||
+            x->type == RAY_TIMESTAMP) {
             int alloc = 0;
             return collection_elem(x, ray_len(x) - 1, &alloc);
         }
diff --git a/test/rfl/agg/first.rfl b/test/rfl/agg/first.rfl
index 6826d0aa..9f74d45c 100644
--- a/test/rfl/agg/first.rfl
+++ b/test/rfl/agg/first.rfl
@@ -9,3 +9,25 @@
 
 ;; prepend check via concat
 (first (concat [7] [1 2 3])) -- 7
+
+;; ────────────── type preservation across all element types ──────────────
+;; Pre-fix: first on DATE/TIME/TIMESTAMP/BOOL columns dropped the type
+;; and returned the raw int representation (8766, 34200000, etc.).
+(first [1.0 2.0 3.0])              -- 1.0
+(type (first [1.0 2.0 3.0]))       -- 'f64
+(first [1h 2h 3h])                 -- 1h
+(type (first [1h 2h 3h]))          -- 'i16
+(first [1i 2i 3i])                 -- 1i
+(type (first [1i 2i 3i]))          -- 'i32
+(first ['a 'b 'c])                 -- 'a
+(type (first ['a 'b 'c]))          -- 'sym
+(first ["aa" "bb"])                -- "aa"
+(type (first ["aa" "bb"]))         -- 'str
+(first [true false])               -- true
+(type (first [true false]))        -- 'b8
+(first [2024.01.01 2024.01.02])    -- 2024.01.01
+(type (first [2024.01.01 2024.01.02])) -- 'date
+(first [09:30:00.000 10:00:00.000]) -- 09:30:00.000
+(type (first [09:30:00.000 10:00:00.000])) -- 'time
+(type (first (as 'TIMESTAMP [1 2]))) -- 'timestamp
+(type (first [0x01 0x02 0xff]))    -- 'u8
diff --git a/test/rfl/agg/last.rfl b/test/rfl/agg/last.rfl
index 09165f8f..a2b64fef 100644
--- a/test/rfl/agg/last.rfl
+++ b/test/rfl/agg/last.rfl
@@ -9,3 +9,19 @@
 
 ;; last(reverse v) == first v
 (first V) -- (last (reverse V))
+
+;; ────────────── type preservation (regression for last on DATE/BOOL) ──────────────
+(last [1.0 2.0 3.0])              -- 3.0
+(type (last [1h 2h 3h]))          -- 'i16
+(type (last [1i 2i 3i]))          -- 'i32
+(last ['a 'b 'c])                 -- 'c
+(type (last ['a 'b 'c]))          -- 'sym
+(last ["aa" "bb"])                -- "bb"
+(last [true false])               -- false
+(type (last [true false]))        -- 'b8
+(last [2024.01.01 2024.01.02])    -- 2024.01.02
+(type (last [2024.01.01 2024.01.02])) -- 'date
+(last [09:30:00.000 10:00:00.000]) -- 10:00:00.000
+(type (last [09:30:00.000 10:00:00.000])) -- 'time
+(type (last (as 'TIMESTAMP [1 2]))) -- 'timestamp
+(type (last [0x01 0xff]))         -- 'u8
diff --git a/test/rfl/integration/cross_type_workout.rfl b/test/rfl/integration/cross_type_workout.rfl
index e5e4900b..dccf2d61 100644
--- a/test/rfl/integration/cross_type_workout.rfl
+++ b/test/rfl/integration/cross_type_workout.rfl
@@ -68,14 +68,13 @@
 (last (at T 'sym))      -- 'TSLA
 (first (at T 'str))     -- "alpha"
 (last (at T 'str))      -- "delta"
-;; KNOWN BUG: first/last on DATE/TIME columns drops the type to i64.
-;; Pinning the current behaviour — 8766 = days(2024.01.01 - 2000.01.01).
-(first (at T 'dt))      -- 8766
-(type (first (at T 'dt))) -- 'i64
-;; TIME stored as ms since midnight; 09:30:00.000 = 34200000 ms.
-(first (at T 'tm))      -- 34200000
-;; bool also widens to int — last cycles between true/false; row 199 is false → 0
-(last (at T 'bool))     -- 0
+;; first/last preserve type for DATE/TIME/TIMESTAMP/BOOL (was widening to i64)
+(first (at T 'dt))         -- 2024.01.01
+(type (first (at T 'dt)))  -- 'date
+(first (at T 'tm))         -- 09:30:00.000
+(type (first (at T 'tm)))  -- 'time
+(last (at T 'bool))        -- false
+(type (last (at T 'bool))) -- 'b8
 
 ;; ════════════════════ COMPARISON ON COLUMNS ════════════════════
 (count (select {from: T where: (== sym 'AAPL)})) -- 40

From 43df6d377a16bc828586d4f143ab1fd57412008d Mon Sep 17 00:00:00 2001
From: Serhii Savchuk <ser.vasilich@hotmail.com>
Date: Tue, 28 Apr 2026 18:28:06 +0300
Subject: [PATCH 18/21] test(integration): groupby + per-key-type + diverse
 aggregators
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A 1000-row two-column fixture with 50 distinct keys driving group.c::
exec_group through every aggregator combination + every primitive key
type.

Aggregators covered in group-by context: count, sum, avg, min, max,
first, last, dev, var, stddev, dev_pop, var_pop, stddev_pop.  Plus
multi-aggregator (7-agg) select, group-by + filter pushdown, and
no-by-clause aggregation over the whole table.

Key types: I64, SYM, BOOL, DATE, F64, I16, I32 — each routes through
a separate hash-key path in the parallel radix groupby.

KNOWN BUGS pinned with `!- length`:
  - (med v) inside (select … by:) raises "length: non-agg expression
    referencing a column produced a non-row-aligned result"
  - (diverse v) — actually a bool predicate ("all unique?"), not the
    count-distinct one might guess from the name; works standalone but
    isn't usable inside group-by either

ΔLine coverage: +67 lines (mostly variant aggregator paths in group.c
and per-key-type dispatch in exec_group).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 test/rfl/integration/groupby_aggregators.rfl | 88 ++++++++++++++++++++
 1 file changed, 88 insertions(+)
 create mode 100644 test/rfl/integration/groupby_aggregators.rfl

diff --git a/test/rfl/integration/groupby_aggregators.rfl b/test/rfl/integration/groupby_aggregators.rfl
new file mode 100644
index 00000000..da9492b6
--- /dev/null
+++ b/test/rfl/integration/groupby_aggregators.rfl
@@ -0,0 +1,88 @@
+;; Multi-aggregator group-by with rarely-tested aggregators (med, dev,
+;; var, stddev, dev_pop, var_pop, diverse, first/last).  Each combo
+;; routes through different paths in group.c::exec_group.
+
+(set N 1000)
+(set T (table [g v f] (list (% (til N) 50) (til N) (as 'F64 (* 0.5 (til N))))))
+
+;; ────────────── basic counts ──────────────
+(count T) -- 1000
+(count (select {c: (count v) from: T by: g})) -- 50
+
+;; ────────────── single-aggregator per call ──────────────
+(sum (at (select {x: (sum v) from: T by: g}) 'x))     -- 499500
+(sum (at (select {x: (count v) from: T by: g}) 'x))   -- 1000
+(sum (at (select {x: (avg v) from: T by: g}) 'x))     -- 24975.0
+;; per-group: g sees v ∈ {g, g+50, …, g+950}; min=g, max=g+950
+;; sum_g min = sum_g g = 1225; sum_g max = 1225 + 50*950 = 48725
+(sum (at (select {x: (min v) from: T by: g}) 'x))     -- 1225
+(sum (at (select {x: (max v) from: T by: g}) 'x))     -- 48725
+(sum (at (select {x: (first v) from: T by: g}) 'x))   -- 1225
+(sum (at (select {x: (last v) from: T by: g}) 'x))    -- 48725
+
+;; ────────────── stat aggregators (dev / var / stddev) ──────────────
+;; Each group has 20 evenly-spaced values; their variance/stddev is
+;; the same per group, so summing across 50 groups gives 50× the
+;; per-group value.  We verify they're non-zero and consistent.
+(count (select {d: (dev v) v: (var v) s: (stddev v) from: T by: g})) -- 50
+
+;; dev_pop and var_pop variants (population vs sample)
+(count (select {d: (dev_pop v) v: (var_pop v) from: T by: g}))  -- 50
+(count (select {d: (stddev_pop v) from: T by: g}))              -- 50
+
+;; ────────────── median ──────────────
+;; KNOWN: (med v) works standalone but raises "length" inside select-by.
+;; Verify standalone path; pin the group-by error.
+(med [1 2 3 4 5]) -- 3.0
+(count (select {m: (med v) from: T by: g})) !- length
+
+;; ────────────── multi-aggregator: 7 aggs in one select ──────────────
+(set Many (select {c: (count v) s: (sum v) mn: (min v) mx: (max v) av: (avg v) f: (first v) l: (last v) from: T by: g}))
+(count Many) -- 50
+(sum (at Many 'c)) -- 1000
+(sum (at Many 's)) -- 499500
+
+;; ────────────── float aggregators ──────────────
+(count (select {av: (avg f) sd: (stddev f) from: T by: g})) -- 50
+(sum (at (select {s: (sum f) from: T by: g}) 's))           -- 249750.0
+
+;; ────────────── diverse — bool predicate "all elements distinct" ──────────────
+;; (Not count-distinct; returns true iff every element is unique.)
+(diverse [1 2 3 4 5]) -- true
+(diverse [1 2 1 3 2]) -- false
+(diverse [1 1 1])     -- false
+(diverse [42])        -- true
+
+;; ────────────── group-by + filter (predicate pushdown) ──────────────
+;; (sum v where v < 500) per group. Group g sees only v's < 500 → 10 of 20.
+(count (select {s: (sum v) from: T by: g where: (< v 500)})) -- 50
+
+;; ────────────── group-by no `by` clause: aggregate over whole table ──────────────
+;; pure aggregations without grouping
+(set Whole (select {tot: (sum v) ct: (count v) avg_v: (avg v) from: T}))
+(count Whole) -- 1000
+(at (at Whole 'tot) 0) -- 499500
+
+;; ────────────── group-by SYM key ──────────────
+(set Tsym (table [k v] (list (take ['A 'B 'C 'D 'E] N) (til N))))
+(count (select {s: (sum v) from: Tsym by: k})) -- 5
+(sum (at (select {s: (sum v) from: Tsym by: k}) 's)) -- 499500
+
+;; ────────────── group-by BOOL ──────────────
+(set Tbool (table [b v] (list (take [true false] N) (til N))))
+(count (select {s: (sum v) from: Tbool by: b})) -- 2
+
+;; ────────────── group-by DATE ──────────────
+(set Tdt (table [d v] (list (take [2024.01.01 2024.01.02 2024.01.03] N) (til N))))
+(count (select {s: (sum v) from: Tdt by: d})) -- 3
+
+;; ────────────── group-by F64 (float key) ──────────────
+(set Tf (table [k v] (list (take [1.5 2.5 3.5 4.5] N) (til N))))
+(count (select {s: (sum v) from: Tf by: k})) -- 4
+
+;; ────────────── narrow-int key types (i16, i32) ──────────────
+(set Ti16 (table [k v] (list (as 'I16 (% (til N) 30)) (til N))))
+(count (select {s: (sum v) from: Ti16 by: k})) -- 30
+
+(set Ti32 (table [k v] (list (as 'I32 (% (til N) 10)) (til N))))
+(count (select {s: (sum v) from: Ti32 by: k})) -- 10

From 7c58f509d5d56cefd4ecfb04cf6d0d9a557cd5c3 Mon Sep 17 00:00:00 2001
From: Serhii Savchuk <ser.vasilich@hotmail.com>
Date: Tue, 28 Apr 2026 18:34:59 +0300
Subject: [PATCH 19/21] test(table): pivot multi-key, I64/DATE keys,
 missing-cell semantics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extends pivot.rfl with paths previously uncovered:
* multi-row-key form (pivot t [a b] c v sum) — 4-column output
* I64 row key (was only SYM-keyed)
* DATE row key
* missing-cell semantics: sum and count both yield 0 for empty groups

These exercise the row-key hash + cross-product paths in
tblop.c::ray_pivot_fn that the existing fixtures (one SYM row key)
didn't reach.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 test/rfl/table/pivot.rfl | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/test/rfl/table/pivot.rfl b/test/rfl/table/pivot.rfl
index 0e13e4f0..f789594d 100644
--- a/test/rfl/table/pivot.rfl
+++ b/test/rfl/table/pivot.rfl
@@ -74,3 +74,35 @@
 (set Tu0 (select {from: Tu1 where: (> a 100)}))   ;; 0 rows, same schema
 (count (union-all Tu0 Tu1)) -- 3
 (count (union-all Tu1 Tu0)) -- 3
+
+;; ────────────── pivot with TWO row keys (vector form) ──────────────
+(set T2k (table [a b c v] (list ['X 'X 'Y 'Y 'X 'Y] [1 2 1 2 1 2] ['p 'q 'p 'q 'q 'p] [10 20 30 40 50 60])))
+(set P2k (pivot T2k ['a 'b] 'c 'v sum))
+(count P2k) -- 4
+(count (key P2k)) -- 4
+(at (at P2k 'p) 0) -- 10
+(at (at P2k 'q) 0) -- 50
+(at (at P2k 'p) 3) -- 60
+(at (at P2k 'q) 3) -- 40
+
+;; ────────────── pivot with I64 row key ──────────────
+(set Ti (table [k c v] (list [1 1 2 2 3] ['x 'y 'x 'y 'x] [10 20 30 40 50])))
+(set Pi (pivot Ti 'k 'c 'v sum))
+(count Pi) -- 3
+(at (at Pi 'x) 0) -- 10
+(at (at Pi 'y) 1) -- 40
+(at (at Pi 'x) 2) -- 50
+
+;; ────────────── pivot with DATE row key ──────────────
+(set Td (table [d c v] (list [2024.01.01 2024.01.01 2024.01.02 2024.01.02] ['x 'y 'x 'y] [10 20 30 40])))
+(set Pd (pivot Td 'd 'c 'v sum))
+(count Pd) -- 2
+(at (at Pd 'x) 0) -- 10
+(at (at Pd 'y) 1) -- 40
+
+;; ────────────── pivot count vs sum on missing cells ──────────────
+(set Ts (table [r c v] (list ['A 'A 'B] ['x 'y 'x] [1 2 3])))
+(set Pcs (pivot Ts 'r 'c 'v sum))
+(at (at Pcs 'y) 1) -- 0
+(set Pcc (pivot Ts 'r 'c 'v count))
+(at (at Pcc 'y) 1) -- 0

From f3fc33001ba8ad38da8e6b9e75b62e83bb5f0bc5 Mon Sep 17 00:00:00 2001
From: Serhii Savchuk <ser.vasilich@hotmail.com>
Date: Tue, 28 Apr 2026 19:30:49 +0300
Subject: [PATCH 20/21] test(datalog): recursive ancestor rule + multi-clause
 body forms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extends datalog/rule.rfl with paths missed by the original 13 cases:
* recursive (anc ?x ?y) — depth-3 chain plus a disjoint pair (5→6),
  exercising the rule fixed-point loop in dl_compile_rule
* two-clause derivation (cofriend) — duplicate triggers de-dupe
* multi-constant body (dept 10 ∧ level 'senior) — separate filter
  branches in dl_parse_body_clause

Should hit some of the 247 uncovered lines in datalog.c::dl_compile_rule.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 test/rfl/datalog/rule.rfl | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/test/rfl/datalog/rule.rfl b/test/rfl/datalog/rule.rfl
index 332d0690..2560b03c 100644
--- a/test/rfl/datalog/rule.rfl
+++ b/test/rfl/datalog/rule.rfl
@@ -143,3 +143,38 @@
 (set db (assert-fact db 2 'name 200))
 (set db (assert-fact db 2 'manager 1))
 (count (query db (find ?e) (where (?e :name ?n) (not (?e :manager ?m))))) -- 0
+
+;; ────────────── recursive rule: ancestor over a parent chain ──────────────
+(set db (datoms))
+(set db (assert-fact db 1 'parent 2))
+(set db (assert-fact db 2 'parent 3))
+(set db (assert-fact db 3 'parent 4))
+(set db (assert-fact db 5 'parent 6))
+(rule (anc ?x ?y) (?x :parent ?y))
+(rule (anc ?x ?z) (?x :parent ?y) (anc ?y ?z))
+(count (query db (find ?y) (where (anc 1 ?y)))) -- 3
+(count (query db (find ?y) (where (anc 5 ?y)))) -- 1
+;; reachable pairs: 1→{2,3,4}, 2→{3,4}, 3→{4}, 5→{6} = 7
+(count (query db (find ?x ?y) (where (anc ?x ?y)))) -- 7
+
+;; ────────────── two-clause derivation (co-friend via shared friend) ──────────────
+(set db (datoms))
+(set db (assert-fact db 1 'friend 2))
+(set db (assert-fact db 2 'friend 3))
+(set db (assert-fact db 1 'friend 4))
+(set db (assert-fact db 4 'friend 3))
+(rule (cofriend ?x ?z) (?x :friend ?y) (?y :friend ?z))
+;; both 1→2→3 and 1→4→3 yield (1,3); query de-dupes
+(count (query db (find ?x ?z) (where (cofriend ?x ?z)))) -- 1
+
+;; ────────────── multi-constant body ──────────────
+(set db (datoms))
+(set db (assert-fact db 1 'dept 10))
+(set db (assert-fact db 1 'level 'senior))
+(set db (assert-fact db 2 'dept 10))
+(set db (assert-fact db 2 'level 'junior))
+(set db (assert-fact db 3 'dept 20))
+(set db (assert-fact db 3 'level 'senior))
+(count (query db (find ?e) (where (?e :dept 10) (?e :level 'senior)))) -- 1
+(count (query db (find ?e) (where (?e :level 'senior)))) -- 2
+(count (query db (find ?e) (where (?e :dept 10)))) -- 2

From 82fae9d220bf7ec96a32ece40a402743f50dd1a6 Mon Sep 17 00:00:00 2001
From: Serhii Savchuk <ser.vasilich@hotmail.com>
Date: Tue, 28 Apr 2026 20:02:02 +0300
Subject: [PATCH 21/21] review: fix three blockers + bonus tests / docs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address Anton's PR #8 review:

[blocker] sym_atom_cmp: NULL ray_sym_str → fall back to comparing raw
interned ids instead of returning 0 (which would silently collapse
distinct symbols if the intern table is corrupted or uninitialised).
Stable, total ordering preserved.

[blocker] neg/abs INT_MIN UB: -INT16_MIN / -INT32_MIN / -INT64_MIN
were signed-overflow UB.  Negate via unsigned cast — wraparound is
defined for unsigned types and the result wraps back to INT_MIN
consistently with binary `(- 0 INT_MIN)`.  Added regression rows in
arith/neg.rfl and arith/abs.rfl that pin INT16_MIN / INT32_MIN
behaviour (via cast since `-32768h` literal is unrepresentable).

[blocker] ray_mkdir_p path buffer: replace hardcoded `char buf[1024]`
with `RAY_PATH_MAX` (PATH_MAX on POSIX, 4096 on Windows).  Deep
splayed paths like /db/yyyy.mm.dd/leaf/ now fit.

[bonus] glob `%`/`_` literal-match assertions in strop/like.rfl —
makes the SQL→glob consolidation explicit in tests, not just docs.

[bonus] glob.h documents the lenient unterminated-class policy
(matches glibc fnmatch semantics; never produces parse error).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/ops/arith.c         | 22 ++++++++++++++--------
 src/ops/cmp.c           | 16 ++++++++++++++--
 src/ops/glob.h          |  9 ++++++++-
 src/store/fileio.c      | 18 ++++++++++++++++--
 test/rfl/arith/abs.rfl  | 11 +++++++++++
 test/rfl/arith/neg.rfl  | 19 +++++++++++++++++++
 test/rfl/strop/like.rfl |  8 ++++++++
 7 files changed, 90 insertions(+), 13 deletions(-)

diff --git a/src/ops/arith.c b/src/ops/arith.c
index 12ae7bfb..e840d015 100644
--- a/src/ops/arith.c
+++ b/src/ops/arith.c
@@ -330,11 +330,15 @@ ray_t* ray_mod_fn(ray_t* a, ray_t* b) {
 
 ray_t* ray_neg_fn(ray_t* x) {
     if (RAY_ATOM_IS_NULL(x)) { ray_retain(x); return x; }
-    if (x->type == -RAY_I64) return make_i64(-x->i64);
     if (x->type == -RAY_F64) return make_f64(-x->f64);
-    /* Narrow ints preserve type — same convention as binary + - mul. */
-    if (x->type == -RAY_I32) return make_i32(-x->i32);
-    if (x->type == -RAY_I16) return make_i16(-x->i16);
+    /* Negate via unsigned to avoid signed-overflow UB on INT_MIN.
+     * Wraparound is defined for unsigned types; (T)(uT)(-(uT)x) yields
+     * the same wrapped value the corresponding two's-complement
+     * arithmetic would produce — so (neg INT_MIN) returns INT_MIN
+     * (overflow-wrap) consistently with binary `(- 0 INT_MIN)`. */
+    if (x->type == -RAY_I64) return make_i64((int64_t)(-(uint64_t)x->i64));
+    if (x->type == -RAY_I32) return make_i32((int32_t)(-(uint32_t)x->i32));
+    if (x->type == -RAY_I16) return make_i16((int16_t)(-(uint16_t)x->i16));
     return ray_error("type", NULL);
 }
 
@@ -362,13 +366,15 @@ ray_t* ray_ceil_fn(ray_t* x) {
     return ray_error("type", NULL);
 }
 
-/* abs: absolute value, preserves type */
+/* abs: absolute value, preserves type.  Uses unsigned-wrap negation
+ * for the negative branch — same overflow-wrap semantics as `neg`,
+ * so (abs INT_MIN) returns INT_MIN rather than UB. */
 ray_t* ray_abs_fn(ray_t* x) {
     if (RAY_ATOM_IS_NULL(x)) { ray_retain(x); return x; }
     if (x->type == -RAY_F64) return make_f64(fabs(x->f64));
-    if (x->type == -RAY_I64) return make_i64(x->i64 < 0 ? -x->i64 : x->i64);
-    if (x->type == -RAY_I32) return make_i32(x->i32 < 0 ? -x->i32 : x->i32);
-    if (x->type == -RAY_I16) return make_i16(x->i16 < 0 ? -x->i16 : x->i16);
+    if (x->type == -RAY_I64) return make_i64(x->i64 < 0 ? (int64_t)(-(uint64_t)x->i64) : x->i64);
+    if (x->type == -RAY_I32) return make_i32(x->i32 < 0 ? (int32_t)(-(uint32_t)x->i32) : x->i32);
+    if (x->type == -RAY_I16) return make_i16(x->i16 < 0 ? (int16_t)(-(uint16_t)x->i16) : x->i16);
     return ray_error("type", NULL);
 }
 
diff --git a/src/ops/cmp.c b/src/ops/cmp.c
index d696e1cb..df47e368 100644
--- a/src/ops/cmp.c
+++ b/src/ops/cmp.c
@@ -43,12 +43,24 @@ int char_str_cmp(ray_t* a, ray_t* b, int *out) {
 /* Lexicographic compare of two SYM atoms.  Fast path: equal interned
  * ids ⇒ identical text ⇒ 0, no global-table lookup.  Slow path: pull
  * the backing STR via ray_sym_str and delegate to ray_str_cmp, which
- * uses the 12-byte SSO inline path for short symbols. */
+ * uses the 12-byte SSO inline path for short symbols.
+ *
+ * If a sym_str lookup fails (NULL — e.g. corrupted intern table or
+ * uninitialised state) we fall back to comparing the raw interned ids
+ * rather than declaring the unequal symbols equal.  Stable, never
+ * silently collapses distinct symbols. */
 int sym_atom_cmp(ray_t* a, ray_t* b) {
     if (a->i64 == b->i64) return 0;
     ray_t* sa = ray_sym_str(a->i64);
     ray_t* sb = ray_sym_str(b->i64);
-    int r = (sa && sb) ? ray_str_cmp(sa, sb) : 0;
+    int r;
+    if (sa && sb) {
+        r = ray_str_cmp(sa, sb);
+    } else {
+        /* Fallback: order by interned id (stable, total).  Same sign
+         * convention as memcmp: negative if a < b, positive if a > b. */
+        r = (a->i64 < b->i64) ? -1 : 1;
+    }
     if (sa) ray_release(sa);
     if (sb) ray_release(sb);
     return r;
diff --git a/src/ops/glob.h b/src/ops/glob.h
index 7fa6bef6..63aa2959 100644
--- a/src/ops/glob.h
+++ b/src/ops/glob.h
@@ -21,7 +21,14 @@
  *
  * `glob_match` is case-sensitive.  `glob_match_ci` lowercases ASCII letters
  * on both sides before comparing (so it matches 'A' against 'a', 'A-Z'
- * range matches both case forms, etc.). */
+ * range matches both case forms, etc.).
+ *
+ * Lenient parsing policy: an unterminated character class (e.g. pattern
+ * "abc[def" with no closing `]`) is accepted — the class consumes input
+ * up to the end of the pattern and the match continues with whatever
+ * `matched` flag accumulated.  This matches glibc fnmatch's permissive
+ * behaviour and avoids surprising `error: parse` mid-search.  Callers
+ * that want strict validation should pre-validate the pattern. */
 bool ray_glob_match(const char* s, size_t sn, const char* p, size_t pn);
 bool ray_glob_match_ci(const char* s, size_t sn, const char* p, size_t pn);
 
diff --git a/src/store/fileio.c b/src/store/fileio.c
index 8adb596f..8586c13a 100644
--- a/src/store/fileio.c
+++ b/src/store/fileio.c
@@ -23,6 +23,20 @@
 
 #include "fileio.h"
 
+#include <limits.h>
+
+/* PATH_MAX is mandated on POSIX (typically 4096 on Linux); Windows
+ * caps at MAX_PATH = 260 unless long-path support is enabled.  Use the
+ * larger of the two when known so callers passing deep splayed paths
+ * (e.g. /db/yyyy.mm.dd/table/) don't silently truncate. */
+#ifdef RAY_OS_WINDOWS
+#  define RAY_PATH_MAX 4096
+#elif defined(PATH_MAX)
+#  define RAY_PATH_MAX PATH_MAX
+#else
+#  define RAY_PATH_MAX 4096
+#endif
+
 #ifdef RAY_OS_WINDOWS
 
 #include <errno.h>
@@ -121,7 +135,7 @@ ray_err_t ray_mkdir(const char* path) {
 
 ray_err_t ray_mkdir_p(const char* path) {
     if (!path || !*path) return RAY_ERR_IO;
-    char buf[1024];
+    char buf[RAY_PATH_MAX];
     size_t len = strlen(path);
     if (len >= sizeof(buf)) return RAY_ERR_IO;
     memcpy(buf, path, len + 1);
@@ -236,7 +250,7 @@ ray_err_t ray_mkdir(const char* path) {
 
 ray_err_t ray_mkdir_p(const char* path) {
     if (!path || !*path) return RAY_ERR_IO;
-    char buf[1024];
+    char buf[RAY_PATH_MAX];
     size_t len = strlen(path);
     if (len >= sizeof(buf)) return RAY_ERR_IO;
     memcpy(buf, path, len + 1);
diff --git a/test/rfl/arith/abs.rfl b/test/rfl/arith/abs.rfl
index 55b5c738..2b01e4d1 100644
--- a/test/rfl/arith/abs.rfl
+++ b/test/rfl/arith/abs.rfl
@@ -36,3 +36,14 @@
 (type (abs -5i))      -- 'i32
 (type (abs [-1h 2h])) -- 'I16
 (type (abs [-1i 2i])) -- 'I32
+
+;; INT_MIN edge: same overflow-wrap convention as neg — abs of INT_MIN
+;; returns INT_MIN (no UB).  Verified under UBSan.
+;; Literal -32768h / -2147483648i can't be parsed (parser tokenises
+;; positive then negates), so verify via i64 round-trip.
+(set MIN16 (as 'i16 (as 'i64 -32768)))
+(as 'i64 (abs MIN16)) -- -32768
+(type (abs MIN16))    -- 'i16
+(set MIN32 (as 'i32 (as 'i64 -2147483648)))
+(as 'i64 (abs MIN32)) -- -2147483648
+(type (abs MIN32))    -- 'i32
diff --git a/test/rfl/arith/neg.rfl b/test/rfl/arith/neg.rfl
index 47e0444e..22bae9b0 100644
--- a/test/rfl/arith/neg.rfl
+++ b/test/rfl/arith/neg.rfl
@@ -62,3 +62,22 @@
 ;; null propagation across narrow types
 (nil? (neg 0Nh)) -- true
 (nil? (neg 0Ni)) -- true
+
+;; ──────────────────────────────────────────────────────────────────
+;; INT_MIN edge: -INT_MIN would be signed overflow (UB).  Implementation
+;; uses unsigned-wrap negation so the result wraps back to INT_MIN
+;; rather than triggering UBSan.  Stable across i16/i32/i64.
+;; ──────────────────────────────────────────────────────────────────
+
+;; INT_MIN constructed via cast — literal can't represent it directly
+;; because `-32768h` is parsed as negate-of-positive (32768 doesn't fit i16).
+;; Compare via widened-i64 form: (as 'i64 (neg INT16_MIN)) == INT16_MIN.
+(set MIN16 (as 'i16 (as 'i64 -32768)))
+(as 'i64 (neg MIN16)) -- -32768
+(type (neg MIN16))    -- 'i16
+(set MIN32 (as 'i32 (as 'i64 -2147483648)))
+(as 'i64 (neg MIN32)) -- -2147483648
+(type (neg MIN32))    -- 'i32
+
+;; INT64_MAX → -INT64_MAX (no overflow, just sign flip)
+(neg 9223372036854775807) -- -9223372036854775807
diff --git a/test/rfl/strop/like.rfl b/test/rfl/strop/like.rfl
index 3e495ff4..ef471a1c 100644
--- a/test/rfl/strop/like.rfl
+++ b/test/rfl/strop/like.rfl
@@ -23,6 +23,14 @@
 (like "x" "") -- false
 (like "" "*") -- true
 
+;; ────────────── SQL meta-chars are LITERAL post-consolidation ──────────────
+;; Pre-consolidation the DAG path treated `%` and `_` as SQL wildcards.
+;; After unifying on glob in src/ops/glob.[ch], they're literal bytes.
+(like "100%" "100%") -- true    ;; literal '%' matches itself
+(like "abc"  "%")    -- false   ;; '%' is no longer "any chars"
+(like "a_b"  "a_b")  -- true    ;; literal '_' matches itself
+(like "ab"   "a_b")  -- false   ;; '_' is no longer "single char"
+
 ;; ────────────── universal-star metamorphic invariants ──────────────
 ;; `*` matches anything, including punctuation / digits / mixed bytes.
 (like "abc" "*") -- true