diff --git a/src/lang/eval.c b/src/lang/eval.c index ab63ada..0780202 100644 --- a/src/lang/eval.c +++ b/src/lang/eval.c @@ -2021,8 +2021,11 @@ static void ray_register_builtins(void) { register_binary_op("<=", RAY_FN_ATOMIC, ray_lte_fn, OP_LE); register_binary_op("==", RAY_FN_ATOMIC, ray_eq_fn, OP_EQ); register_binary_op("!=", RAY_FN_ATOMIC, ray_neq_fn, OP_NE); - register_vary("and", RAY_FN_NONE, ray_and_vary_fn); - register_vary("or", RAY_FN_NONE, ray_or_vary_fn); + /* Special-form so args are passed unevaluated and the kernel can + * short-circuit on the first determining scalar (matches v1 and the + * Lisp/Clojure convention). */ + register_vary("and", RAY_FN_SPECIAL_FORM, ray_and_vary_fn); + register_vary("or", RAY_FN_SPECIAL_FORM, ray_or_vary_fn); register_unary_op("not", RAY_FN_NONE, ray_not_fn, OP_NOT); register_unary_op("neg", RAY_FN_ATOMIC, ray_neg_fn, OP_NEG); register_unary("round", RAY_FN_ATOMIC, ray_round_fn); diff --git a/src/ops/arith.c b/src/ops/arith.c index e840d01..72d92ba 100644 --- a/src/ops/arith.c +++ b/src/ops/arith.c @@ -331,14 +331,23 @@ ray_t* ray_mod_fn(ray_t* a, ray_t* b) { ray_t* ray_neg_fn(ray_t* x) { if (RAY_ATOM_IS_NULL(x)) { ray_retain(x); return x; } if (x->type == -RAY_F64) return make_f64(-x->f64); - /* Negate via unsigned to avoid signed-overflow UB on INT_MIN. - * Wraparound is defined for unsigned types; (T)(uT)(-(uT)x) yields - * the same wrapped value the corresponding two's-complement - * arithmetic would produce — so (neg INT_MIN) returns INT_MIN - * (overflow-wrap) consistently with binary `(- 0 INT_MIN)`. */ - if (x->type == -RAY_I64) return make_i64((int64_t)(-(uint64_t)x->i64)); - if (x->type == -RAY_I32) return make_i32((int32_t)(-(uint32_t)x->i32)); - if (x->type == -RAY_I16) return make_i16((int16_t)(-(uint16_t)x->i16)); + /* INT_MIN is the lone overflow case for signed negation: -INT_MIN + * doesn't fit in the same width. Per k/q convention, surface this + * as a typed null of the same width — preserving type, avoiding UB, + * and giving the caller a `nil?`-detectable signal that overflow + * happened. Consistent with how `(neg 0Ni) → 0Ni` propagates. */ + if (x->type == -RAY_I64) { + if (RAY_UNLIKELY(x->i64 == INT64_MIN)) return ray_typed_null(-RAY_I64); + return make_i64(-x->i64); + } + if (x->type == -RAY_I32) { + if (RAY_UNLIKELY(x->i32 == INT32_MIN)) return ray_typed_null(-RAY_I32); + return make_i32(-x->i32); + } + if (x->type == -RAY_I16) { + if (RAY_UNLIKELY(x->i16 == INT16_MIN)) return ray_typed_null(-RAY_I16); + return make_i16(-x->i16); + } return ray_error("type", NULL); } @@ -366,15 +375,25 @@ ray_t* ray_ceil_fn(ray_t* x) { return ray_error("type", NULL); } -/* abs: absolute value, preserves type. Uses unsigned-wrap negation - * for the negative branch — same overflow-wrap semantics as `neg`, - * so (abs INT_MIN) returns INT_MIN rather than UB. */ +/* abs: absolute value, preserves type. INT_MIN has no representable + * positive in the same width — return a typed null instead (same + * convention as `neg`). Stops `(abs -32768h) → -32768h` (negative + * result from abs!) and `(abs INT_MIN)` UB simultaneously. */ ray_t* ray_abs_fn(ray_t* x) { if (RAY_ATOM_IS_NULL(x)) { ray_retain(x); return x; } if (x->type == -RAY_F64) return make_f64(fabs(x->f64)); - if (x->type == -RAY_I64) return make_i64(x->i64 < 0 ? (int64_t)(-(uint64_t)x->i64) : x->i64); - if (x->type == -RAY_I32) return make_i32(x->i32 < 0 ? (int32_t)(-(uint32_t)x->i32) : x->i32); - if (x->type == -RAY_I16) return make_i16(x->i16 < 0 ? (int16_t)(-(uint16_t)x->i16) : x->i16); + if (x->type == -RAY_I64) { + if (RAY_UNLIKELY(x->i64 == INT64_MIN)) return ray_typed_null(-RAY_I64); + return make_i64(x->i64 < 0 ? -x->i64 : x->i64); + } + if (x->type == -RAY_I32) { + if (RAY_UNLIKELY(x->i32 == INT32_MIN)) return ray_typed_null(-RAY_I32); + return make_i32(x->i32 < 0 ? -x->i32 : x->i32); + } + if (x->type == -RAY_I16) { + if (RAY_UNLIKELY(x->i16 == INT16_MIN)) return ray_typed_null(-RAY_I16); + return make_i16(x->i16 < 0 ? -x->i16 : x->i16); + } return ray_error("type", NULL); } diff --git a/src/ops/cmp.c b/src/ops/cmp.c index df47e36..f0beae6 100644 --- a/src/ops/cmp.c +++ b/src/ops/cmp.c @@ -22,6 +22,9 @@ */ #include "lang/internal.h" +#include "ops/ops.h" /* RAY_LAZY, ray_is_lazy, ray_lazy_materialize */ + +#include /* Helper: compare char atom vs string atom. * Returns: -1 if no char/string pair, else memcmp-like result via *out. */ @@ -45,24 +48,24 @@ int char_str_cmp(ray_t* a, ray_t* b, int *out) { * the backing STR via ray_sym_str and delegate to ray_str_cmp, which * uses the 12-byte SSO inline path for short symbols. * - * If a sym_str lookup fails (NULL — e.g. corrupted intern table or - * uninitialised state) we fall back to comparing the raw interned ids - * rather than declaring the unequal symbols equal. Stable, never - * silently collapses distinct symbols. */ + * Invariant: any valid SYM atom resolves to its interned string. A + * NULL from ray_sym_str means corruption (uninitialised intern table, + * out-of-range id, or evicted slot) — no defensible total order exists + * in that state. We assert and let the process abort rather than + * fabricate an answer (returning 0 silently collapses distinct symbols; + * returning ±1 by raw id invents a non-lexicographic ordering that + * still lies about the contract). Matches v1 behaviour, which also + * trusts the invariant (and would SIGSEGV via strcmp(NULL,...) if it + * broke). */ int sym_atom_cmp(ray_t* a, ray_t* b) { if (a->i64 == b->i64) return 0; ray_t* sa = ray_sym_str(a->i64); ray_t* sb = ray_sym_str(b->i64); - int r; - if (sa && sb) { - r = ray_str_cmp(sa, sb); - } else { - /* Fallback: order by interned id (stable, total). Same sign - * convention as memcmp: negative if a < b, positive if a > b. */ - r = (a->i64 < b->i64) ? -1 : 1; - } - if (sa) ray_release(sa); - if (sb) ray_release(sb); + assert(sa && sb && "sym_atom_cmp: corrupted intern table — " + "valid SYM atom must resolve to interned string"); + int r = ray_str_cmp(sa, sb); + ray_release(sa); + ray_release(sb); return r; } @@ -249,30 +252,63 @@ ray_t* ray_or_fn(ray_t* a, ray_t* b) { return make_bool((is_truthy(a) || is_truthy(b)) ? 1 : 0); } -/* Variadic left-fold over the binary kernels. (and a b c) folds as - * (and (and a b) c) — same shape Lisp/Clojure use. */ +/* Special-form variadic AND/OR with short-circuit (matches v1). + * + * `args` are UNEVALUATED AST nodes — registered with RAY_FN_SPECIAL_FORM + * so the evaluator hands us raw forms rather than computed values. We + * call ray_eval per arg ourselves and stop as soon as the result is + * determined: AND on first scalar falsy, OR on first scalar truthy. + * + * Mixed scalar+vector: when the running accumulator becomes a *scalar* + * with the determining truth value, we return it immediately — same + * shape as Lisp/Clojure where short-circuit yields the determinant. + * If the accumulator is a vector we cannot short-circuit (subsequent + * args may be vectors that still need element-wise combination), so we + * fall through to ray_and_fn / ray_or_fn for that step. */ +static ray_t* eval_and_short(ray_t* arg) { + ray_t* v = ray_eval(arg); + if (!v || RAY_IS_ERR(v)) return v; + if (ray_is_lazy(v)) v = ray_lazy_materialize(v); + return v; +} + ray_t* ray_and_vary_fn(ray_t** args, int64_t n) { if (n < 2) return ray_error("arity", "expected at least 2 args, got %lld", (long long)n); - ray_t* acc = ray_and_fn(args[0], args[1]); + ray_t* acc = eval_and_short(args[0]); if (!acc || RAY_IS_ERR(acc)) return acc; - for (int64_t i = 2; i < n; i++) { - ray_t* next = ray_and_fn(acc, args[i]); + /* Short-circuit only when the running result is a *scalar* falsy. + * If acc is a vector, subsequent args still need element-wise + * combination (so `(and vec false)` broadcasts to all-false vector + * of acc's shape rather than a bare scalar). */ + if (ray_is_atom(acc) && !is_truthy(acc)) return acc; + for (int64_t i = 1; i < n; i++) { + ray_t* v = eval_and_short(args[i]); + if (!v || RAY_IS_ERR(v)) { ray_release(acc); return v; } + ray_t* next = ray_and_fn(acc, v); ray_release(acc); + ray_release(v); if (!next || RAY_IS_ERR(next)) return next; acc = next; + if (ray_is_atom(acc) && !is_truthy(acc)) return acc; } return acc; } ray_t* ray_or_vary_fn(ray_t** args, int64_t n) { if (n < 2) return ray_error("arity", "expected at least 2 args, got %lld", (long long)n); - ray_t* acc = ray_or_fn(args[0], args[1]); + ray_t* acc = eval_and_short(args[0]); if (!acc || RAY_IS_ERR(acc)) return acc; - for (int64_t i = 2; i < n; i++) { - ray_t* next = ray_or_fn(acc, args[i]); + /* Short-circuit only on scalar truthy accumulator (see AND comment). */ + if (ray_is_atom(acc) && is_truthy(acc)) return acc; + for (int64_t i = 1; i < n; i++) { + ray_t* v = eval_and_short(args[i]); + if (!v || RAY_IS_ERR(v)) { ray_release(acc); return v; } + ray_t* next = ray_or_fn(acc, v); ray_release(acc); + ray_release(v); if (!next || RAY_IS_ERR(next)) return next; acc = next; + if (ray_is_atom(acc) && is_truthy(acc)) return acc; } return acc; } diff --git a/src/ops/expr.c b/src/ops/expr.c index ba5934b..b0f2da6 100644 --- a/src/ops/expr.c +++ b/src/ops/expr.c @@ -926,6 +926,35 @@ static void expr_full_fn(void* ctx, uint32_t worker_id, int64_t start, int64_t e scratch_free(scratch_hdr); } +/* Post-pass for the fused unary path: |INT64_MIN| and -INT64_MIN don't fit in + * i64 (signed-overflow; k/q convention surfaces this as typed null). The + * element-wise loop uses unsigned wrap, so any overflow position lands as + * INT64_MIN in data. Convert each such position to typed-null: zero data[i] + * (preserve "null position is 0" invariant) and set the null bit. Caller + * must invoke single-threaded — after pool dispatch joins. */ +static void mark_i64_overflow_as_null(ray_t* result, int64_t off, int64_t len) { + int64_t* d = (int64_t*)ray_data(result) + off; + for (int64_t i = 0; i < len; i++) { + if (RAY_UNLIKELY(d[i] == INT64_MIN)) { + d[i] = 0; + ray_vec_set_null(result, off + i, true); + } + } +} + +/* The fused unary path may produce INT64_MIN via signed-overflow only for + * OP_NEG and OP_ABS over an i64 source (output type i64). Detect those + * shapes from the last instruction in the compiled expression. */ +static bool expr_last_op_overflows_i64(const ray_expr_t* expr) { + if (expr->out_type != RAY_I64 || expr->n_ins == 0) return false; + const expr_ins_t* last = &expr->ins[expr->n_ins - 1]; + if (last->opcode != OP_NEG && last->opcode != OP_ABS) return false; + if (last->src2 != 0xFF) return false; /* unary only */ + if (expr->regs[last->src1].type != RAY_I64) return false; + if (expr->regs[last->dst].type != RAY_I64) return false; + return true; +} + /* Evaluate compiled expression over parted (segmented) columns. * Iterates segments as outer loop, rebinds data pointers per segment, * then dispatches the existing morsel evaluator per segment. Zero copy. */ @@ -991,6 +1020,8 @@ static ray_t* expr_eval_full_parted(const ray_expr_t* expr, int64_t nrows) { global_off += seg_len; } + if (expr_last_op_overflows_i64(expr)) + mark_i64_overflow_as_null(out, 0, nrows); return out; } @@ -1014,6 +1045,8 @@ ray_t* expr_eval_full(const ray_expr_t* expr, int64_t nrows) { else expr_full_fn(&ctx, 0, 0, nrows); + if (expr_last_op_overflows_i64(expr)) + mark_i64_overflow_as_null(out, 0, nrows); return out; } @@ -1272,6 +1305,13 @@ ray_t* exec_elementwise_unary(ray_graph_t* g, ray_op_t* op, ray_t* input) { } } + /* OP_NEG/OP_ABS over i64: |INT64_MIN| and -INT64_MIN don't fit — surface + * as typed null (k/q convention). Loop above used unsigned wrap, so + * overflow positions land as INT64_MIN in data; convert them to null. */ + if (out_type == RAY_I64 && in_type == RAY_I64 && + (op->opcode == OP_NEG || op->opcode == OP_ABS)) + mark_i64_overflow_as_null(result, 0, len); + return result; } diff --git a/src/ops/glob.h b/src/ops/glob.h index 63aa295..71bc3a2 100644 --- a/src/ops/glob.h +++ b/src/ops/glob.h @@ -19,6 +19,14 @@ * [a-z] — range * [!abc] — negated class * + * Matching a literal metacharacter — there is no backslash escape; wrap + * the character in a one-element class instead: + * [*] matches a literal '*' + * [?] matches a literal '?' + * [[] matches a literal '[' + * []] matches a literal ']' (']' as first char inside [...] is literal) + * [-] matches a literal '-' (as the sole char, no range to form) + * * `glob_match` is case-sensitive. `glob_match_ci` lowercases ASCII letters * on both sides before comparing (so it matches 'A' against 'a', 'A-Z' * range matches both case forms, etc.). diff --git a/src/ops/query.c b/src/ops/query.c index dd1d756..aed45e9 100644 --- a/src/ops/query.c +++ b/src/ops/query.c @@ -819,6 +819,55 @@ static ray_op_t* compile_expr_dag(ray_graph_t* g, ray_t* expr) { return &g->nodes[chain_id]; } + /* Variadic `and`/`or`: fold into a balanced binary tree. + * `(and a b c d)` → `(and (and a b) (and c d))` — depth log2(N). + * Without this, n>=4 falls through `compile_expr_dag` and the + * caller (e.g. select WHERE) reports "WHERE predicate not + * supported by DAG compiler". The fused-expr executor evaluates + * the resulting tree as a sequence of binary AND/OR instructions + * sharing scratch registers — no extra column allocations vs + * what hand-nested binary forms already do. + * + * Balanced tree (rather than left-fold) keeps the canonical + * shape symmetric and minimises dependency-chain depth, which + * future OoO / parallel-instruction executors can exploit. */ + if (n >= 4) { + bool is_and = (fname_len == 3 && memcmp(fname, "and", 3) == 0); + bool is_or = (fname_len == 2 && memcmp(fname, "or", 2) == 0); + if (is_and || is_or) { + int64_t k = n - 1; + if (k > 64) return NULL; /* depth/space guard */ + uint32_t arg_ids[64]; + for (int64_t i = 0; i < k; i++) { + ray_op_t* a = compile_expr_dag(g, elems[i + 1]); + if (!a) return NULL; + arg_ids[i] = a->id; + } + dag_binary_ctor ctor = is_and ? ray_and : ray_or; + /* Iterative pairwise reduction: at each round, fold + * adjacent pairs into a single node, halving the count. + * Equivalent to recursive bisect but avoids a helper. */ + int64_t cnt = k; + while (cnt > 1) { + int64_t out = 0; + for (int64_t i = 0; i + 1 < cnt; i += 2) { + /* make_binary re-resolves both inputs via stored + * IDs after its own potential realloc, so the + * pointers we pass here are safe to use. */ + ray_op_t* l = &g->nodes[arg_ids[i]]; + ray_op_t* r = &g->nodes[arg_ids[i + 1]]; + ray_op_t* combined = ctor(g, l, r); + if (!combined) return NULL; + arg_ids[out++] = combined->id; + } + if (cnt & 1) /* carry odd tail */ + arg_ids[out++] = arg_ids[cnt - 1]; + cnt = out; + } + return &g->nodes[arg_ids[0]]; + } + } + /* Binary op? */ if (n == 3) { dag_binary_ctor ctor = resolve_binary_dag(fn_sym); diff --git a/test/rfl/arith/abs.rfl b/test/rfl/arith/abs.rfl index 2b01e4d..a3875fb 100644 --- a/test/rfl/arith/abs.rfl +++ b/test/rfl/arith/abs.rfl @@ -37,13 +37,45 @@ (type (abs [-1h 2h])) -- 'I16 (type (abs [-1i 2i])) -- 'I32 -;; INT_MIN edge: same overflow-wrap convention as neg — abs of INT_MIN -;; returns INT_MIN (no UB). Verified under UBSan. -;; Literal -32768h / -2147483648i can't be parsed (parser tokenises -;; positive then negates), so verify via i64 round-trip. -(set MIN16 (as 'i16 (as 'i64 -32768))) -(as 'i64 (abs MIN16)) -- -32768 -(type (abs MIN16)) -- 'i16 -(set MIN32 (as 'i32 (as 'i64 -2147483648))) -(as 'i64 (abs MIN32)) -- -2147483648 -(type (abs MIN32)) -- 'i32 +;; INT_MIN edge: |INT_MIN| doesn't fit in the same width — abs +;; returns a typed null of the same width (k/q convention). Stops +;; the broken `(abs -32768h) → -32768h` (negative result from abs!) +;; behaviour and avoids signed-overflow UB. + +;; (- (neg 32767h) 1h) = -32768h = INT16_MIN; (abs INT16_MIN) → 0Nh +(nil? (abs (- (neg 32767h) 1h))) -- true +(type (abs (- (neg 32767h) 1h))) -- 'i16 + +(nil? (abs (- (neg 2147483647i) 1i))) -- true +(type (abs (- (neg 2147483647i) 1i))) -- 'i32 + +(nil? (abs (- (neg 9223372036854775807) 1))) -- true +(type (abs (- (neg 9223372036854775807) 1))) -- 'i64 + +;; values adjacent to INT_MIN that fit — abs works normally +(abs -32767h) -- 32767h +(abs -2147483647i) -- 2147483647i +(abs -9223372036854775807) -- 9223372036854775807 + +;; ────────────────────────────────────────────────────────────────── +;; DAG (fused expression) path — `(select {x: (abs col) from: t})` +;; over a column containing INT64_MIN must surface that row as typed +;; null, not as a negative value (the broken "abs returns INT_MIN" +;; case Anton flagged on PR #8). +;; +;; Vec literal `[...]` only accepts atom literals, so we build the +;; INT64_MIN-bearing column via `concat` of a typed atom. +;; ────────────────────────────────────────────────────────────────── + +(set Va (concat -9223372036854775808 (concat -5 (concat 5 0)))) +(set Ta (table [v] (list Va))) +(set Ra (select {x: (abs v) from: Ta})) +(nil? (at (at Ra 'x) 0)) -- true ;; INT64_MIN row → null +(at (at Ra 'x) 1) -- 5 +(at (at Ra 'x) 2) -- 5 +(at (at Ra 'x) 3) -- 0 + +;; vector eval-path (no fused DAG) — goes through ray_abs_fn per element. +(nil? (at (abs Va) 0)) -- true +(at (abs Va) 1) -- 5 +(at (abs Va) 2) -- 5 diff --git a/test/rfl/arith/neg.rfl b/test/rfl/arith/neg.rfl index 22bae9b..f86cb4a 100644 --- a/test/rfl/arith/neg.rfl +++ b/test/rfl/arith/neg.rfl @@ -64,20 +64,45 @@ (nil? (neg 0Ni)) -- true ;; ────────────────────────────────────────────────────────────────── -;; INT_MIN edge: -INT_MIN would be signed overflow (UB). Implementation -;; uses unsigned-wrap negation so the result wraps back to INT_MIN -;; rather than triggering UBSan. Stable across i16/i32/i64. +;; INT_MIN: -INT_MIN doesn't fit in the same signed width (overflow). +;; Per k/q convention neg surfaces this as a typed null of the same +;; width — type preserved, no UB, detectable via `nil?`. ;; ────────────────────────────────────────────────────────────────── -;; INT_MIN constructed via cast — literal can't represent it directly -;; because `-32768h` is parsed as negate-of-positive (32768 doesn't fit i16). -;; Compare via widened-i64 form: (as 'i64 (neg INT16_MIN)) == INT16_MIN. -(set MIN16 (as 'i16 (as 'i64 -32768))) -(as 'i64 (neg MIN16)) -- -32768 -(type (neg MIN16)) -- 'i16 -(set MIN32 (as 'i32 (as 'i64 -2147483648))) -(as 'i64 (neg MIN32)) -- -2147483648 -(type (neg MIN32)) -- 'i32 - -;; INT64_MAX → -INT64_MAX (no overflow, just sign flip) -(neg 9223372036854775807) -- -9223372036854775807 +;; (- (neg 32767h) 1h) = -32768h = INT16_MIN; (neg INT16_MIN) → 0Nh +(nil? (neg (- (neg 32767h) 1h))) -- true +(type (neg (- (neg 32767h) 1h))) -- 'i16 + +(nil? (neg (- (neg 2147483647i) 1i))) -- true +(type (neg (- (neg 2147483647i) 1i))) -- 'i32 + +(nil? (neg (- (neg 9223372036854775807) 1))) -- true +(type (neg (- (neg 9223372036854775807) 1))) -- 'i64 + +;; values adjacent to INT_MIN that DON'T overflow — neg works normally +(neg 32767h) -- -32767h +(neg 2147483647i) -- -2147483647i +(neg 9223372036854775807) -- -9223372036854775807 + +;; ────────────────────────────────────────────────────────────────── +;; DAG (fused expression) path — same INT64_MIN → typed null contract +;; through `(select {x: (neg col) from: t})`. Without the post-pass +;; in expr_eval_full, the unsigned-wrap loop returned INT64_MIN as a +;; bare value and the result lacked HAS_NULLS — silent overflow. +;; +;; Vec literal `[...]` only accepts atom literals, so we build the +;; INT64_MIN-bearing column via `concat` of a typed atom. +;; ────────────────────────────────────────────────────────────────── + +(set Vn (concat -9223372036854775808 (concat -5 (concat 5 0)))) +(set Tn (table [v] (list Vn))) +(set Rn (select {x: (neg v) from: Tn})) +(nil? (at (at Rn 'x) 0)) -- true ;; INT64_MIN row → null +(at (at Rn 'x) 1) -- 5 +(at (at Rn 'x) 2) -- -5 +(at (at Rn 'x) 3) -- 0 + +;; vector eval-path (no fused DAG) — `(neg col)` directly over a vec +;; goes through atomic_map_unary → ray_neg_fn per element. +(nil? (at (neg Vn) 0)) -- true +(at (neg Vn) 1) -- 5 diff --git a/test/rfl/cmp/and.rfl b/test/rfl/cmp/and.rfl index 973d2d5..143fbb3 100644 --- a/test/rfl/cmp/and.rfl +++ b/test/rfl/cmp/and.rfl @@ -35,3 +35,17 @@ ;; ── arity boundaries ── (and) !- arity (and true) !- arity + +;; ── short-circuit semantics (matches v1 FN_SPECIAL_FORM) ── +;; PR #8 dropped FN_SPECIAL_FORM, breaking v1's contract. Restored: +;; subsequent args are NOT evaluated once the result is determined. +;; If short-circuit were broken, `undefined-name` would surface as +;; `error: name` and these tests would fail. +(and false undefined-name) -- false +(and false true undefined-name) -- false +(and true false undefined-name) -- false +(and true true false undefined-name) -- false + +;; Short-circuit must NOT trigger when the result is still undetermined — +;; full chain runs with all-truthy args. +(and true true 1) -- true diff --git a/test/rfl/cmp/or.rfl b/test/rfl/cmp/or.rfl index cedfb08..a88730f 100644 --- a/test/rfl/cmp/or.rfl +++ b/test/rfl/cmp/or.rfl @@ -40,3 +40,15 @@ ;; ── arity boundaries ── (or) !- arity (or false) !- arity + +;; ── short-circuit semantics (matches v1 FN_SPECIAL_FORM) ── +;; Subsequent args are NOT evaluated once a scalar truthy is seen. +;; If short-circuit were broken, `undefined-name` would surface as +;; `error: name` and these tests would fail. +(or true undefined-name) -- true +(or false true undefined-name) -- true +(or false false true undefined-name) -- true + +;; Short-circuit must NOT trigger when the result is still undetermined — +;; full chain runs with all-falsy args until the final true. +(or false false 1) -- true diff --git a/test/rfl/integration/dag_binary_ops.rfl b/test/rfl/integration/dag_binary_ops.rfl index 7101385..79cf99d 100644 --- a/test/rfl/integration/dag_binary_ops.rfl +++ b/test/rfl/integration/dag_binary_ops.rfl @@ -64,3 +64,35 @@ (set Tnan (table [a b] (list [1.0 2.0 0Nf 3.0] [1.0 2.0 2.0 0Nf]))) (sum (as 'I64 (at (select {x: (== a b) from: Tnan}) 'x))) -- 2 (sum (as 'I64 (at (select {x: (!= a b) from: Tnan}) 'x))) -- 2 + +;; ────────────── variadic AND/OR in WHERE — DAG auto-folds ────────────── +;; PR #8 turned `and`/`or` into eval-time variadic, dropping their +;; binary OP_AND/OP_OR opcode registration. `compile_expr_dag` still +;; recognises 2-arg forms via `resolve_binary_dag` but n>=4 used to +;; bail out with "WHERE predicate not supported by DAG compiler" — +;; outright correctness regression, not the silent perf cliff Anton +;; flagged. query.c now folds variadic AND/OR into a balanced binary +;; tree before lowering, so 3+, 4+, ... -arg forms compile to the same +;; fused-expr execution as the hand-nested equivalent. +(set Tand (table [a b c d] (list [1 2 3 4 5 6 7 8 9 10] [10 20 30 40 50 60 70 80 90 100] [100 200 300 400 500 600 700 800 900 1000] [0 1 0 1 0 1 0 1 0 1]))) + +;; 3-arg AND in WHERE — same result as nested (and (and ...) ...) +(count (select {from: Tand where: (and (> a 2) (> b 20) (> c 200))})) -- 8 +(count (select {from: Tand where: (and (and (> a 2) (> b 20)) (> c 200))})) -- 8 + +;; 4-arg AND in WHERE — same result as nested +(count (select {from: Tand where: (and (> a 2) (> b 20) (> c 200) (== d 1))})) -- 4 +(count (select {from: Tand where: (and (and (and (> a 2) (> b 20)) (> c 200)) (== d 1))})) -- 4 + +;; 3-arg OR in WHERE +(count (select {from: Tand where: (or (== a 1) (== a 5) (== a 10))})) -- 3 + +;; 5-arg OR in WHERE — every odd index +(count (select {from: Tand where: (or (== a 1) (== a 3) (== a 5) (== a 7) (== a 9))})) -- 5 + +;; nested AND/OR mix — `(and (or ...) (or ...))` +(count (select {from: Tand where: (and (or (== a 1) (== a 3) (== a 5)) (or (== d 0) (== d 1)))})) -- 3 + +;; correctness pin: variadic `and` must agree with composing two selects +;; (fold semantics is real conjunction). +(count (select {from: Tand where: (and (> a 2) (> b 20) (> c 200))})) -- (count (select {from: (select {from: Tand where: (and (> a 2) (> b 20))}) where: (> c 200)})) diff --git a/test/rfl/strop/like.rfl b/test/rfl/strop/like.rfl index ef471a1..5495cfc 100644 --- a/test/rfl/strop/like.rfl +++ b/test/rfl/strop/like.rfl @@ -31,6 +31,17 @@ (like "a_b" "a_b") -- true ;; literal '_' matches itself (like "ab" "a_b") -- false ;; '_' is no longer "single char" +;; ────────────── escaping metacharacters via [ ... ] ────────────── +;; No backslash escape: wrap the metachar in a one-element class to +;; match it literally. Documented in src/ops/glob.h. +(like "abc*" "abc[*]") -- true ;; literal '*' +(like "what?" "what[?]") -- true ;; literal '?' +(like "[error]" "[[]error[]]") -- true ;; literal '[' and ']' +(like "a-z" "a[-]z") -- true ;; literal '-' (sole char in class) +(like "100%" "100[%]") -- true ;; literal '%' (also a literal byte + ;; on its own, but [%] proves the + ;; class-wrap idiom works) + ;; ────────────── universal-star metamorphic invariants ────────────── ;; `*` matches anything, including punctuation / digits / mixed bytes. (like "abc" "*") -- true