Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions include/rayforce.h
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,14 @@ int64_t ray_sym_intern(const char* str, size_t len);
int64_t ray_sym_find(const char* str, size_t len);
ray_t* ray_sym_str(int64_t id);
uint32_t ray_sym_count(void);

/* Borrow a snapshot of the sym → string array. Returns a pointer to
* the underlying ray_t** strings table along with its length; valid
* only while no concurrent ray_sym_intern occurs (i.e. read-only
* execution phases). Lock is taken once for the snapshot and dropped
* before return — caller may iterate freely. Both *out_strings and
* *out_count must be non-NULL. */
void ray_sym_strings_borrow(ray_t*** out_strings, uint32_t* out_count);
bool ray_sym_ensure_cap(uint32_t needed);
ray_err_t ray_sym_save(const char* path);
ray_err_t ray_sym_load(const char* path);
Expand Down
27 changes: 25 additions & 2 deletions src/io/csv.c
Original file line number Diff line number Diff line change
Expand Up @@ -595,14 +595,31 @@ static bool csv_intern_strings(csv_strref_t** str_refs, int n_cols,
int64_t* col_max_ids,
uint8_t** col_nullmaps) {
bool ok = true;

/* Empty TSV/CSV fields are flagged in the parse-time nullmap (see
* CSV_TYPE_STR branch of the parse loop) — that's correct for STR
* columns where the null/empty distinction matters, but for SYM
* columns it conflates with the "no value" sentinel and breaks the
* SQL-style `(!= col "")` filter (which never excludes nulls in the
* q/k value-vs-null comparison kernel). Pre-intern "" once and
* remap null rows to that ID, clearing their null bit so the
* compare kernel takes the both-non-null branch. Net effect: the
* CSV format's "field is empty" — which can't be distinguished from
* "field is missing" anyway — round-trips through Rayforce as the
* empty SYM, matching how DuckDB / Spark / polars treat the same
* input. */
int64_t empty_sym_id = ray_sym_intern_prehashed(
(uint32_t)ray_hash_bytes("", 0), "", 0);
if (empty_sym_id < 0) empty_sym_id = 0; /* fall back to old behavior on intern failure */

for (int c = 0; c < n_cols; c++) {
if (col_types[c] != CSV_TYPE_STR) continue;
/* RAY_STR columns are materialized directly; skip sym interning. */
if (resolved_types[c] == RAY_STR) continue;
csv_strref_t* refs = str_refs[c];
uint32_t* ids = (uint32_t*)col_data[c];
uint8_t* nm = col_nullmaps ? col_nullmaps[c] : NULL;
int64_t max_id = 0;
int64_t max_id = empty_sym_id;

/* Pre-grow: upper bound is n_rows unique strings */
uint32_t current = ray_sym_count();
Expand All @@ -611,7 +628,13 @@ static bool csv_intern_strings(csv_strref_t** str_refs, int n_cols,

for (int64_t r = 0; r < n_rows; r++) {
if (nm && (nm[r >> 3] & (1u << (r & 7)))) {
ids[r] = 0;
ids[r] = (uint32_t)empty_sym_id;
/* Clear the null bit — this row now holds a real value
* (the empty SYM). Without this clear, fmt_raw_elem
* still prints "0Ns" and ray_eq_fn still routes through
* the null-vs-non-null branch (returning false for
* `== ""` and true for `!= ""`). */
nm[r >> 3] &= (uint8_t)~(1u << (r & 7));
continue;
}
uint32_t hash = (uint32_t)ray_hash_bytes(refs[r].ptr, refs[r].len);
Expand Down
101 changes: 96 additions & 5 deletions src/lang/eval.c
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,94 @@ ray_t* atomic_map_binary_op(ray_binary_fn fn, uint16_t dag_opcode, ray_t* left,
}
}
}
/* R7 fast path: (== or !=) of SYM-vec against a SYM atom.
*
* The DAG path above doesn't handle SYM (IS_NUM_TYPE excludes it),
* so without this, ray_neq_fn / ray_eq_fn fan out to one allocation
* per row in the slow loop. At 5M rows the per-element bool atom
* thrash dominates: `(!= URL nu)` standalone takes 113 ms when the
* raw work is one i64 lookup + N width-truncated cmpneq.
*
* Handles either operand order; output is RAY_BOOL. Nulls go
* through the q/k atom-vs-atom rules already in cmp.c (null≠value
* is true for NE) by applying the same logic per element. */
if (!force_boxed && (dag_opcode == OP_EQ || dag_opcode == OP_NE) &&
out_type == RAY_BOOL) {
int l_is_sym_vec = left_coll && ray_is_vec(left) && left->type == RAY_SYM;
int r_is_sym_vec = right_coll && ray_is_vec(right) && right->type == RAY_SYM;
int l_is_sym_atom = !left_coll && left && left->type == -RAY_SYM;
int r_is_sym_atom = !right_coll && right && right->type == -RAY_SYM;
if ((l_is_sym_vec && r_is_sym_atom) || (r_is_sym_vec && l_is_sym_atom)) {
ray_t* vv = l_is_sym_vec ? left : right;
ray_t* atom = l_is_sym_vec ? right : left;
int64_t n = vv->len;

ray_t* out = ray_vec_new(RAY_BOOL, n);
if (out && !RAY_IS_ERR(out)) {
out->len = n;
bool* obuf = (bool*)ray_data(out);
const void* src = ray_data(vv);
int8_t vt = vv->type;
uint8_t va = vv->attrs;
int atom_null = RAY_ATOM_IS_NULL(atom);
int64_t target = atom_null ? 0 : atom->i64;
int vec_has_nulls = (va & RAY_ATTR_HAS_NULLS) ? 1 : 0;
bool invert = (dag_opcode == OP_NE);

if (atom_null && !vec_has_nulls) {
/* Atom is null, vec has no nulls — every row is
* "not equal" to the null atom (== false, != true). */
bool fill = invert; /* != null → true; == null → false */
for (int64_t i = 0; i < n; i++) obuf[i] = fill;
} else if (!atom_null && !vec_has_nulls) {
/* Hot path: tight per-width loop, no per-element
* null checks. This is what ClickBench Q22..Q38
* with R6-cleaned columns actually hit. */
uint8_t w = (uint8_t)(va & RAY_SYM_W_MASK);
if (w == RAY_SYM_W8) {
const uint8_t* d = (const uint8_t*)src;
uint8_t t8 = (uint8_t)target;
for (int64_t i = 0; i < n; i++)
obuf[i] = (d[i] == t8) ^ invert;
} else if (w == RAY_SYM_W16) {
const uint16_t* d = (const uint16_t*)src;
uint16_t t16 = (uint16_t)target;
for (int64_t i = 0; i < n; i++)
obuf[i] = (d[i] == t16) ^ invert;
} else if (w == RAY_SYM_W32) {
const uint32_t* d = (const uint32_t*)src;
uint32_t t32 = (uint32_t)target;
for (int64_t i = 0; i < n; i++)
obuf[i] = (d[i] == t32) ^ invert;
} else { /* RAY_SYM_W64 */
const int64_t* d = (const int64_t*)src;
for (int64_t i = 0; i < n; i++)
obuf[i] = (d[i] == target) ^ invert;
}
} else {
/* General path: vec may have nulls, atom may be null.
* Apply q/k atom-rules per element so semantics match
* the slow path exactly. */
for (int64_t i = 0; i < n; i++) {
int row_null = ray_vec_is_null(vv, i);
int eq;
if (row_null && atom_null) eq = 1;
else if (row_null || atom_null) eq = 0;
else {
int64_t row_id = ray_read_sym(src, i, vt, va);
eq = (row_id == target);
}
obuf[i] = invert ? !eq : eq;
}
}
ray_release(e0);
return out;
}
if (out) ray_release(out);
/* Fall through to slow path on allocation failure. */
}
}

/* SLOW PATH: per-element scalar loop (fallback for mixed types, temporal, etc.) */
if (!force_boxed &&
(out_type == RAY_I64 || out_type == RAY_F64 || out_type == RAY_I32 ||
Expand Down Expand Up @@ -875,10 +963,6 @@ ray_t* gather_by_idx(ray_t* vec, int64_t* idx, int64_t n) {
case 1: for (int64_t i = 0; i < n; i++) dst[i] = src[idx[i]]; break;
default: for (int64_t i = 0; i < n; i++) memcpy(dst + i*esz, src + idx[i]*esz, esz); break;
}
if (vec->sym_dict) {
ray_retain(vec->sym_dict);
result->sym_dict = vec->sym_dict;
}
if (has_nulls) {
for (int64_t i = 0; i < n; i++)
if (ray_vec_is_null(vec, idx[i]))
Expand Down Expand Up @@ -2280,7 +2364,12 @@ static void ray_register_builtins(void) {
register_vary("update", RAY_FN_SPECIAL_FORM | RAY_FN_RESTRICTED, ray_update_fn);
register_vary("insert", RAY_FN_SPECIAL_FORM | RAY_FN_RESTRICTED, ray_insert_fn);
register_vary("upsert", RAY_FN_SPECIAL_FORM | RAY_FN_RESTRICTED, ray_upsert_fn);
register_binary("xbar", RAY_FN_ATOMIC, ray_xbar_fn);
/* xbar is registered NON-atomic so the call path lands in
* ray_xbar_fn(VEC, scalar) directly. ray_xbar_fn handles the
* vector fast path itself (tight per-element loop, no per-atom
* allocation) and recurses through atomic_map_binary for the rare
* (collection, collection) zip case. */
register_binary("xbar", RAY_FN_NONE, ray_xbar_fn);

/* Join operations */
register_vary("left-join", RAY_FN_NONE, ray_left_join_fn);
Expand All @@ -2294,6 +2383,8 @@ static void ray_register_builtins(void) {
register_vary("println", RAY_FN_NONE, ray_println_fn);
register_vary("show", RAY_FN_NONE, ray_show_fn);
register_vary("format", RAY_FN_NONE, ray_format_fn);
register_vary("read-csv", RAY_FN_RESTRICTED, ray_read_csv_fn);
register_vary("write-csv", RAY_FN_RESTRICTED, ray_write_csv_fn);
register_vary(".csv.read", RAY_FN_RESTRICTED, ray_read_csv_fn);
register_vary(".csv.write", RAY_FN_RESTRICTED, ray_write_csv_fn);
register_binary("as", RAY_FN_NONE, ray_cast_fn);
Expand Down
33 changes: 33 additions & 0 deletions src/ops/collection.c
Original file line number Diff line number Diff line change
Expand Up @@ -1554,6 +1554,39 @@ ray_t* ray_at_fn(ray_t* vec, ray_t* idx) {
return ray_dict_new(keys, vals);
}

/* Table row selection by index vector: apply the row ids to each
* column and return a table. Keep this before the generic collection
* fallback; otherwise a table indexed by millions of row ids becomes
* a LIST of row dictionaries. */
if (vec->type == RAY_TABLE && idx->type == RAY_I64) {
int64_t nrows = ray_table_nrows(vec);
int64_t nidx = ray_len(idx);
int64_t* ids = (int64_t*)ray_data(idx);
for (int64_t i = 0; i < nidx; i++) {
if (ids[i] < 0 || ids[i] >= nrows)
return ray_error("domain", NULL);
}

int64_t ncols = ray_table_ncols(vec);
ray_t* result = ray_table_new(ncols);
if (!result || RAY_IS_ERR(result)) return result ? result : ray_error("oom", NULL);
for (int64_t c = 0; c < ncols; c++) {
ray_t* col = ray_table_get_col_idx(vec, c);
int64_t name = ray_table_col_name(vec, c);
if (!col) continue;
ray_t* gathered = gather_by_idx(col, ids, nidx);
if (!gathered || RAY_IS_ERR(gathered)) {
ray_release(result);
return gathered ? gathered : ray_error("oom", NULL);
}
result = ray_table_add_col(result, name, gathered);
ray_release(gathered);
if (!result || RAY_IS_ERR(result))
return result ? result : ray_error("oom", NULL);
}
return result;
}

/* Dict key access: (at dict key) → value or 0Nl if missing */
if (vec->type == RAY_DICT) {
ray_t* v = ray_dict_get(vec, idx);
Expand Down
96 changes: 96 additions & 0 deletions src/ops/glob.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@

#include "ops/glob.h"

#define _GNU_SOURCE
#include <string.h>

/* Lowercase an ASCII byte; non-ASCII passes through unchanged. */
static inline char to_lower(char c) {
return (c >= 'A' && c <= 'Z') ? (char)(c + 32) : c;
Expand Down Expand Up @@ -100,3 +103,96 @@ bool ray_glob_match(const char* s, size_t sn, const char* p, size_t pn) {
bool ray_glob_match_ci(const char* s, size_t sn, const char* p, size_t pn) {
return glob_impl(s, sn, p, pn, true);
}

ray_glob_compiled_t ray_glob_compile(const char* p, size_t pn) {
ray_glob_compiled_t c = { RAY_GLOB_SHAPE_NONE, NULL, 0 };

if (pn == 0) {
c.shape = RAY_GLOB_SHAPE_EXACT;
c.lit = p; c.lit_len = 0;
return c;
}

/* Strip a single leading and trailing '*'; classify by the residual
* pattern. Any other glob metachar (`?`, `[`, or interior `*`)
* forces the general matcher. */
size_t lo = 0, hi = pn;
bool leading_star = (p[0] == '*');
bool trailing_star = (pn > 0 && p[pn - 1] == '*' &&
/* don't double-count single '*' as both */
(pn > 1 || !leading_star));
if (leading_star) lo = 1;
if (trailing_star) hi = pn - 1;

/* Ensure the residual has no glob metacharacters. */
for (size_t i = lo; i < hi; i++) {
char ch = p[i];
if (ch == '*' || ch == '?' || ch == '[') {
c.shape = RAY_GLOB_SHAPE_NONE;
return c;
}
}

c.lit = p + lo;
c.lit_len = hi - lo;

if (leading_star && trailing_star) {
c.shape = (c.lit_len == 0) ? RAY_GLOB_SHAPE_ANY
: RAY_GLOB_SHAPE_CONTAINS;
} else if (leading_star) {
c.shape = RAY_GLOB_SHAPE_SUFFIX;
} else if (trailing_star) {
c.shape = RAY_GLOB_SHAPE_PREFIX;
} else {
c.shape = RAY_GLOB_SHAPE_EXACT;
}
return c;
}

bool ray_glob_match_compiled(const ray_glob_compiled_t* c,
const char* s, size_t sn) {
switch (c->shape) {
case RAY_GLOB_SHAPE_ANY:
return true;
case RAY_GLOB_SHAPE_EXACT:
return sn == c->lit_len &&
(c->lit_len == 0 || memcmp(s, c->lit, c->lit_len) == 0);
case RAY_GLOB_SHAPE_PREFIX:
return sn >= c->lit_len &&
(c->lit_len == 0 || memcmp(s, c->lit, c->lit_len) == 0);
case RAY_GLOB_SHAPE_SUFFIX:
return sn >= c->lit_len &&
(c->lit_len == 0 ||
memcmp(s + sn - c->lit_len, c->lit, c->lit_len) == 0);
case RAY_GLOB_SHAPE_CONTAINS:
if (c->lit_len == 0) return true;
if (sn < c->lit_len) return false;
/* glibc's memmem is SIMD-accelerated; use it where available.
* Falls back to a portable Boyer-Moore-Horspool when not. */
#if defined(__GLIBC__) || defined(__APPLE__) || defined(__FreeBSD__)
return memmem(s, sn, c->lit, c->lit_len) != NULL;
#else
{
/* Portable fallback: short-needle byte scan with memchr. */
const char first = c->lit[0];
const char* haystack = s;
size_t remaining = sn;
while (remaining >= c->lit_len) {
const char* hit = (const char*)memchr(haystack, first,
remaining - c->lit_len + 1);
if (!hit) return false;
if (memcmp(hit, c->lit, c->lit_len) == 0) return true;
size_t adv = (size_t)(hit - haystack) + 1;
haystack = hit + 1;
remaining -= adv;
}
return false;
}
#endif
case RAY_GLOB_SHAPE_NONE:
default:
/* Caller contract violation — fall through to false rather than
* silently matching everything. */
return false;
}
}
43 changes: 43 additions & 0 deletions src/ops/glob.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,47 @@
bool ray_glob_match(const char* s, size_t sn, const char* p, size_t pn);
bool ray_glob_match_ci(const char* s, size_t sn, const char* p, size_t pn);

/* ---- Pre-compiled pattern fast path -------------------------------------
* Many LIKE workloads have very simple patterns (e.g. `*google*`). When
* the pattern has no metacharacters except (optionally) a leading `*`
* and/or a trailing `*`, the match collapses to a literal substring /
* prefix / suffix / equality test that we can drive with memcmp /
* memmem — both libc-vectorised on modern glibc. Detect the shape once
* up front, then run the entire dictionary (or row vector) through a
* single tight loop.
*
* Shapes:
* RAY_GLOB_SHAPE_NONE — pattern needs the full glob matcher
* RAY_GLOB_SHAPE_EXACT — no `*`/`?`/`[` — literal equality
* RAY_GLOB_SHAPE_PREFIX — `<lit>*` — strncmp prefix
* RAY_GLOB_SHAPE_SUFFIX — `*<lit>` — tail equality
* RAY_GLOB_SHAPE_CONTAINS — `*<lit>*` — memmem
* RAY_GLOB_SHAPE_ANY — pattern is "*" — always true
* The compiled struct caches a pointer/length into the original
* pattern buffer, so the caller must keep the pattern alive while the
* compiled view is in use. */
typedef enum {
RAY_GLOB_SHAPE_NONE = 0,
RAY_GLOB_SHAPE_EXACT,
RAY_GLOB_SHAPE_PREFIX,
RAY_GLOB_SHAPE_SUFFIX,
RAY_GLOB_SHAPE_CONTAINS,
RAY_GLOB_SHAPE_ANY,
} ray_glob_shape_t;

typedef struct {
ray_glob_shape_t shape;
const char* lit; /* literal substring inside the pattern */
size_t lit_len;
} ray_glob_compiled_t;

/* Classify a pattern. Returns the simplest matching shape; falls back
* to RAY_GLOB_SHAPE_NONE when the pattern needs the general matcher. */
ray_glob_compiled_t ray_glob_compile(const char* p, size_t pn);

/* Match a single string against a compiled simple-shape pattern.
* Caller must guarantee shape != RAY_GLOB_SHAPE_NONE. */
bool ray_glob_match_compiled(const ray_glob_compiled_t* c,
const char* s, size_t sn);

#endif /* RAY_OPS_GLOB_H */
Loading