diff --git a/CLAUDE.md b/CLAUDE.md index d5cffa5..e5039a8 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -6,8 +6,6 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co Rust JSON decoder (`cdylib` + `rlib`) exposed to LuaJIT via FFI. Optimized for parse-once / extract-a-few-fields / discard. The competitive edge over `lua-cjson` comes from **never building a Lua table** — Phase 1 records only structural offsets, Phase 2 lazily decodes the fields the caller actually asks for. Crate name in `Cargo.toml` is `lua-quick-decode`; the compiled artifact is `libquickdecode.so`. -Authoritative design doc: `docs/superpowers/specs/2026-05-15-rust-quick-json-decode-design.md`. - ## Common commands The `Makefile` is the canonical entry point; `make help` lists targets. diff --git a/README.md b/README.md index b2fdd3d..7aac859 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,6 @@ Rust-implemented fast JSON decoder exposed to LuaJIT via FFI. Optimized for the common case where a large JSON is parsed once and only a small number of fields are extracted before the document is discarded. -Design document: `docs/superpowers/specs/2026-05-15-rust-quick-json-decode-design.md`. - ## Status Initial implementation complete: scalar + AVX2/PCLMUL structural scanner, root-path and cursor APIs, escape-decoded strings, integer/float/bool/typeof/len, FFI panic barrier, and a LuaJIT wrapper. Rust unit/integration tests and Lua busted tests run in CI. The benchmark harness compares against lua-cjson but tuning is pending — see `Roadmap / Deferred` below. @@ -38,6 +36,41 @@ local model = body:get_str("model") local temp = body:get_f64("temperature") ``` +### Lazy table API (`qd.decode` / `qd.encode`) + +For callers migrating from `cjson`, an alternative API returns a table-shaped +lazy view. Reads, iteration, and length all work like a `cjson.decode`'d +table; writes materialize the affected level into a plain Lua table. + +```lua +local qd = require("quickdecode") +local cjson = require("cjson") -- optional; provides null / empty_array sentinels + +local t = qd.decode(json_str) + +print(t.model) +for _, m in qd.ipairs(t.messages) do + print(m.role, m.content) +end + +t.extra = "x" + +local s = qd.encode(t) -- drop-in replacement for cjson.encode +``` + +`qd.encode` works on lazy proxies (re-emitting unmodified subtrees as the +original JSON bytes), real Lua tables (matching `cjson.encode` output), and +mixed trees. Callers cannot pass a lazy proxy directly to `cjson.encode` +(cjson bypasses metamethods in C); use `qd.encode` instead, or call +`qd.materialize(t)` to get a plain Lua table that any third-party encoder +can handle. + +**LuaJIT compat-52 caveat.** `for k, v in pairs/ipairs(t)` and `#t` on a lazy +proxy rely on `__pairs` / `__ipairs` / `__len`, which LuaJIT only invokes when +built with `LUAJIT_ENABLE_LUA52COMPAT` (OpenResty's default). On a stock LuaJIT +5.1, use the explicit `qd.pairs(t)`, `qd.ipairs(t)`, and `qd.len(t)` helpers +— they work on both builds. + ## Testing — Lua Requires LuaJIT + busted + lua-cjson installed system-wide. @@ -76,3 +109,17 @@ Items intentionally pushed out of the first implementation. Each will be picked - **`cargo fmt --check` not enforced** — `make lint` runs clippy only. The codebase uses intentional manual column alignment in struct definitions and compact single-line literals that default rustfmt would reflow. Skip rather than reformat until a project-wide style decision is made. - **`validate_brackets` fusion into scan emit loop** — surfaced by profiling: on structurally-dense workloads `validate_brackets` is 65% of parse time (second linear pass over emitted indices). Folding bracket pairing into the scan emit loop via an inline depth stack eliminates that pass. No effect on the current string-heavy bench (0.3% there); a win for config / JSONL / table-shape JSON. - **`memchr2` cross-chunk jump for very long string interiors** — the AVX2 in-string fast probe (issue #5) drops per-chunk cost from ~25 to ~10 ops but still pays ALU work for every 64-byte chunk in a string. A `memchr2(b'"', b'\\')` jump can approach memory bandwidth on multi-MB single-string payloads. Deferred until a workload that benefits clearly emerges; needs careful `bs_carry` reasoning across the jump. +- **Stateful O(N) iterator FFI** — current `qd.pairs` and the `__newindex` + materialization path walk the object cursor from the start on every step, + giving O(N²) total cost for full enumeration. Acceptable for the "read a + few keys" use case the library is optimized for; full-iteration workloads + (e.g. encoding a deeply-keyed object that has been materialized) would + benefit from a `qjd_iter_init` / `qjd_iter_next` pair that holds position + state across calls. +- **Lazy-table read overhead vs path API** — `qd.decode + t.field x3` lands + ~30–40% behind `qd.parse:get_str` on small-to-medium payloads, converging + to parity at multi-MB sizes. The gap is structural (per-access `__index` + metamethod dispatch + transient cdata allocation for nested wraps). Worth + attempting if a workload-driven need surfaces; current measured cost is + still 14× faster than `cjson.decode` at 100 KB, so the lazy API is the + right default for migrating callers. diff --git a/benches/lua_bench.lua b/benches/lua_bench.lua index 7f52d25..29db44b 100644 --- a/benches/lua_bench.lua +++ b/benches/lua_bench.lua @@ -147,6 +147,18 @@ for _, s in ipairs(scenarios) do local _ = d:get_str("messages[0].role") end) end + + bench("qd.decode + t.field x3", s.iters, function() + local t = qd.decode(s.payload) + local _ = t.model + local _ = t.temperature + local _ = t.messages and t.messages[1] and t.messages[1].role + end) + + bench("qd.decode + qd.encode (unmodified)", s.iters, function() + local t = qd.decode(s.payload) + local _ = qd.encode(t) + end) end -- Interleaved scenario: cycle through several payloads of different sizes @@ -207,4 +219,20 @@ do local _ = d:get_str("messages[0].role") end) end + + next_p = make_cycler(interleaved) + bench("qd.decode + t.field x3", 400, function() + local p = next_p() + local t = qd.decode(p) + local _ = t.model + local _ = t.temperature + local _ = t.messages and t.messages[1] and t.messages[1].role + end) + + next_p = make_cycler(interleaved) + bench("qd.decode + qd.encode (unmodified)", 400, function() + local p = next_p() + local t = qd.decode(p) + local _ = qd.encode(t) + end) end diff --git a/docs/superpowers/plans/2026-05-15-rust-quick-json-decode.md b/docs/superpowers/plans/2026-05-15-rust-quick-json-decode.md deleted file mode 100644 index 64a20c9..0000000 --- a/docs/superpowers/plans/2026-05-15-rust-quick-json-decode.md +++ /dev/null @@ -1,3875 +0,0 @@ -# Rust Quick JSON Decode — Implementation Plan (v1) - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. - -**Goal:** Ship a Rust `cdylib` (`libquickdecode.so`) plus `lua/quickdecode.lua` wrapper that lets LuaJIT extract individual fields from large JSON documents faster than `lua-cjson`, by skipping the full Lua-table construction. - -**Architecture:** Two-phase decoder. Phase 1 is a single high-throughput structural scan (scalar fallback + AVX2 fast path with runtime dispatch) that records only byte offsets of structural characters. Phase 2 is lazy: paths are resolved by walking those offsets, with a per-container sibling-skip cache built on first access. String/number decode is deferred to the moment a typed getter is called. - -**Tech Stack:** Rust (stable), `cdylib` + `rlib`, `cargo`, intrinsics for AVX2/PCLMUL via `core::arch::x86_64`, `memchr`, `rustc-hash`, `once_cell`. Tests use `cargo test` + `proptest`. Lua side uses LuaJIT `ffi` and `busted` for tests. - -**Spec:** `docs/superpowers/specs/2026-05-15-rust-quick-json-decode-design.md` (commit `24990eb`). - ---- - -## File Structure - -Each row below is created or modified by exactly the tasks listed; if a task says "Create", the file does not exist yet at that point. - -``` -Cargo.toml — Task 1 (create) -README.md — Tasks 1, 15, 17 (modify) -include/lua_quick_decode.h — Tasks 1 (skeleton), 14 (finalize) - -src/ -├── lib.rs — Task 1 (create) -├── error.rs — Task 1 (create) -├── ffi.rs — Tasks 3, 9, 10, 11, 12 (FFI surface) -├── doc.rs — Task 3 (create), 6 (extend with cache) -├── path.rs — Task 4 (create) -├── cursor.rs — Task 5 (create), 6 (extend) -├── skip_cache.rs — Task 6 (create) -├── scan/ -│ ├── mod.rs — Task 2 (create trait), 16 (add dispatch) -│ ├── scalar.rs — Task 2 (create) -│ └── avx2.rs — Tasks 13, 14, 15, 16 (create + extend) -└── decode/ - ├── mod.rs — Task 7 (create) - ├── string.rs — Task 7 (create) - └── number.rs — Task 8 (create) - -tests/ -├── ffi_smoke.rs — Task 3 (create) -├── ffi_strings.rs — Task 10 (create) -├── ffi_numbers.rs — Task 10 (create) -├── ffi_cursor.rs — Task 11 (create) -├── ffi_panic_safety.rs — Task 12 (create) -├── scanner_crosscheck.rs — Task 16 (create) -└── lua/ - ├── basic_spec.lua — Task 17 (create) - ├── escape_spec.lua — Task 17 (create) - └── cjson_compat_spec.lua — Task 17 (create) - -lua/ -└── quickdecode.lua — Task 15 (create) - -benches/ -├── lua_bench.lua — Task 18 (create) -└── fixtures/ - ├── small_api.json — Task 18 (create) - ├── medium_resp.json — Task 18 (create) - └── large_dump.json — Task 18 (create or generate) -``` - -The crate is a single package, not a workspace. Files are split by responsibility (scanner / decode / cursor / FFI / wrapper); each unit can be reasoned about without reading the others. - ---- - -## Task 1: Project scaffold + error codes - -**Files:** -- Create: `Cargo.toml` -- Create: `src/lib.rs` -- Create: `src/error.rs` -- Create: `include/lua_quick_decode.h` (skeleton) -- Modify: `README.md` - -- [ ] **Step 1: Write `Cargo.toml`** - -```toml -[package] -name = "lua-quick-decode" -version = "0.1.0" -edition = "2021" -publish = false - -[lib] -name = "quickdecode" -crate-type = ["cdylib", "rlib"] - -[dependencies] -memchr = "2" -rustc-hash = "2" -once_cell = "1" - -[dev-dependencies] -proptest = "1" - -[profile.release] -opt-level = 3 -lto = "thin" -codegen-units = 1 -panic = "abort" -``` - -- [ ] **Step 2: Write `src/error.rs`** - -```rust -#![allow(non_camel_case_types)] - -#[repr(C)] -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum qjd_err { - QJD_OK = 0, - QJD_PARSE_ERROR = 1, - QJD_NOT_FOUND = 2, - QJD_TYPE_MISMATCH = 3, - QJD_OUT_OF_RANGE = 4, - QJD_DECODE_FAILED = 5, - QJD_INVALID_PATH = 6, - QJD_INVALID_ARG = 7, - QJD_OOM = 8, -} - -#[repr(C)] -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum qjd_type { - QJD_T_NULL = 0, - QJD_T_BOOL = 1, - QJD_T_NUM = 2, - QJD_T_STR = 3, - QJD_T_ARR = 4, - QJD_T_OBJ = 5, -} - -pub fn strerror(code: qjd_err) -> &'static str { - match code { - qjd_err::QJD_OK => "ok", - qjd_err::QJD_PARSE_ERROR => "JSON parse error", - qjd_err::QJD_NOT_FOUND => "path not found", - qjd_err::QJD_TYPE_MISMATCH => "type mismatch at path", - qjd_err::QJD_OUT_OF_RANGE => "numeric out of range", - qjd_err::QJD_DECODE_FAILED => "decode failed", - qjd_err::QJD_INVALID_PATH => "invalid path syntax", - qjd_err::QJD_INVALID_ARG => "invalid argument", - qjd_err::QJD_OOM => "out of memory", - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn strerror_covers_every_variant() { - for code in [ - qjd_err::QJD_OK, qjd_err::QJD_PARSE_ERROR, qjd_err::QJD_NOT_FOUND, - qjd_err::QJD_TYPE_MISMATCH, qjd_err::QJD_OUT_OF_RANGE, - qjd_err::QJD_DECODE_FAILED, qjd_err::QJD_INVALID_PATH, - qjd_err::QJD_INVALID_ARG, qjd_err::QJD_OOM, - ] { - assert!(!strerror(code).is_empty()); - } - } -} -``` - -- [ ] **Step 3: Write `src/lib.rs`** - -```rust -//! lua-quick-decode: Rust JSON decoder for LuaJIT FFI consumers. -//! See docs/superpowers/specs/2026-05-15-rust-quick-json-decode-design.md - -pub mod error; -``` - -- [ ] **Step 4: Write `include/lua_quick_decode.h` skeleton** - -```c -#ifndef LUA_QUICK_DECODE_H -#define LUA_QUICK_DECODE_H - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum { - QJD_OK = 0, - QJD_PARSE_ERROR = 1, - QJD_NOT_FOUND = 2, - QJD_TYPE_MISMATCH = 3, - QJD_OUT_OF_RANGE = 4, - QJD_DECODE_FAILED = 5, - QJD_INVALID_PATH = 6, - QJD_INVALID_ARG = 7, - QJD_OOM = 8 -} qjd_err; - -typedef enum { - QJD_T_NULL = 0, QJD_T_BOOL = 1, QJD_T_NUM = 2, - QJD_T_STR = 3, QJD_T_ARR = 4, QJD_T_OBJ = 5 -} qjd_type; - -const char* qjd_strerror(int code); - -/* Forward declarations; full prototypes filled in Task 14. */ - -#ifdef __cplusplus -} -#endif - -#endif -``` - -- [ ] **Step 5: Update `README.md` with Building section** - -Insert under existing content: - -```markdown -## Building - -```sh -cargo build --release -# Output: target/release/libquickdecode.so -``` - -## Testing - -```sh -cargo test -``` -``` - -- [ ] **Step 6: Run tests** - -```sh -cargo test -``` - -Expected: 1 test passes (`strerror_covers_every_variant`). Crate compiles as `cdylib` and `rlib`. - -- [ ] **Step 7: Commit** - -```sh -git add Cargo.toml src/ include/ README.md -git commit -m "Scaffold crate with error codes and C header skeleton" -``` - ---- - -## Task 2: ScalarScanner — Phase 1 structural scan - -**Files:** -- Create: `src/scan/mod.rs` -- Create: `src/scan/scalar.rs` -- Modify: `src/lib.rs` - -- [ ] **Step 1: Write `src/scan/mod.rs`** - -```rust -pub(crate) mod scalar; - -/// A structural scanner: given a JSON byte buffer, append the byte offset of -/// every structural character (`{` `}` `[` `]` `:` `,` `"`) that is NOT inside -/// a string literal to `out`. On shallow validation failure (unclosed string, -/// unmatched bracket), returns `Err(offset)` where `offset` is the byte -/// position the failure was detected at. The offset is informational and not -/// exposed via FFI in v1. -pub(crate) trait Scanner { - fn scan(buf: &[u8], out: &mut Vec) -> Result<(), usize>; -} - -pub(crate) use scalar::ScalarScanner; -``` - -- [ ] **Step 2: Write failing tests in `src/scan/scalar.rs`** - -```rust -use super::Scanner; - -pub(crate) struct ScalarScanner; - -impl Scanner for ScalarScanner { - fn scan(_buf: &[u8], _out: &mut Vec) -> Result<(), usize> { - unimplemented!() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn scan(input: &[u8]) -> Result, usize> { - let mut v = Vec::new(); - ScalarScanner::scan(input, &mut v).map(|_| v) - } - - #[test] - fn empty_object() { - assert_eq!(scan(b"{}"), Ok(vec![0, 1])); - } - - #[test] - fn empty_array() { - assert_eq!(scan(b"[]"), Ok(vec![0, 1])); - } - - #[test] - fn simple_object() { - // {"a":1} - // ^ ^ - // 012345 6 - assert_eq!(scan(b"{\"a\":1}"), Ok(vec![0, 1, 3, 4, 6])); - // { " " : } - } - - #[test] - fn nested_object() { - // {"a":{"b":2}} - // 0 4 9 10 11 12 - let r = scan(b"{\"a\":{\"b\":2}}").unwrap(); - // Positions of: { " " : { " " : } } - assert_eq!(r, vec![0, 1, 3, 4, 5, 6, 8, 9, 11, 12]); - } - - #[test] - fn array_with_strings() { - // ["a","b"] - // 0 12 3 4 56 7 8 - let r = scan(b"[\"a\",\"b\"]").unwrap(); - assert_eq!(r, vec![0, 1, 3, 4, 5, 7, 8]); - } - - #[test] - fn escape_double_quote_in_string() { - // {"a":"x\"y"} - // 0 12 3 4 5 678 9 10 11 - let r = scan(b"{\"a\":\"x\\\"y\"}").unwrap(); - assert_eq!(r, vec![0, 1, 3, 4, 5, 10, 11]); - } - - #[test] - fn escape_backslash_then_quote() { - // {"a":"x\\"} (string content is `x\`) - // 0 12 3 4 5 678 9 10 - let r = scan(b"{\"a\":\"x\\\\\"}").unwrap(); - assert_eq!(r, vec![0, 1, 3, 4, 5, 9, 10]); - } - - #[test] - fn unclosed_string_is_error() { - assert!(scan(b"{\"a\":\"foo").is_err()); - } - - #[test] - fn unmatched_closer_is_error() { - assert!(scan(b"]").is_err()); - } - - #[test] - fn mismatched_bracket_type_is_error() { - assert!(scan(b"{]").is_err()); - } - - #[test] - fn deeply_nested() { - let mut buf = Vec::new(); - for _ in 0..100 { buf.push(b'['); } - for _ in 0..100 { buf.push(b']'); } - let r = scan(&buf).unwrap(); - assert_eq!(r.len(), 200); - } -} -``` - -- [ ] **Step 3: Run tests to verify they fail (unimplemented)** - -```sh -cargo test scan::scalar -``` - -Expected: tests panic with `unimplemented!()`. - -- [ ] **Step 4: Implement `ScalarScanner::scan`** - -Replace the body in `src/scan/scalar.rs`: - -```rust -impl Scanner for ScalarScanner { - fn scan(buf: &[u8], out: &mut Vec) -> Result<(), usize> { - out.reserve(buf.len() / 6); - - let mut i = 0usize; - let mut in_str = false; - let mut stack: Vec = Vec::with_capacity(32); - - while i < buf.len() { - let b = buf[i]; - - if in_str { - if b == b'\\' { - // Skip the escape and the next byte unconditionally. - // Anything in a string cannot be a structural char. - i += 2; - continue; - } - if b == b'"' { - in_str = false; - out.push(i as u32); - } - i += 1; - continue; - } - - match b { - b'"' => { - in_str = true; - out.push(i as u32); - } - b'{' | b'[' => { - stack.push(b); - out.push(i as u32); - } - b'}' => { - match stack.pop() { - Some(b'{') => {} - _ => return Err(i), - } - out.push(i as u32); - } - b']' => { - match stack.pop() { - Some(b'[') => {} - _ => return Err(i), - } - out.push(i as u32); - } - b',' | b':' => out.push(i as u32), - _ => {} - } - i += 1; - } - - if in_str { return Err(buf.len()); } - if !stack.is_empty() { return Err(buf.len()); } - Ok(()) - } -} -``` - -- [ ] **Step 5: Run tests to verify pass** - -```sh -cargo test scan::scalar -``` - -Expected: all 10 tests pass. - -- [ ] **Step 6: Wire module into `src/lib.rs`** - -```rust -pub mod error; -mod scan; -``` - -- [ ] **Step 7: Commit** - -```sh -git add src/lib.rs src/scan/ -git commit -m "Add ScalarScanner with shallow JSON validation" -``` - ---- - -## Task 3: Document + qjd_parse / qjd_free FFI - -**Files:** -- Create: `src/doc.rs` -- Create: `src/ffi.rs` -- Create: `tests/ffi_smoke.rs` -- Modify: `src/lib.rs` - -- [ ] **Step 1: Write `src/doc.rs`** - -```rust -use crate::error::qjd_err; -use crate::scan::{Scanner, ScalarScanner}; - -pub struct Document<'a> { - pub(crate) buf: &'a [u8], - pub(crate) indices: Vec, - pub(crate) scratch: Vec, -} - -impl<'a> Document<'a> { - pub fn parse(buf: &'a [u8]) -> Result { - let mut indices = Vec::new(); - ScalarScanner::scan(buf, &mut indices).map_err(|_| qjd_err::QJD_PARSE_ERROR)?; - // Sentinel simplifies boundary checks during Phase 2. - indices.push(u32::MAX); - Ok(Self { buf, indices, scratch: Vec::new() }) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn parses_simple_object() { - let doc = Document::parse(b"{\"a\":1}").unwrap(); - assert!(doc.indices.len() >= 5); - assert_eq!(*doc.indices.last().unwrap(), u32::MAX); - } - - #[test] - fn parse_error_on_malformed() { - assert!(Document::parse(b"{").is_err()); - } -} -``` - -- [ ] **Step 2: Write `src/ffi.rs`** - -```rust -//! C ABI surface. Every public function is `unsafe extern "C"`. -//! All public symbols use the `qjd_` prefix. - -#![allow(non_camel_case_types)] - -use std::os::raw::{c_char, c_int}; -use std::ptr; - -use crate::doc::Document; -use crate::error::{qjd_err, strerror}; - -/// Opaque type exported to C as `qjd_doc*`. -#[allow(dead_code)] -pub struct qjd_doc(Document<'static>); - -#[no_mangle] -pub unsafe extern "C" fn qjd_strerror(code: c_int) -> *const c_char { - // Map int back to enum; fall back to OK message for unknown. - let e = match code { - 0 => qjd_err::QJD_OK, - 1 => qjd_err::QJD_PARSE_ERROR, - 2 => qjd_err::QJD_NOT_FOUND, - 3 => qjd_err::QJD_TYPE_MISMATCH, - 4 => qjd_err::QJD_OUT_OF_RANGE, - 5 => qjd_err::QJD_DECODE_FAILED, - 6 => qjd_err::QJD_INVALID_PATH, - 7 => qjd_err::QJD_INVALID_ARG, - 8 => qjd_err::QJD_OOM, - _ => return c"unknown error code".as_ptr(), - }; - // strerror returns a &'static str; we need NUL-terminated C strings. - // Hardcoded NUL-terminated map below to avoid runtime allocation. - match e { - qjd_err::QJD_OK => c"ok".as_ptr(), - qjd_err::QJD_PARSE_ERROR => c"JSON parse error".as_ptr(), - qjd_err::QJD_NOT_FOUND => c"path not found".as_ptr(), - qjd_err::QJD_TYPE_MISMATCH => c"type mismatch at path".as_ptr(), - qjd_err::QJD_OUT_OF_RANGE => c"numeric out of range".as_ptr(), - qjd_err::QJD_DECODE_FAILED => c"decode failed".as_ptr(), - qjd_err::QJD_INVALID_PATH => c"invalid path syntax".as_ptr(), - qjd_err::QJD_INVALID_ARG => c"invalid argument".as_ptr(), - qjd_err::QJD_OOM => c"out of memory".as_ptr(), - } - // Touch strerror to keep it linked (used elsewhere later). - // let _ = strerror; -} - -#[no_mangle] -pub unsafe extern "C" fn qjd_parse( - buf: *const u8, - len: usize, - err_out: *mut c_int, -) -> *mut qjd_doc { - if buf.is_null() || err_out.is_null() { - if !err_out.is_null() { *err_out = qjd_err::QJD_INVALID_ARG as c_int; } - return ptr::null_mut(); - } - let slice: &'static [u8] = std::slice::from_raw_parts(buf, len); - match Document::parse(slice) { - Ok(d) => { - *err_out = qjd_err::QJD_OK as c_int; - Box::into_raw(Box::new(qjd_doc(d))) - } - Err(e) => { - *err_out = e as c_int; - ptr::null_mut() - } - } -} - -#[no_mangle] -pub unsafe extern "C" fn qjd_free(doc: *mut qjd_doc) { - if doc.is_null() { return; } - let _ = Box::from_raw(doc); -} - -// Suppress dead_code warning during this task; later tasks consume it. -#[allow(dead_code)] -pub(crate) fn _link_strerror() { let _ = strerror; } -``` - -- [ ] **Step 3: Wire modules in `src/lib.rs`** - -```rust -pub mod error; -mod scan; -mod doc; -pub mod ffi; -``` - -- [ ] **Step 4: Write `tests/ffi_smoke.rs`** - -```rust -use std::ffi::CStr; -use std::os::raw::c_int; - -use quickdecode::ffi::{qjd_doc, qjd_free, qjd_parse, qjd_strerror}; - -#[test] -fn parse_and_free_roundtrip() { - let json = b"{\"a\":1}"; - let mut err: c_int = -1; - let doc: *mut qjd_doc = unsafe { qjd_parse(json.as_ptr(), json.len(), &mut err) }; - assert!(!doc.is_null()); - assert_eq!(err, 0); - unsafe { qjd_free(doc); } -} - -#[test] -fn parse_error_returns_null() { - let bad = b"{"; - let mut err: c_int = -1; - let doc = unsafe { qjd_parse(bad.as_ptr(), bad.len(), &mut err) }; - assert!(doc.is_null()); - assert_eq!(err, 1); // QJD_PARSE_ERROR -} - -#[test] -fn parse_null_buffer_returns_invalid_arg() { - let mut err: c_int = -1; - let doc = unsafe { qjd_parse(std::ptr::null(), 0, &mut err) }; - assert!(doc.is_null()); - assert_eq!(err, 7); // QJD_INVALID_ARG -} - -#[test] -fn free_null_is_safe() { - unsafe { qjd_free(std::ptr::null_mut()); } -} - -#[test] -fn strerror_returns_non_empty() { - for code in 0..=8 { - let p = unsafe { qjd_strerror(code) }; - assert!(!p.is_null()); - let s = unsafe { CStr::from_ptr(p) }.to_str().unwrap(); - assert!(!s.is_empty(), "code {}", code); - } -} -``` - -- [ ] **Step 5: Run tests** - -```sh -cargo test -``` - -Expected: all tests pass (unit + integration). `target/release/libquickdecode.so` exports `qjd_parse`, `qjd_free`, `qjd_strerror`. - -- [ ] **Step 6: Commit** - -```sh -git add src/doc.rs src/ffi.rs src/lib.rs tests/ffi_smoke.rs -git commit -m "Add Document and qjd_parse/qjd_free/qjd_strerror FFI" -``` - ---- - -## Task 4: Path string parser - -**Files:** -- Create: `src/path.rs` -- Modify: `src/lib.rs` - -- [ ] **Step 1: Write failing tests in `src/path.rs`** - -```rust -use crate::error::qjd_err; - -#[derive(Debug, PartialEq, Eq)] -pub(crate) enum PathSeg<'a> { - Key(&'a [u8]), - Idx(u32), -} - -pub(crate) struct PathIter<'a> { - rest: &'a [u8], -} - -impl<'a> PathIter<'a> { - pub(crate) fn new(path: &'a [u8]) -> Self { Self { rest: path } } -} - -impl<'a> Iterator for PathIter<'a> { - type Item = Result, qjd_err>; - fn next(&mut self) -> Option { unimplemented!() } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn parse(p: &[u8]) -> Result, qjd_err> { - PathIter::new(p).collect() - } - - #[test] - fn empty_path_yields_no_segs() { - assert_eq!(parse(b""), Ok(vec![])); - } - - #[test] - fn single_key() { - assert_eq!(parse(b"body"), Ok(vec![PathSeg::Key(b"body")])); - } - - #[test] - fn dotted_keys() { - assert_eq!( - parse(b"body.model"), - Ok(vec![PathSeg::Key(b"body"), PathSeg::Key(b"model")]), - ); - } - - #[test] - fn array_index_after_key() { - assert_eq!( - parse(b"messages[0]"), - Ok(vec![PathSeg::Key(b"messages"), PathSeg::Idx(0)]), - ); - } - - #[test] - fn complex_path() { - assert_eq!( - parse(b"body.messages[42].role"), - Ok(vec![ - PathSeg::Key(b"body"), - PathSeg::Key(b"messages"), - PathSeg::Idx(42), - PathSeg::Key(b"role"), - ]), - ); - } - - #[test] - fn consecutive_indices() { - assert_eq!( - parse(b"data[3][1]"), - Ok(vec![PathSeg::Key(b"data"), PathSeg::Idx(3), PathSeg::Idx(1)]), - ); - } - - #[test] - fn leading_index() { - assert_eq!(parse(b"[5]"), Ok(vec![PathSeg::Idx(5)])); - } - - #[test] - fn unterminated_index_is_error() { - assert_eq!(parse(b"a[3"), Err(qjd_err::QJD_INVALID_PATH)); - } - - #[test] - fn non_digit_in_index_is_error() { - assert_eq!(parse(b"a[abc]"), Err(qjd_err::QJD_INVALID_PATH)); - } - - #[test] - fn trailing_dot_is_error() { - assert_eq!(parse(b"a."), Err(qjd_err::QJD_INVALID_PATH)); - } -} -``` - -- [ ] **Step 2: Run tests to verify they fail** - -```sh -cargo test path::tests -``` - -Expected: panic with `unimplemented!()`. - -- [ ] **Step 3: Implement `PathIter::next`** - -Replace the body in `src/path.rs`: - -```rust -impl<'a> Iterator for PathIter<'a> { - type Item = Result, qjd_err>; - - fn next(&mut self) -> Option { - if self.rest.is_empty() { - return None; - } - - let first = self.rest[0]; - - if first == b'[' { - // Index segment: [digits] - let close = match self.rest.iter().position(|&c| c == b']') { - Some(p) => p, - None => return Some(Err(qjd_err::QJD_INVALID_PATH)), - }; - let digits = &self.rest[1..close]; - if digits.is_empty() || !digits.iter().all(|c| c.is_ascii_digit()) { - return Some(Err(qjd_err::QJD_INVALID_PATH)); - } - let mut n: u32 = 0; - for &c in digits { - n = n.checked_mul(10) - .and_then(|x| x.checked_add((c - b'0') as u32)) - .unwrap_or(u32::MAX); - if n == u32::MAX { - return Some(Err(qjd_err::QJD_INVALID_PATH)); - } - } - self.rest = &self.rest[close + 1..]; - return Some(Ok(PathSeg::Idx(n))); - } - - if first == b'.' { - // Separator before a key. Skip it then require a key. - self.rest = &self.rest[1..]; - if self.rest.is_empty() { - return Some(Err(qjd_err::QJD_INVALID_PATH)); - } - return self.next(); - } - - // Key segment: read until '.' or '[' or end. - let end = self.rest.iter() - .position(|&c| c == b'.' || c == b'[') - .unwrap_or(self.rest.len()); - if end == 0 { - return Some(Err(qjd_err::QJD_INVALID_PATH)); - } - let key = &self.rest[..end]; - self.rest = &self.rest[end..]; - Some(Ok(PathSeg::Key(key))) - } -} -``` - -- [ ] **Step 4: Run tests to verify pass** - -```sh -cargo test path::tests -``` - -Expected: all 10 tests pass. - -- [ ] **Step 5: Wire module into `src/lib.rs`** - -```rust -pub mod error; -mod scan; -mod doc; -mod path; -pub mod ffi; -``` - -- [ ] **Step 6: Commit** - -```sh -git add src/path.rs src/lib.rs -git commit -m "Add zero-alloc PathIter for path string parsing" -``` - ---- - -## Task 5: Cursor core + brute-force resolve - -This task implements a working `Cursor::resolve` without any skip cache. Task 6 adds the cache on top. - -**Files:** -- Create: `src/cursor.rs` -- Modify: `src/lib.rs` - -- [ ] **Step 1: Write failing tests in `src/cursor.rs`** - -```rust -use crate::doc::Document; -use crate::error::qjd_err; -use crate::path::{PathIter, PathSeg}; - -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub(crate) struct Cursor { - /// Position in doc.indices of the opening '{' or '[', or the value's - /// first-byte structural marker (e.g. opening '"' for a string). - pub(crate) idx_start: u32, - /// One past the closing '}' / ']' in doc.indices. For scalar values, - /// idx_end == idx_start + 1. - pub(crate) idx_end: u32, -} - -impl Cursor { - pub(crate) fn root(doc: &Document) -> Self { - Cursor { idx_start: 0, idx_end: (doc.indices.len() as u32) - 1 } - } - - pub(crate) fn resolve(self, doc: &Document, path: &[u8]) -> Result { - let mut cur = self; - for seg in PathIter::new(path) { - let seg = seg?; - cur = step(doc, cur, &seg)?; - } - Ok(cur) - } -} - -fn step(_doc: &Document, _cur: Cursor, _seg: &PathSeg) -> Result { - unimplemented!() -} - -#[cfg(test)] -mod tests { - use super::*; - - fn doc_of(s: &[u8]) -> Document<'_> { Document::parse(s).unwrap() } - - #[test] - fn root_path_returns_root() { - let d = doc_of(b"{\"a\":1}"); - let c = Cursor::root(&d).resolve(&d, b"").unwrap(); - assert_eq!(c, Cursor::root(&d)); - } - - #[test] - fn simple_key() { - let d = doc_of(b"{\"a\":1}"); - let c = Cursor::root(&d).resolve(&d, b"a").unwrap(); - // 'a' points at the value '1', which is between ':' and '}'. - // Value starts after the ':' (indices position 3). - // Cursor for a scalar value: idx_start at the value's leading marker - // (here we use the next structural index, which is the closing '}'). - // We pick the convention: idx_start = position in indices array of - // the value's first-byte marker. For scalars without their own - // structural char, idx_start points at the position AFTER the ':' - // in indices, with idx_end == idx_start. - // Concretely the indices for {"a":1} are [0, 1, 3, 4, 6, MAX]: - // { " " : } - // Position 4 in indices is the ':' index (byte offset 4). The value - // starts at byte 5 and ends before byte 6 (the '}'). We set: - // idx_start = 4 + 1 = 5 (= position in indices of value's end) - // idx_end = 5 - // Actually for scalars our convention: idx_start == idx_end == - // position in indices of the structural char that ENDS the value. - assert_ne!(c, Cursor::root(&d)); - } - - #[test] - fn nested_key() { - let d = doc_of(b"{\"a\":{\"b\":2}}"); - let _ = Cursor::root(&d).resolve(&d, b"a.b").unwrap(); - } - - #[test] - fn missing_key_is_not_found() { - let d = doc_of(b"{\"a\":1}"); - let r = Cursor::root(&d).resolve(&d, b"b"); - assert_eq!(r, Err(qjd_err::QJD_NOT_FOUND)); - } - - #[test] - fn type_mismatch_on_index_into_object() { - let d = doc_of(b"{\"a\":1}"); - let r = Cursor::root(&d).resolve(&d, b"[0]"); - assert_eq!(r, Err(qjd_err::QJD_TYPE_MISMATCH)); - } - - #[test] - fn type_mismatch_on_key_into_array() { - let d = doc_of(b"[1,2,3]"); - let r = Cursor::root(&d).resolve(&d, b"a"); - assert_eq!(r, Err(qjd_err::QJD_TYPE_MISMATCH)); - } - - #[test] - fn array_index() { - let d = doc_of(b"[10,20,30]"); - let _ = Cursor::root(&d).resolve(&d, b"[1]").unwrap(); - } - - #[test] - fn array_out_of_bounds() { - let d = doc_of(b"[10,20]"); - let r = Cursor::root(&d).resolve(&d, b"[5]"); - assert_eq!(r, Err(qjd_err::QJD_NOT_FOUND)); - } -} -``` - -Note: cursor representation is internal. The tests above verify behavior (success / error variant), not exact field values, so we can refine the representation in Task 6 without rewriting tests. - -- [ ] **Step 2: Run tests to verify they fail** - -```sh -cargo test cursor::tests -``` - -Expected: tests panic with `unimplemented!()`. - -- [ ] **Step 3: Implement `step` and supporting helpers** - -Replace `step` and add helpers in `src/cursor.rs`: - -```rust -fn step(doc: &Document, cur: Cursor, seg: &PathSeg) -> Result { - // The cursor must point at a container. - let opener_byte = container_opener_byte(doc, cur) - .ok_or(qjd_err::QJD_TYPE_MISMATCH)?; - match (seg, opener_byte) { - (PathSeg::Key(_), b'{') | (PathSeg::Idx(_), b'[') => {} - _ => return Err(qjd_err::QJD_TYPE_MISMATCH), - } - - walk_children(doc, cur, seg) -} - -/// If `cur` points at a container, return its opener byte (`{` or `[`). -/// Returns None for scalars. -fn container_opener_byte(doc: &Document, cur: Cursor) -> Option { - if cur.idx_start as usize >= doc.indices.len() { return None; } - let pos = doc.indices[cur.idx_start as usize] as usize; - let b = *doc.buf.get(pos)?; - if b == b'{' || b == b'[' { Some(b) } else { None } -} - -/// Iterate children of the container at `cur` and return a Cursor for the -/// matching child. `seg` is either a Key (object children) or Idx (array -/// children). -fn walk_children(doc: &Document, cur: Cursor, seg: &PathSeg) -> Result { - let mut i = cur.idx_start + 1; // skip opener - let end = cur.idx_end; // closer position in indices - let mut arr_idx: u32 = 0; - let is_obj = matches!(seg, PathSeg::Key(_)); - - while i < end { - // i now points at the start of a child entry. - // For object: i points at the key's opening '"'. - // For array: i points at the value's first structural marker. - - let child_key_match = if is_obj { - // Read the key: from quote at indices[i] to closing quote at indices[i+1]. - let key_open = doc.indices[i as usize] as usize; - let key_close = doc.indices[(i + 1) as usize] as usize; - if doc.buf.get(key_open).copied() != Some(b'"') { - return Err(qjd_err::QJD_PARSE_ERROR); - } - let key_bytes = &doc.buf[key_open + 1 .. key_close]; - match seg { - PathSeg::Key(want) => key_bytes == *want, - _ => unreachable!(), - } - } else { - match seg { - PathSeg::Idx(want) => arr_idx == *want, - _ => unreachable!(), - } - }; - - // Advance i past the key + ':' (for object), to the value's first marker. - let value_idx_start = if is_obj { i + 3 } else { i }; - // ^ i: key '"' open; i+1: key '"' close; i+2: ':' ; i+3: value marker - // Determine value range. value_idx_end depends on value type. - let value_idx_end = find_value_end(doc, value_idx_start)?; - - if child_key_match { - return Ok(Cursor { idx_start: value_idx_start, idx_end: value_idx_end }); - } - - // Move past this child: value_idx_end points at ',' or closing bracket. - // If at ',' continue; if at closing bracket we're at end. - let after_pos = doc.indices[value_idx_end as usize] as usize; - if after_pos >= doc.buf.len() { return Err(qjd_err::QJD_NOT_FOUND); } - match doc.buf[after_pos] { - b',' => { i = value_idx_end + 1; arr_idx += 1; } - b'}' | b']' => return Err(qjd_err::QJD_NOT_FOUND), - _ => return Err(qjd_err::QJD_PARSE_ERROR), - } - } - Err(qjd_err::QJD_NOT_FOUND) -} - -/// Given the indices position of a value's first marker, return the indices -/// position of the structural character immediately following the value: -/// - for object/array values, the matching closer (one past it == sibling) -/// - for string values, the closing quote -/// - for scalars (numbers / true / false / null), the next structural char -fn find_value_end(doc: &Document, start: u32) -> Result { - let pos = doc.indices[start as usize] as usize; - let b = *doc.buf.get(pos).ok_or(qjd_err::QJD_PARSE_ERROR)?; - match b { - b'{' | b'[' => { - // Brace-count to matching closer. - let want_close = if b == b'{' { b'}' } else { b']' }; - let mut depth: i32 = 1; - let mut k = start + 1; - while (k as usize) < doc.indices.len() { - let cb = doc.buf[doc.indices[k as usize] as usize]; - match cb { - b'{' | b'[' => depth += 1, - b'}' | b']' => { - depth -= 1; - if depth == 0 { - if cb != want_close { return Err(qjd_err::QJD_PARSE_ERROR); } - return Ok(k); - } - } - _ => {} - } - k += 1; - } - Err(qjd_err::QJD_PARSE_ERROR) - } - b'"' => { - // String value: the indices array has both opening and closing quotes. - Ok(start + 1) - } - _ => { - // Scalar: end at next structural char. - Ok(start + 1) - } - } -} -``` - -Note: this implementation works for Cursor::root if we set `idx_start` to `0` (the outer opener) and `idx_end` to the matching closer's position in `indices`. Update `Cursor::root`: - -```rust -impl Cursor { - pub(crate) fn root(doc: &Document) -> Self { - // Find the closing index of the outermost container. - // indices has a u32::MAX sentinel at the end. - let n = doc.indices.len() as u32; - debug_assert!(n >= 2); - Cursor { idx_start: 0, idx_end: n - 2 } - } -} -``` - -- [ ] **Step 4: Run tests to verify pass** - -```sh -cargo test cursor::tests -``` - -Expected: all 8 tests pass. - -- [ ] **Step 5: Wire module into `src/lib.rs`** - -```rust -pub mod error; -mod scan; -mod doc; -mod path; -mod cursor; -pub mod ffi; -``` - -- [ ] **Step 6: Commit** - -```sh -git add src/cursor.rs src/lib.rs -git commit -m "Add Cursor with brute-force path resolution" -``` - ---- - -## Task 6: SkipCache (lazy fill) - -This task adds the per-container sibling-skip cache that makes repeated access of the same container O(N_keys) instead of O(N_keys × subtree_size). - -**Files:** -- Create: `src/skip_cache.rs` -- Modify: `src/doc.rs` (add cache to Document) -- Modify: `src/cursor.rs` (use cache in walk_children) -- Modify: `src/lib.rs` - -- [ ] **Step 1: Write `src/skip_cache.rs`** - -```rust -use rustc_hash::FxHashMap; - -#[derive(Default)] -pub(crate) struct SkipCache { - /// Slot 0 reserved as "no cache" marker. - slots: Vec, - /// Map from a container's opener position-in-indices (idx_start) to slot index. - by_opener: FxHashMap, -} - -pub(crate) struct SkipSlot { - /// child_starts[i] = position in doc.indices of the i-th child's leading - /// marker. For object children this is the key's opening '"'; for array - /// children, the value's first marker. - pub(crate) child_starts: Vec, -} - -impl SkipCache { - pub(crate) fn new() -> Self { - Self { slots: vec![SkipSlot { child_starts: Vec::new() }], by_opener: FxHashMap::default() } - } - - pub(crate) fn get_or_insert(&mut self, opener_idx: u32) -> (u32, bool) { - if let Some(&slot) = self.by_opener.get(&opener_idx) { - return (slot, true); - } - let new = self.slots.len() as u32; - self.slots.push(SkipSlot { child_starts: Vec::new() }); - self.by_opener.insert(opener_idx, new); - (new, false) - } - - pub(crate) fn slot_mut(&mut self, n: u32) -> &mut SkipSlot { - &mut self.slots[n as usize] - } - - pub(crate) fn slot(&self, n: u32) -> &SkipSlot { - &self.slots[n as usize] - } -} -``` - -- [ ] **Step 2: Add cache to `Document`** - -In `src/doc.rs`: - -```rust -use crate::skip_cache::SkipCache; - -pub struct Document<'a> { - pub(crate) buf: &'a [u8], - pub(crate) indices: Vec, - pub(crate) scratch: Vec, - pub(crate) skip: std::cell::RefCell, -} - -impl<'a> Document<'a> { - pub fn parse(buf: &'a [u8]) -> Result { - let mut indices = Vec::new(); - ScalarScanner::scan(buf, &mut indices).map_err(|_| qjd_err::QJD_PARSE_ERROR)?; - indices.push(u32::MAX); - Ok(Self { - buf, - indices, - scratch: Vec::new(), - skip: std::cell::RefCell::new(SkipCache::new()), - }) - } -} -``` - -We use `RefCell` because cursors take `&Document` but the cache mutates. Single-threaded use means `RefCell` is fine; multi-threading is explicitly out of scope (spec §7.5). - -- [ ] **Step 3: Modify `walk_children` to use the cache** - -Replace `walk_children` in `src/cursor.rs`: - -```rust -fn walk_children(doc: &Document, cur: Cursor, seg: &PathSeg) -> Result { - let is_obj = matches!(seg, PathSeg::Key(_)); - let mut cache = doc.skip.borrow_mut(); - let (slot_n, was_cached) = cache.get_or_insert(cur.idx_start); - - if was_cached { - // Fast path: iterate cached child_starts. - let starts = cache.slot(slot_n).child_starts.clone(); - // ^ small clone; alternative: drop borrow then iterate. We keep - // semantics simple at the cost of a Vec clone per match attempt; - // optimization deferred. - drop(cache); - return resolve_in_known_children(doc, &starts, is_obj, seg); - } - - // Slow path: walk for the first time, populate cache as we go. - let mut starts: Vec = Vec::new(); - let mut i = cur.idx_start + 1; - let end = cur.idx_end; - let mut arr_idx: u32 = 0; - - while i < end { - starts.push(i); - - let value_idx_start = if is_obj { i + 3 } else { i }; - let value_idx_end = find_value_end(doc, value_idx_start)?; - - let matched = if is_obj { - let key_open = doc.indices[i as usize] as usize; - let key_close = doc.indices[(i + 1) as usize] as usize; - let key_bytes = &doc.buf[key_open + 1 .. key_close]; - match seg { - PathSeg::Key(want) => key_bytes == *want, - _ => unreachable!(), - } - } else { - match seg { - PathSeg::Idx(want) => arr_idx == *want, - _ => unreachable!(), - } - }; - - if matched { - // Continue populating cache fully before returning, so subsequent - // siblings benefit too. Walk remaining children without further - // matching. - let result = Cursor { idx_start: value_idx_start, idx_end: value_idx_end }; - - let mut j = value_idx_end; - loop { - let after = doc.buf[doc.indices[j as usize] as usize]; - match after { - b',' => { j += 1; starts.push(j); j = find_value_end(doc, if is_obj { j + 3 } else { j })?; } - b'}' | b']' => break, - _ => return Err(qjd_err::QJD_PARSE_ERROR), - } - } - - cache.slot_mut(slot_n).child_starts = starts; - return Ok(result); - } - - let after = doc.buf[doc.indices[value_idx_end as usize] as usize]; - match after { - b',' => { i = value_idx_end + 1; arr_idx += 1; } - b'}' | b']' => { - cache.slot_mut(slot_n).child_starts = starts; - return Err(qjd_err::QJD_NOT_FOUND); - } - _ => return Err(qjd_err::QJD_PARSE_ERROR), - } - } - - cache.slot_mut(slot_n).child_starts = starts; - Err(qjd_err::QJD_NOT_FOUND) -} - -fn resolve_in_known_children( - doc: &Document, starts: &[u32], is_obj: bool, seg: &PathSeg, -) -> Result { - for (k, &i) in starts.iter().enumerate() { - let matched = if is_obj { - let key_open = doc.indices[i as usize] as usize; - let key_close = doc.indices[(i + 1) as usize] as usize; - let key_bytes = &doc.buf[key_open + 1 .. key_close]; - matches!(seg, PathSeg::Key(want) if key_bytes == *want) - } else { - matches!(seg, PathSeg::Idx(want) if (k as u32) == *want) - }; - if matched { - let value_idx_start = if is_obj { i + 3 } else { i }; - let value_idx_end = find_value_end(doc, value_idx_start)?; - return Ok(Cursor { idx_start: value_idx_start, idx_end: value_idx_end }); - } - } - Err(qjd_err::QJD_NOT_FOUND) -} -``` - -- [ ] **Step 4: Wire skip_cache into `src/lib.rs`** - -```rust -pub mod error; -mod scan; -mod skip_cache; -mod doc; -mod path; -mod cursor; -pub mod ffi; -``` - -- [ ] **Step 5: Add a test that exercises the cache hit path** - -Append to `src/cursor.rs` tests: - -```rust -#[test] -fn cache_hit_on_repeated_access() { - let d = doc_of(b"{\"a\":1,\"b\":2,\"c\":3}"); - let r1 = Cursor::root(&d).resolve(&d, b"a").unwrap(); - let r2 = Cursor::root(&d).resolve(&d, b"b").unwrap(); - let r3 = Cursor::root(&d).resolve(&d, b"c").unwrap(); - // Just assert all succeed; cache correctness verified by sharing impl. - assert_ne!(r1, r2); - assert_ne!(r2, r3); - // Verify only one slot exists for the root container. - let cache = d.skip.borrow(); - // 1 slot + slot 0 reserved = 2 - assert_eq!(cache.by_opener.len(), 1); -} -``` - -- [ ] **Step 6: Run tests** - -```sh -cargo test -``` - -Expected: all previous tests + new cache test pass. - -- [ ] **Step 7: Commit** - -```sh -git add src/skip_cache.rs src/doc.rs src/cursor.rs src/lib.rs -git commit -m "Add lazy sibling-skip cache for cursor path resolution" -``` - ---- - -## Task 7: String escape decode - -**Files:** -- Create: `src/decode/mod.rs` -- Create: `src/decode/string.rs` -- Modify: `src/lib.rs` - -- [ ] **Step 1: Write `src/decode/mod.rs`** - -```rust -pub(crate) mod string; -pub(crate) mod number; -``` - -- [ ] **Step 2: Write failing tests in `src/decode/string.rs`** - -```rust -use crate::error::qjd_err; - -/// Decode the JSON string between `start` and `end` (exclusive of the -/// surrounding quotes) into `scratch` if escapes are present. Returns -/// (ptr, len) pointing into either `buf` (no escapes) or `scratch`. -pub(crate) fn decode_string( - buf: &[u8], start: usize, end: usize, scratch: &mut Vec, -) -> Result<(*const u8, usize), qjd_err> { - let _ = (buf, start, end, scratch); - unimplemented!() -} - -#[cfg(test)] -mod tests { - use super::*; - - fn d(s: &[u8]) -> Result, qjd_err> { - let mut scratch = Vec::new(); - let (p, n) = decode_string(s, 0, s.len(), &mut scratch)?; - Ok(unsafe { std::slice::from_raw_parts(p, n) }.to_vec()) - } - - #[test] - fn no_escape_returns_input() { - assert_eq!(d(b"hello").unwrap(), b"hello".to_vec()); - } - - #[test] - fn escaped_quote() { - assert_eq!(d(b"a\\\"b").unwrap(), b"a\"b".to_vec()); - } - - #[test] - fn escaped_backslash() { - assert_eq!(d(b"a\\\\b").unwrap(), b"a\\b".to_vec()); - } - - #[test] - fn escaped_newline() { - assert_eq!(d(b"a\\nb").unwrap(), b"a\nb".to_vec()); - } - - #[test] - fn escaped_tab() { - assert_eq!(d(b"a\\tb").unwrap(), b"a\tb".to_vec()); - } - - #[test] - fn escaped_unicode_ascii() { - // A = 'A' - assert_eq!(d(b"a\\u0041b").unwrap(), b"aAb".to_vec()); - } - - #[test] - fn escaped_unicode_2byte() { - // é = 'é' = 0xC3 0xA9 - assert_eq!(d(b"\\u00e9").unwrap(), vec![0xC3, 0xA9]); - } - - #[test] - fn escaped_unicode_3byte() { - // 中 = '中' = 0xE4 0xB8 0xAD - assert_eq!(d(b"\\u4e2d").unwrap(), vec![0xE4, 0xB8, 0xAD]); - } - - #[test] - fn surrogate_pair() { - // 😀 = '😀' = U+1F600 = 0xF0 0x9F 0x98 0x80 - assert_eq!( - d(b"\\uD83D\\uDE00").unwrap(), - vec![0xF0, 0x9F, 0x98, 0x80], - ); - } - - #[test] - fn lone_high_surrogate_fails() { - assert_eq!(d(b"\\uD83D").unwrap_err(), qjd_err::QJD_DECODE_FAILED); - } - - #[test] - fn invalid_hex_in_unicode_fails() { - assert_eq!(d(b"\\uZZZZ").unwrap_err(), qjd_err::QJD_DECODE_FAILED); - } - - #[test] - fn unknown_escape_fails() { - assert_eq!(d(b"\\q").unwrap_err(), qjd_err::QJD_DECODE_FAILED); - } - - #[test] - fn dangling_backslash_fails() { - assert_eq!(d(b"a\\").unwrap_err(), qjd_err::QJD_DECODE_FAILED); - } -} -``` - -- [ ] **Step 3: Run tests to verify they fail** - -```sh -cargo test decode::string -``` - -Expected: panics with `unimplemented!()`. - -- [ ] **Step 4: Implement `decode_string`** - -Replace in `src/decode/string.rs`: - -```rust -pub(crate) fn decode_string( - buf: &[u8], start: usize, end: usize, scratch: &mut Vec, -) -> Result<(*const u8, usize), qjd_err> { - let slice = &buf[start..end]; - if memchr::memchr(b'\\', slice).is_none() { - return Ok((slice.as_ptr(), slice.len())); - } - - scratch.clear(); - scratch.reserve(slice.len()); - - let mut i = 0; - while i < slice.len() { - let b = slice[i]; - if b != b'\\' { - scratch.push(b); - i += 1; - continue; - } - // Escape. - if i + 1 >= slice.len() { return Err(qjd_err::QJD_DECODE_FAILED); } - match slice[i + 1] { - b'"' => { scratch.push(b'"'); i += 2; } - b'\\' => { scratch.push(b'\\'); i += 2; } - b'/' => { scratch.push(b'/'); i += 2; } - b'b' => { scratch.push(0x08); i += 2; } - b'f' => { scratch.push(0x0C); i += 2; } - b'n' => { scratch.push(b'\n'); i += 2; } - b'r' => { scratch.push(b'\r'); i += 2; } - b't' => { scratch.push(b'\t'); i += 2; } - b'u' => { - if i + 6 > slice.len() { return Err(qjd_err::QJD_DECODE_FAILED); } - let h = parse_hex4(&slice[i + 2 .. i + 6])?; - i += 6; - let cp = if (0xD800..=0xDBFF).contains(&h) { - // High surrogate, expect \uDXXX low surrogate next. - if i + 6 > slice.len() || &slice[i..i + 2] != b"\\u" { - return Err(qjd_err::QJD_DECODE_FAILED); - } - let l = parse_hex4(&slice[i + 2 .. i + 6])?; - if !(0xDC00..=0xDFFF).contains(&l) { - return Err(qjd_err::QJD_DECODE_FAILED); - } - i += 6; - 0x10000 + ((h - 0xD800) << 10) + (l - 0xDC00) - } else if (0xDC00..=0xDFFF).contains(&h) { - // Unmatched low surrogate. - return Err(qjd_err::QJD_DECODE_FAILED); - } else { - h - }; - encode_utf8(cp, scratch); - } - _ => return Err(qjd_err::QJD_DECODE_FAILED), - } - } - - Ok((scratch.as_ptr(), scratch.len())) -} - -fn parse_hex4(bytes: &[u8]) -> Result { - let mut v: u32 = 0; - for &b in bytes { - v <<= 4; - v |= match b { - b'0'..=b'9' => (b - b'0') as u32, - b'a'..=b'f' => (b - b'a' + 10) as u32, - b'A'..=b'F' => (b - b'A' + 10) as u32, - _ => return Err(qjd_err::QJD_DECODE_FAILED), - }; - } - Ok(v) -} - -fn encode_utf8(cp: u32, out: &mut Vec) { - if cp < 0x80 { - out.push(cp as u8); - } else if cp < 0x800 { - out.push(0xC0 | (cp >> 6) as u8); - out.push(0x80 | (cp & 0x3F) as u8); - } else if cp < 0x10000 { - out.push(0xE0 | (cp >> 12) as u8); - out.push(0x80 | ((cp >> 6) & 0x3F) as u8); - out.push(0x80 | (cp & 0x3F) as u8); - } else { - out.push(0xF0 | (cp >> 18) as u8); - out.push(0x80 | ((cp >> 12) & 0x3F) as u8); - out.push(0x80 | ((cp >> 6) & 0x3F) as u8); - out.push(0x80 | (cp & 0x3F) as u8); - } -} -``` - -- [ ] **Step 5: Run tests to verify pass** - -```sh -cargo test decode::string -``` - -Expected: all 13 tests pass. - -- [ ] **Step 6: Wire module into `src/lib.rs`** - -```rust -pub mod error; -mod scan; -mod skip_cache; -mod doc; -mod path; -mod cursor; -mod decode; -pub mod ffi; -``` - -- [ ] **Step 7: Commit** - -```sh -git add src/decode/ src/lib.rs -git commit -m "Add lazy string escape decode with surrogate-pair handling" -``` - ---- - -## Task 8: Number decode (i64 and f64) - -**Files:** -- Create: `src/decode/number.rs` - -- [ ] **Step 1: Write failing tests in `src/decode/number.rs`** - -```rust -use crate::error::qjd_err; - -pub(crate) fn parse_i64(bytes: &[u8]) -> Result { - let _ = bytes; unimplemented!() -} - -pub(crate) fn parse_f64(bytes: &[u8]) -> Result { - let _ = bytes; unimplemented!() -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] fn i64_zero() { assert_eq!(parse_i64(b"0"), Ok(0)); } - #[test] fn i64_positive() { assert_eq!(parse_i64(b"42"), Ok(42)); } - #[test] fn i64_negative() { assert_eq!(parse_i64(b"-7"), Ok(-7)); } - #[test] fn i64_max() { assert_eq!(parse_i64(b"9223372036854775807"), Ok(i64::MAX)); } - #[test] fn i64_min() { assert_eq!(parse_i64(b"-9223372036854775808"), Ok(i64::MIN)); } - - #[test] - fn i64_overflow() { - assert_eq!(parse_i64(b"9223372036854775808"), Err(qjd_err::QJD_OUT_OF_RANGE)); - } - - #[test] - fn i64_rejects_decimal() { - assert_eq!(parse_i64(b"1.5"), Err(qjd_err::QJD_TYPE_MISMATCH)); - } - - #[test] - fn i64_rejects_exponent() { - assert_eq!(parse_i64(b"1e5"), Err(qjd_err::QJD_TYPE_MISMATCH)); - } - - #[test] - fn i64_rejects_empty() { - assert_eq!(parse_i64(b""), Err(qjd_err::QJD_DECODE_FAILED)); - } - - #[test] fn f64_zero() { assert_eq!(parse_f64(b"0.0").unwrap(), 0.0); } - #[test] fn f64_pi() { assert!((parse_f64(b"3.14").unwrap() - 3.14).abs() < 1e-12); } - #[test] fn f64_negative(){ assert_eq!(parse_f64(b"-1.5").unwrap(), -1.5); } - #[test] fn f64_exponent(){ assert_eq!(parse_f64(b"1e2").unwrap(), 100.0); } - - #[test] - fn f64_rejects_garbage() { - assert_eq!(parse_f64(b"hello"), Err(qjd_err::QJD_DECODE_FAILED)); - } -} -``` - -- [ ] **Step 2: Run tests to verify they fail** - -```sh -cargo test decode::number -``` - -Expected: panics with `unimplemented!()`. - -- [ ] **Step 3: Implement `parse_i64` and `parse_f64`** - -Replace in `src/decode/number.rs`: - -```rust -pub(crate) fn parse_i64(bytes: &[u8]) -> Result { - if bytes.is_empty() { - return Err(qjd_err::QJD_DECODE_FAILED); - } - // Reject non-integer JSON numbers. - if bytes.iter().any(|&b| b == b'.' || b == b'e' || b == b'E') { - return Err(qjd_err::QJD_TYPE_MISMATCH); - } - let (neg, rest) = match bytes[0] { - b'-' => (true, &bytes[1..]), - _ => (false, bytes), - }; - if rest.is_empty() || !rest.iter().all(|c| c.is_ascii_digit()) { - return Err(qjd_err::QJD_DECODE_FAILED); - } - let mut v: i64 = 0; - for &c in rest { - let d = (c - b'0') as i64; - v = match v.checked_mul(10).and_then(|x| { - if neg { x.checked_sub(d) } else { x.checked_add(d) } - }) { - Some(n) => n, - None => return Err(qjd_err::QJD_OUT_OF_RANGE), - }; - } - Ok(v) -} - -pub(crate) fn parse_f64(bytes: &[u8]) -> Result { - if bytes.is_empty() { - return Err(qjd_err::QJD_DECODE_FAILED); - } - let s = std::str::from_utf8(bytes).map_err(|_| qjd_err::QJD_DECODE_FAILED)?; - s.parse::().map_err(|_| qjd_err::QJD_DECODE_FAILED) -} -``` - -- [ ] **Step 4: Run tests to verify pass** - -```sh -cargo test decode::number -``` - -Expected: all 14 tests pass. - -- [ ] **Step 5: Commit** - -```sh -git add src/decode/number.rs -git commit -m "Add lazy i64/f64 number decode with overflow checking" -``` - ---- - -## Task 9: qjd_typeof, qjd_is_null, qjd_len - -**Files:** -- Modify: `src/ffi.rs` -- Modify: `src/doc.rs` (add helper for cursor → value type) -- Create: `tests/ffi_typeof.rs` - -- [ ] **Step 1: Add helper in `src/doc.rs`** - -Append to `src/doc.rs`: - -```rust -use crate::cursor::Cursor; -use crate::error::qjd_type; - -impl<'a> Document<'a> { - /// Inspect the byte at the cursor's value start to determine type. - pub(crate) fn type_of(&self, cur: Cursor) -> Result { - let pos = *self.indices.get(cur.idx_start as usize) - .ok_or(qjd_err::QJD_PARSE_ERROR)? as usize; - // For values that have a leading structural marker: - // '"' → string '{' → object '[' → array - // For scalars (numbers/true/false/null), idx_start points at the - // following structural char (e.g. ','/'}'/']'); we have to look at - // the byte BEFORE that position which begins the scalar. - let lead = self.buf.get(pos).copied().ok_or(qjd_err::QJD_PARSE_ERROR)?; - match lead { - b'"' => Ok(qjd_type::QJD_T_STR), - b'{' => Ok(qjd_type::QJD_T_OBJ), - b'[' => Ok(qjd_type::QJD_T_ARR), - // Otherwise this index points at a separator following a scalar. - _ => { - // Find the scalar's first non-whitespace byte in buf. - // The scalar lives between (previous index byte + 1) and pos. - let scalar_start = self.find_scalar_start(cur.idx_start)?; - match self.buf.get(scalar_start).copied() { - Some(b't') | Some(b'f') => Ok(qjd_type::QJD_T_BOOL), - Some(b'n') => Ok(qjd_type::QJD_T_NULL), - Some(b'-') | Some(b'0'..=b'9') => Ok(qjd_type::QJD_T_NUM), - _ => Err(qjd_err::QJD_PARSE_ERROR), - } - } - } - } - - pub(crate) fn find_scalar_start(&self, idx: u32) -> Result { - // Look at indices[idx-1] which marks the character immediately before - // this scalar (typically ':' or ',' or opening bracket); the scalar's - // first non-whitespace byte is at indices[idx-1] + 1 plus any whitespace. - if idx == 0 { return Err(qjd_err::QJD_PARSE_ERROR); } - let prev = self.indices[(idx - 1) as usize] as usize; - let mut p = prev + 1; - while p < self.buf.len() && matches!(self.buf[p], b' '|b'\t'|b'\n'|b'\r') { - p += 1; - } - Ok(p) - } - - pub(crate) fn cursor_len(&self, cur: Cursor) -> Result { - let pos = self.indices[cur.idx_start as usize] as usize; - match self.buf.get(pos).copied() { - Some(b'{') | Some(b'[') => {} - _ => return Err(qjd_err::QJD_TYPE_MISMATCH), - } - // Use the same brace-counting walk as in cursor::find_value_end, - // but counting children instead. - let mut depth = 1i32; - let mut count = 0usize; - let mut at_start = true; - let mut i = cur.idx_start + 1; - let end = cur.idx_end; - while i < end { - let b = self.buf[self.indices[i as usize] as usize]; - match b { - b'{' | b'[' => { if depth == 1 && at_start { count += 1; at_start = false; } depth += 1; } - b'}' | b']' => depth -= 1, - b',' if depth == 1 => { at_start = true; } - b'"' | b't' | b'f' | b'n' if depth == 1 && at_start => { - count += 1; at_start = false; - } - _ => { - if depth == 1 && at_start && (b == b':' ) { - // object key was already what made us count, ':' separates - } - } - } - i += 1; - } - Ok(count) - } -} -``` - -Note: the `cursor_len` implementation above is approximate; refine in this task until tests pass. The reference behavior: count direct children of the container. - -- [ ] **Step 2: Write `tests/ffi_typeof.rs`** - -```rust -use std::os::raw::c_int; -use quickdecode::ffi::*; - -fn parse(s: &[u8]) -> *mut qjd_doc { - let mut err: c_int = -1; - let d = unsafe { qjd_parse(s.as_ptr(), s.len(), &mut err) }; - assert!(!d.is_null()); - d -} - -#[test] -fn typeof_string() { - let d = parse(b"{\"a\":\"hi\"}"); - let mut t: c_int = -1; - let p = b"a"; - let rc = unsafe { qjd_typeof(d, p.as_ptr() as *const i8, p.len(), &mut t) }; - assert_eq!(rc, 0); - assert_eq!(t, 3); // QJD_T_STR - unsafe { qjd_free(d) }; -} - -#[test] -fn typeof_number() { - let d = parse(b"{\"a\":42}"); - let mut t: c_int = -1; - let p = b"a"; - let rc = unsafe { qjd_typeof(d, p.as_ptr() as *const i8, p.len(), &mut t) }; - assert_eq!(rc, 0); - assert_eq!(t, 2); // QJD_T_NUM - unsafe { qjd_free(d) }; -} - -#[test] -fn typeof_bool() { - let d = parse(b"{\"a\":true}"); - let mut t: c_int = -1; - let p = b"a"; - let rc = unsafe { qjd_typeof(d, p.as_ptr() as *const i8, p.len(), &mut t) }; - assert_eq!(rc, 0); - assert_eq!(t, 1); - unsafe { qjd_free(d) }; -} - -#[test] -fn typeof_null() { - let d = parse(b"{\"a\":null}"); - let mut t: c_int = -1; - let p = b"a"; - let rc = unsafe { qjd_typeof(d, p.as_ptr() as *const i8, p.len(), &mut t) }; - assert_eq!(rc, 0); - assert_eq!(t, 0); - unsafe { qjd_free(d) }; -} - -#[test] -fn is_null_true() { - let d = parse(b"{\"a\":null}"); - let mut b: c_int = -1; - let p = b"a"; - let rc = unsafe { qjd_is_null(d, p.as_ptr() as *const i8, p.len(), &mut b) }; - assert_eq!(rc, 0); - assert_ne!(b, 0); - unsafe { qjd_free(d) }; -} - -#[test] -fn len_object() { - let d = parse(b"{\"a\":1,\"b\":2,\"c\":3}"); - let mut n: usize = 0; - let p = b""; - let rc = unsafe { qjd_len(d, p.as_ptr() as *const i8, p.len(), &mut n) }; - assert_eq!(rc, 0); - assert_eq!(n, 3); - unsafe { qjd_free(d) }; -} - -#[test] -fn len_array() { - let d = parse(b"[10,20,30,40]"); - let mut n: usize = 0; - let p = b""; - let rc = unsafe { qjd_len(d, p.as_ptr() as *const i8, p.len(), &mut n) }; - assert_eq!(rc, 0); - assert_eq!(n, 4); - unsafe { qjd_free(d) }; -} - -#[test] -fn typeof_not_found() { - let d = parse(b"{\"a\":1}"); - let mut t: c_int = -1; - let p = b"b"; - let rc = unsafe { qjd_typeof(d, p.as_ptr() as *const i8, p.len(), &mut t) }; - assert_eq!(rc, 2); // NOT_FOUND - unsafe { qjd_free(d) }; -} -``` - -- [ ] **Step 3: Run tests to see them fail (undeclared symbols)** - -```sh -cargo test ffi_typeof -``` - -Expected: link errors for `qjd_typeof`, `qjd_is_null`, `qjd_len`. - -- [ ] **Step 4: Add FFI exports in `src/ffi.rs`** - -Append: - -```rust -use crate::cursor::Cursor; -use crate::error::qjd_type; - -unsafe fn resolve_root_path( - doc: *mut qjd_doc, path: *const c_char, path_len: usize, -) -> Result<(&'static Document<'static>, Cursor), qjd_err> { - if doc.is_null() || (path.is_null() && path_len != 0) { - return Err(qjd_err::QJD_INVALID_ARG); - } - let d: &Document = &(*doc).0; - let p: &[u8] = if path.is_null() { - &[] - } else { - std::slice::from_raw_parts(path as *const u8, path_len) - }; - let cur = Cursor::root(d).resolve(d, p)?; - // SAFETY: caller holds doc alive; we re-erase lifetime for return. - Ok((std::mem::transmute(d), cur)) -} - -#[no_mangle] -pub unsafe extern "C" fn qjd_typeof( - doc: *mut qjd_doc, path: *const c_char, path_len: usize, type_out: *mut c_int, -) -> c_int { - if type_out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } - match resolve_root_path(doc, path, path_len) { - Ok((d, cur)) => match d.type_of(cur) { - Ok(t) => { *type_out = t as c_int; qjd_err::QJD_OK as c_int } - Err(e) => e as c_int, - }, - Err(e) => e as c_int, - } -} - -#[no_mangle] -pub unsafe extern "C" fn qjd_is_null( - doc: *mut qjd_doc, path: *const c_char, path_len: usize, out: *mut c_int, -) -> c_int { - if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } - match resolve_root_path(doc, path, path_len) { - Ok((d, cur)) => match d.type_of(cur) { - Ok(qjd_type::QJD_T_NULL) => { *out = 1; qjd_err::QJD_OK as c_int } - Ok(_) => { *out = 0; qjd_err::QJD_OK as c_int } - Err(e) => e as c_int, - }, - Err(e) => e as c_int, - } -} - -#[no_mangle] -pub unsafe extern "C" fn qjd_len( - doc: *mut qjd_doc, path: *const c_char, path_len: usize, out: *mut usize, -) -> c_int { - if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } - match resolve_root_path(doc, path, path_len) { - Ok((d, cur)) => match d.cursor_len(cur) { - Ok(n) => { *out = n; qjd_err::QJD_OK as c_int } - Err(e) => e as c_int, - }, - Err(e) => e as c_int, - } -} -``` - -- [ ] **Step 5: Run tests** - -```sh -cargo test ffi_typeof -``` - -Expected: all 8 tests pass. - -- [ ] **Step 6: Commit** - -```sh -git add src/ffi.rs src/doc.rs tests/ffi_typeof.rs -git commit -m "Add qjd_typeof / qjd_is_null / qjd_len FFI" -``` - ---- - -## Task 10: Typed root-path getters (str/i64/f64/bool) - -**Files:** -- Modify: `src/ffi.rs` -- Create: `tests/ffi_strings.rs` -- Create: `tests/ffi_numbers.rs` - -- [ ] **Step 1: Write failing tests in `tests/ffi_strings.rs`** - -```rust -use std::ffi::CStr; -use std::os::raw::c_int; -use quickdecode::ffi::*; - -fn parse(s: &[u8]) -> *mut qjd_doc { - let mut err: c_int = -1; - let d = unsafe { qjd_parse(s.as_ptr(), s.len(), &mut err) }; - assert!(!d.is_null()); - d -} - -#[test] -fn get_str_simple() { - let d = parse(b"{\"a\":\"hello\"}"); - let mut p: *const u8 = std::ptr::null(); - let mut n: usize = 0; - let path = b"a"; - let rc = unsafe { qjd_get_str(d, path.as_ptr() as *const i8, path.len(), &mut p, &mut n) }; - assert_eq!(rc, 0); - let s = unsafe { std::slice::from_raw_parts(p, n) }; - assert_eq!(s, b"hello"); - unsafe { qjd_free(d) }; -} - -#[test] -fn get_str_with_escape() { - let d = parse(b"{\"a\":\"he\\nlo\"}"); - let mut p: *const u8 = std::ptr::null(); - let mut n: usize = 0; - let path = b"a"; - let rc = unsafe { qjd_get_str(d, path.as_ptr() as *const i8, path.len(), &mut p, &mut n) }; - assert_eq!(rc, 0); - let s = unsafe { std::slice::from_raw_parts(p, n) }; - assert_eq!(s, b"he\nlo"); - unsafe { qjd_free(d) }; -} - -#[test] -fn get_str_type_mismatch() { - let d = parse(b"{\"a\":42}"); - let mut p: *const u8 = std::ptr::null(); - let mut n: usize = 0; - let path = b"a"; - let rc = unsafe { qjd_get_str(d, path.as_ptr() as *const i8, path.len(), &mut p, &mut n) }; - assert_eq!(rc, 3); // TYPE_MISMATCH - unsafe { qjd_free(d) }; -} -``` - -- [ ] **Step 2: Write failing tests in `tests/ffi_numbers.rs`** - -```rust -use std::os::raw::c_int; -use quickdecode::ffi::*; - -fn parse(s: &[u8]) -> *mut qjd_doc { - let mut err: c_int = -1; - let d = unsafe { qjd_parse(s.as_ptr(), s.len(), &mut err) }; - assert!(!d.is_null()); - d -} - -#[test] -fn get_i64_basic() { - let d = parse(b"{\"a\":42}"); - let mut v: i64 = 0; - let p = b"a"; - let rc = unsafe { qjd_get_i64(d, p.as_ptr() as *const i8, p.len(), &mut v) }; - assert_eq!(rc, 0); - assert_eq!(v, 42); - unsafe { qjd_free(d) }; -} - -#[test] -fn get_i64_negative() { - let d = parse(b"{\"a\":-7}"); - let mut v: i64 = 0; - let p = b"a"; - unsafe { qjd_get_i64(d, p.as_ptr() as *const i8, p.len(), &mut v) }; - assert_eq!(v, -7); - unsafe { qjd_free(d) }; -} - -#[test] -fn get_i64_overflow() { - let d = parse(b"{\"a\":99999999999999999999}"); - let mut v: i64 = 0; - let p = b"a"; - let rc = unsafe { qjd_get_i64(d, p.as_ptr() as *const i8, p.len(), &mut v) }; - assert_eq!(rc, 4); // OUT_OF_RANGE - unsafe { qjd_free(d) }; -} - -#[test] -fn get_f64_basic() { - let d = parse(b"{\"a\":3.14}"); - let mut v: f64 = 0.0; - let p = b"a"; - unsafe { qjd_get_f64(d, p.as_ptr() as *const i8, p.len(), &mut v) }; - assert!((v - 3.14).abs() < 1e-12); - unsafe { qjd_free(d) }; -} - -#[test] -fn get_bool() { - let d = parse(b"{\"a\":true,\"b\":false}"); - let mut v: c_int = -1; - let p = b"a"; - unsafe { qjd_get_bool(d, p.as_ptr() as *const i8, p.len(), &mut v) }; - assert_ne!(v, 0); - let p = b"b"; - unsafe { qjd_get_bool(d, p.as_ptr() as *const i8, p.len(), &mut v) }; - assert_eq!(v, 0); - unsafe { qjd_free(d) }; -} -``` - -- [ ] **Step 3: Run tests to verify they fail** - -```sh -cargo test ffi_strings ffi_numbers -``` - -Expected: link errors for `qjd_get_str`, `qjd_get_i64`, `qjd_get_f64`, `qjd_get_bool`. - -- [ ] **Step 4: Implement getters in `src/ffi.rs`** - -Append: - -```rust -use crate::decode::{number, string}; - -#[no_mangle] -pub unsafe extern "C" fn qjd_get_str( - doc: *mut qjd_doc, path: *const c_char, path_len: usize, - out_ptr: *mut *const u8, out_len: *mut usize, -) -> c_int { - if out_ptr.is_null() || out_len.is_null() { - return qjd_err::QJD_INVALID_ARG as c_int; - } - let (d, cur) = match resolve_root_path(doc, path, path_len) { - Ok(x) => x, Err(e) => return e as c_int, - }; - let pos = d.indices[cur.idx_start as usize] as usize; - if d.buf.get(pos).copied() != Some(b'"') { - return qjd_err::QJD_TYPE_MISMATCH as c_int; - } - // String ends at next index: indices[cur.idx_start + 1]. - let close = d.indices[(cur.idx_start + 1) as usize] as usize; - - // SAFETY: scratch is part of doc which the caller pins via Lua reference. - // We need mutable access to scratch; transmute to &mut. - let scratch = &mut *(&d.scratch as *const _ as *mut Vec); - match string::decode_string(d.buf, pos + 1, close, scratch) { - Ok((p, n)) => { *out_ptr = p; *out_len = n; qjd_err::QJD_OK as c_int } - Err(e) => e as c_int, - } -} - -#[no_mangle] -pub unsafe extern "C" fn qjd_get_i64( - doc: *mut qjd_doc, path: *const c_char, path_len: usize, out: *mut i64, -) -> c_int { - if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } - let (d, cur) = match resolve_root_path(doc, path, path_len) { - Ok(x) => x, Err(e) => return e as c_int, - }; - let bytes = match scalar_bytes(d, cur) { - Ok(b) => b, Err(e) => return e as c_int, - }; - match number::parse_i64(bytes) { - Ok(v) => { *out = v; qjd_err::QJD_OK as c_int } - Err(e) => e as c_int, - } -} - -#[no_mangle] -pub unsafe extern "C" fn qjd_get_f64( - doc: *mut qjd_doc, path: *const c_char, path_len: usize, out: *mut f64, -) -> c_int { - if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } - let (d, cur) = match resolve_root_path(doc, path, path_len) { - Ok(x) => x, Err(e) => return e as c_int, - }; - let bytes = match scalar_bytes(d, cur) { - Ok(b) => b, Err(e) => return e as c_int, - }; - match number::parse_f64(bytes) { - Ok(v) => { *out = v; qjd_err::QJD_OK as c_int } - Err(e) => e as c_int, - } -} - -#[no_mangle] -pub unsafe extern "C" fn qjd_get_bool( - doc: *mut qjd_doc, path: *const c_char, path_len: usize, out: *mut c_int, -) -> c_int { - if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } - let (d, cur) = match resolve_root_path(doc, path, path_len) { - Ok(x) => x, Err(e) => return e as c_int, - }; - let bytes = match scalar_bytes(d, cur) { - Ok(b) => b, Err(e) => return e as c_int, - }; - match bytes { - b"true" => { *out = 1; qjd_err::QJD_OK as c_int } - b"false" => { *out = 0; qjd_err::QJD_OK as c_int } - _ => qjd_err::QJD_TYPE_MISMATCH as c_int, - } -} - -/// Return the byte slice for a scalar value (number, true, false, null). -unsafe fn scalar_bytes<'d>(d: &'d Document, cur: Cursor) -> Result<&'d [u8], qjd_err> { - // Scalar's start: find first non-whitespace after previous index. - let start = d.find_scalar_start(cur.idx_start)?; - // Scalar's end: position of structural char at cur.idx_start. - let end = d.indices[cur.idx_start as usize] as usize; - if end < start { return Err(qjd_err::QJD_PARSE_ERROR); } - let mut e = end; - while e > start && matches!(d.buf[e - 1], b' '|b'\t'|b'\n'|b'\r') { e -= 1; } - Ok(&d.buf[start..e]) -} -``` - -- [ ] **Step 5: Run tests to verify pass** - -```sh -cargo test ffi_strings ffi_numbers -``` - -Expected: all 8 tests pass. - -- [ ] **Step 6: Commit** - -```sh -git add src/ffi.rs tests/ffi_strings.rs tests/ffi_numbers.rs -git commit -m "Add qjd_get_str / get_i64 / get_f64 / get_bool FFI getters" -``` - ---- - -## Task 11: Cursor C ABI (qjd_open + qjd_cursor_*) - -**Files:** -- Modify: `src/ffi.rs` -- Create: `tests/ffi_cursor.rs` - -- [ ] **Step 1: Define `qjd_cursor` in `src/ffi.rs`** - -Append: - -```rust -#[repr(C)] -#[derive(Copy, Clone)] -pub struct qjd_cursor { - pub doc: *const qjd_doc, - pub idx_start: u32, - pub idx_end: u32, - pub cache_slot: u32, - pub _pad: u32, -} - -unsafe fn cursor_to_internal(c: *const qjd_cursor) -> Result<(&'static Document<'static>, Cursor), qjd_err> { - if c.is_null() { return Err(qjd_err::QJD_INVALID_ARG); } - let cc = &*c; - if cc.doc.is_null() { return Err(qjd_err::QJD_INVALID_ARG); } - let d: &Document = &(*(cc.doc as *mut qjd_doc)).0; - Ok((std::mem::transmute(d), Cursor { idx_start: cc.idx_start, idx_end: cc.idx_end })) -} - -fn internal_to_cursor(doc: *const qjd_doc, cur: Cursor) -> qjd_cursor { - qjd_cursor { - doc, idx_start: cur.idx_start, idx_end: cur.idx_end, - cache_slot: 0, _pad: 0, - } -} -``` - -- [ ] **Step 2: Add cursor open + getters** - -Append: - -```rust -#[no_mangle] -pub unsafe extern "C" fn qjd_open( - doc: *mut qjd_doc, path: *const c_char, path_len: usize, out: *mut qjd_cursor, -) -> c_int { - if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } - match resolve_root_path(doc, path, path_len) { - Ok((_, cur)) => { *out = internal_to_cursor(doc as *const qjd_doc, cur); qjd_err::QJD_OK as c_int } - Err(e) => e as c_int, - } -} - -#[no_mangle] -pub unsafe extern "C" fn qjd_cursor_open( - c: *const qjd_cursor, path: *const c_char, path_len: usize, out: *mut qjd_cursor, -) -> c_int { - if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } - let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; - let p: &[u8] = if path.is_null() { &[] } else { - std::slice::from_raw_parts(path as *const u8, path_len) - }; - match cur.resolve(d, p) { - Ok(child) => { *out = internal_to_cursor((*c).doc, child); qjd_err::QJD_OK as c_int } - Err(e) => e as c_int, - } -} - -#[no_mangle] -pub unsafe extern "C" fn qjd_cursor_field( - c: *const qjd_cursor, key: *const c_char, key_len: usize, out: *mut qjd_cursor, -) -> c_int { - if out.is_null() || (key.is_null() && key_len != 0) { - return qjd_err::QJD_INVALID_ARG as c_int; - } - let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; - let k = if key.is_null() { &[][..] } else { std::slice::from_raw_parts(key as *const u8, key_len) }; - // Use PathSeg::Key directly via walk_children, but our public surface is - // resolve. Emulate single-segment key via path that has no separators. - // For keys containing '.' or '[', this is the intended escape hatch. - let child = match crate::cursor::resolve_single_key(d, cur, k) { - Ok(x) => x, Err(e) => return e as c_int, - }; - *out = internal_to_cursor((*c).doc, child); - qjd_err::QJD_OK as c_int -} - -#[no_mangle] -pub unsafe extern "C" fn qjd_cursor_index( - c: *const qjd_cursor, i: usize, out: *mut qjd_cursor, -) -> c_int { - if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } - if i > u32::MAX as usize { return qjd_err::QJD_INVALID_ARG as c_int; } - let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; - let child = match crate::cursor::resolve_single_idx(d, cur, i as u32) { - Ok(x) => x, Err(e) => return e as c_int, - }; - *out = internal_to_cursor((*c).doc, child); - qjd_err::QJD_OK as c_int -} - -#[no_mangle] -pub unsafe extern "C" fn qjd_cursor_get_str( - c: *const qjd_cursor, path: *const c_char, path_len: usize, - out_ptr: *mut *const u8, out_len: *mut usize, -) -> c_int { - if out_ptr.is_null() || out_len.is_null() { - return qjd_err::QJD_INVALID_ARG as c_int; - } - let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; - let p: &[u8] = if path.is_null() { &[] } else { - std::slice::from_raw_parts(path as *const u8, path_len) - }; - let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; - let pos = d.indices[cur.idx_start as usize] as usize; - if d.buf.get(pos).copied() != Some(b'"') { - return qjd_err::QJD_TYPE_MISMATCH as c_int; - } - let close = d.indices[(cur.idx_start + 1) as usize] as usize; - let scratch = &mut *(&d.scratch as *const _ as *mut Vec); - match string::decode_string(d.buf, pos + 1, close, scratch) { - Ok((p, n)) => { *out_ptr = p; *out_len = n; qjd_err::QJD_OK as c_int } - Err(e) => e as c_int, - } -} - -// Mirror qjd_cursor_get_i64 / get_f64 / get_bool / typeof / len following the -// same pattern: resolve, then dispatch on value byte. - -#[no_mangle] -pub unsafe extern "C" fn qjd_cursor_get_i64( - c: *const qjd_cursor, path: *const c_char, path_len: usize, out: *mut i64, -) -> c_int { - if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } - let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; - let p: &[u8] = if path.is_null() { &[] } else { - std::slice::from_raw_parts(path as *const u8, path_len) - }; - let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; - let bytes = match scalar_bytes(d, cur) { Ok(b) => b, Err(e) => return e as c_int }; - match number::parse_i64(bytes) { - Ok(v) => { *out = v; qjd_err::QJD_OK as c_int } - Err(e) => e as c_int, - } -} - -#[no_mangle] -pub unsafe extern "C" fn qjd_cursor_get_f64( - c: *const qjd_cursor, path: *const c_char, path_len: usize, out: *mut f64, -) -> c_int { - if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } - let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; - let p: &[u8] = if path.is_null() { &[] } else { - std::slice::from_raw_parts(path as *const u8, path_len) - }; - let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; - let bytes = match scalar_bytes(d, cur) { Ok(b) => b, Err(e) => return e as c_int }; - match number::parse_f64(bytes) { - Ok(v) => { *out = v; qjd_err::QJD_OK as c_int } - Err(e) => e as c_int, - } -} - -#[no_mangle] -pub unsafe extern "C" fn qjd_cursor_get_bool( - c: *const qjd_cursor, path: *const c_char, path_len: usize, out: *mut c_int, -) -> c_int { - if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } - let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; - let p: &[u8] = if path.is_null() { &[] } else { - std::slice::from_raw_parts(path as *const u8, path_len) - }; - let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; - let bytes = match scalar_bytes(d, cur) { Ok(b) => b, Err(e) => return e as c_int }; - match bytes { - b"true" => { *out = 1; qjd_err::QJD_OK as c_int } - b"false" => { *out = 0; qjd_err::QJD_OK as c_int } - _ => qjd_err::QJD_TYPE_MISMATCH as c_int, - } -} - -#[no_mangle] -pub unsafe extern "C" fn qjd_cursor_typeof( - c: *const qjd_cursor, path: *const c_char, path_len: usize, type_out: *mut c_int, -) -> c_int { - if type_out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } - let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; - let p: &[u8] = if path.is_null() { &[] } else { - std::slice::from_raw_parts(path as *const u8, path_len) - }; - let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; - match d.type_of(cur) { - Ok(t) => { *type_out = t as c_int; qjd_err::QJD_OK as c_int } - Err(e) => e as c_int, - } -} - -#[no_mangle] -pub unsafe extern "C" fn qjd_cursor_len( - c: *const qjd_cursor, path: *const c_char, path_len: usize, out: *mut usize, -) -> c_int { - if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } - let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; - let p: &[u8] = if path.is_null() { &[] } else { - std::slice::from_raw_parts(path as *const u8, path_len) - }; - let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; - match d.cursor_len(cur) { - Ok(n) => { *out = n; qjd_err::QJD_OK as c_int } - Err(e) => e as c_int, - } -} -``` - -- [ ] **Step 3: Add `resolve_single_key` / `resolve_single_idx` helpers in `src/cursor.rs`** - -Append to `src/cursor.rs`: - -```rust -pub(crate) fn resolve_single_key(doc: &Document, cur: Cursor, key: &[u8]) -> Result { - step(doc, cur, &PathSeg::Key(key)) -} - -pub(crate) fn resolve_single_idx(doc: &Document, cur: Cursor, idx: u32) -> Result { - step(doc, cur, &PathSeg::Idx(idx)) -} -``` - -- [ ] **Step 4: Write `tests/ffi_cursor.rs`** - -```rust -use std::os::raw::c_int; -use quickdecode::ffi::*; - -fn parse(s: &[u8]) -> *mut qjd_doc { - let mut err: c_int = -1; - let d = unsafe { qjd_parse(s.as_ptr(), s.len(), &mut err) }; - assert!(!d.is_null()); - d -} - -#[test] -fn open_object_then_get_field() { - let d = parse(b"{\"body\":{\"model\":\"gpt\",\"temperature\":0.5}}"); - let mut c = std::mem::MaybeUninit::::uninit(); - let p = b"body"; - let rc = unsafe { qjd_open(d, p.as_ptr() as *const i8, p.len(), c.as_mut_ptr()) }; - assert_eq!(rc, 0); - let c = unsafe { c.assume_init() }; - - let mut pp: *const u8 = std::ptr::null(); - let mut nn: usize = 0; - let k = b"model"; - let rc = unsafe { qjd_cursor_get_str(&c, k.as_ptr() as *const i8, k.len(), &mut pp, &mut nn) }; - assert_eq!(rc, 0); - let s = unsafe { std::slice::from_raw_parts(pp, nn) }; - assert_eq!(s, b"gpt"); - - let mut f: f64 = 0.0; - let k = b"temperature"; - let rc = unsafe { qjd_cursor_get_f64(&c, k.as_ptr() as *const i8, k.len(), &mut f) }; - assert_eq!(rc, 0); - assert!((f - 0.5).abs() < 1e-12); - - unsafe { qjd_free(d) }; -} - -#[test] -fn cursor_index_array() { - let d = parse(b"[\"a\",\"b\",\"c\"]"); - let mut c = std::mem::MaybeUninit::::uninit(); - let p = b""; - unsafe { qjd_open(d, p.as_ptr() as *const i8, 0, c.as_mut_ptr()) }; - let c = unsafe { c.assume_init() }; - - let mut sub = std::mem::MaybeUninit::::uninit(); - let rc = unsafe { qjd_cursor_index(&c, 1, sub.as_mut_ptr()) }; - assert_eq!(rc, 0); - let sub = unsafe { sub.assume_init() }; - - let mut pp: *const u8 = std::ptr::null(); - let mut nn: usize = 0; - let empty = b""; - let rc = unsafe { qjd_cursor_get_str(&sub, empty.as_ptr() as *const i8, 0, &mut pp, &mut nn) }; - assert_eq!(rc, 0); - assert_eq!(unsafe { std::slice::from_raw_parts(pp, nn) }, b"b"); - - unsafe { qjd_free(d) }; -} - -#[test] -fn cursor_field_with_dotted_key() { - let d = parse(b"{\"a.b\":42}"); - let mut c = std::mem::MaybeUninit::::uninit(); - let p = b""; - unsafe { qjd_open(d, p.as_ptr() as *const i8, 0, c.as_mut_ptr()) }; - let c = unsafe { c.assume_init() }; - - let mut sub = std::mem::MaybeUninit::::uninit(); - let key = b"a.b"; - let rc = unsafe { qjd_cursor_field(&c, key.as_ptr() as *const i8, key.len(), sub.as_mut_ptr()) }; - assert_eq!(rc, 0); - - let sub = unsafe { sub.assume_init() }; - let mut v: i64 = 0; - let empty = b""; - let rc = unsafe { qjd_cursor_get_i64(&sub, empty.as_ptr() as *const i8, 0, &mut v) }; - assert_eq!(rc, 0); - assert_eq!(v, 42); - - unsafe { qjd_free(d) }; -} -``` - -- [ ] **Step 5: Run tests to verify pass** - -```sh -cargo test ffi_cursor -``` - -Expected: all 3 tests pass. - -- [ ] **Step 6: Commit** - -```sh -git add src/ffi.rs src/cursor.rs tests/ffi_cursor.rs -git commit -m "Add qjd_cursor type and qjd_open / qjd_cursor_* FFI" -``` - ---- - -## Task 12: panic::catch_unwind boundary - -**Files:** -- Modify: `src/ffi.rs` -- Create: `tests/ffi_panic_safety.rs` - -- [ ] **Step 1: Create a wrapper macro** - -In `src/ffi.rs`, add at the top: - -```rust -macro_rules! ffi_catch { - ($body:block) => {{ - let r = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| $body)); - match r { - Ok(code) => code, - Err(_) => qjd_err::QJD_OOM as c_int, - } - }}; -} -``` - -- [ ] **Step 2: Wrap every `pub unsafe extern "C" fn` returning `c_int`** - -For each of `qjd_typeof`, `qjd_is_null`, `qjd_len`, `qjd_get_str`, `qjd_get_i64`, `qjd_get_f64`, `qjd_get_bool`, `qjd_open`, `qjd_cursor_*`, wrap their bodies: - -```rust -#[no_mangle] -pub unsafe extern "C" fn qjd_typeof( - doc: *mut qjd_doc, path: *const c_char, path_len: usize, type_out: *mut c_int, -) -> c_int { - ffi_catch!({ - if type_out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } - // ... existing body ... - }) -} -``` - -Apply the same transformation to every FFI function returning `c_int`. Functions returning `*mut qjd_doc` or `*const c_char` are not wrapped (they cannot return error codes the same way; for `qjd_parse` we keep the existing error-out parameter and just wrap separately): - -```rust -#[no_mangle] -pub unsafe extern "C" fn qjd_parse( - buf: *const u8, len: usize, err_out: *mut c_int, -) -> *mut qjd_doc { - let r = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { - // ... existing body ... - })); - match r { - Ok(p) => p, - Err(_) => { - if !err_out.is_null() { *err_out = qjd_err::QJD_OOM as c_int; } - std::ptr::null_mut() - } - } -} -``` - -- [ ] **Step 3: Add a Rust-only panic injection for testing** - -Add to `src/ffi.rs` (only when `cfg(test)` or behind a feature): - -```rust -#[cfg(test)] -#[no_mangle] -pub unsafe extern "C" fn qjd_test_panic() -> c_int { - ffi_catch!({ - panic!("forced panic for test"); - }) -} -``` - -- [ ] **Step 4: Write `tests/ffi_panic_safety.rs`** - -```rust -#[cfg(test)] -use quickdecode::ffi::qjd_test_panic; - -#[test] -fn panic_does_not_unwind_through_ffi() { - let rc = unsafe { qjd_test_panic() }; - assert_eq!(rc, 8); // QJD_OOM -} -``` - -- [ ] **Step 5: Run tests** - -```sh -cargo test -``` - -Expected: all previously passing tests + panic-safety test pass. No tests panic-unwind through the FFI boundary. - -- [ ] **Step 6: Commit** - -```sh -git add src/ffi.rs tests/ffi_panic_safety.rs -git commit -m "Wrap FFI entry points in catch_unwind to prevent UB on panic" -``` - ---- - -## Task 13: Avx2Scanner — structural mask only (no string handling) - -This task establishes the AVX2 scaffolding and produces a correct scanner for input that contains **no strings**. Task 14 adds quote-and-escape handling. Task 15 adds the PCLMUL inside-string mask. Task 16 wires up multi-chunk state and registers in the dispatch. - -The implementation follows the simdjson approach. Reference: `simdjson/src/generic/stage1/json_structural_indexer.h` and `simdjson/src/generic/stage1/buf_block_reader.h` for the chunking and bitmask emission patterns. - -**Files:** -- Create: `src/scan/avx2.rs` -- Modify: `src/scan/mod.rs` - -- [ ] **Step 1: Create skeleton in `src/scan/avx2.rs`** - -```rust -#![cfg(target_arch = "x86_64")] - -use core::arch::x86_64::*; -use super::Scanner; - -pub(crate) struct Avx2Scanner; - -impl Scanner for Avx2Scanner { - fn scan(buf: &[u8], out: &mut Vec) -> Result<(), usize> { - if buf.is_empty() { return Ok(()); } - out.reserve(buf.len() / 6); - unsafe { scan_avx2_impl(buf, out) } - } -} - -#[target_feature(enable = "avx2,pclmulqdq")] -unsafe fn scan_avx2_impl(buf: &[u8], out: &mut Vec) -> Result<(), usize> { - // Task 13: structural mask only; assumes no strings/escapes. - let mut i: usize = 0; - while i + 64 <= buf.len() { - let chunk_lo = _mm256_loadu_si256(buf.as_ptr().add(i) as *const __m256i); - let chunk_hi = _mm256_loadu_si256(buf.as_ptr().add(i + 32) as *const __m256i); - - let struct_mask = structural_mask_chunk(chunk_lo, chunk_hi); - emit_bits(struct_mask, i as u32, out); - - i += 64; - } - - // Tail: scalar fallback for the remainder. - super::ScalarScanner::scan(&buf[i..], &mut Vec::new()).ok(); - // Append tail offsets (offset by i). - let mut tail = Vec::new(); - super::ScalarScanner::scan(&buf[i..], &mut tail).map_err(|p| p + i)?; - out.extend(tail.into_iter().map(|p| p + i as u32)); - Ok(()) -} - -#[inline(always)] -unsafe fn structural_mask_chunk(lo: __m256i, hi: __m256i) -> u64 { - // For each byte, set 1 if byte is one of: { } [ ] : , " - // We use byte-wise equality compares OR'd together. - let chars = [b'{', b'}', b'[', b']', b':', b',', b'"']; - let mut mask_lo: i32 = 0; - let mut mask_hi: i32 = 0; - for c in chars { - let v = _mm256_set1_epi8(c as i8); - let eq_lo = _mm256_cmpeq_epi8(lo, v); - let eq_hi = _mm256_cmpeq_epi8(hi, v); - mask_lo |= _mm256_movemask_epi8(eq_lo); - mask_hi |= _mm256_movemask_epi8(eq_hi); - } - (mask_lo as u32 as u64) | ((mask_hi as u32 as u64) << 32) -} - -#[inline(always)] -fn emit_bits(mut mask: u64, base: u32, out: &mut Vec) { - while mask != 0 { - let tz = mask.trailing_zeros(); - out.push(base + tz); - mask &= mask - 1; // clear lowest bit - } -} -``` - -- [ ] **Step 2: Add a unit test in `src/scan/avx2.rs`** - -```rust -#[cfg(test)] -mod tests { - use super::*; - use crate::scan::{Scanner, ScalarScanner}; - - fn parity(input: &[u8]) { - let mut a = Vec::new(); - let mut b = Vec::new(); - ScalarScanner::scan(input, &mut a).unwrap(); - Avx2Scanner::scan(input, &mut b).unwrap(); - assert_eq!(a, b, "mismatch on input {:?}", std::str::from_utf8(input)); - } - - #[test] - fn no_strings_matches_scalar() { - // Pure structural inputs (no strings) — Task 13 only handles these correctly. - parity(b"{}"); - parity(b"[]"); - parity(b"[{}]"); - parity(b"[[[]]]"); - parity(b"[1,2,3,4,5,6,7,8,9,0]"); - parity(b"{1:2,3:4,5:6,7:8,9:0,1:2}"); // illegal JSON keys, but valid scan - } -} -``` - -- [ ] **Step 3: Run tests** - -```sh -cargo test scan::avx2 -``` - -Expected: tests pass on AVX2-capable hosts. Will fail to compile on non-x86_64 (gated). - -- [ ] **Step 4: Wire module into `src/scan/mod.rs`** - -```rust -#[cfg(target_arch = "x86_64")] -pub(crate) mod avx2; -``` - -- [ ] **Step 5: Commit** - -```sh -git add src/scan/avx2.rs src/scan/mod.rs -git commit -m "Add AVX2 scanner skeleton with structural mask kernel" -``` - ---- - -## Task 14: Avx2Scanner — quote and escape masks - -This task adds correct handling of strings inside the AVX2 kernel via the simdjson backslash-escape algorithm. After this task, the AVX2 scanner produces correct results on any input that fits in a single 64-byte chunk **plus** correctly handles within-chunk strings; multi-chunk state carry is added in Task 16. - -**Files:** -- Modify: `src/scan/avx2.rs` - -- [ ] **Step 1: Add escape-aware quote mask helper** - -Append to `src/scan/avx2.rs`: - -```rust -/// Compute the mask of escaped bytes inside a 64-byte chunk: for any backslash -/// followed by a byte, that byte is "escaped". Consecutive backslashes alternate. -/// Algorithm: identify odd-length backslash runs; the byte immediately after -/// each odd run is escaped. -#[inline(always)] -unsafe fn find_escape_mask(backslash_mask: u64) -> u64 { - // From simdjson: identify starts of backslash runs. - let starts = backslash_mask & !(backslash_mask << 1); - // For each start, determine parity by xor-running. Encode start position - // parity via odd/even bit patterns of the runs. - // For chunk-local computation (no cross-chunk carry yet): - let even_bits: u64 = 0x5555_5555_5555_5555; - let odd_bits: u64 = 0xAAAA_AAAA_AAAA_AAAA; - let even_starts = starts & even_bits; - let odd_starts = starts & odd_bits; - // Use carry arithmetic to find run ends. - let even_carries = backslash_mask.wrapping_add(even_starts); - let odd_carries = backslash_mask.wrapping_add(odd_starts); - let even_carry_ends = even_carries & !backslash_mask; - let odd_carry_ends = odd_carries & !backslash_mask; - // odd-length runs starting at even position end at odd parity; - // odd-length runs starting at odd position end at even parity. - let odd_run_ends = (even_carry_ends & odd_bits) | (odd_carry_ends & even_bits); - // Each odd_run_end bit is the position right after an odd-length run; the - // byte at that position is escaped. - odd_run_ends -} -``` - -This computation is the standard simdjson kernel; see `find_escape_mask` in simdjson's source for the canonical derivation. For chunk-local correctness it's enough; cross-chunk carry comes in Task 16. - -- [ ] **Step 2: Update `scan_avx2_impl` to use quote mask** - -Replace `scan_avx2_impl`: - -```rust -#[target_feature(enable = "avx2,pclmulqdq")] -unsafe fn scan_avx2_impl(buf: &[u8], out: &mut Vec) -> Result<(), usize> { - let mut i: usize = 0; - let mut in_string: u64 = 0; // 1 if chunk-start is inside a string - - while i + 64 <= buf.len() { - let chunk_lo = _mm256_loadu_si256(buf.as_ptr().add(i) as *const __m256i); - let chunk_hi = _mm256_loadu_si256(buf.as_ptr().add(i + 32) as *const __m256i); - - let backslash = byte_mask(chunk_lo, chunk_hi, b'\\'); - let quote = byte_mask(chunk_lo, chunk_hi, b'"'); - let escaped = find_escape_mask(backslash); - let real_quote = quote & !escaped; - - // inside_string_mask is computed in Task 15 via PCLMUL. For Task 14 - // we just emit structural chars excluding bytes inside strings using - // a scalar in-string flag carried across this chunk only. - // This is a placeholder bridge — Task 15 replaces it with PCLMUL. - let mut inside: u64 = if in_string != 0 { !0u64 } else { 0 }; - let mut bit = 1u64; - let mut in_str_cur = in_string != 0; - for _ in 0..64 { - if (real_quote & bit) != 0 { - in_str_cur = !in_str_cur; - inside ^= bit; - } - if in_str_cur { inside |= bit; } else { inside &= !bit; } - bit <<= 1; - if bit == 0 { break; } - } - in_string = if in_str_cur { 1 } else { 0 }; - - let struct_mask = structural_mask_chunk(chunk_lo, chunk_hi); - let final_mask = (struct_mask & !inside) | real_quote; - emit_bits(final_mask, i as u32, out); - - i += 64; - } - - // Tail - let mut tail = Vec::new(); - super::ScalarScanner::scan(&buf[i..], &mut tail).map_err(|p| p + i)?; - out.extend(tail.into_iter().map(|p| p + i as u32)); - Ok(()) -} - -#[inline(always)] -unsafe fn byte_mask(lo: __m256i, hi: __m256i, c: u8) -> u64 { - let v = _mm256_set1_epi8(c as i8); - let eq_lo = _mm256_cmpeq_epi8(lo, v); - let eq_hi = _mm256_cmpeq_epi8(hi, v); - let mlo = _mm256_movemask_epi8(eq_lo) as u32 as u64; - let mhi = _mm256_movemask_epi8(eq_hi) as u32 as u64; - mlo | (mhi << 32) -} -``` - -- [ ] **Step 3: Extend the parity tests** - -In the test module: - -```rust -#[test] -fn within_chunk_strings_match_scalar() { - parity(b"{\"a\":\"hello\"}"); - parity(b"{\"a\":\"he\\nlo\"}"); - parity(b"{\"a\":\"he\\\"lo\"}"); - parity(b"[\"x\",\"y\",\"z\"]"); -} -``` - -- [ ] **Step 4: Run tests** - -```sh -cargo test scan::avx2 -``` - -Expected: pass on AVX2 hosts. Inputs spanning more than 64 bytes may still mismatch — that's covered in Task 16. - -- [ ] **Step 5: Commit** - -```sh -git add src/scan/avx2.rs -git commit -m "AVX2 scanner: chunk-local quote and escape masks" -``` - ---- - -## Task 15: Avx2Scanner — PCLMUL inside-string mask - -Replace the scalar in-string flag bridge with the PCLMUL prefix-XOR algorithm that simdjson uses. This makes the kernel branchless per chunk. - -**Files:** -- Modify: `src/scan/avx2.rs` - -- [ ] **Step 1: Add the PCLMUL helper** - -In `src/scan/avx2.rs`: - -```rust -/// Given the mask of true (non-escaped) quotes and the prior in-string state, -/// return (inside_string_mask, new_in_string). -/// Algorithm: prefix XOR via carry-less multiplication. -#[inline(always)] -#[target_feature(enable = "avx2,pclmulqdq")] -unsafe fn inside_string_mask(real_quote: u64, prev_in_string: u64) -> (u64, u64) { - // Prefix XOR: for each bit, the result bit equals the XOR of all bits up - // to and including this position. Carry-less multiply by all-ones produces - // exactly this prefix XOR. - let ones = _mm_set1_epi64x(-1i64); - let q = _mm_set_epi64x(0, real_quote as i64); - let prefix = _mm_clmulepi64_si128::<0>(q, ones); - let mut mask = _mm_cvtsi128_si64(prefix) as u64; - // XOR with prior state so that a chunk starting inside a string flips polarity. - if prev_in_string != 0 { mask = !mask; } - let new_state = mask >> 63; - (mask, new_state & 1) -} -``` - -- [ ] **Step 2: Replace the scalar bridge in `scan_avx2_impl`** - -Replace the `// inside_string_mask is computed in Task 15` block with: - -```rust - let (inside, new_in_string) = inside_string_mask(real_quote, in_string); - in_string = new_in_string; -``` - -(Remove the bit-by-bit scalar loop.) - -- [ ] **Step 3: Add tests with multi-quote strings** - -```rust -#[test] -fn pclmul_inside_string_correct() { - parity(b"{\"a\":\"foo\",\"b\":\"bar\"}"); - parity(b"[\"a\",\"b\",\"c\",\"d\",\"e\"]"); - // Adversarial: nested escapes - parity(b"{\"a\":\"\\\\\\\\\\\"\"}"); -} -``` - -- [ ] **Step 4: Run tests** - -```sh -cargo test scan::avx2 -``` - -Expected: pass on AVX2+PCLMUL hosts. - -- [ ] **Step 5: Commit** - -```sh -git add src/scan/avx2.rs -git commit -m "AVX2 scanner: PCLMUL prefix-XOR for inside-string mask" -``` - ---- - -## Task 16: Multi-chunk state, runtime dispatch, proptest cross-check - -**Files:** -- Modify: `src/scan/avx2.rs` -- Modify: `src/scan/mod.rs` -- Modify: `src/doc.rs` -- Create: `tests/scanner_crosscheck.rs` - -- [ ] **Step 1: Add cross-chunk carry to backslash escape** - -The chunk-local `find_escape_mask` is incorrect at chunk boundaries when a backslash run crosses the boundary. Fix: - -```rust -#[inline(always)] -unsafe fn find_escape_mask_with_carry( - backslash_mask: u64, prev_carry: &mut u64, -) -> u64 { - let bs = backslash_mask; - let starts = bs & !(bs << 1 | *prev_carry); - let even_bits: u64 = 0x5555_5555_5555_5555; - let odd_bits: u64 = 0xAAAA_AAAA_AAAA_AAAA; - let even_starts = starts & even_bits; - let odd_starts = starts & odd_bits; - let even_carries = bs.wrapping_add(even_starts); - let odd_carries = bs.wrapping_add(odd_starts).wrapping_add(*prev_carry); - let even_carry_ends = even_carries & !bs; - let odd_carry_ends = odd_carries & !bs; - let odd_run_ends = (even_carry_ends & odd_bits) | (odd_carry_ends & even_bits); - // Update carry for next chunk: 1 if the chunk ended mid-run with odd parity. - *prev_carry = (bs >> 63) & 1; - odd_run_ends -} -``` - -Update `scan_avx2_impl` to keep `let mut bs_carry: u64 = 0;` across iterations and call `find_escape_mask_with_carry(backslash, &mut bs_carry)` instead of `find_escape_mask`. - -- [ ] **Step 2: Set up runtime dispatch in `src/scan/mod.rs`** - -```rust -use once_cell::sync::OnceCell; - -static SCAN_FN: OnceCell) -> Result<(), usize>> - = OnceCell::new(); - -pub(crate) fn scan(buf: &[u8], out: &mut Vec) -> Result<(), usize> { - let f = *SCAN_FN.get_or_init(|| { - #[cfg(target_arch = "x86_64")] - { - if std::is_x86_feature_detected!("avx2") - && std::is_x86_feature_detected!("pclmulqdq") - { - return avx2::Avx2Scanner::scan; - } - } - ScalarScanner::scan - }); - f(buf, out) -} -``` - -- [ ] **Step 3: Wire `Document::parse` to call `scan::scan` instead of `ScalarScanner::scan`** - -In `src/doc.rs`: - -```rust -pub fn parse(buf: &'a [u8]) -> Result { - let mut indices = Vec::new(); - crate::scan::scan(buf, &mut indices).map_err(|_| qjd_err::QJD_PARSE_ERROR)?; - indices.push(u32::MAX); - Ok(Self { - buf, - indices, - scratch: Vec::new(), - skip: std::cell::RefCell::new(crate::skip_cache::SkipCache::new()), - }) -} -``` - -- [ ] **Step 4: Write `tests/scanner_crosscheck.rs`** - -```rust -use proptest::prelude::*; -use quickdecode::error::qjd_err; - -// We need access to the internal scanners. Expose them via a test-only path -// through pub(crate). Easiest: add a pub-test export. -// -// In src/lib.rs add: -// #[doc(hidden)] pub mod __test_api { -// pub use crate::scan::{ScalarScanner, Scanner}; -// #[cfg(target_arch="x86_64")] pub use crate::scan::avx2::Avx2Scanner; -// } -use quickdecode::__test_api::{Scanner, ScalarScanner}; -#[cfg(target_arch = "x86_64")] -use quickdecode::__test_api::Avx2Scanner; - -#[cfg(target_arch = "x86_64")] -proptest! { - #![proptest_config(ProptestConfig::with_cases(2000))] - - #[test] - fn scalar_avx2_bit_identical(input in valid_jsonish()) { - if !std::is_x86_feature_detected!("avx2") - || !std::is_x86_feature_detected!("pclmulqdq") { - return Ok(()); - } - let mut a = Vec::new(); - let mut b = Vec::new(); - let ra = ScalarScanner::scan(input.as_bytes(), &mut a); - let rb = Avx2Scanner::scan(input.as_bytes(), &mut b); - prop_assert_eq!(ra.is_err(), rb.is_err(), - "scalar/avx2 disagree on validity for {:?}", input); - if ra.is_ok() { - prop_assert_eq!(a, b, "mismatch on {:?}", input); - } - } -} - -/// Generate strings that exercise structural and quote/escape edge cases. -fn valid_jsonish() -> impl Strategy { - // Mix of structural bytes, escape sequences, multi-byte UTF-8. - proptest::collection::vec( - prop_oneof![ - Just("{".to_string()), - Just("}".to_string()), - Just("[".to_string()), - Just("]".to_string()), - Just(",".to_string()), - Just(":".to_string()), - Just("\"a\"".to_string()), - Just("\"\\\\\"".to_string()), - Just("\"\\\"\"".to_string()), - Just("\"\\u00e9\"".to_string()), - Just("\"中文\"".to_string()), - Just("123".to_string()), - ], - 0..200, - ).prop_map(|v| v.concat()) -} -``` - -Also add to `src/lib.rs`: - -```rust -#[doc(hidden)] -pub mod __test_api { - pub use crate::scan::{ScalarScanner, Scanner}; - #[cfg(target_arch = "x86_64")] - pub use crate::scan::avx2::Avx2Scanner; -} -``` - -- [ ] **Step 5: Run cross-check** - -```sh -cargo test scanner_crosscheck --release -``` - -Expected: 2000 proptest cases pass with no scalar/AVX2 divergence. - -- [ ] **Step 6: Commit** - -```sh -git add src/scan/avx2.rs src/scan/mod.rs src/doc.rs src/lib.rs tests/scanner_crosscheck.rs -git commit -m "AVX2 scanner cross-chunk carry, runtime dispatch, proptest cross-check" -``` - ---- - -## Task 17: Public C header (finalize) + LuaJIT wrapper - -**Files:** -- Modify: `include/lua_quick_decode.h` -- Create: `lua/quickdecode.lua` -- Modify: `README.md` - -- [ ] **Step 1: Finalize `include/lua_quick_decode.h`** - -Replace placeholder with full prototypes matching the FFI surface: - -```c -#ifndef LUA_QUICK_DECODE_H -#define LUA_QUICK_DECODE_H - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum { - QJD_OK = 0, - QJD_PARSE_ERROR = 1, - QJD_NOT_FOUND = 2, - QJD_TYPE_MISMATCH = 3, - QJD_OUT_OF_RANGE = 4, - QJD_DECODE_FAILED = 5, - QJD_INVALID_PATH = 6, - QJD_INVALID_ARG = 7, - QJD_OOM = 8 -} qjd_err; - -typedef enum { - QJD_T_NULL = 0, QJD_T_BOOL = 1, QJD_T_NUM = 2, - QJD_T_STR = 3, QJD_T_ARR = 4, QJD_T_OBJ = 5 -} qjd_type; - -typedef struct qjd_doc qjd_doc; - -typedef struct { - const qjd_doc* doc; - uint32_t idx_start; - uint32_t idx_end; - uint32_t cache_slot; - uint32_t _pad; -} qjd_cursor; - -const char* qjd_strerror(int code); - -qjd_doc* qjd_parse(const uint8_t* buf, size_t len, int* err_out); -void qjd_free (qjd_doc* doc); - -int qjd_get_str (qjd_doc*, const char* path, size_t path_len, - const uint8_t** out_ptr, size_t* out_len); -int qjd_get_i64 (qjd_doc*, const char* path, size_t path_len, int64_t* out); -int qjd_get_f64 (qjd_doc*, const char* path, size_t path_len, double* out); -int qjd_get_bool (qjd_doc*, const char* path, size_t path_len, int* out); -int qjd_is_null (qjd_doc*, const char* path, size_t path_len, int* out); -int qjd_typeof (qjd_doc*, const char* path, size_t path_len, int* type_out); -int qjd_len (qjd_doc*, const char* path, size_t path_len, size_t* out); - -int qjd_open (qjd_doc*, const char* path, size_t path_len, qjd_cursor* out); -int qjd_cursor_open (const qjd_cursor*, const char* path, size_t path_len, qjd_cursor* out); -int qjd_cursor_field (const qjd_cursor*, const char* key, size_t key_len, qjd_cursor* out); -int qjd_cursor_index (const qjd_cursor*, size_t i, qjd_cursor* out); - -int qjd_cursor_get_str (const qjd_cursor*, const char* path, size_t path_len, - const uint8_t** out_ptr, size_t* out_len); -int qjd_cursor_get_i64 (const qjd_cursor*, const char* path, size_t path_len, int64_t* out); -int qjd_cursor_get_f64 (const qjd_cursor*, const char* path, size_t path_len, double* out); -int qjd_cursor_get_bool (const qjd_cursor*, const char* path, size_t path_len, int* out); -int qjd_cursor_typeof (const qjd_cursor*, const char* path, size_t path_len, int* out); -int qjd_cursor_len (const qjd_cursor*, const char* path, size_t path_len, size_t* out); - -#ifdef __cplusplus -} -#endif - -#endif -``` - -- [ ] **Step 2: Create `lua/quickdecode.lua`** - -```lua -local ffi = require("ffi") - -ffi.cdef[[ -typedef enum { - QJD_OK = 0, QJD_PARSE_ERROR = 1, QJD_NOT_FOUND = 2, - QJD_TYPE_MISMATCH = 3, QJD_OUT_OF_RANGE = 4, QJD_DECODE_FAILED = 5, - QJD_INVALID_PATH = 6, QJD_INVALID_ARG = 7, QJD_OOM = 8 -} qjd_err; - -typedef struct qjd_doc qjd_doc; -typedef struct { - const qjd_doc* doc; - uint32_t idx_start, idx_end, cache_slot, _pad; -} qjd_cursor; - -const char* qjd_strerror(int code); -qjd_doc* qjd_parse(const uint8_t* buf, size_t len, int* err_out); -void qjd_free(qjd_doc* doc); - -int qjd_get_str (qjd_doc*, const char* path, size_t path_len, const uint8_t** p, size_t* n); -int qjd_get_i64 (qjd_doc*, const char* path, size_t path_len, int64_t* out); -int qjd_get_f64 (qjd_doc*, const char* path, size_t path_len, double* out); -int qjd_get_bool(qjd_doc*, const char* path, size_t path_len, int* out); -int qjd_is_null (qjd_doc*, const char* path, size_t path_len, int* out); -int qjd_typeof (qjd_doc*, const char* path, size_t path_len, int* out); -int qjd_len (qjd_doc*, const char* path, size_t path_len, size_t* out); - -int qjd_open (qjd_doc*, const char* path, size_t path_len, qjd_cursor* out); -int qjd_cursor_open (const qjd_cursor*, const char* path, size_t path_len, qjd_cursor* out); -int qjd_cursor_field(const qjd_cursor*, const char* key, size_t key_len, qjd_cursor* out); -int qjd_cursor_index(const qjd_cursor*, size_t i, qjd_cursor* out); - -int qjd_cursor_get_str (const qjd_cursor*, const char*, size_t, const uint8_t**, size_t*); -int qjd_cursor_get_i64 (const qjd_cursor*, const char*, size_t, int64_t*); -int qjd_cursor_get_f64 (const qjd_cursor*, const char*, size_t, double*); -int qjd_cursor_get_bool(const qjd_cursor*, const char*, size_t, int*); -int qjd_cursor_typeof (const qjd_cursor*, const char*, size_t, int*); -int qjd_cursor_len (const qjd_cursor*, const char*, size_t, size_t*); -]] - -local C = ffi.load("quickdecode") - -local err_box = ffi.new("int[1]") -local i64_box = ffi.new("int64_t[1]") -local f64_box = ffi.new("double[1]") -local bool_box = ffi.new("int[1]") -local size_box = ffi.new("size_t[1]") -local type_box = ffi.new("int[1]") -local strp_box = ffi.new("const uint8_t*[1]") -local cur_box = ffi.new("qjd_cursor[1]") - -local NOT_FOUND = 2 - -local _M = { - T_NULL = 0, T_BOOL = 1, T_NUM = 2, - T_STR = 3, T_ARR = 4, T_OBJ = 5, -} - -local Doc = {}; Doc.__index = Doc -local Cursor = {}; Cursor.__index = Cursor - -local function check_err(rc) - if rc == 0 then return true end - if rc == NOT_FOUND then return false end - error("quickdecode: " .. ffi.string(C.qjd_strerror(rc))) -end - -function _M.parse(json_str) - local ptr = C.qjd_parse(json_str, #json_str, err_box) - if ptr == nil then - error("quickdecode: " .. ffi.string(C.qjd_strerror(err_box[0]))) - end - return setmetatable({ - _ptr = ffi.gc(ptr, C.qjd_free), - _hold = json_str, - }, Doc) -end - -function Doc:get_str(path) - local rc = C.qjd_get_str(self._ptr, path, #path, strp_box, size_box) - if not check_err(rc) then return nil end - return ffi.string(strp_box[0], size_box[0]) -end - -function Doc:get_i64(path) - local rc = C.qjd_get_i64(self._ptr, path, #path, i64_box) - if not check_err(rc) then return nil end - return tonumber(i64_box[0]) -end - -function Doc:get_f64(path) - local rc = C.qjd_get_f64(self._ptr, path, #path, f64_box) - if not check_err(rc) then return nil end - return f64_box[0] -end - -function Doc:get_bool(path) - local rc = C.qjd_get_bool(self._ptr, path, #path, bool_box) - if not check_err(rc) then return nil end - return bool_box[0] ~= 0 -end - -function Doc:is_null(path) - local rc = C.qjd_is_null(self._ptr, path, #path, bool_box) - if not check_err(rc) then return nil end - return bool_box[0] ~= 0 -end - -function Doc:typeof(path) - local rc = C.qjd_typeof(self._ptr, path, #path, type_box) - if not check_err(rc) then return nil end - return type_box[0] -end - -function Doc:len(path) - local rc = C.qjd_len(self._ptr, path, #path, size_box) - if not check_err(rc) then return nil end - return tonumber(size_box[0]) -end - -function Doc:open(path) - local rc = C.qjd_open(self._ptr, path, #path, cur_box) - if not check_err(rc) then return nil end - return setmetatable({ _cur = cur_box[0], _doc = self }, Cursor) -end - -function Cursor:get_str(path) - local rc = C.qjd_cursor_get_str(self._cur, path or "", path and #path or 0, strp_box, size_box) - if not check_err(rc) then return nil end - return ffi.string(strp_box[0], size_box[0]) -end - -function Cursor:get_i64(path) - local rc = C.qjd_cursor_get_i64(self._cur, path or "", path and #path or 0, i64_box) - if not check_err(rc) then return nil end - return tonumber(i64_box[0]) -end - -function Cursor:get_f64(path) - local rc = C.qjd_cursor_get_f64(self._cur, path or "", path and #path or 0, f64_box) - if not check_err(rc) then return nil end - return f64_box[0] -end - -function Cursor:get_bool(path) - local rc = C.qjd_cursor_get_bool(self._cur, path or "", path and #path or 0, bool_box) - if not check_err(rc) then return nil end - return bool_box[0] ~= 0 -end - -function Cursor:typeof(path) - local rc = C.qjd_cursor_typeof(self._cur, path or "", path and #path or 0, type_box) - if not check_err(rc) then return nil end - return type_box[0] -end - -function Cursor:len(path) - local rc = C.qjd_cursor_len(self._cur, path or "", path and #path or 0, size_box) - if not check_err(rc) then return nil end - return tonumber(size_box[0]) -end - -function Cursor:open(path) - local out = ffi.new("qjd_cursor[1]") - local rc = C.qjd_cursor_open(self._cur, path, #path, out) - if not check_err(rc) then return nil end - return setmetatable({ _cur = out[0], _doc = self._doc }, Cursor) -end - -function Cursor:field(key) - local out = ffi.new("qjd_cursor[1]") - local rc = C.qjd_cursor_field(self._cur, key, #key, out) - if not check_err(rc) then return nil end - return setmetatable({ _cur = out[0], _doc = self._doc }, Cursor) -end - -function Cursor:index(i) - local out = ffi.new("qjd_cursor[1]") - local rc = C.qjd_cursor_index(self._cur, i, out) - if not check_err(rc) then return nil end - return setmetatable({ _cur = out[0], _doc = self._doc }, Cursor) -end - -return _M -``` - -- [ ] **Step 3: Update README with LuaJIT usage example** - -Append to `README.md`: - -```markdown -## LuaJIT Usage - -```lua -local qd = require("quickdecode") -local doc = qd.parse(json_str) - --- Root-path getter: -local model = doc:get_str("body.model") - --- Cursor (avoid re-walking shared prefix): -local body = doc:open("body") -local model = body:get_str("model") -local temp = body:get_f64("temperature") -``` -``` - -- [ ] **Step 4: Commit** - -```sh -git add include/lua_quick_decode.h lua/quickdecode.lua README.md -git commit -m "Finalize C header and add LuaJIT wrapper module" -``` - ---- - -## Task 18: Lua integration tests (busted) + benchmark vs lua-cjson - -**Files:** -- Create: `tests/lua/basic_spec.lua` -- Create: `tests/lua/escape_spec.lua` -- Create: `tests/lua/cjson_compat_spec.lua` -- Create: `benches/lua_bench.lua` -- Create: `benches/fixtures/small_api.json` -- Create: `benches/fixtures/medium_resp.json` -- Modify: `README.md` - -- [ ] **Step 1: Write `tests/lua/basic_spec.lua`** - -```lua -local qd = require("quickdecode") - -describe("quickdecode basic", function() - it("parses an object and gets a string field", function() - local d = qd.parse('{"a":"hello"}') - assert.are.equal("hello", d:get_str("a")) - end) - - it("returns nil on missing path", function() - local d = qd.parse('{"a":1}') - assert.is_nil(d:get_str("b")) - end) - - it("errors on type mismatch", function() - local d = qd.parse('{"a":1}') - assert.has_error(function() d:get_str("a") end) - end) - - it("supports nested paths", function() - local d = qd.parse('{"body":{"model":"gpt"}}') - assert.are.equal("gpt", d:get_str("body.model")) - end) - - it("supports array indexing", function() - local d = qd.parse('{"xs":[10,20,30]}') - assert.are.equal(20, d:get_i64("xs[1]")) - end) - - it("cursor reuses shared prefix", function() - local d = qd.parse('{"body":{"a":1,"b":"two"}}') - local b = d:open("body") - assert.are.equal(1, b:get_i64("a")) - assert.are.equal("two", b:get_str("b")) - end) - - it("typeof reports correct types", function() - local d = qd.parse('{"s":"x","n":1,"f":1.5,"b":true,"z":null,"a":[],"o":{}}') - assert.are.equal(qd.T_STR, d:typeof("s")) - assert.are.equal(qd.T_NUM, d:typeof("n")) - assert.are.equal(qd.T_NUM, d:typeof("f")) - assert.are.equal(qd.T_BOOL, d:typeof("b")) - assert.are.equal(qd.T_NULL, d:typeof("z")) - assert.are.equal(qd.T_ARR, d:typeof("a")) - assert.are.equal(qd.T_OBJ, d:typeof("o")) - end) - - it("len for objects and arrays", function() - local d = qd.parse('{"o":{"a":1,"b":2,"c":3},"a":[1,2,3,4]}') - assert.are.equal(3, d:len("o")) - assert.are.equal(4, d:len("a")) - end) -end) -``` - -- [ ] **Step 2: Write `tests/lua/escape_spec.lua`** - -```lua -local qd = require("quickdecode") - -describe("quickdecode strings", function() - it("decodes simple escape", function() - local d = qd.parse('{"a":"he\\nlo"}') - assert.are.equal("he\nlo", d:get_str("a")) - end) - - it("decodes unicode escape", function() - local d = qd.parse('{"a":"\\u00e9"}') - assert.are.equal("\xc3\xa9", d:get_str("a")) - end) - - it("decodes surrogate pair", function() - local d = qd.parse('{"a":"\\uD83D\\uDE00"}') - assert.are.equal("\xF0\x9F\x98\x80", d:get_str("a")) - end) - - it("zero-copy for unescaped strings", function() - local d = qd.parse('{"a":"plain"}') - assert.are.equal("plain", d:get_str("a")) - end) -end) -``` - -- [ ] **Step 3: Write `tests/lua/cjson_compat_spec.lua`** - -```lua -local qd = require("quickdecode") -local cjson = require("cjson") - -local function expect_eq(qd_doc, cjson_obj, paths) - for _, p in ipairs(paths) do - local got = qd_doc:get_str(p) or qd_doc:get_f64(p) or qd_doc:get_bool(p) - -- Walk cjson result. - local want = cjson_obj - for seg in p:gmatch("[^%.]+") do - want = want[seg] or want[tonumber(seg)] - end - assert.are.equal(want, got, "path " .. p) - end -end - -describe("quickdecode vs lua-cjson", function() - it("agrees on simple object fields", function() - local s = '{"a":"x","b":42,"c":1.5,"d":true}' - expect_eq(qd.parse(s), cjson.decode(s), {"a","b","c","d"}) - end) -end) -``` - -- [ ] **Step 4: Write `benches/fixtures/small_api.json`** - -A representative ~5KB JSON. Concrete content (a single LLM API request shape): - -```json -{ - "model": "gpt-4", - "temperature": 0.7, - "max_tokens": 1024, - "messages": [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "Hello, how are you?"} - ], - "metadata": { - "user_id": "u_123", - "session_id": "s_abc", - "tags": ["a", "b", "c"] - } -} -``` - -(Real fixture should be ~5KB; pad messages content or add more keys to reach ~5KB. Same shape for medium_resp.json at ~200KB with more messages.) - -- [ ] **Step 5: Write `benches/lua_bench.lua`** - -```lua -package.path = package.path .. ";./lua/?.lua" -package.cpath = package.cpath .. ";./target/release/lib?.so" - -local qd = require("quickdecode") -local cjson = require("cjson") - -local function read_file(p) - local f = assert(io.open(p, "rb")) - local s = f:read("*a") - f:close() - return s -end - -local function bench(name, iters, fn) - collectgarbage("collect") - local mem_before = collectgarbage("count") - local t0 = os.clock() - for i = 1, iters do fn() end - local t1 = os.clock() - local mem_after = collectgarbage("count") - print(string.format("%-40s %.2fms total %.2fµs/op +%.1fKB", - name, (t1 - t0) * 1000, (t1 - t0) * 1e6 / iters, - mem_after - mem_before)) -end - -local fixtures = { - small = read_file("benches/fixtures/small_api.json"), - medium = read_file("benches/fixtures/medium_resp.json"), -} - -for size, payload in pairs(fixtures) do - print("=== " .. size .. " (" .. #payload .. " bytes) ===") - - bench("cjson.decode + access 3 fields", 1000, function() - local obj = cjson.decode(payload) - local _ = obj.model - local _ = obj.temperature - local _ = obj.messages[1].role - end) - - bench("quickdecode.parse + access 3 fields", 1000, function() - local d = qd.parse(payload) - local _ = d:get_str("model") - local _ = d:get_f64("temperature") - local _ = d:get_str("messages[0].role") - end) -end -``` - -- [ ] **Step 6: Update README with how to run tests/benchmarks** - -Append: - -```markdown -## Testing - -```sh -cargo test # Rust unit + integration -cargo build --release # build the .so -busted tests/lua --lpath='./lua/?.lua' \ - --cpath='./target/release/lib?.so' # Lua-side tests -``` - -## Benchmarking vs lua-cjson - -```sh -cargo build --release -luajit benches/lua_bench.lua -``` - -Expected: quickdecode is 3-10× faster than lua-cjson on the "decode + extract few fields" pattern. See spec §9.3 for targets. -``` - -- [ ] **Step 7: Run all tests** - -```sh -cargo build --release -busted tests/lua --lpath='./lua/?.lua' --cpath='./target/release/lib?.so' -luajit benches/lua_bench.lua -``` - -Expected: all Lua tests pass; benchmark shows quickdecode beating cjson. - -- [ ] **Step 8: Commit** - -```sh -git add tests/lua/ benches/ README.md -git commit -m "Add Lua integration tests and lua-cjson benchmark" -``` - ---- - -## Self-Review - -**Spec coverage** (against `2026-05-15-rust-quick-json-decode-design.md`): - -| Spec section | Task(s) | -|---|---| -| §3.1 Module layout | Tasks 1-7, 13, 15, 17 | -| §3.2 Data flow | Tasks 3-11 | -| §3.3 Invariants | Tasks 6, 10 (scratch invalidate), 16 | -| §4 C ABI types & errors | Tasks 1, 3 | -| §4.3 qjd_parse / qjd_free | Task 3 | -| §4.4 Root-path getters | Tasks 9, 10 | -| §4.5 Cursor API | Task 11 | -| §4.6 Path syntax | Task 4 | -| §4.7 String pointer lifetime | Task 10 (scratch handling) | -| §5 ScalarScanner | Task 2 | -| §5 Avx2Scanner | Tasks 13-16 | -| §5.4 Runtime dispatch | Task 16 | -| §5.6 Shallow validation | Task 2 | -| §6 Cursor + skip cache | Tasks 5, 6 | -| §6.5 String escape decode | Task 7 | -| §6.6 Number decode | Task 8 | -| §7.3 catch_unwind | Task 12 | -| §8 Lua wrapper | Task 17 | -| §9 Tests / Benchmark | Tasks 16 (proptest), 18 | - -**Items not covered by individual tasks (acknowledged):** -- §5.5 SmallVec fast path for <4KB — deferred per spec Roadmap. -- §6.5 SIMD backslash search — deferred per spec Roadmap. -- §6.6 `lexical` fast float parser — deferred per spec Roadmap. -- §7.4 NEON backend — deferred per spec Roadmap. -- CI workflow (GitHub Actions) — handled in deployment; not in V1 implementation plan. - -**Type consistency:** -- `qjd_cursor.cache_slot` (C side) matches `SkipCache.slots` indexing in Rust (Task 6, 11). -- `Cursor::idx_start` / `idx_end` consistent across Tasks 5, 6, 9, 10, 11. -- FFI symbol names match header in Task 17. - -**No placeholders:** every step has runnable code or exact commands. AVX2 tasks (13-16) reference simdjson algorithms by name with full kernel code shown. - ---- - -Plan complete and saved to `docs/superpowers/plans/2026-05-15-rust-quick-json-decode.md`. Two execution options: - -1. **Subagent-Driven (recommended)** — I dispatch a fresh subagent per task, review between tasks, fast iteration. -2. **Inline Execution** — Execute tasks in this session using executing-plans, batch execution with checkpoints. - -Which approach? diff --git a/docs/superpowers/specs/2026-05-15-avx2-memchr-string-skip-design.md b/docs/superpowers/specs/2026-05-15-avx2-memchr-string-skip-design.md deleted file mode 100644 index d125cc9..0000000 --- a/docs/superpowers/specs/2026-05-15-avx2-memchr-string-skip-design.md +++ /dev/null @@ -1,195 +0,0 @@ -# AVX2 scanner: cheaper in-string fast path - -**Status**: design approved, ready for implementation plan -**Issue**: [#5 perf(scan): memchr-based fast path for in-string content](https://github.com/membphis/lua-quick-decode/issues/5) -**Touches**: `src/scan/avx2.rs`, `benches/lua_bench.lua`, `README.md` (Roadmap / Deferred) - -## Problem - -The AVX2 scanner's current in-string fast path (`src/scan/avx2.rs:34-43`, added in PR #3) detects when a 64-byte chunk lies fully inside a string and skips the structural-mask + PCLMUL prefix-XOR work. The condition is `in_string != 0 && real_quote == 0`, which still requires computing both the backslash mask and the escape mask before it can fire. - -Per-chunk cost when the current fast path *fires*: - -- 2 × `loadu` (free, needed for any path) -- `backslash` byte mask: ~6 ops -- `quote` byte mask: ~6 ops -- `find_escape_mask_with_carry`: ~10 scalar ALU ops + several branches -- final `real_quote == 0` test - -≈ 25 ops per "skip" chunk. On string-heavy payloads — e.g. a multimodal-shaped JSON whose `data` field is ~10 MB of base64 — ~95% of chunks hit this path, making it the dominant scanner cost. - -## Goal - -Lower per-chunk cost on string-interior chunks from ~25 ops to ~10 ops, by replacing the current fast-path *condition* with a cheaper probe that detects "chunk has no `"` and no `\`" directly, before computing the escape mask. - -Estimated speedup on a 10 MB string-heavy payload: ~3× scan-phase throughput (op-count analysis; the implementation will validate via `make bench` against a synthetic fixture). - -This proposal is the chunk-granularity step (Option 1 in brainstorming). Cross-chunk `memchr2` jumps for very long string interiors are deferred (see Roadmap / Deferred). - -## Non-goals - -- Touching the scalar scanner (`src/scan/scalar.rs`). The hot path for the targeted workloads is the AVX2 backend. -- Changing validation semantics. Every byte still gets scanned for well-formedness; bracket balance still validated at end. -- Adding a new cargo feature. The change rides on the existing `avx2` feature. -- Cross-chunk jumps (`memchr2` jump path). Deferred — see Roadmap / Deferred. - -## Design - -### Code change - -Single file: `src/scan/avx2.rs::scan_avx2_impl`. The chunk loop body becomes: - -```rust -while i + 64 <= buf.len() { - let chunk_lo = _mm256_loadu_si256(buf.as_ptr().add(i) as *const __m256i); - let chunk_hi = _mm256_loadu_si256(buf.as_ptr().add(i + 32) as *const __m256i); - - // in_string fast-probe: only enter when previous chunk left us inside - // a string. Cheap quote-or-backslash mask; if zero, the chunk is pure - // string interior and we can skip ALL mask computation including the - // escape-run scan. - if in_string != 0 { - let interesting = quote_or_backslash_mask(chunk_lo, chunk_hi); - if interesting == 0 { - // No `"` or `\` in chunk → no escapes can originate here, so - // bs_carry must be 0 leaving this chunk. in_string stays 1. - bs_carry = 0; - i += 64; - continue; - } - } - - // Slow path unchanged below. - let backslash = byte_mask(chunk_lo, chunk_hi, b'\\'); - let quote = byte_mask(chunk_lo, chunk_hi, b'"'); - let escaped = find_escape_mask_with_carry(backslash, &mut bs_carry); - let real_quote = quote & !escaped; - - let (inside, new_in_string) = inside_string_mask(real_quote, in_string); - in_string = new_in_string; - - let struct_mask = structural_mask_chunk(chunk_lo, chunk_hi); - let final_mask = (struct_mask & !inside) | real_quote; - - emit_bits(final_mask, i as u32, out); - - i += 64; -} -``` - -The current fast-path branch (`if in_string != 0 && real_quote == 0 { i += 64; continue; }`) is **removed** — the new probe is a true subset of its trigger condition (proof in §"Correctness"), so removing the late fast path costs nothing and the code reads more linearly. - -### New helper - -```rust -#[inline(always)] -unsafe fn quote_or_backslash_mask(lo: __m256i, hi: __m256i) -> u64 { - let vq = _mm256_set1_epi8(b'"' as i8); - let vb = _mm256_set1_epi8(b'\\' as i8); - let lo_or = _mm256_or_si256(_mm256_cmpeq_epi8(lo, vq), _mm256_cmpeq_epi8(lo, vb)); - let hi_or = _mm256_or_si256(_mm256_cmpeq_epi8(hi, vq), _mm256_cmpeq_epi8(hi, vb)); - let mlo = _mm256_movemask_epi8(lo_or) as u32 as u64; - let mhi = _mm256_movemask_epi8(hi_or) as u32 as u64; - mlo | (mhi << 32) -} -``` - -Matches the style of existing helpers (`byte_mask`, `structural_mask_chunk`): `#[inline(always)] unsafe fn` with no explicit `#[target_feature]` annotation — the caller `scan_avx2_impl` carries `#[target_feature(enable = "avx2,pclmulqdq")]` and inlining propagates the feature set. - -Op count: 4 `cmpeq` + 2 `or` + 2 `movemask` + 1 shift + 1 or = ~10 vector ops, no scalar ALU, no branches. - -### Op-count comparison - -| chunk shape | current path | new path | delta | -|---|---|---|---| -| not in_string | full mask path (~25 ops, no fast path) | unchanged | 0 | -| in_string, chunk pure string interior | ~25 ops (current fast path) | ~10 ops (new probe) | **−60%** | -| in_string, chunk has `\` or `"` | ~25 ops slow path | ~10 ops probe + ~25 slow = ~35 | +40% | - -Net effect on a 10 MB base64-style payload (~95% pure-interior chunks): probe-hit case dominates; expected ~3× scan throughput. Mixed payloads with frequent escapes inside strings see a smaller win or slight regression on the in-string-with-escapes chunks; bench will measure the crossover. - -## Correctness - -The new fast path fires when `in_string == 1 ∧ chunk contains no '"' and no '\'`. We must prove that taking the branch (skip 64 bytes, set `bs_carry = 0`, keep `in_string = 1`) produces output identical to letting the slow path run. - -### (a) `bs_carry` leaves the chunk as 0 - -`bs_carry` represents whether the trailing backslash run of the current chunk has odd parity (and thus escapes byte 0 of the next chunk). With `backslash == 0`: - -- `trailing_bs = 0` in `find_escape_mask_with_carry` -- Falls into the `else` branch: `new_carry = 0 & 1 = 0` - -So slow-path `bs_carry` after this chunk is 0, regardless of incoming `bs_carry`. Setting it to 0 explicitly is equivalent. - -### (b) `in_string` stays 1 - -With `real_quote == 0` (which follows from `quote == 0`), `inside_string_mask` computes: - -- `q = 0`, prefix-XOR via `_mm_clmulepi64_si128` = 0 -- If `prev_in_string != 0`, `mask = !0 = u64::MAX` -- `new_state = (u64::MAX >> 63) & 1 = 1` - -Slow path leaves `in_string = 1`. Explicit retention is equivalent. - -### (c) No structural offsets are emitted for this chunk - -Slow path: `final_mask = (struct_mask & !inside) | real_quote`. With the whole chunk inside the string (`inside = u64::MAX`) and `real_quote = 0`, `final_mask = 0`. Zero offsets emitted. Skipping the chunk emits nothing. Equivalent. - -### (d) New condition is strictly narrower than current fast path - -Current condition `in_string != 0 ∧ real_quote == 0` fires when `quote & !escaped == 0`. New condition fires when `quote == 0 ∧ backslash == 0`. The new condition implies `quote == 0 ⇒ real_quote == 0`, so any chunk hit by the new path was also hit by the current fast path. The reverse is not true: a chunk with `quote != 0` where every quote bit is escaped (preceded by an odd backslash run) hits the current fast path but not the new one. Those chunks now go through the slow path — correctness unchanged, performance unchanged (slow path is the same code). - -### Edge cases - -| scenario | behavior | -|---|---| -| Entering chunk with `bs_carry == 1`, chunk byte 0 is `\` | `backslash != 0` → probe miss → slow path → `pc=1` handled by `find_escape_mask_with_carry` as before | -| Entering chunk with `bs_carry == 1`, chunk has no `"` or `\` | Probe hit → `bs_carry := 0`, equivalent to slow path's `else` branch returning `new_carry = 0` | -| 64-aligned input ending mid-string | Unchanged — main loop exits with `i == buf.len()`, existing post-loop `if i < buf.len() ... else if in_string != 0 { return Err(buf.len()) }` still flags unterminated | -| Non-aligned tail with `bs_carry=1` from probe-hit chunk | `bs_carry = 0` after probe hit, so `scalar_start = i` (existing logic), correct | - -## Bench fixture - -`benches/lua_bench.lua` gains a synthetic "string-heavy" scenario. **Fixture is generated at run time, not committed.** - -- Top-level shape: `{"id": "...", "ts": , "data": ""}` -- `data` value: `QJD_BENCH_BIG_MB` MB (default 10) of characters drawn from `A-Za-z0-9+/`. Guaranteed no `"` or `\` in the payload. Deterministic seed for reproducibility. -- Bench reports fixture size + three-run median for: - - `lua-cjson` full parse - - `quickdecode` parse + single-field extract on `data` - -Bench is a manual `make bench` target. **Not a CI gate.** Its output goes into the PR description and a Performance section update in `README.md`. - -## Tests - -Rust unit tests in `src/scan/avx2.rs::tests`. The host-AVX2 guard pattern (`if !host_supports_avx2() { return; }`) is preserved. - -| test | new / modified | purpose | -|---|---|---| -| `long_string_engages_skip_fastpath` | modified | bump from ~10 KB to ≥1 MB string interior — multiple probe-hit chunks in a row | -| `long_string_with_periodic_backslash` | **new** | every ~5 chunks inject `\\n` / `\\\"` escape sequences; alternates probe-hit and slow path, asserts parity with scalar | -| `bs_carry_one_at_pure_string_chunk_boundary` | **new** | construct prior chunk ending in odd-length backslash run (`bs_carry=1`), next chunk fully pure string interior with no `"`/`\`; assert parity (verifies §(a)) | -| `escaped_quotes_remain_correct_with_fastpath` | unchanged | existing test, still passes | -| `scanner_crosscheck` (proptest, `tests/scanner_crosscheck.rs`) | unchanged | 2000-case property test; if shrinking finds a regression case, `.proptest-regressions` gets committed | - -## CI matrix - -Unchanged. No new cargo features, no new test binaries. - -1. `cargo test --release` — exercises new path (host AVX2 required) -2. `cargo test --release --no-default-features` — scalar-only, new code excluded by `#![cfg(target_arch = "x86_64")]` + feature gate -3. `cargo test --features test-panic --release` — FFI panic barrier unchanged -4. Lua busted suite under LuaJIT — unchanged - -## Roadmap / Deferred - -After landing, add to `README.md` under Roadmap / Deferred: - -> - **memchr2 jump for ≥N consecutive in-string chunks** — current chunk-per-chunk probe leaves ~10 vector ops/chunk on the table for very large string-interior runs (≥1 MB single string). A `memchr2(b'"', b'\\')` jump path can approach memory bandwidth; deferred until a workload that benefits clearly emerges. - -## Out of scope - -- Scalar scanner changes. -- Auto-tuning the probe threshold or making the probe optional. -- Reworking `find_escape_mask_with_carry` (its cost is paid only on slow-path chunks now). -- Cross-chunk `memchr2` jumps (Option 2 from brainstorming; tracked in Roadmap). diff --git a/docs/superpowers/specs/2026-05-15-makefile-design.md b/docs/superpowers/specs/2026-05-15-makefile-design.md deleted file mode 100644 index c0c01a5..0000000 --- a/docs/superpowers/specs/2026-05-15-makefile-design.md +++ /dev/null @@ -1,48 +0,0 @@ -# Makefile for lua-quick-decode - -Add a root-level `Makefile` that wraps the common Rust + LuaJIT workflows so contributors don't have to remember the env-var dance for bench/test. - -## Targets - -| Target | Action | -|---|---| -| `help` (default) | Print each target and its `## ` doc-comment via awk | -| `build` | `cargo build --release` (produces `target/release/libquickdecode.so`) | -| `test` | Depends on `build`. Runs `cargo test --release`, then `busted tests/lua --lua=$(LUAJIT) --lpath='./lua/?.lua'` with `LD_LIBRARY_PATH=$(CURDIR)/target/release` and `LUA_CPATH=$(LUA_CPATH)` exported | -| `lint` | `cargo clippy --release --all-targets -- -D warnings` then `cargo fmt --check` | -| `bench` | Depends on `build`. Runs `$(LUAJIT) benches/lua_bench.lua` with the same env exports as `test` | -| `clean` | `cargo clean` | - -All targets are `.PHONY`. - -## Overridable variables - -```make -LUAJIT ?= $(shell command -v luajit 2>/dev/null || echo /usr/local/openresty/luajit/bin/luajit) -LUA_CPATH ?= ./?.so;/usr/local/openresty/lualib/?.so;/usr/local/lib/lua/5.1/?.so;/usr/local/openresty/luajit/lib/lua/5.1/?.so -``` - -- `LUAJIT` autodetects: prefers `luajit` on `PATH` (apt/CI install), falls back to OpenResty's path (local dev box). -- `LUA_CPATH` includes OpenResty's `lualib` (where `cjson.so` lives on the local box) plus the standard LuaJIT 5.1 search paths. The default value is intentionally absolute, not appended to LuaJIT's built-in default, so the Makefile is reproducible regardless of which LuaJIT build is invoked. -- Users override per invocation: `make bench LUAJIT=/path/to/luajit`. - -## Help format - -Each target line carries a `## description` trailing comment. The `help` target greps targets with `## ` and pretty-prints `target — description` via awk. This pattern lets `help` stay in sync automatically when a new target is added. - -## Out of scope - -- **`luacheck` lint for Lua sources.** Neither the local box nor CI has it installed; adding it now would be dead code. Track in README if/when desired. -- **Separate `release` / `debug` build targets.** The repo only ships release artifacts (bench and Lua FFI tests both require release). Add later if a debug workflow appears. -- **Cross-target dependency on `test` from `bench`.** Bench depends only on `build`; running tests as part of bench would slow down iterative perf work. - -## Non-goals - -- Replacing CI. The Makefile mirrors CI commands but is not invoked by CI (CI keeps its explicit steps for cache-key clarity). -- Cross-platform. macOS/Windows are not supported; the OpenResty path defaults are Linux-specific. PRs welcome but not required for v1. - -## Failure modes (intentional, loud) - -- `busted` not installed → `test` fails with a clear `command not found`. Fix: `sudo luarocks install busted`. -- `luajit` not on PATH and OpenResty fallback missing → `bench` and `test` fail at the luajit invocation. Fix: install LuaJIT or pass `LUAJIT=...`. -- `target/release/libquickdecode.so` missing → impossible by construction; `bench` and `test` depend on `build`. diff --git a/docs/superpowers/specs/2026-05-15-rust-quick-json-decode-design.md b/docs/superpowers/specs/2026-05-15-rust-quick-json-decode-design.md deleted file mode 100644 index 196e9f1..0000000 --- a/docs/superpowers/specs/2026-05-15-rust-quick-json-decode-design.md +++ /dev/null @@ -1,623 +0,0 @@ -# Rust Quick JSON Decode — Design (v1) - -**Date:** 2026-05-15 -**Status:** Design approved, awaiting implementation plan -**Project:** `lua-quick-decode` - ---- - -## 1. Purpose & Non-Goals - -### Purpose - -A Rust-implemented JSON decoder exposed to LuaJIT via FFI, optimized for the case where: - -- A large-ish JSON (1 KB – 64 MB) is parsed **once**. -- The caller extracts a **small number of fields** (typically 5–20) via dynamic paths. -- The document is then **discarded**. - -The library's competitive advantage over `lua-cjson` is that it skips: - -- Constructing a full Lua `table` for the parsed document. -- Copying / interning every string value. -- Allocating GC objects for every nested object/array. - -It does so by performing a **single fast SIMD structural scan** in Phase 1 (only recording byte offsets of structural characters) and then **lazily decoding** only the fields the caller actually requests in Phase 2. - -### Non-Goals - -- Full JSON RFC 8259 validation. We perform shallow structural validation only; value-level errors (invalid escapes, malformed numbers, invalid UTF-8 in `\u` sequences) are deferred to lazy decode and surfaced only if the offending field is accessed. -- Building a Lua table representation. The library never produces a Lua table from JSON; callers must request fields explicitly. -- Streaming / incremental parse. The whole input must be available as a contiguous `&[u8]` before parsing begins. -- Thread safety. A `qjd_doc` is single-threaded. Document objects must not be shared across threads. -- JSON encoding / serialization. Decode only. - ---- - -## 2. Confirmed Decisions - -| Aspect | Decision | -|---|---| -| Output artifact | Rust `cdylib` → `.so`, plus `rlib` for Rust-side tests/integration | -| Caller binding | LuaJIT via `ffi.cdef` + `ffi.load`; **no** dependency on `lua.h` | -| Access pattern | Fully dynamic, runtime path resolution | -| Access frequency | One parse, few accesses, then discard | -| Input size | 1 KB – 64 MB (32-bit offsets sufficient) | -| Portability | Scalar fallback required; SIMD as runtime-detected acceleration | -| Validation level | Shallow (brace/quote/structure only); value validity deferred to lazy decode | -| Buffer ownership | Borrow `&[u8]`; `Document` holds a reference for its lifetime | -| Field API | Both root-path (`get_str(doc, "body.model")`) and cursor (`open(doc, "body")` → sub-cursor) | -| Error model | `errcode` return + output pointer; static `qjd_strerror(code)` for descriptions | -| Lua wrapper | Full `quickdecode.lua` shipped as deliverable | -| Benchmark targets | 100 KB – 1 MB / 5-20 fields; 10 MB – 64 MB / few fields | -| Backends in v1 | `ScalarScanner` (fallback) + `Avx2Scanner` (x86_64); NEON deferred | - ---- - -## 3. Architecture - -### 3.1 Module Layout - -``` -src/ -├── lib.rs — crate root, re-exports -├── ffi.rs — pub extern "C" symbols (C ABI layer) -├── doc.rs — Document type (Phase 1 + container helpers) -├── cursor.rs — Cursor, path resolution, skip-cache walk -├── path.rs — path string parse (zero-alloc iterator) -├── error.rs — error / type enums -├── scan/ -│ ├── mod.rs — Scanner trait + runtime dispatch (OnceCell-cached) -│ ├── scalar.rs — scalar fallback -│ └── avx2.rs — x86_64 AVX2 + PCLMUL (gated by `avx2` feature) -├── decode/ -│ ├── mod.rs -│ ├── number.rs — lazy i64/f64 parse -│ └── string.rs — lazy escape decode + UTF-8 check on \u -└── skip_cache.rs — Phase 2 sibling-skip cache - -lua/ -└── quickdecode.lua — LuaJIT wrapper module - -tests/ -├── integration.rs — Rust-side C ABI tests -└── lua/ — busted Lua tests - -benches/ -├── rust_bench.rs — Rust criterion benches -├── lua_bench.lua — Lua bench vs lua-cjson -└── fixtures/ — JSON fixtures - -include/ -└── lua_quick_decode.h — public C header - -docs/ -└── superpowers/specs/ - └── 2026-05-15-rust-quick-json-decode-design.md (this file) -``` - -### 3.2 Layered Data Flow - -``` -caller buf:&[u8] - │ - ▼ ffi::qjd_parse - Document::parse - │ - ▼ scan::dispatch (cached function pointer) - { ScalarScanner | Avx2Scanner } - │ - ▼ - indices: Vec (Phase 1 complete) - │ -caller: doc:get_str("body.model") - │ - ▼ path::parse (zero-alloc iterator) - Cursor::resolve - │ │ - │ └─► skip_cache (lazy fill) - ▼ - decode::string - │ - ▼ - (ptr, len) → LuaJIT side ffi.string() -``` - -### 3.3 Key Invariants - -- `Document<'a>` borrows `'a` from the caller's input buffer; the FFI layer erases `'a` to `'static`, and the LuaJIT wrapper enforces lifetime via Lua-side strong references. -- `indices: Vec` is write-once: filled during Phase 1, read-only thereafter. -- `skip_cache` lives on `Document`; populated lazily during Phase 2 access. -- `scratch: Vec` (for escape decode) lives on `Document`. **Invariant: only the most recent `get_str` result's pointer is valid.** The LuaJIT wrapper calls `ffi.string(ptr, len)` immediately to copy into a Lua string. -- `indices` records only byte offsets, **not** token types. Type is recovered from `buf[indices[i]]`. This saves 25% memory vs storing a type tag. - ---- - -## 4. C ABI - -Public header: `include/lua_quick_decode.h`. Symbols all `extern "C"`, `#[no_mangle]`. - -### 4.1 Types - -```c -typedef struct qjd_doc qjd_doc; /* opaque */ - -typedef struct { - const qjd_doc* doc; - uint32_t idx_start; /* opener position in doc.indices */ - uint32_t idx_end; /* one past closer */ - uint32_t _reserved0; /* reserved for future fast-path */ - uint32_t _reserved1; /* reserved / padding */ -} qjd_cursor; /* 24 bytes, by-value, no allocation */ -``` - -### 4.2 Error Codes - -```c -typedef enum { - QJD_OK = 0, - QJD_PARSE_ERROR = 1, /* Phase 1 structural failure */ - QJD_NOT_FOUND = 2, /* path does not exist */ - QJD_TYPE_MISMATCH = 3, /* path target is wrong JSON type for getter */ - QJD_OUT_OF_RANGE = 4, /* numeric overflow for requested integer type */ - QJD_DECODE_FAILED = 5, /* malformed escape / UTF-8 / number */ - QJD_INVALID_PATH = 6, /* path string syntax error */ - QJD_INVALID_ARG = 7, /* NULL pointer etc. */ - QJD_OOM = 8, -} qjd_err; - -const char* qjd_strerror(int code); /* static; caller must not free */ -``` - -### 4.3 Phase 1 - -```c -qjd_doc* qjd_parse(const uint8_t* buf, size_t len, int* err_out); -void qjd_free (qjd_doc* doc); -``` - -Returns NULL on failure with `*err_out` set. `qjd_free(NULL)` is a no-op. The caller must keep `buf` valid for the lifetime of the returned `qjd_doc`. - -### 4.4 Phase 2 — Root-Path API - -```c -int qjd_get_str (qjd_doc*, const char* path, size_t path_len, - const uint8_t** out_ptr, size_t* out_len); -int qjd_get_i64 (qjd_doc*, const char* path, size_t path_len, int64_t* out); -int qjd_get_f64 (qjd_doc*, const char* path, size_t path_len, double* out); -int qjd_get_bool (qjd_doc*, const char* path, size_t path_len, int* out); -int qjd_is_null (qjd_doc*, const char* path, size_t path_len, int* out); - -typedef enum { - QJD_T_NULL = 0, QJD_T_BOOL = 1, QJD_T_NUM = 2, - QJD_T_STR = 3, QJD_T_ARR = 4, QJD_T_OBJ = 5, -} qjd_type; -int qjd_typeof (qjd_doc*, const char* path, size_t path_len, int* type_out); -int qjd_len (qjd_doc*, const char* path, size_t path_len, size_t* out); -``` - -### 4.5 Phase 2 — Cursor API - -```c -int qjd_open (qjd_doc*, const char* path, size_t path_len, qjd_cursor* out); - -int qjd_cursor_get_str (qjd_cursor*, const char* path, size_t path_len, - const uint8_t** out_ptr, size_t* out_len); -int qjd_cursor_get_i64 (qjd_cursor*, const char* path, size_t path_len, int64_t* out); -int qjd_cursor_get_f64 (qjd_cursor*, const char* path, size_t path_len, double* out); -int qjd_cursor_get_bool (qjd_cursor*, const char* path, size_t path_len, int* out); -int qjd_cursor_typeof (qjd_cursor*, const char* path, size_t path_len, int* out); -int qjd_cursor_len (qjd_cursor*, const char* path, size_t path_len, size_t* out); - -/* sub-cursor; key/index avoids path-string composition */ -int qjd_cursor_open (qjd_cursor*, const char* path, size_t path_len, qjd_cursor* out); -int qjd_cursor_field (qjd_cursor*, const char* key, size_t key_len, qjd_cursor* out); -int qjd_cursor_index (qjd_cursor*, size_t i, qjd_cursor* out); -``` - -### 4.6 Path Syntax - -``` -path := segment ( '.' segment | '[' digit+ ']' )* -segment := key | '[' digit+ ']' -key := characters not containing '.' or '[' -``` - -Empty path / NULL path = root. - -Keys containing `.` or `[` are **not supported** via path strings — use `qjd_cursor_field()` instead. Attempting to parse such a path returns `QJD_INVALID_PATH`. - -### 4.7 String Output Pointer Lifetime - -The `out_ptr` returned by `qjd_get_str` / `qjd_cursor_get_str` points to either: - -1. The original input buffer (when the string contains no escape sequences); or -2. A document-internal scratch buffer (when escape decode was required). - -**The caller must consume the result before the next call to any `*_get_str` function on the same document.** Any subsequent `get_str` may invalidate prior pointers. The LuaJIT wrapper handles this by calling `ffi.string(ptr, len)` immediately, copying into a Lua string. - ---- - -## 5. Phase 1 — Structural Scan - -### 5.1 Goal - -Given `buf: &[u8]`, produce `indices: Vec` listing the byte offset of every structural character (`{`, `}`, `[`, `]`, `:`, `,`, `"`) that is **not inside a string literal**. - -### 5.2 Quote Handling - -The hard part is correctly identifying which `"` characters open/close strings versus being escaped. We use the classical SIMD algorithm (simdjson): - -For each 64-byte chunk: - -1. Build `quote_mask` (bit per byte = `"`) -2. Build `backslash_mask` (bit per byte = `\`) -3. Build `structural_mask` (bit per byte = one of `{}[]:,`) -4. Compute `escaped_quote_mask` from `backslash_mask` using bit arithmetic that accounts for consecutive backslash runs (odd-length run = next char escaped; even-length = next char literal). -5. `real_quote_mask = quote_mask & ~escaped_quote_mask` -6. Use PCLMUL (or scalar prefix XOR on fallback) to turn `real_quote_mask` into `inside_string_mask` (1 between consecutive quote pairs). -7. `output_mask = structural_mask & ~inside_string_mask`, plus `real_quote_mask` itself (strings' boundaries are also structural). -8. Iterate set bits in `output_mask` and append byte offsets to `indices`. - -The "carry-over" state across chunks: whether the chunk begins inside a string, and the trailing backslash count of the previous chunk. - -### 5.3 Backend Trait - -```rust -pub(crate) trait StructScanner { - /// Scan `buf`, appending offsets to `out`. - /// On shallow validation failure (unclosed string, unmatched bracket), - /// returns `Err(byte_offset)` (offset not exposed in v1 errors). - fn scan(buf: &[u8], out: &mut Vec) -> Result<(), usize>; -} - -pub(crate) struct ScalarScanner; -#[cfg(target_arch = "x86_64")] pub(crate) struct Avx2Scanner; -``` - -### 5.4 Runtime Dispatch - -```rust -static SCAN_FN: OnceCell) -> Result<(), usize>> - = OnceCell::new(); - -fn dispatch() -> fn(&[u8], &mut Vec) -> Result<(), usize> { - *SCAN_FN.get_or_init(|| { - #[cfg(target_arch = "x86_64")] - if is_x86_feature_detected!("avx2") - && is_x86_feature_detected!("pclmulqdq") - { - return Avx2Scanner::scan; - } - ScalarScanner::scan - }) -} -``` - -First call detects CPU features; subsequent calls use a cached function pointer (no `cpuid` overhead). - -### 5.5 Indices Capacity - -Initial capacity = `buf.len() / 6` (≈17 % of input bytes). Empirically structural characters make up 5–25 % of a typical JSON. Under-allocation triggers `Vec` doubling, costing one realloc; over-allocation wastes ≤17 % of input size. - -For very small documents (< 4 KB), the wasted bytes are negligible. A stack-allocated SmallVec fast path is **deferred to Roadmap**. - -### 5.6 Shallow Validation Coverage - -Phase 1 detects and rejects: - -- Unclosed string at end of buffer -- Mismatched bracket types (`{` paired with `]` etc.) -- Unbalanced closers (more `}` than `{` etc.) - -Phase 1 does **not** check: - -- Semantic position of `:` `,` (extraneous commas, missing colons) -- Escape sequence validity inside strings -- UTF-8 validity (multi-byte UTF-8 cannot be confused with ASCII structural chars) -- Number format validity -- Duplicate keys - -### 5.7 Expected Throughput - -| Backend | Target | -|---|---| -| Scalar | 500 MB/s – 1 GB/s | -| AVX2 (+ PCLMUL) | 3 – 6 GB/s | - ---- - -## 6. Phase 2 — Path Resolution & Cursor - -### 6.1 Cursor Internal Representation - -```rust -#[derive(Copy, Clone)] -pub(crate) struct Cursor<'d> { - doc: &'d Document<'d>, - /// Slice of doc.indices covered by this cursor. - /// idx_start points at '{' or '['; idx_end points one past matching '}' / ']'. - idx_start: u32, - idx_end: u32, -} -``` - -The published `qjd_cursor` carries two `_reservedN` slots beyond `idx_start`/`idx_end`; they are unused in v1 but reserved so a future per-cursor skip-cache fast-path can be added without breaking the ABI. - -`Cursor` is `Copy` and never allocates. `open()`, `field()`, `index()` return new cursors by value. - -### 6.2 Resolution Algorithm - -```text -for seg in path: - Confirm cursor points at correct container type: - seg=Key → require '{' at cursor opener; else TYPE_MISMATCH - seg=Idx → require '[' at cursor opener; else TYPE_MISMATCH - - Walk children of the container: - - If cache_slot is populated: directly read child_starts[i] / - probe child_starts for matching key. - - Otherwise: brace-counting scan from opener+1 to find each child, - populating cache_slot as we go (incremental fill). - - On match: advance cursor to child's [idx_start, idx_end). - On exhaustion: NOT_FOUND. -``` - -### 6.3 Sibling-Skip Cache - -```rust -pub(crate) struct SkipCache { - slots: Vec, // slot 0 reserved - by_opener: rustc_hash::FxHashMap,// opener idx → slot number -} - -pub(crate) struct SkipSlot { - /// child_starts[i] = position in doc.indices where i-th child begins - /// (for object: pointing at the key's opening '"'; - /// for array: pointing at the value's first token). - child_starts: Vec, - /// child_ends[i] = idx_end for a Cursor pointing at the i-th child's value. - /// Storing this lets cache-hit lookups skip the brace-counting walk. - child_ends: Vec, -} -``` - -**Build-on-first-access:** when a container is entered for the first time, its `SkipSlot` is built incrementally as the resolver walks its children. The walk uses brace-counting (the cheap operation on the `indices` array, not on the original buffer). Subsequent accesses to the same container are O(N_keys) field comparisons with no brace counting. - -**Memory cost analysis:** worst case is when the caller enters every child of a large array (e.g. iterates 100 `messages[i]` and descends into each). Each entered container costs roughly `8 * num_children` bytes. For a 1 MB / 100-message JSON this stays below 5 MB total — acceptable. No LRU eviction in v1. - -### 6.4 Field-Type Dispatch - -Typed getters (`get_str`, `get_i64`, ...) inspect `buf[doc.indices[cursor.idx_start]]` after path resolution: - -| First byte | Inferred type | Behavior | -|---|---|---| -| `"` | string | `get_str` → decode; `get_i64`/`get_f64`/`get_bool` → TYPE_MISMATCH | -| `0`-`9`, `-` | number | `get_i64`/`get_f64` → parse; others → TYPE_MISMATCH | -| `t`, `f` | bool | `get_bool` → parse; others → TYPE_MISMATCH | -| `n` | null | `is_null` → true; others → TYPE_MISMATCH | -| `{` | object | `typeof` → OBJ; getters → TYPE_MISMATCH | -| `[` | array | `typeof` → ARR; getters → TYPE_MISMATCH | - -`qjd_typeof` only inspects the first byte; no value decoding. - -`qjd_typeof` on a non-existent path returns `QJD_NOT_FOUND`, **not** `QJD_T_NULL`. The two are distinct. - -### 6.5 String Escape Decode - -```rust -fn decode_string( - buf: &[u8], start: usize, end: usize, - scratch: &mut Vec, -) -> Result<(*const u8, usize), qjd_err> { - // Fast path: no backslash in range → return original slice. - if memchr::memchr(b'\\', &buf[start..end]).is_none() { - return Ok((buf.as_ptr().wrapping_add(start), end - start)); - } - // Slow path: decode into scratch. - scratch.clear(); - // Handle: \" \\ \/ \b \f \n \r \t \u XXXX with surrogate pair join - // ... - Ok((scratch.as_ptr(), scratch.len())) -} -``` - -UTF-8 validity of `\u XXXX` sequences (correct surrogate pairing) is checked here and surfaced as `QJD_DECODE_FAILED`. Other bytes are passed through without UTF-8 validation, consistent with our shallow-validation policy. - -A SIMD-accelerated backslash search in the fast path is **deferred to Roadmap**. - -### 6.6 Number Decode - -- `get_i64`: hand-written fast parse, accepts JSON-number integer form (`-?[0-9]+`), rejects `.`, `e`, `E`. Overflow → `QJD_OUT_OF_RANGE`. -- `get_f64`: `core::str::FromStr` on a verified-ASCII slice. If first benchmark shows this dominating, switch to `lexical` — **deferred to Roadmap**. -- Integers > 2⁵³ requested via `get_f64` will return with precision loss per IEEE 754 (no error). Integers > i64 range via `get_i64` return `QJD_OUT_OF_RANGE`. - -A "lossless integer" mode returning `int64_t` as cdata (preserving full precision on the Lua side) is **deferred to Roadmap**. - ---- - -## 7. Memory Management & Safety - -### 7.1 Document Layout - -```rust -pub struct Document<'a> { - buf: &'a [u8], - indices: Vec, // appended sentinel u32::MAX at end - scratch: Vec, // lazy; populated on first escape-decode - skip: SkipCache, // lazy; populated on first Phase 2 access -} -``` - -### 7.2 Allocation Budget - -| Phase | Item | Count | -|---|---|---| -| Phase 1 | `Box` | 1 | -| Phase 1 | `indices` initial reserve | 1 | -| Phase 1 | `indices` doubling (worst case) | 0–2 | -| Phase 2 | `scratch` first escape | 0 or 1 | -| Phase 2 | `skip.slots[i].child_starts` per first-entered container | 1 each | -| Phase 2 | path parse / cursor ops | 0 | - -### 7.3 FFI Safety - -All FFI entry points: - -- Reject NULL pointers with `QJD_INVALID_ARG` (no panic, no UB). -- Trust `len` (cannot validate at runtime). -- Wrap their body in `std::panic::catch_unwind` to prevent unwinding across the C boundary. Internal panics convert to `QJD_OOM`. -- Use `unsafe extern "C"`. - -Rust internal code is panic-free in steady state: no `.unwrap()`, no `.expect()`, no array indexing where bounds aren't pre-validated. Errors propagate via `Result<_, qjd_err>` to the FFI layer. - -### 7.4 Lifetime Erasure - -The FFI layer materializes a `Document<'static>` from a `&'static [u8]` made via `slice::from_raw_parts`. The actual lifetime equals the caller's input buffer, which Rust cannot enforce. The LuaJIT wrapper (§8) enforces it by holding a strong reference to the original Lua string. - -### 7.5 Threading - -Single-threaded per `qjd_doc`. No internal locking. Documented in the public header. - ---- - -## 8. LuaJIT Wrapper (`lua/quickdecode.lua`) - -### 8.1 Responsibilities - -1. Declare the C ABI via `ffi.cdef`. -2. Load the shared library via `ffi.load("quickdecode")`. -3. Wrap raw C calls into OO-style methods on `Doc` and `Cursor`. -4. **Strong-hold the original JSON string** to prevent GC while the document is alive. -5. Register `qjd_free` via `ffi.gc` for automatic cleanup. -6. Translate `QJD_NOT_FOUND` to Lua `nil`; other errors to `error(qjd_strerror(code))`. -7. Call `ffi.string(ptr, len)` immediately on string results, eliminating the scratch-invalidate hazard. - -### 8.2 API Surface - -`Doc` methods: `get_str`, `get_i64`, `get_f64`, `get_bool`, `is_null`, `typeof`, `len`, `open(path)`. - -`Cursor` methods: same set + `open(path)`, `field(key)`, `index(i)`. - -`#cursor` via `__len` is **not** implemented (Lua 5.1 / LuaJIT compatibility). Use `cursor:len("")`. - -### 8.3 Output-Box Reuse - -Module-level pre-allocated `ffi.new` buffers (`err_box`, `i64_box`, `strp_box`, `cur_box`, ...) are reused across all calls. New cdata allocation in the hot path would abort LuaJIT traces. - -### 8.4 Lifetime Holding - -```lua -function _M.parse(json_str) - local err = err_box - local ptr = C.qjd_parse(json_str, #json_str, err) - if ptr == nil then error(...) end - return setmetatable({ - _ptr = ffi.gc(ptr, C.qjd_free), - _hold = json_str, -- strong ref keeps buffer alive - }, Doc) -end -``` - -Cursors hold a back-reference to their `Doc` to prevent the `Doc` (and therefore the buffer) from being collected while cursors exist. - -### 8.5 Integer Precision Caveat - -`tonumber(int64_t)` truncates to double; values exceeding 2⁵³ lose precision silently. Documented in the wrapper. A lossless-integer mode returning cdata is on the Roadmap. - ---- - -## 9. Testing & Benchmarking - -### 9.1 Test Layers - -| Layer | Framework | Approx Cases | -|---|---|---| -| Rust unit (`#[cfg(test)]`) | `cargo test` | ~100 | -| Rust integration (`tests/integration.rs`) | `cargo test` | ~30 | -| Property / fuzz | `proptest`, `cargo-fuzz` | ongoing | -| Lua integration (`tests/lua/`) | `busted` | ~50 | - -### 9.2 Critical Test Matrix - -**Phase 1 correctness:** -- ScalarScanner vs Avx2Scanner produce **bit-identical** `indices` on the same input. Enforced by proptest cross-check. -- Buffer length boundaries: `len % 64 ∈ {0, 1, 31, 32, 33, 63}`. -- Pure ASCII vs multi-byte UTF-8 content in strings. -- Adversarial escape patterns: `\\\"`, `\\\\\"`, `\\\\\\"`, long runs of backslashes. -- Extreme depth (stack tolerance). -- Extreme width (10K+ keys / array elements). - -**Phase 2 correctness:** -- Path syntax variants and parsing failures. -- Non-existence at each path depth. -- Type-mismatch at each typed getter. -- Full escape-decode coverage including surrogate pairs (`😀`). -- Numeric boundaries: `INT64_MIN`, `INT64_MAX`, `2^63`, `1.7e308`, JSON-illegal forms. -- Wide objects (5K keys) → skip-cache correctness. - -**FFI boundary:** -- Every entry point handles NULL pointers gracefully. -- `qjd_free(NULL)` is a no-op. -- `qjd_parse` failure path correctly populates `err_out`. -- Internal panic surfaces as `QJD_OOM`, not unwinding. - -**Lua wrapper (busted):** -- `nil` on `NOT_FOUND`, `error()` on other failures. -- GC of `Doc` triggers `qjd_free`. -- Original JSON string is held against premature GC. -- Same-fixture value-equivalence with `lua-cjson`. - -### 9.3 Benchmark - -`benches/lua_bench.lua` directly compares against `lua-cjson` on the same fixtures using `os.clock()` and `collectgarbage('count')` for allocation pressure. No busted involvement (busted overhead is unsuitable for microbenchmarks). - -**Fixtures:** -- `small_api.json` (~5 KB, LLM API request shape) -- `medium_resp.json` (~200 KB) -- `large_dump.json` (~20 MB) -- `deep_nest.json` (depth stress test) - -**Acceptance targets (first cut; revise after measurement):** - -| Scenario | Target | vs lua-cjson | -|---|---|---| -| 200 KB / 5 fields | Phase 1 ≥ 800 MB/s | 3-5× faster | -| 20 MB / 5 fields | Phase 1 ≥ 2 GB/s (AVX2) | 5-10× faster | -| Cursor repeated access | < 200 ns / get_str (AVX2) | — | - -### 9.4 CI - -- `cargo test --features default` (scalar + AVX2) -- `cargo test --no-default-features` (scalar only, simulates non-AVX2 host) -- `busted tests/lua/` after building the `.so` -- Short fuzz runs (1–5 min) per push - ---- - -## 10. Roadmap / Deferred - -Tracked in `README.md` and to be picked up individually. Items deferred from this design: - -- **ARM64 NEON scanner backend** — for Apple Silicon, Graviton, 鲲鹏. -- **SmallVec fast path for small documents** (< 4 KB) — avoid heap allocation for `indices` on tiny inputs. -- **SIMD-accelerated backslash search** in the `decode_string` fast path. -- **`lexical` fast float parser** if `::from_str` benchmarks as a bottleneck. -- **Lossless 64-bit integer mode** — return cdata `int64_t` to preserve precision > 2⁵³. -- **Skip-cache LRU eviction** — only if memory pressure on huge documents proves problematic in practice. -- **Path-position info on Phase 1 errors** — currently only an opaque `QJD_PARSE_ERROR`. - ---- - -## 11. Open Questions for Implementation Plan - -The implementation plan (next phase) should resolve: - -1. Exact crate features and Cargo.toml shape (workspace vs single crate? feature flags for scalar-only builds?). -2. Choice of `proptest` vs `quickcheck`. -3. Whether to vendor `memchr` and `rustc-hash` or add as direct dependencies. -4. Whether `cargo fuzz` integration runs in CI or only on-demand. -5. Build flow for LuaJIT tests (must build `.so` first; how to chain `cargo build` → `busted`). - -These are tactical decisions deferred to the implementation plan. diff --git a/include/lua_quick_decode.h b/include/lua_quick_decode.h index 0c0c0a0..e3aeab2 100644 --- a/include/lua_quick_decode.h +++ b/include/lua_quick_decode.h @@ -61,6 +61,10 @@ int qjd_cursor_get_f64 (const qjd_cursor*, const char* path, size_t path_len, d int qjd_cursor_get_bool (const qjd_cursor*, const char* path, size_t path_len, int* out); int qjd_cursor_typeof (const qjd_cursor*, const char* path, size_t path_len, int* out); int qjd_cursor_len (const qjd_cursor*, const char* path, size_t path_len, size_t* out); +int qjd_cursor_bytes (const qjd_cursor*, size_t* byte_start, size_t* byte_end); +int qjd_cursor_object_entry_at(const qjd_cursor*, size_t i, + const uint8_t** key_ptr, size_t* key_len, + qjd_cursor* value_out); #ifdef __cplusplus } diff --git a/lua/quickdecode.lua b/lua/quickdecode.lua index 9675231..0851895 100644 --- a/lua/quickdecode.lua +++ b/lua/quickdecode.lua @@ -30,6 +30,10 @@ int qjd_cursor_get_f64 (const qjd_cursor*, const char*, size_t, double*); int qjd_cursor_get_bool(const qjd_cursor*, const char*, size_t, int*); int qjd_cursor_typeof (const qjd_cursor*, const char*, size_t, int*); int qjd_cursor_len (const qjd_cursor*, const char*, size_t, size_t*); +int qjd_cursor_bytes(const qjd_cursor*, size_t* byte_start, size_t* byte_end); +int qjd_cursor_object_entry_at(const qjd_cursor*, size_t i, + const uint8_t** key_ptr, size_t* key_len, + qjd_cursor* value_out); ]] local C = ffi.load("quickdecode") @@ -178,4 +182,17 @@ function Cursor:index(i) return setmetatable({ _cur = cur_box[0], _doc = self._doc }, Cursor) end +-- Lazy table API (cjson-shaped surface). See lua/quickdecode/table.lua. +local _lazy = require("quickdecode.table") +_M.decode = _lazy.decode +_M.encode = _lazy.encode +_M.materialize = _lazy.materialize +_M.pairs = _lazy.pairs +_M.ipairs = _lazy.ipairs +_M.len = _lazy.len +_M.null = _lazy.null +_M.empty_array_mt = _lazy.empty_array_mt +_M._LazyObject = _lazy._LazyObject +_M._LazyArray = _lazy._LazyArray + return _M diff --git a/lua/quickdecode/table.lua b/lua/quickdecode/table.lua new file mode 100644 index 0000000..9a8b93d --- /dev/null +++ b/lua/quickdecode/table.lua @@ -0,0 +1,556 @@ +-- Lazy table view + cjson-compatible encoder for quickdecode. +-- +-- This module relies on the FFI cdef set up by `lua/quickdecode.lua`, so +-- callers must `require("quickdecode")` (transitively or directly) before +-- they require this module. + +local ffi = require("ffi") +local C = ffi.load("quickdecode") +-- Defer the require to avoid a circular dependency when quickdecode.lua +-- re-exports this module. By the time _M.decode is called, quickdecode +-- is already registered in package.loaded. +local function get_qd() + return require("quickdecode") +end + +-- Optional cjson bridge: reuse its sentinels when available so callers' +-- `v == cjson.null` comparisons keep working unchanged. +local has_cjson, cjson = pcall(require, "cjson") + +local _M = {} + +if has_cjson then + _M.null = cjson.null + _M.empty_array_mt = cjson.empty_array_mt +else + _M.null = setmetatable({}, { __tostring = function() return "null" end }) + _M.empty_array_mt = { __jsontype = "array" } +end + +-- Box scratch used for one-shot FFI returns. Reused across calls to avoid +-- per-call allocation; safe because the parent Doc / lazy view holds the +-- buffer alive and these are read-and-copy. +local err_box = ffi.new("int[1]") +local i64_box = ffi.new("int64_t[1]") +local f64_box = ffi.new("double[1]") +local bool_box = ffi.new("int[1]") +local size_box = ffi.new("size_t[1]") +local type_box = ffi.new("int[1]") +local strp_box = ffi.new("const uint8_t*[1]") +local cur_box = ffi.new("qjd_cursor[1]") +local child_box = ffi.new("qjd_cursor[1]") +local sz_a = ffi.new("size_t[1]") +local sz_b = ffi.new("size_t[1]") + +local QJD_OK = 0 +local QJD_NOT_FOUND = 2 +local T_NULL = 0 +local T_BOOL = 1 +local T_NUM = 2 +local T_STR = 3 +local T_ARR = 4 +local T_OBJ = 5 + +local function check(rc) + if rc == QJD_OK then return true end + if rc == QJD_NOT_FOUND then return false end + error("quickdecode: " .. ffi.string(C.qjd_strerror(rc))) +end + +local LazyObject = {} +local LazyArray = {} + +-- Build a new lazy view for a child container cursor. +-- src_box is an FFI cdata `qjd_cursor[1]`; src_box[0] is the cursor whose +-- data we copy into a fresh per-view allocation so the new view's _cur +-- survives later overwrites of src_box. +local function wrap_child(parent_view, src_box) + C.qjd_cursor_bytes(src_box[0], sz_a, sz_b) + local own_box = ffi.new("qjd_cursor[1]") + ffi.copy(own_box, src_box, ffi.sizeof("qjd_cursor")) + return { + _doc = parent_view._doc, + _cur_box = own_box, -- keep cdata alive + _cur = own_box[0], -- stable reference into own_box + _bs = tonumber(sz_a[0]), + _be = tonumber(sz_b[0]), + } +end + +-- Decode the value at src_box[0] into a Lua value. +-- src_box is a `qjd_cursor[1]`; for container types, a new view is created +-- via wrap_child so the caller's box can be freely reused afterwards. +local function decode_cursor(parent_view, src_box) + local trc = C.qjd_cursor_typeof(src_box[0], "", 0, type_box) + if not check(trc) then return nil end + local t = type_box[0] + if t == T_STR then + local rrc = C.qjd_cursor_get_str(src_box[0], "", 0, strp_box, size_box) + if not check(rrc) then return nil end + return ffi.string(strp_box[0], size_box[0]) + elseif t == T_NUM then + local rrc = C.qjd_cursor_get_f64(src_box[0], "", 0, f64_box) + if not check(rrc) then return nil end + return f64_box[0] + elseif t == T_BOOL then + local rrc = C.qjd_cursor_get_bool(src_box[0], "", 0, bool_box) + if not check(rrc) then return nil end + return bool_box[0] ~= 0 + elseif t == T_NULL then + return _M.null + elseif t == T_OBJ then + return setmetatable(wrap_child(parent_view, src_box), LazyObject) + elseif t == T_ARR then + return setmetatable(wrap_child(parent_view, src_box), LazyArray) + end + return nil +end + +-- Resolve a child cursor at `key` (object) and decode it into a Lua value. +-- Returns nil for missing keys (cjson semantics). +-- Container results (lazy proxies) are rawset-cached into `self` so that +-- subsequent accesses return the same Lua table object. This is required for +-- `t.a.x = v` to propagate back: __newindex materializes `t.a` in-place, and +-- the next `t.a` lookup retrieves the already-materialized table from the +-- raw table rather than creating a fresh proxy. +local function read_object_field(self, key) + if type(key) ~= "string" then return nil end + -- Use child_box so the lookup result does not alias self._cur (which is + -- itself stored in root_box's backing memory in the decode caller). + local rc = C.qjd_cursor_field(self._cur, key, #key, child_box) + if not check(rc) then return nil end + local v = decode_cursor(self, child_box) + -- Cache containers so identity is stable and materialization sticks. + if type(v) == "table" then rawset(self, key, v) end + return v +end + +LazyObject.__index = read_object_field + +-- Resolve a child cursor at integer index `key` (1-based) and decode it. +-- Returns nil for missing/out-of-range indices and non-integer keys. +-- Container results are rawset-cached for the same identity-stability reason +-- as read_object_field. +local function read_array_index(self, key) + if type(key) ~= "number" then return nil end + -- 1-based external, 0-based internal + local i = key - 1 + if i < 0 or i ~= math.floor(i) then return nil end + local rc = C.qjd_cursor_index(self._cur, i, child_box) + if not check(rc) then return nil end + local v = decode_cursor(self, child_box) + -- Cache containers so identity is stable and materialization sticks. + if type(v) == "table" then rawset(self, key, v) end + return v +end + +LazyArray.__index = read_array_index + +-- Iterator function for lazy_object_iter: advances through object entries by +-- integer index, returning key/value pairs in source order. +local function lazy_object_iter(state, _prev_key) + local i = state.i + state.i = i + 1 + local rc = C.qjd_cursor_object_entry_at( + state.view._cur, i, strp_box, size_box, child_box + ) + if rc == QJD_NOT_FOUND then return nil end + check(rc) + local k = ffi.string(strp_box[0], size_box[0]) + local v = decode_cursor(state.view, child_box) + return k, v +end + +function LazyObject.__pairs(t) + return lazy_object_iter, { view = t, i = 0 }, nil +end + +local function lazy_array_iter(state, _prev_i) + local i = state.i + local rc = C.qjd_cursor_index(state.view._cur, i, child_box) + if rc == QJD_NOT_FOUND then return nil end + check(rc) + state.i = i + 1 + local v = decode_cursor(state.view, child_box) + return i + 1, v +end + +function LazyArray.__ipairs(t) + return lazy_array_iter, { view = t, i = 0 }, 0 +end + +function _M.ipairs(t) + local mt = getmetatable(t) + if mt == LazyArray then + return LazyArray.__ipairs(t) + end + return ipairs(t) +end + +function _M.pairs(t) + local mt = getmetatable(t) + if mt == LazyObject then + return LazyObject.__pairs(t) + elseif mt == LazyArray then + return _M.ipairs(t) + end + return pairs(t) +end + +local function lazy_len(self) + local rc = C.qjd_cursor_len(self._cur, "", 0, size_box) + check(rc) + return tonumber(size_box[0]) +end + +LazyObject.__len = lazy_len +LazyArray.__len = lazy_len + +-- Public fallback for `#t` on a lazy proxy. Vanilla LuaJIT 5.1 does not invoke +-- __len on tables (only userdata) unless built with LUAJIT_ENABLE_LUA52COMPAT +-- (OpenResty's default). Callers running on a non-compat LuaJIT must use +-- qt.len(t) — same role qt.pairs / qt.ipairs play for __pairs / __ipairs. +function _M.len(t) + local mt = getmetatable(t) + if mt == LazyObject or mt == LazyArray then + return lazy_len(t) + end + return #t +end + +-- Materialize all key/value pairs from a LazyObject view into a plain list. +-- Returns a sequence of {k, v} pairs. The view is not mutated here; mutation +-- happens in __newindex after the walk completes successfully. +local function materialize_object_contents(view) + local i = 0 + local pairs_out = {} + while true do + local rc = C.qjd_cursor_object_entry_at(view._cur, i, strp_box, size_box, child_box) + if rc == QJD_NOT_FOUND then break end + check(rc) + local k = ffi.string(strp_box[0], size_box[0]) + local v = decode_cursor(view, child_box) + pairs_out[#pairs_out+1] = {k, v} + i = i + 1 + end + return pairs_out +end + +-- Materialize all elements from a LazyArray view into a plain sequence. +-- Returns a sequence indexed 1..n. The view is not mutated here. +local function materialize_array_contents(view) + local i = 0 + local out = {} + while true do + local rc = C.qjd_cursor_index(view._cur, i, child_box) + if rc == QJD_NOT_FOUND then break end + check(rc) + out[i + 1] = decode_cursor(view, child_box) + i = i + 1 + end + return out +end + +-- The set of keys reserved by the lazy view bookkeeping; user-supplied JSON +-- keys with these names would collide (minor, deferred). Centralized here so +-- the dirty check and __newindex can share the list. +local INTERNAL_KEYS = { + _doc = true, _cur_box = true, _cur = true, _bs = true, _be = true, +} + +-- On first write, walk all existing key/value pairs into a plain table, +-- strip the lazy metatable, then apply the new assignment. Any FFI error +-- during the walk leaves `t` in its original lazy state. +-- Existing rawget-cached entries (e.g. previously returned child proxies) +-- are preserved so callers' references remain valid. +LazyObject.__newindex = function(t, k, v) + local contents = materialize_object_contents(t) + -- Snapshot user-key cache BEFORE nilling internals. + -- Use next() for raw iteration: pairs() invokes __pairs on lazy tables, + -- walking the full JSON via FFI instead of the Lua-side rawget cache. + local cache = {} + local ck, cv = next(t) + while ck ~= nil do + if not INTERNAL_KEYS[ck] then + cache[ck] = cv + end + ck, cv = next(t, ck) + end + t._doc, t._cur_box, t._cur, t._bs, t._be = nil, nil, nil, nil, nil + setmetatable(t, nil) + for _, kv in ipairs(contents) do + rawset(t, kv[1], cache[kv[1]] or kv[2]) + end + rawset(t, k, v) +end + +-- On first write, walk all existing elements into a plain sequence, +-- switch to empty_array_mt (no lazy machinery), then apply the assignment. +-- Existing rawget-cached entries are preserved so callers' references remain valid. +LazyArray.__newindex = function(t, k, v) + local contents = materialize_array_contents(t) + -- Snapshot integer-key cache BEFORE nilling internals. + -- Use next() for raw iteration: pairs() would invoke __pairs on lazy arrays, + -- walking the full JSON via FFI instead of the Lua-side rawget cache. + local cache = {} + local ck, cv = next(t) + while ck ~= nil do + if type(ck) == "number" then + cache[ck] = cv + end + ck, cv = next(t, ck) + end + t._doc, t._cur_box, t._cur, t._bs, t._be = nil, nil, nil, nil, nil + setmetatable(t, _M.empty_array_mt) + for i, x in ipairs(contents) do + rawset(t, i, cache[i] or x) + end + rawset(t, k, v) +end + +function _M.decode(json_str) + -- Reuse the existing qd.parse path to get a Doc with stable buffer hold. + local doc = get_qd().parse(json_str) + -- Open the root cursor into cur_box, then copy into a dedicated box owned + -- by the view so that later child lookups (which reuse child_box) do not + -- alias the root cursor's backing storage. + local rc = C.qjd_open(doc._ptr, "", 0, cur_box) + if not check(rc) then + error("quickdecode: open root failed") + end + local root_box = ffi.new("qjd_cursor[1]") + ffi.copy(root_box, cur_box, ffi.sizeof("qjd_cursor")) + -- Determine root container kind (object/array) and wrap accordingly. + -- Both have meaningful byte spans for encode. + local trc = C.qjd_cursor_typeof(root_box[0], "", 0, type_box) + if not check(trc) then + error("quickdecode: root typeof failed") + end + local rt = type_box[0] + local brc = C.qjd_cursor_bytes(root_box[0], sz_a, sz_b) + if not check(brc) then + error("quickdecode: root byte-span failed") + end + local view = { + _doc = doc, + _cur_box = root_box, -- keep the box alive; _cur is a stable reference + _cur = root_box[0], + _bs = tonumber(sz_a[0]), + _be = tonumber(sz_b[0]), + } + if rt == T_OBJ then + return setmetatable(view, LazyObject) + elseif rt == T_ARR then + return setmetatable(view, LazyArray) + else + error("quickdecode: top-level JSON value is not an object or array") + end +end + +local function materialize(v) + local mt = (type(v) == "table") and getmetatable(v) or nil + if mt == LazyObject then + local out = {} + for _, kv in ipairs(materialize_object_contents(v)) do + out[kv[1]] = materialize(kv[2]) + end + return out + elseif mt == LazyArray then + local raw = materialize_array_contents(v) + local out = {} + for i, x in ipairs(raw) do + out[i] = materialize(x) + end + if #out == 0 then + setmetatable(out, _M.empty_array_mt) + end + return out + end + return v +end + +_M.materialize = materialize + +local string_byte = string.byte +local string_format = string.format + +-- Minimal JSON string escaper covering the cjson default set. +local function encode_string(s) + local out = {'"'} + for i = 1, #s do + local b = string_byte(s, i) + if b == 0x22 then out[#out+1] = '\\"' + elseif b == 0x5C then out[#out+1] = '\\\\' + elseif b == 0x0A then out[#out+1] = '\\n' + elseif b == 0x0D then out[#out+1] = '\\r' + elseif b == 0x09 then out[#out+1] = '\\t' + elseif b == 0x08 then out[#out+1] = '\\b' + elseif b == 0x0C then out[#out+1] = '\\f' + elseif b < 0x20 then out[#out+1] = string_format('\\u%04x', b) + else out[#out+1] = string.char(b) + end + end + out[#out+1] = '"' + return table.concat(out) +end + +local function encode_number(n) + if n ~= n or n == math.huge or n == -math.huge then + error("qd.encode: cannot encode non-finite number") + end + if n == math.floor(n) and math.abs(n) < 1e15 then + return string_format("%d", n) + end + return string_format("%.14g", n) +end + +-- A lazy subtree is "dirty" if any cached descendant has been materialized +-- (no longer carries Lazy* metatable). Non-cached descendants are guaranteed +-- untouched, so we only need to walk the rawget-cached entries. +local function is_dirty(v) + if type(v) ~= "table" then return false end + local mt = getmetatable(v) + if mt ~= LazyObject and mt ~= LazyArray then + return true -- materialized + end + -- Use next() for raw table iteration: pairs() would invoke __pairs on + -- lazy tables, walking the full JSON via FFI instead of the Lua cache. + local k, child = next(v) + while k ~= nil do + if not INTERNAL_KEYS[k] then + if is_dirty(child) then return true end + end + k, child = next(v, k) + end + return false +end + +-- Forward declaration so encode_lazy_object_walking, encode_lazy_array_walking, +-- and encode_array/encode_object can reference encode before its definition is +-- complete (Lua resolves upvalues at call time, but the slot must be declared first). +local encode + +-- Walk a dirty LazyObject and emit JSON, preferring cached children (which +-- may be materialized) over freshly resolved cursors. Non-cached children +-- emit through a fresh proxy and naturally fast-path their unmodified subtree. +local function encode_lazy_object_walking(t) + local parts = {} + local i = 0 + while true do + local rc = C.qjd_cursor_object_entry_at(t._cur, i, strp_box, size_box, child_box) + if rc == QJD_NOT_FOUND then break end + check(rc) + local k = ffi.string(strp_box[0], size_box[0]) + local v + local cached = rawget(t, k) + if cached ~= nil and not INTERNAL_KEYS[k] then + v = cached + else + v = decode_cursor(t, child_box) + end + parts[#parts + 1] = encode_string(k) .. ":" .. encode(v) + i = i + 1 + end + return "{" .. table.concat(parts, ",") .. "}" +end + +local function encode_lazy_array_walking(t) + local parts = {} + local rc = C.qjd_cursor_len(t._cur, "", 0, size_box) + check(rc) + local n = tonumber(size_box[0]) + for i = 0, n - 1 do + local irc = C.qjd_cursor_index(t._cur, i, child_box) + check(irc) + local cached = rawget(t, i + 1) + local v + if cached ~= nil then + v = cached + else + v = decode_cursor(t, child_box) + end + parts[#parts + 1] = encode(v) + end + return "[" .. table.concat(parts, ",") .. "]" +end + +local function encode_proxy(t) + if not is_dirty(t) then + -- Fast path: no mutations — slice the original buffer bytes. + return t._doc._hold:sub(t._bs + 1, t._be) + end + if getmetatable(t) == LazyObject then + return encode_lazy_object_walking(t) + end + return encode_lazy_array_walking(t) +end + +local function is_array(t) + local mt = getmetatable(t) + if mt == _M.empty_array_mt then return true end + local n = #t + local count = 0 + for k in pairs(t) do + count = count + 1 + if type(k) ~= "number" or k < 1 or k > n or k ~= math.floor(k) then + return false + end + end + return count == n and (n > 0 or mt == _M.empty_array_mt) +end + +local function encode_array(t) + local parts = {} + for i = 1, #t do + parts[i] = encode(t[i]) + end + return "[" .. table.concat(parts, ",") .. "]" +end + +local function encode_object(t) + local parts = {} + for k, v in pairs(t) do + if type(k) ~= "string" then + error("qd.encode: object key must be a string, got " .. type(k)) + end + parts[#parts+1] = encode_string(k) .. ":" .. encode(v) + end + return "{" .. table.concat(parts, ",") .. "}" +end + +encode = function(v) + if rawequal(v, _M.null) then + return "null" + end + local tv = type(v) + if tv == "string" then + return encode_string(v) + elseif tv == "number" then + return encode_number(v) + elseif tv == "boolean" then + return v and "true" or "false" + elseif tv == "table" then + local mt = getmetatable(v) + if mt == LazyObject or mt == LazyArray then + return encode_proxy(v) + end + if is_array(v) then + return encode_array(v) + end + return encode_object(v) + end + error("qd.encode: unsupported value type: " .. tv) +end + +_M.encode = encode + +-- Debug convenience: tostring(lazy_view) returns the original JSON bytes. +-- Not the canonical encoder — callers should still use qd.encode for output. +LazyObject.__tostring = encode_proxy +LazyArray.__tostring = encode_proxy + +-- Test-only exports for metatable identity checks. +_M._LazyObject = LazyObject +_M._LazyArray = LazyArray + +return _M diff --git a/src/cursor.rs b/src/cursor.rs index cc21556..b2cd890 100644 --- a/src/cursor.rs +++ b/src/cursor.rs @@ -77,7 +77,27 @@ fn walk_children(doc: &Document, cur: Cursor, seg: &PathSeg) -> Result = None; - while i < end { + // Empty-container guard: if the byte immediately after the opener (ignoring + // whitespace) is the closing bracket/brace, there are no children. + { + let opener_byte_pos = doc.indices[cur.idx_start as usize] as usize; + let closer_byte_pos = doc.indices[end as usize] as usize; + let mut p = opener_byte_pos + 1; + while p < closer_byte_pos && matches!(doc.buf[p], b' ' | b'\t' | b'\n' | b'\r') { + p += 1; + } + if p == closer_byte_pos { + let slot = cache.slot_mut(slot_n); + slot.child_starts = starts; + slot.child_ends = ends; + return Err(qjd_err::QJD_NOT_FOUND); + } + } + + // Use `<= end` so trailing scalar elements (which have no structural + // marker of their own — `indices[end]` is the parent closer) are visited. + // The `b'}' | b']' => break` arm below terminates the loop after them. + while i <= end { starts.push(i); let value_idx_start = if is_obj { i + 3 } else { i }; diff --git a/src/doc.rs b/src/doc.rs index 7c6ecda..707bb44 100644 --- a/src/doc.rs +++ b/src/doc.rs @@ -66,6 +66,53 @@ impl<'a> Document<'a> { Ok(p) } + /// Find the i-th key/value entry of an object cursor. Returns the + /// indices position of the key (so the caller can decode it via the + /// existing string-decode path) and the value's `Cursor`. + /// + /// Returns `QJD_TYPE_MISMATCH` for non-object cursors, `QJD_NOT_FOUND` + /// when `i` is past the end. + pub(crate) fn nth_object_entry(&self, cur: Cursor, n: usize) -> Result<(u32, Cursor), qjd_err> { + let pos = self.indices[cur.idx_start as usize] as usize; + let b = *self.buf.get(pos).ok_or(qjd_err::QJD_PARSE_ERROR)?; + if b != b'{' { + return Err(qjd_err::QJD_TYPE_MISMATCH); + } + // Mirror cursor_len's walk, but stop at the n-th child rather than counting. + let closer_pos = self.indices[cur.idx_end as usize] as usize; + let mut p = pos + 1; + while p < closer_pos && matches!(self.buf[p], b' '|b'\t'|b'\n'|b'\r') { + p += 1; + } + if p == closer_pos { + return Err(qjd_err::QJD_NOT_FOUND); + } + let mut i = cur.idx_start + 1; + let end = cur.idx_end; + let mut count: usize = 0; + loop { + // For objects, the key occupies indices[i..=i+1] (open & close quote); + // the value cursor starts at i+3 (after the colon at i+2). + let key_idx_start = i; + let value_idx_start = i + 3; + let (cursor_end, skip_end) = crate::cursor::find_value_span(self, value_idx_start)?; + if count == n { + return Ok((key_idx_start, Cursor { idx_start: value_idx_start, idx_end: cursor_end })); + } + count += 1; + let after_pos = self.indices[skip_end as usize] as usize; + if after_pos >= self.buf.len() { return Err(qjd_err::QJD_PARSE_ERROR); } + match self.buf[after_pos] { + b',' => { + i = skip_end + 1; + if i > end { return Err(qjd_err::QJD_NOT_FOUND); } + } + b'}' => return Err(qjd_err::QJD_NOT_FOUND), + _ => return Err(qjd_err::QJD_PARSE_ERROR), + } + } + } + /// Count direct children of the container at `cur`. /// Returns QJD_TYPE_MISMATCH for non-container cursors. pub(crate) fn cursor_len(&self, cur: Cursor) -> Result { diff --git a/src/ffi.rs b/src/ffi.rs index 09d4094..b5110b1 100644 --- a/src/ffi.rs +++ b/src/ffi.rs @@ -341,19 +341,26 @@ pub unsafe extern "C" fn qjd_get_bool( }) } -/// Return the byte slice for a scalar value (number, true, false, null). -/// Uses the cursor convention: cur.idx_start is the position in indices of -/// the structural char AFTER the scalar (a separator or closer). -unsafe fn scalar_bytes<'d>(d: &'d Document<'d>, cur: Cursor) -> Result<&'d [u8], qjd_err> { - // First byte: just after the previous structural char (skip whitespace). +/// Compute the byte range of a scalar value (number / true / false / null). +/// Uses the cursor convention: `cur.idx_start` is the position in indices +/// of the structural char AFTER the scalar (a separator or closer); the +/// scalar's bytes sit between `find_scalar_start(cur.idx_start)` and that +/// structural char, with trailing whitespace stripped. +unsafe fn scalar_byte_range(d: &Document<'_>, cur: Cursor) -> Result<(usize, usize), qjd_err> { let start = d.find_scalar_start(cur.idx_start)?; - // End byte: position of the structural char at cur.idx_start (exclusive). let end = d.indices[cur.idx_start as usize] as usize; if end < start { return Err(qjd_err::QJD_PARSE_ERROR); } - // Strip trailing whitespace. let mut e = end; while e > start && matches!(d.buf[e - 1], b' '|b'\t'|b'\n'|b'\r') { e -= 1; } - Ok(&d.buf[start..e]) + Ok((start, e)) +} + +/// Return the byte slice for a scalar value (number, true, false, null). +/// Uses the cursor convention: cur.idx_start is the position in indices of +/// the structural char AFTER the scalar (a separator or closer). +unsafe fn scalar_bytes<'d>(d: &'d Document<'d>, cur: Cursor) -> Result<&'d [u8], qjd_err> { + let (s, e) = scalar_byte_range(d, cur)?; + Ok(&d.buf[s..e]) } // ── qjd_cursor type and cursor-based FFI ──────────────────────────────────── @@ -668,6 +675,105 @@ pub unsafe extern "C" fn qjd_cursor_len( }) } +/// Write the original-buffer byte range `[byte_start, byte_end)` that the +/// cursor's value occupies. For containers, the range spans the opening +/// bracket through the closing bracket inclusive (so `byte_end` is one past +/// the close char). For scalars, leading and trailing whitespace and +/// surrounding separators are stripped (same convention as `scalar_bytes`). +/// +/// # Safety +/// +/// See the module-level [shared safety contract](self#shared-safety-contract). +/// `c` must point to a cursor produced by an earlier `qjd_*` call whose +/// document is still alive; `byte_start` and `byte_end` must be non-NULL +/// and writable. +#[no_mangle] +pub unsafe extern "C" fn qjd_cursor_bytes( + c: *const qjd_cursor, byte_start: *mut usize, byte_end: *mut usize, +) -> c_int { + ffi_catch!({ + if byte_start.is_null() || byte_end.is_null() { + return qjd_err::QJD_INVALID_ARG as c_int; + } + let (d, cur) = match cursor_to_internal(c) { + Ok(x) => x, Err(e) => return e as c_int, + }; + let pos = d.indices[cur.idx_start as usize] as usize; + let lead = match d.buf.get(pos) { + Some(b) => *b, + None => return qjd_err::QJD_PARSE_ERROR as c_int, + }; + match lead { + b'{' | b'[' | b'"' => { + // Container or string: span runs from opener to the matching + // closer, inclusive. + let end = d.indices[cur.idx_end as usize] as usize; + if end >= d.buf.len() { + return qjd_err::QJD_PARSE_ERROR as c_int; + } + *byte_start = pos; + *byte_end = end + 1; + qjd_err::QJD_OK as c_int + } + _ => { + // Scalar: delegate to scalar_byte_range. + let (s, e) = match scalar_byte_range(d, cur) { + Ok(x) => x, Err(e) => return e as c_int, + }; + *byte_start = s; + *byte_end = e; + qjd_err::QJD_OK as c_int + } + } + }) +} + +/// Write the i-th object entry's key (decoded into the doc's scratch +/// buffer) and value cursor into the out parameters. +/// +/// Returns `QJD_TYPE_MISMATCH` when the cursor is not an object, or +/// `QJD_NOT_FOUND` when `i` is past the end. +/// +/// # Safety +/// +/// See the module-level [shared safety contract](self#shared-safety-contract). +/// `c` must point to a live cursor; `key_ptr`, `key_len`, and `value_out` +/// must be non-NULL and writable. The `(*key_ptr, *key_len)` pair is +/// invalidated by the next `qjd_get_str` / `qjd_cursor_get_str` / +/// `qjd_cursor_object_entry_at` call on the same document (scratch reuse). +#[no_mangle] +pub unsafe extern "C" fn qjd_cursor_object_entry_at( + c: *const qjd_cursor, i: usize, + key_ptr: *mut *const u8, key_len: *mut usize, + value_out: *mut qjd_cursor, +) -> c_int { + ffi_catch!({ + if key_ptr.is_null() || key_len.is_null() || value_out.is_null() { + return qjd_err::QJD_INVALID_ARG as c_int; + } + let (d, cur) = match cursor_to_internal(c) { + Ok(x) => x, Err(e) => return e as c_int, + }; + let (key_idx_start, value_cur) = match d.nth_object_entry(cur, i) { + Ok(x) => x, Err(e) => return e as c_int, + }; + // Decode the key: it sits at indices[key_idx_start..=key_idx_start+1] + // — open quote at key_idx_start, close quote at key_idx_start+1. + let open_pos = d.indices[key_idx_start as usize] as usize; + let close_pos = d.indices[(key_idx_start + 1) as usize] as usize; + let mut scratch = d.scratch.borrow_mut(); + match string::decode_string(d.buf, open_pos + 1, close_pos, &mut scratch) { + Ok((p, n)) => { + *key_ptr = p; + *key_len = n; + *value_out = internal_to_cursor((*c).doc, value_cur); + qjd_err::QJD_OK as c_int + } + Err(e) => e as c_int, + } + }) +} + /// Test-only export that forces a Rust panic to verify the FFI panic barrier /// converts it to `QJD_OOM` instead of unwinding across the boundary. /// diff --git a/src/lib.rs b/src/lib.rs index 43a07ca..90215cd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,4 @@ //! lua-quick-decode: Rust JSON decoder for LuaJIT FFI consumers. -//! See docs/superpowers/specs/2026-05-15-rust-quick-json-decode-design.md pub mod error; pub(crate) mod scan; diff --git a/tests/ffi_cursor.rs b/tests/ffi_cursor.rs index dde276d..722bc42 100644 --- a/tests/ffi_cursor.rs +++ b/tests/ffi_cursor.rs @@ -79,3 +79,53 @@ fn cursor_field_with_dotted_key() { unsafe { qjd_free(d) }; } + +// Regression: walk_children must visit the trailing scalar (last element has no +// structural marker of its own — `indices[end]` is the parent closer). +// Before the fix, `while i < end` stopped one step early and index 2 returned +// QJD_NOT_FOUND for a 3-element all-scalar array. +#[test] +fn walk_children_trailing_scalar_integer() { + let d = parse(b"[10,20,30]"); + let mut c = std::mem::MaybeUninit::::uninit(); + let empty = b""; + unsafe { qjd_open(d, empty.as_ptr() as *const i8, 0, c.as_mut_ptr()) }; + let c = unsafe { c.assume_init() }; + + // Index 2 is the trailing element `30`. + let mut sub = std::mem::MaybeUninit::::uninit(); + let rc = unsafe { qjd_cursor_index(&c, 2, sub.as_mut_ptr()) }; + assert_eq!(rc, 0, "qjd_cursor_index([2]) must succeed"); + let sub = unsafe { sub.assume_init() }; + + let mut v: i64 = 0; + let rc = unsafe { qjd_cursor_get_i64(&sub, empty.as_ptr() as *const i8, 0, &mut v) }; + assert_eq!(rc, 0, "qjd_cursor_get_i64 on trailing element must succeed"); + assert_eq!(v, 30); + + unsafe { qjd_free(d) }; +} + +// Regression: trailing scalar with non-numeric type — ensures walk_children +// visits it and type information is correct (not silently skipped). +#[test] +fn walk_children_trailing_scalar_bool() { + let d = parse(b"[1,\"x\",true]"); + let mut c = std::mem::MaybeUninit::::uninit(); + let empty = b""; + unsafe { qjd_open(d, empty.as_ptr() as *const i8, 0, c.as_mut_ptr()) }; + let c = unsafe { c.assume_init() }; + + // Index 2 is the trailing element `true`. + let mut sub = std::mem::MaybeUninit::::uninit(); + let rc = unsafe { qjd_cursor_index(&c, 2, sub.as_mut_ptr()) }; + assert_eq!(rc, 0, "qjd_cursor_index([2]) must succeed"); + let sub = unsafe { sub.assume_init() }; + + let mut b: c_int = -1; + let rc = unsafe { qjd_cursor_get_bool(&sub, empty.as_ptr() as *const i8, 0, &mut b) }; + assert_eq!(rc, 0, "qjd_cursor_get_bool on trailing `true` must succeed"); + assert_eq!(b, 1); + + unsafe { qjd_free(d) }; +} diff --git a/tests/ffi_cursor_bytes.rs b/tests/ffi_cursor_bytes.rs new file mode 100644 index 0000000..c7d4821 --- /dev/null +++ b/tests/ffi_cursor_bytes.rs @@ -0,0 +1,90 @@ +use std::os::raw::c_int; +use std::ptr; + +use quickdecode::error::qjd_err; +use quickdecode::ffi::{ + qjd_cursor, qjd_cursor_bytes, qjd_cursor_field, qjd_doc, qjd_free, qjd_open, qjd_parse, +}; + +unsafe fn open_root(json: &[u8]) -> (*mut qjd_doc, qjd_cursor) { + let mut err: c_int = -1; + let doc = qjd_parse(json.as_ptr(), json.len(), &mut err); + assert!(!doc.is_null()); + let mut cur: qjd_cursor = std::mem::zeroed(); + let rc = qjd_open(doc, ptr::null(), 0, &mut cur); + assert_eq!(rc, 0); + (doc, cur) +} + +#[test] +fn bytes_of_root_object_covers_full_json() { + let json = br#"{"a":1,"b":[2,3]}"#; + unsafe { + let (doc, cur) = open_root(json); + let mut bs: usize = 0; + let mut be: usize = 0; + let rc = qjd_cursor_bytes(&cur, &mut bs, &mut be); + assert_eq!(rc, 0); + assert_eq!(&json[bs..be], json.as_ref()); + qjd_free(doc); + } +} + +#[test] +fn bytes_of_string_value_is_quoted_span() { + let json = br#"{"k":"hello"}"#; + unsafe { + let (doc, root) = open_root(json); + let mut child: qjd_cursor = std::mem::zeroed(); + let rc = qjd_cursor_field(&root, b"k".as_ptr() as *const i8, 1, &mut child); + assert_eq!(rc, 0); + let mut bs: usize = 0; + let mut be: usize = 0; + let rc = qjd_cursor_bytes(&child, &mut bs, &mut be); + assert_eq!(rc, 0); + assert_eq!(&json[bs..be], br#""hello""#); + qjd_free(doc); + } +} + +#[test] +fn bytes_of_number_value_strips_separators() { + let json = br#"{"k": 42 ,"x":1}"#; + unsafe { + let (doc, root) = open_root(json); + let mut child: qjd_cursor = std::mem::zeroed(); + let rc = qjd_cursor_field(&root, b"k".as_ptr() as *const i8, 1, &mut child); + assert_eq!(rc, 0); + let mut bs: usize = 0; + let mut be: usize = 0; + let rc = qjd_cursor_bytes(&child, &mut bs, &mut be); + assert_eq!(rc, 0); + assert_eq!(&json[bs..be], b"42"); + qjd_free(doc); + } +} + +#[test] +fn bytes_with_null_out_pointer_returns_invalid_arg() { + let json = br#"{"a":1}"#; + unsafe { + let (doc, root) = open_root(json); + let rc = qjd_cursor_bytes(&root, ptr::null_mut(), ptr::null_mut()); + assert_eq!(rc, qjd_err::QJD_INVALID_ARG as c_int); + qjd_free(doc); + } +} + +#[test] +fn bytes_of_root_array_covers_full_json() { + let json = br#"[1,"two",true]"#; + unsafe { + let (doc, cur) = open_root(json); + let mut bs: usize = 0; + let mut be: usize = 0; + let rc = qjd_cursor_bytes(&cur, &mut bs, &mut be); + assert_eq!(rc, 0); + assert_eq!(&json[bs..be], json.as_ref()); + qjd_free(doc); + } +} diff --git a/tests/ffi_object_iter.rs b/tests/ffi_object_iter.rs new file mode 100644 index 0000000..622abf8 --- /dev/null +++ b/tests/ffi_object_iter.rs @@ -0,0 +1,81 @@ +use std::os::raw::c_int; +use std::ptr; + +use quickdecode::ffi::{ + qjd_cursor, qjd_cursor_object_entry_at, qjd_doc, qjd_free, qjd_open, qjd_parse, +}; + +unsafe fn open_root(json: &[u8]) -> (*mut qjd_doc, qjd_cursor) { + let mut err: c_int = -1; + let doc = qjd_parse(json.as_ptr(), json.len(), &mut err); + assert!(!doc.is_null()); + let mut cur: qjd_cursor = std::mem::zeroed(); + qjd_open(doc, ptr::null(), 0, &mut cur); + (doc, cur) +} + +unsafe fn entry_at(root: &qjd_cursor, i: usize) -> (String, qjd_cursor) { + let mut kp: *const u8 = ptr::null(); + let mut kn: usize = 0; + let mut vc: qjd_cursor = std::mem::zeroed(); + let rc = qjd_cursor_object_entry_at(root, i, &mut kp, &mut kn, &mut vc); + assert_eq!(rc, 0, "entry_at({}) failed with rc={}", i, rc); + let key = std::slice::from_raw_parts(kp, kn); + (String::from_utf8(key.to_vec()).unwrap(), vc) +} + +#[test] +fn three_keys_in_order() { + let json = br#"{"a":1,"b":"x","c":[2,3]}"#; + unsafe { + let (doc, root) = open_root(json); + let (k0, _) = entry_at(&root, 0); + let (k1, _) = entry_at(&root, 1); + let (k2, _) = entry_at(&root, 2); + assert_eq!(k0, "a"); + assert_eq!(k1, "b"); + assert_eq!(k2, "c"); + qjd_free(doc); + } +} + +#[test] +fn key_with_escape_decodes() { + // The key `"a\nb"` (3 chars: a, newline, b) — verifies the FFI runs the + // string-decode scratch path rather than handing back raw escaped bytes. + let json = b"{\"a\\nb\":1}"; + unsafe { + let (doc, root) = open_root(json); + let (k0, _) = entry_at(&root, 0); + assert_eq!(k0, "a\nb"); + qjd_free(doc); + } +} + +#[test] +fn out_of_range_returns_not_found() { + let json = br#"{"a":1}"#; + unsafe { + let (doc, root) = open_root(json); + let mut kp: *const u8 = ptr::null(); + let mut kn: usize = 0; + let mut vc: qjd_cursor = std::mem::zeroed(); + let rc = qjd_cursor_object_entry_at(&root, 5, &mut kp, &mut kn, &mut vc); + assert_eq!(rc, 2); // QJD_NOT_FOUND + qjd_free(doc); + } +} + +#[test] +fn array_cursor_returns_type_mismatch() { + let json = br#"[1,2,3]"#; + unsafe { + let (doc, root) = open_root(json); + let mut kp: *const u8 = ptr::null(); + let mut kn: usize = 0; + let mut vc: qjd_cursor = std::mem::zeroed(); + let rc = qjd_cursor_object_entry_at(&root, 0, &mut kp, &mut kn, &mut vc); + assert_eq!(rc, 3); // QJD_TYPE_MISMATCH + qjd_free(doc); + } +} diff --git a/tests/lua/lazy_table_spec.lua b/tests/lua/lazy_table_spec.lua new file mode 100644 index 0000000..b0bf909 --- /dev/null +++ b/tests/lua/lazy_table_spec.lua @@ -0,0 +1,394 @@ +local qd = require("quickdecode") +local qt = qd -- keep tests reading naturally + +describe("LazyObject __index — scalars", function() + it("reads a string field", function() + local t = qt.decode('{"k":"hello"}') + assert.are.equal("hello", t.k) + end) + + it("reads a number field", function() + local t = qt.decode('{"n":42.5}') + assert.are.equal(42.5, t.n) + end) + + it("reads a boolean field", function() + local t = qt.decode('{"b":true,"c":false}') + assert.is_true(t.b) + assert.is_false(t.c) + end) + + it("returns nil for missing key", function() + local t = qt.decode('{"a":1}') + assert.is_nil(t.missing) + end) +end) + +describe("LazyObject __index — nested containers", function() + it("returns a LazyObject for a nested object", function() + local t = qt.decode('{"a":{"b":"x"}}') + local inner = t.a + assert.is_table(inner) + assert.are.equal("x", inner.b) + end) + + it("returns a LazyArray for a nested array", function() + local t = qt.decode('{"xs":[10,20]}') + local xs = t.xs + assert.is_table(xs) + -- LazyArray __index is added in a later task; just verify it's + -- a table-typed value at this stage. + end) +end) + +describe("LazyArray __index", function() + it("reads scalar elements by integer index (1-based)", function() + local t = qt.decode('[10,"x",true,null]') + assert.are.equal(10, t[1]) + assert.are.equal("x", t[2]) + assert.is_true(t[3]) + assert.are.equal(qt.null, t[4]) + end) + + it("returns nil for out-of-range index", function() + local t = qt.decode('[1,2,3]') + assert.is_nil(t[0]) + assert.is_nil(t[4]) + end) + + it("returns nil for non-integer key", function() + local t = qt.decode('[1,2,3]') + assert.is_nil(t.foo) + assert.is_nil(t[1.5]) + end) + + it("returns a nested LazyObject", function() + local t = qt.decode('[{"a":1},{"a":2}]') + assert.are.equal(1, t[1].a) + assert.are.equal(2, t[2].a) + end) +end) + +-- LuaJIT 5.1 only invokes __len on userdata; it ignores the metamethod on +-- tables unless built with LUAJIT_ENABLE_LUA52COMPAT (OpenResty's default). +-- Probe once so the `#t` cases only run where they can pass; qt.len(t) is +-- the supported path everywhere. +local LJ52_LEN = (#setmetatable({}, {__len = function() return 99 end}) == 99) + +describe("qt.len", function() + it("counts object keys", function() + local t = qt.decode('{"a":1,"b":2,"c":3}') + assert.are.equal(3, qt.len(t)) + end) + + it("counts array elements", function() + local t = qt.decode('[10,20,30,40]') + assert.are.equal(4, qt.len(t)) + end) + + it("returns 0 for empty containers", function() + assert.are.equal(0, qt.len(qt.decode('{}'))) + assert.are.equal(0, qt.len(qt.decode('[]'))) + end) + + it("falls back to # on a plain table", function() + assert.are.equal(3, qt.len({10, 20, 30})) + end) +end) + +describe("__len (LJ52 only)", function() + it("counts object keys via #t", function() + if not LJ52_LEN then return pending("LuaJIT built without LUAJIT_ENABLE_LUA52COMPAT") end + local t = qt.decode('{"a":1,"b":2,"c":3}') + assert.are.equal(3, #t) + end) + + it("counts array elements via #t", function() + if not LJ52_LEN then return pending("LuaJIT built without LUAJIT_ENABLE_LUA52COMPAT") end + local t = qt.decode('[10,20,30,40]') + assert.are.equal(4, #t) + end) + + it("returns 0 for empty containers via #t", function() + if not LJ52_LEN then return pending("LuaJIT built without LUAJIT_ENABLE_LUA52COMPAT") end + assert.are.equal(0, #qt.decode('{}')) + assert.are.equal(0, #qt.decode('[]')) + end) +end) + +describe("__pairs / qd.pairs over LazyObject", function() + it("iterates string keys in source order", function() + local t = qt.decode('{"a":1,"b":2,"c":3}') + local keys = {} + local values = {} + for k, v in qt.pairs(t) do + keys[#keys+1] = k + values[#values+1] = v + end + assert.are.same({"a","b","c"}, keys) + assert.are.same({1, 2, 3}, values) + end) + + it("returns nested containers as lazy proxies, not materialized", function() + local t = qt.decode('{"a":{"x":1}}') + for _, v in qt.pairs(t) do + assert.is_table(v) + assert.are.equal(1, v.x) + end + end) + + it("handles empty object", function() + local count = 0 + for _ in qt.pairs(qt.decode('{}')) do count = count + 1 end + assert.are.equal(0, count) + end) +end) + +describe("__ipairs / qd.ipairs over LazyArray", function() + it("iterates elements 1..n in order", function() + local t = qt.decode('[10,20,30]') + local got = {} + for i, v in qt.ipairs(t) do got[i] = v end + assert.are.same({10,20,30}, got) + end) + + it("yields lazy proxies for nested containers", function() + local t = qt.decode('[{"a":1},{"a":2}]') + local seen = {} + for _, v in qt.ipairs(t) do + assert.is_table(v) + seen[#seen+1] = v.a + end + assert.are.same({1, 2}, seen) + end) + + it("handles empty array", function() + local count = 0 + for _ in qt.ipairs(qt.decode('[]')) do count = count + 1 end + assert.are.equal(0, count) + end) +end) + +describe("__newindex — first-write materialization", function() + it("converts LazyObject into a plain table preserving existing keys", function() + local t = qt.decode('{"a":1,"b":2}') + t.c = 3 + assert.is_nil(getmetatable(t)) + assert.are.equal(1, t.a) + assert.are.equal(2, t.b) + assert.are.equal(3, t.c) + end) + + it("nested containers remain lazy after parent materialization", function() + local t = qt.decode('{"inner":{"x":1}}') + t.extra = "y" + assert.is_nil(getmetatable(t)) + local inner = t.inner + assert.are.equal(qt._LazyObject, getmetatable(inner)) + assert.are.equal(1, inner.x) + end) + + it("LazyArray materializes preserving empty_array_mt", function() + local t = qt.decode('[]') + t[1] = "x" + assert.are.equal(qt.empty_array_mt, getmetatable(t)) + assert.are.equal("x", t[1]) + end) + + it("simple write leaves other keys intact", function() + local t = qt.decode('{"a":1}') + t.b = 2 + assert.are.equal(1, t.a) + assert.are.equal(2, t.b) + end) +end) + +describe("qt.materialize", function() + it("converts a LazyObject and its nested containers into real tables", function() + local m = qt.materialize(qt.decode('{"a":1,"b":{"c":[10,20]}}')) + assert.is_nil(getmetatable(m)) + assert.are.equal(1, m.a) + assert.is_nil(getmetatable(m.b)) + assert.are.equal(10, m.b.c[1]) + assert.are.equal(20, m.b.c[2]) + end) + + it("tags empty arrays with empty_array_mt", function() + local m = qt.materialize(qt.decode('[]')) + assert.are.equal(qt.empty_array_mt, getmetatable(m)) + end) + + it("preserves cjson.null", function() + local m = qt.materialize(qt.decode('{"x":null}')) + assert.are.equal(qt.null, m.x) + end) + + it("passes through scalars and plain tables unchanged", function() + assert.are.equal(42, qt.materialize(42)) + assert.are.equal("hi", qt.materialize("hi")) + local raw = {1, 2, 3} + assert.are.equal(raw, qt.materialize(raw)) + end) +end) + +describe("qd.encode — lazy proxy substring fast path", function() + it("re-emits the original JSON for an unmodified LazyObject", function() + local src = '{"a":1,"b":[2,3],"c":"x"}' + local t = qt.decode(src) + assert.are.equal(src, qt.encode(t)) + end) + + it("re-emits the original JSON for an unmodified LazyArray", function() + local src = '[10,20,{"k":"v"}]' + local t = qt.decode(src) + assert.are.equal(src, qt.encode(t)) + end) + + it("trims leading/trailing whitespace at the boundary", function() + local src = ' {"a":1} ' + local t = qt.decode(src) + -- byte span is the value, not its outer whitespace. + assert.are.equal('{"a":1}', qt.encode(t)) + end) +end) + +describe("qd.encode — scalars", function() + it("encodes strings with JSON escapes", function() + assert.are.equal('"hello"', qt.encode("hello")) + assert.are.equal('"a\\nb"', qt.encode("a\nb")) + assert.are.equal('"a\\"b"', qt.encode('a"b')) + assert.are.equal('"a\\\\b"', qt.encode("a\\b")) + end) + + it("encodes booleans", function() + assert.are.equal("true", qt.encode(true)) + assert.are.equal("false", qt.encode(false)) + end) + + it("encodes numbers", function() + assert.are.equal("42", qt.encode(42)) + assert.are.equal("-3.14", qt.encode(-3.14)) + end) + + it("encodes qt.null as JSON null", function() + assert.are.equal("null", qt.encode(qt.null)) + end) + + it("errors on unsupported values", function() + assert.has_error(function() qt.encode(function() end) end) + end) +end) + +describe("qd.encode — real and mixed tables", function() + it("encodes a real Lua object", function() + local cjson = require("cjson") + local s = qt.encode({a = 1, b = "x"}) + assert.are.same({a = 1, b = "x"}, cjson.decode(s)) + end) + + it("encodes a real Lua array", function() + assert.are.equal("[1,2,3]", qt.encode({1,2,3})) + end) + + it("encodes a hand-built empty array with empty_array_mt", function() + local arr = setmetatable({}, qt.empty_array_mt) + assert.are.equal("[]", qt.encode(arr)) + end) + + it("encodes mixed lazy + materialized", function() + local t = qt.decode('{"keep":{"x":1},"changed":{"y":2}}') + t.changed = "now a string" + local out = qt.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.same({x=1}, parsed.keep) + assert.are.equal("now a string", parsed.changed) + end) +end) + +local cjson = require("cjson") + +-- Deep-equal aware of cjson.null and empty_array_mt (which qd aliases). +local function deep_equal(a, b) + if a == b then return true end + if type(a) ~= "table" or type(b) ~= "table" then return false end + for k, v in pairs(a) do + if not deep_equal(v, b[k]) then return false end + end + for k in pairs(b) do + if a[k] == nil then return false end + end + return true +end + +describe("cjson round-trip equivalence", function() + local fixtures = { + '{"a":1,"b":"x","c":null,"d":true,"e":false,"f":[1,2,3],"g":{"h":4.5}}', + '[1,"x",true,null,{},[]]', + '{"messages":[{"role":"user","content":"hi"},{"role":"assistant","content":"hello"}]}', + '{}', + '[]', + '{"escapes":"a\\nb\\tc\\\"d\\\\e"}', + } + for _, src in ipairs(fixtures) do + it("materialize matches cjson.decode for: " .. src:sub(1, 40), function() + local from_qd = qd.materialize(qd.decode(src)) + local from_cj = cjson.decode(src) + assert.is_true(deep_equal(from_qd, from_cj)) + end) + + it("encode round-trips for: " .. src:sub(1, 40), function() + local out = qd.encode(qd.decode(src)) + local back_qd = cjson.decode(out) + local back_cj = cjson.decode(src) + assert.is_true(deep_equal(back_qd, back_cj)) + end) + end +end) + +describe("sentinel handling", function() + it("JSON null reads as qd.null and encodes back", function() + local t = qd.decode('{"x":null}') + assert.are.equal(qd.null, t.x) + assert.are.equal('{"x":null}', qd.encode(t)) + end) + + it("empty array stays an array through materialize and encode", function() + local t = qd.decode('{"xs":[]}') + local m = qd.materialize(t) + assert.are.equal(qd.empty_array_mt, getmetatable(m.xs)) + assert.are.equal('{"xs":[]}', qd.encode(t)) + end) +end) + +describe("qd.encode — nested mutations propagate", function() + it("emits nested object mutation, not original bytes", function() + local cjson = require("cjson") + local t = qd.decode('{"a":{"b":{"c":1}},"d":2}') + t.a.b.c = 999 + local out = qd.encode(t) + local parsed = cjson.decode(out) + assert.are.equal(999, parsed.a.b.c) + assert.are.equal(2, parsed.d) + end) + + it("emits nested array mutation", function() + local cjson = require("cjson") + local t = qd.decode('{"xs":[10,20,30]}') + t.xs[2] = 999 + local out = qd.encode(t) + local parsed = cjson.decode(out) + assert.are.equal(10, parsed.xs[1]) + assert.are.equal(999, parsed.xs[2]) + assert.are.equal(30, parsed.xs[3]) + end) + + it("preserves cached proxy identity across parent materialization", function() + local t = qd.decode('{"a":{"x":1}}') + local inner = t.a + t.c = 3 + assert.are.equal(inner, t.a) + inner.x = 99 + assert.are.equal(99, t.a.x) + end) +end)