diff --git a/Makefile b/Makefile index 417a09b..53a9422 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,9 @@ LUA_ENV := LD_LIBRARY_PATH=$(LIB_DIR) LUA_CPATH='$(LUA_CPATH)' .PHONY: help build test lint bench clean help: ## Show this help - @awk 'BEGIN {FS = ":.*## "} /^[a-zA-Z_-]+:.*## / {printf " \033[36m%-10s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST) + @# FS uses [^#]* (not .*) so a description containing `##` isn't truncated. + @# Consequence: targets whose prerequisite list contains `#` won't render — none today. + @awk 'BEGIN {FS = ":[^#]*## "} /^[a-zA-Z_-]+:[^#]*## / {printf " \033[36m%-10s\033[0m — %s\n", $$1, $$2}' $(MAKEFILE_LIST) build: ## Build the release cdylib (target/release/libquickdecode.so) cargo build --release diff --git a/benches/lua_bench.lua b/benches/lua_bench.lua index e75579c..5293dbf 100644 --- a/benches/lua_bench.lua +++ b/benches/lua_bench.lua @@ -14,8 +14,25 @@ end -- Shape: a multimodal chat-completion request with one ~1.5K text question -- and N base64-encoded image parts (each 50-500 KB) until the payload reaches -- target_bytes. Mirrors the production case the bench is meant to reflect. +-- +-- Image sizes are drawn from a deterministic Park-Miller LCG (not math.random, +-- which delegates to libc rand() and varies across machines) so the same +-- target_bytes produces byte-identical output on any LuaJIT 2.1 host. +-- +-- Size accuracy: the normal-branch upper is `min(500K, remaining)` so the +-- loop cannot overshoot during steady state. When fewer than 50 KB remain +-- the final image falls through to `math.max(1024, remaining)` — undershoot +-- is at most a few hundred bytes; worst-case overshoot is ~1 KB (only when +-- `remaining < 1024`, which the seed=42 walk does not hit for our ladder). local function make_payload(target_bytes) - math.randomseed(42) + local rng_state = 42 + local function rng_range(lo, hi) + -- Park-Miller minimal-standard LCG: a=48271, m=2^31-1. Multiplication + -- fits in double precision (48271 * 2^31 < 2^53). + rng_state = (rng_state * 48271) % 2147483647 + return lo + (rng_state % (hi - lo + 1)) + end + local text = string.rep("Q", 1500) local text_part = '{"type":"text","text":"' .. text .. '"}' local parts = { text_part } @@ -23,9 +40,17 @@ local function make_payload(target_bytes) while current < target_bytes do local remaining = target_bytes - current - local upper = math.min(500 * 1024, math.max(50 * 1024, remaining + 50 * 1024)) - local lower = math.min(50 * 1024, upper) - local img_size = math.random(lower, upper) + local img_size + if remaining < 50 * 1024 then + -- Final image: shrink below the 50 KB floor so the label matches + -- the actual payload size. Bench iters all see the same payload + -- regardless, so the smaller tail blob doesn't change what's + -- being measured. + img_size = math.max(1024, remaining) + else + local upper = math.min(500 * 1024, remaining) + img_size = rng_range(50 * 1024, upper) + end local b64 = string.rep("A", img_size) local img_part = '{"type":"image_url","image_url":{"url":"data:image/jpeg;base64,' .. b64 .. '"}}' @@ -57,9 +82,9 @@ local scenarios = { {name = "200k", iters = 50, payload = make_payload(200 * 1024)}, {name = "500k", iters = 20, payload = make_payload(500 * 1024)}, {name = "1m", iters = 15, payload = make_payload(1024 * 1024)}, - {name = "2m", iters = 10, payload = make_payload(2 * 1024 * 1024)}, - {name = "5m", iters = 10, payload = make_payload(5 * 1024 * 1024)}, - {name = "10m", iters = 10, payload = make_payload(10 * 1024 * 1024)}, + {name = "2m", iters = 20, payload = make_payload(2 * 1024 * 1024)}, + {name = "5m", iters = 20, payload = make_payload(5 * 1024 * 1024)}, + {name = "10m", iters = 20, payload = make_payload(10 * 1024 * 1024)}, } for _, s in ipairs(scenarios) do diff --git a/src/scan/avx2.rs b/src/scan/avx2.rs index 0b9f09e..d98d8db 100644 --- a/src/scan/avx2.rs +++ b/src/scan/avx2.rs @@ -63,17 +63,21 @@ unsafe fn scan_avx2_impl(buf: &[u8], out: &mut Vec) -> Result<(), usize> { // skip it (treat as an escaped data byte, not a structural). Outside // a string backslashes are plain characters and bs_carry has no effect. if i < buf.len() { + // Invariant: scalar_start ∈ {i, i+1} and i < buf.len(), so + // scalar_start <= buf.len(). The boundary case scalar_start == + // buf.len() only fires when i == buf.len()-1 AND in_string != 0 + // AND bs_carry != 0; scan_emit_resume handles it by entering with + // an empty loop body and returning Err(buf.len()) from its + // post-loop `if in_str` check. let scalar_start = if in_string != 0 && bs_carry != 0 { i + 1 } else { i }; - if scalar_start <= buf.len() { - super::scalar::scan_emit_resume(buf, scalar_start, in_string != 0, out)?; - } else if in_string != 0 { - return Err(buf.len()); - } + super::scalar::scan_emit_resume(buf, scalar_start, in_string != 0, out)?; } else if in_string != 0 { + // 64-aligned input that ended mid-string: tail handler never runs, + // so flag the unterminated string here. return Err(buf.len()); } @@ -281,11 +285,11 @@ mod tests { parity(&buf); } - /// String contains escaped quotes — the fast path must NOT fire when - /// `real_quote != 0` even though we may still be inside a string at - /// the chunk boundary. + /// String contains escaped quotes — the parity output must still + /// match scalar. (We cannot directly observe whether the fast path + /// took the branch; parity asserts equivalence either way.) #[test] - fn escaped_quotes_do_not_trip_fastpath() { + fn escaped_quotes_remain_correct_with_fastpath() { if !host_supports_avx2() { return; } let mut buf = Vec::new(); buf.extend_from_slice(b"{\"k\":\""); diff --git a/src/scan/mod.rs b/src/scan/mod.rs index 85f9874..84598d7 100644 --- a/src/scan/mod.rs +++ b/src/scan/mod.rs @@ -36,9 +36,11 @@ pub(crate) fn scan(buf: &[u8], out: &mut Vec) -> Result<(), usize> { /// Walk a sequence of already-emitted structural offsets and verify that /// `{`/`}` and `[`/`]` are properly paired. String quotes toggle an -/// `in_string` flag and are otherwise skipped — well-formed emit paths -/// never push structural chars from inside strings, but the check is -/// defensive. +/// `in_string` flag and are otherwise skipped. This pass trusts the emit +/// phase: a forged quote in the index list would flip `in_string` and +/// mask subsequent bracket mismatches, so the function is correctness- +/// coupled with the scanner that produced `indices`, not defensive +/// against arbitrary inputs. /// /// On the first mismatch, returns `Err(offset_in_buf)`. On unmatched /// openers at end of input, returns `Err(buf.len())`. diff --git a/tests/scanner_crosscheck.rs b/tests/scanner_crosscheck.rs index 66ee338..209ac5a 100644 --- a/tests/scanner_crosscheck.rs +++ b/tests/scanner_crosscheck.rs @@ -17,13 +17,16 @@ proptest! { let mut a = Vec::new(); let mut b = Vec::new(); let ra = ScalarScanner::scan(input.as_bytes(), &mut a); - let _rb = Avx2Scanner::scan(input.as_bytes(), &mut b); - // Only compare positions when scalar says the input is valid. - // AVX2 does not validate bracket matching (only structural positions), - // so we cannot assert error agreement for structurally invalid inputs. - if ra.is_ok() { - prop_assert_eq!(a, b, "mismatch on {:?}", input); - } + let rb = Avx2Scanner::scan(input.as_bytes(), &mut b); + // Both paths run the same scan_emit_resume + validate_brackets + // pipeline, so Result equality is required: same Ok/Err verdict + // AND same error offset when Err. + prop_assert_eq!(&ra, &rb, "scan results differ for {:?}", input); + // Indices are produced entirely by scan_emit_resume (which walks + // through end-of-buffer before any Err) and are not modified by + // validate_brackets, so both `a` and `b` reflect the full emit + // regardless of whether the final result was Ok or Err. + prop_assert_eq!(&a, &b, "indices differ for {:?}", input); } }