From 8756c794c8b0beb2fce04dc697cd256e0a4e7df1 Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Fri, 15 May 2026 16:28:27 +0000 Subject: [PATCH 1/2] chore: address PR #3 review hygiene items MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Five Important findings + four small Minor follow-ups from the PR #3 local code review: Important - tests/scanner_crosscheck.rs — drop the stale "AVX2 does not validate brackets" comment and tighten the proptest to require full Result equality (Ok/Err verdict + error offset) and indices equality on every case, not just on Ok. After the tail-bypass fix scalar and AVX2 run the same scan_emit_resume + validate_brackets pipeline, so this is now enforceable. Still passes the 2000-case proptest. - benches/lua_bench.lua — switch image-size RNG from math.random (which delegates to libc rand() and varies across machines) to a deterministic Park-Miller LCG. Same target_bytes now produces byte-identical output on any LuaJIT 2.1 host. - benches/lua_bench.lua — tighten the loop so the actual payload size matches its label. Cap the per-iteration `upper` at `remaining` and allow the last image to shrink below the 50 KB floor when fewer bytes remain. Observed: every scenario now lands within ~0.1% of its label (100k -> 102351 bytes, 1m -> 1048527, 10m -> 10485711) vs up to +49% before. - src/scan/avx2.rs — remove the dead `else if in_string != 0` branch inside the tail handler. `i < buf.len()` makes the `scalar_start <= buf.len()` check trivially true, and scan_emit_resume already returns Err(buf.len()) when start == buf.len() and in_string is set. Replace the unreachable branch with a comment that documents the invariant. - src/scan/mod.rs — drop the inaccurate "the check is defensive" wording from validate_brackets. The function is correctness-coupled with the scanner that produced its index list; a forged quote would flip in_string and mask later mismatches. Minor - Makefile — match the spec's "target — description" help format (em-dash separator) and tighten the awk FS pattern from `:.*## ` to `:[^#]*## ` so descriptions containing `##` aren't truncated by the greedy `.*`. - benches/lua_bench.lua — bump 2m / 5m / 10m iters from 10 to 20 so bigger-payload measurements ride out one-shot allocator / page-fault noise. - src/scan/avx2.rs — rename `escaped_quotes_do_not_trip_fastpath` to `escaped_quotes_remain_correct_with_fastpath`. The test asserts parity with scalar, not that the branch was taken (we have no counter to observe that), so the name should reflect what's actually checked. cargo test --release: 70+3+10+1+5+3+12+1+1 = 106 unit/integration tests plus the 2000-case proptest, all pass. cargo test --release --no-default-features (scalar-only build): 60+3+10+1+5+3+12+1+1 = 96 tests, all pass. --- Makefile | 2 +- benches/lua_bench.lua | 35 ++++++++++++++++++++++++++++------- src/scan/avx2.rs | 19 ++++++++++--------- src/scan/mod.rs | 8 +++++--- tests/scanner_crosscheck.rs | 15 ++++++++------- 5 files changed, 52 insertions(+), 27 deletions(-) diff --git a/Makefile b/Makefile index 417a09b..83283df 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ LUA_ENV := LD_LIBRARY_PATH=$(LIB_DIR) LUA_CPATH='$(LUA_CPATH)' .PHONY: help build test lint bench clean help: ## Show this help - @awk 'BEGIN {FS = ":.*## "} /^[a-zA-Z_-]+:.*## / {printf " \033[36m%-10s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST) + @awk 'BEGIN {FS = ":[^#]*## "} /^[a-zA-Z_-]+:[^#]*## / {printf " \033[36m%-10s\033[0m — %s\n", $$1, $$2}' $(MAKEFILE_LIST) build: ## Build the release cdylib (target/release/libquickdecode.so) cargo build --release diff --git a/benches/lua_bench.lua b/benches/lua_bench.lua index e75579c..177da5f 100644 --- a/benches/lua_bench.lua +++ b/benches/lua_bench.lua @@ -14,8 +14,21 @@ end -- Shape: a multimodal chat-completion request with one ~1.5K text question -- and N base64-encoded image parts (each 50-500 KB) until the payload reaches -- target_bytes. Mirrors the production case the bench is meant to reflect. +-- +-- Image sizes are drawn from a deterministic Park-Miller LCG (not math.random, +-- which delegates to libc rand() and varies across machines) so the same +-- target_bytes produces byte-identical output on any LuaJIT 2.1 host. The +-- upper bound is capped at `remaining + slack` so the final overshoot vs +-- target stays inside ~10 KB. local function make_payload(target_bytes) - math.randomseed(42) + local rng_state = 42 + local function rng_range(lo, hi) + -- Park-Miller minimal-standard LCG: a=48271, m=2^31-1. Multiplication + -- fits in double precision (48271 * 2^31 < 2^53). + rng_state = (rng_state * 48271) % 2147483647 + return lo + (rng_state % (hi - lo + 1)) + end + local text = string.rep("Q", 1500) local text_part = '{"type":"text","text":"' .. text .. '"}' local parts = { text_part } @@ -23,9 +36,17 @@ local function make_payload(target_bytes) while current < target_bytes do local remaining = target_bytes - current - local upper = math.min(500 * 1024, math.max(50 * 1024, remaining + 50 * 1024)) - local lower = math.min(50 * 1024, upper) - local img_size = math.random(lower, upper) + local img_size + if remaining < 50 * 1024 then + -- Final image: shrink below the 50 KB floor so the label matches + -- the actual payload size. Bench iters all see the same payload + -- regardless, so the smaller tail blob doesn't change what's + -- being measured. + img_size = math.max(1024, remaining) + else + local upper = math.min(500 * 1024, remaining) + img_size = rng_range(50 * 1024, upper) + end local b64 = string.rep("A", img_size) local img_part = '{"type":"image_url","image_url":{"url":"data:image/jpeg;base64,' .. b64 .. '"}}' @@ -57,9 +78,9 @@ local scenarios = { {name = "200k", iters = 50, payload = make_payload(200 * 1024)}, {name = "500k", iters = 20, payload = make_payload(500 * 1024)}, {name = "1m", iters = 15, payload = make_payload(1024 * 1024)}, - {name = "2m", iters = 10, payload = make_payload(2 * 1024 * 1024)}, - {name = "5m", iters = 10, payload = make_payload(5 * 1024 * 1024)}, - {name = "10m", iters = 10, payload = make_payload(10 * 1024 * 1024)}, + {name = "2m", iters = 20, payload = make_payload(2 * 1024 * 1024)}, + {name = "5m", iters = 20, payload = make_payload(5 * 1024 * 1024)}, + {name = "10m", iters = 20, payload = make_payload(10 * 1024 * 1024)}, } for _, s in ipairs(scenarios) do diff --git a/src/scan/avx2.rs b/src/scan/avx2.rs index 0b9f09e..064cab8 100644 --- a/src/scan/avx2.rs +++ b/src/scan/avx2.rs @@ -63,17 +63,18 @@ unsafe fn scan_avx2_impl(buf: &[u8], out: &mut Vec) -> Result<(), usize> { // skip it (treat as an escaped data byte, not a structural). Outside // a string backslashes are plain characters and bs_carry has no effect. if i < buf.len() { + // i < buf.len() and the only adjustment below is +1, so + // scalar_start <= buf.len() always; scan_emit_resume itself returns + // Err(buf.len()) when start == buf.len() and in_string is true. let scalar_start = if in_string != 0 && bs_carry != 0 { i + 1 } else { i }; - if scalar_start <= buf.len() { - super::scalar::scan_emit_resume(buf, scalar_start, in_string != 0, out)?; - } else if in_string != 0 { - return Err(buf.len()); - } + super::scalar::scan_emit_resume(buf, scalar_start, in_string != 0, out)?; } else if in_string != 0 { + // 64-aligned input that ended mid-string: tail handler never runs, + // so flag the unterminated string here. return Err(buf.len()); } @@ -281,11 +282,11 @@ mod tests { parity(&buf); } - /// String contains escaped quotes — the fast path must NOT fire when - /// `real_quote != 0` even though we may still be inside a string at - /// the chunk boundary. + /// String contains escaped quotes — the parity output must still + /// match scalar. (We cannot directly observe whether the fast path + /// took the branch; parity asserts equivalence either way.) #[test] - fn escaped_quotes_do_not_trip_fastpath() { + fn escaped_quotes_remain_correct_with_fastpath() { if !host_supports_avx2() { return; } let mut buf = Vec::new(); buf.extend_from_slice(b"{\"k\":\""); diff --git a/src/scan/mod.rs b/src/scan/mod.rs index 85f9874..84598d7 100644 --- a/src/scan/mod.rs +++ b/src/scan/mod.rs @@ -36,9 +36,11 @@ pub(crate) fn scan(buf: &[u8], out: &mut Vec) -> Result<(), usize> { /// Walk a sequence of already-emitted structural offsets and verify that /// `{`/`}` and `[`/`]` are properly paired. String quotes toggle an -/// `in_string` flag and are otherwise skipped — well-formed emit paths -/// never push structural chars from inside strings, but the check is -/// defensive. +/// `in_string` flag and are otherwise skipped. This pass trusts the emit +/// phase: a forged quote in the index list would flip `in_string` and +/// mask subsequent bracket mismatches, so the function is correctness- +/// coupled with the scanner that produced `indices`, not defensive +/// against arbitrary inputs. /// /// On the first mismatch, returns `Err(offset_in_buf)`. On unmatched /// openers at end of input, returns `Err(buf.len())`. diff --git a/tests/scanner_crosscheck.rs b/tests/scanner_crosscheck.rs index 66ee338..9fe4ed2 100644 --- a/tests/scanner_crosscheck.rs +++ b/tests/scanner_crosscheck.rs @@ -17,13 +17,14 @@ proptest! { let mut a = Vec::new(); let mut b = Vec::new(); let ra = ScalarScanner::scan(input.as_bytes(), &mut a); - let _rb = Avx2Scanner::scan(input.as_bytes(), &mut b); - // Only compare positions when scalar says the input is valid. - // AVX2 does not validate bracket matching (only structural positions), - // so we cannot assert error agreement for structurally invalid inputs. - if ra.is_ok() { - prop_assert_eq!(a, b, "mismatch on {:?}", input); - } + let rb = Avx2Scanner::scan(input.as_bytes(), &mut b); + // Both paths run the same scan_emit_resume + validate_brackets + // pipeline, so Result equality is required: same Ok/Err verdict + // AND same error offset when Err. + prop_assert_eq!(&ra, &rb, "scan results differ for {:?}", input); + // Emit order is also expected to be identical; both phases push + // through end-of-buffer before any potential Err is returned. + prop_assert_eq!(&a, &b, "indices differ for {:?}", input); } } From 4021449a427a8a7f4bfd6a45e5eb3b604341072f Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Fri, 15 May 2026 16:35:46 +0000 Subject: [PATCH 2/2] docs: align review-followup comments with actual behavior MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address the Important #1 + three Minor findings from the local review of PR #4 (the docs drifted from the code introduced in the same PR): - benches/lua_bench.lua: rewrite the size-accuracy comment. The prior draft referenced a `remaining + slack` upper-bound expression that the final code does not use; replace with the two-branch behaviour actually implemented (normal `min(500K, remaining)`; final image falls through to `max(1024, remaining)`) and the real worst-case overshoot (~1 KB, not "~10 KB"). - Makefile: explain the `:[^#]*## ` FS choice and its tradeoff (targets with `#` in the prerequisite list won't render — none today). - tests/scanner_crosscheck.rs: clarify that the indices assertion holds for both Ok and Err cases because scan_emit_resume always completes emission before any potential Err, and validate_brackets does not modify the index list. - src/scan/avx2.rs: spell out the scalar_start invariant including the exact boundary case (scalar_start == buf.len() when i == buf.len()-1 && in_string != 0 && bs_carry != 0) that scan_emit_resume's post-loop in_str check covers. No code changes. All tests still pass. --- Makefile | 2 ++ benches/lua_bench.lua | 10 +++++++--- src/scan/avx2.rs | 9 ++++++--- tests/scanner_crosscheck.rs | 6 ++++-- 4 files changed, 19 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 83283df..53a9422 100644 --- a/Makefile +++ b/Makefile @@ -8,6 +8,8 @@ LUA_ENV := LD_LIBRARY_PATH=$(LIB_DIR) LUA_CPATH='$(LUA_CPATH)' .PHONY: help build test lint bench clean help: ## Show this help + @# FS uses [^#]* (not .*) so a description containing `##` isn't truncated. + @# Consequence: targets whose prerequisite list contains `#` won't render — none today. @awk 'BEGIN {FS = ":[^#]*## "} /^[a-zA-Z_-]+:[^#]*## / {printf " \033[36m%-10s\033[0m — %s\n", $$1, $$2}' $(MAKEFILE_LIST) build: ## Build the release cdylib (target/release/libquickdecode.so) diff --git a/benches/lua_bench.lua b/benches/lua_bench.lua index 177da5f..5293dbf 100644 --- a/benches/lua_bench.lua +++ b/benches/lua_bench.lua @@ -17,9 +17,13 @@ end -- -- Image sizes are drawn from a deterministic Park-Miller LCG (not math.random, -- which delegates to libc rand() and varies across machines) so the same --- target_bytes produces byte-identical output on any LuaJIT 2.1 host. The --- upper bound is capped at `remaining + slack` so the final overshoot vs --- target stays inside ~10 KB. +-- target_bytes produces byte-identical output on any LuaJIT 2.1 host. +-- +-- Size accuracy: the normal-branch upper is `min(500K, remaining)` so the +-- loop cannot overshoot during steady state. When fewer than 50 KB remain +-- the final image falls through to `math.max(1024, remaining)` — undershoot +-- is at most a few hundred bytes; worst-case overshoot is ~1 KB (only when +-- `remaining < 1024`, which the seed=42 walk does not hit for our ladder). local function make_payload(target_bytes) local rng_state = 42 local function rng_range(lo, hi) diff --git a/src/scan/avx2.rs b/src/scan/avx2.rs index 064cab8..d98d8db 100644 --- a/src/scan/avx2.rs +++ b/src/scan/avx2.rs @@ -63,9 +63,12 @@ unsafe fn scan_avx2_impl(buf: &[u8], out: &mut Vec) -> Result<(), usize> { // skip it (treat as an escaped data byte, not a structural). Outside // a string backslashes are plain characters and bs_carry has no effect. if i < buf.len() { - // i < buf.len() and the only adjustment below is +1, so - // scalar_start <= buf.len() always; scan_emit_resume itself returns - // Err(buf.len()) when start == buf.len() and in_string is true. + // Invariant: scalar_start ∈ {i, i+1} and i < buf.len(), so + // scalar_start <= buf.len(). The boundary case scalar_start == + // buf.len() only fires when i == buf.len()-1 AND in_string != 0 + // AND bs_carry != 0; scan_emit_resume handles it by entering with + // an empty loop body and returning Err(buf.len()) from its + // post-loop `if in_str` check. let scalar_start = if in_string != 0 && bs_carry != 0 { i + 1 } else { diff --git a/tests/scanner_crosscheck.rs b/tests/scanner_crosscheck.rs index 9fe4ed2..209ac5a 100644 --- a/tests/scanner_crosscheck.rs +++ b/tests/scanner_crosscheck.rs @@ -22,8 +22,10 @@ proptest! { // pipeline, so Result equality is required: same Ok/Err verdict // AND same error offset when Err. prop_assert_eq!(&ra, &rb, "scan results differ for {:?}", input); - // Emit order is also expected to be identical; both phases push - // through end-of-buffer before any potential Err is returned. + // Indices are produced entirely by scan_emit_resume (which walks + // through end-of-buffer before any Err) and are not modified by + // validate_brackets, so both `a` and `b` reflect the full emit + // regardless of whether the final result was Ok or Err. prop_assert_eq!(&a, &b, "indices differ for {:?}", input); } }