diff --git a/Makefile b/Makefile
index 417a09b..53a9422 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,9 @@ LUA_ENV := LD_LIBRARY_PATH=$(LIB_DIR) LUA_CPATH='$(LUA_CPATH)'
 .PHONY: help build test lint bench clean
 
 help: ## Show this help
-	@awk 'BEGIN {FS = ":.*## "} /^[a-zA-Z_-]+:.*## / {printf "  \033[36m%-10s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
+	@# FS uses [^#]* (not .*) so a description containing `##` isn't truncated.
+	@# Consequence: targets whose prerequisite list contains `#` won't render — none today.
+	@awk 'BEGIN {FS = ":[^#]*## "} /^[a-zA-Z_-]+:[^#]*## / {printf "  \033[36m%-10s\033[0m — %s\n", $$1, $$2}' $(MAKEFILE_LIST)
 
 build: ## Build the release cdylib (target/release/libquickdecode.so)
 	cargo build --release
diff --git a/benches/lua_bench.lua b/benches/lua_bench.lua
index e75579c..5293dbf 100644
--- a/benches/lua_bench.lua
+++ b/benches/lua_bench.lua
@@ -14,8 +14,25 @@ end
 -- Shape: a multimodal chat-completion request with one ~1.5K text question
 -- and N base64-encoded image parts (each 50-500 KB) until the payload reaches
 -- target_bytes. Mirrors the production case the bench is meant to reflect.
+--
+-- Image sizes are drawn from a deterministic Park-Miller LCG (not math.random,
+-- which delegates to libc rand() and varies across machines) so the same
+-- target_bytes produces byte-identical output on any LuaJIT 2.1 host.
+--
+-- Size accuracy: the normal-branch upper is `min(500K, remaining)` so the
+-- loop cannot overshoot during steady state. When fewer than 50 KB remain
+-- the final image falls through to `math.max(1024, remaining)` — undershoot
+-- is at most a few hundred bytes; worst-case overshoot is ~1 KB (only when
+-- `remaining < 1024`, which the seed=42 walk does not hit for our ladder).
 local function make_payload(target_bytes)
-    math.randomseed(42)
+    local rng_state = 42
+    local function rng_range(lo, hi)
+        -- Park-Miller minimal-standard LCG: a=48271, m=2^31-1. Multiplication
+        -- fits in double precision (48271 * 2^31 < 2^53).
+        rng_state = (rng_state * 48271) % 2147483647
+        return lo + (rng_state % (hi - lo + 1))
+    end
+
     local text = string.rep("Q", 1500)
     local text_part = '{"type":"text","text":"' .. text .. '"}'
     local parts = { text_part }
@@ -23,9 +40,17 @@ local function make_payload(target_bytes)
 
     while current < target_bytes do
         local remaining = target_bytes - current
-        local upper = math.min(500 * 1024, math.max(50 * 1024, remaining + 50 * 1024))
-        local lower = math.min(50 * 1024, upper)
-        local img_size = math.random(lower, upper)
+        local img_size
+        if remaining < 50 * 1024 then
+            -- Final image: shrink below the 50 KB floor so the label matches
+            -- the actual payload size. Bench iters all see the same payload
+            -- regardless, so the smaller tail blob doesn't change what's
+            -- being measured.
+            img_size = math.max(1024, remaining)
+        else
+            local upper = math.min(500 * 1024, remaining)
+            img_size = rng_range(50 * 1024, upper)
+        end
         local b64 = string.rep("A", img_size)
         local img_part = '{"type":"image_url","image_url":{"url":"data:image/jpeg;base64,'
             .. b64 .. '"}}'
@@ -57,9 +82,9 @@ local scenarios = {
     {name = "200k",   iters = 50,   payload = make_payload(200 * 1024)},
     {name = "500k",   iters = 20,   payload = make_payload(500 * 1024)},
     {name = "1m",     iters = 15,   payload = make_payload(1024 * 1024)},
-    {name = "2m",     iters = 10,   payload = make_payload(2 * 1024 * 1024)},
-    {name = "5m",     iters = 10,   payload = make_payload(5 * 1024 * 1024)},
-    {name = "10m",    iters = 10,   payload = make_payload(10 * 1024 * 1024)},
+    {name = "2m",     iters = 20,   payload = make_payload(2 * 1024 * 1024)},
+    {name = "5m",     iters = 20,   payload = make_payload(5 * 1024 * 1024)},
+    {name = "10m",    iters = 20,   payload = make_payload(10 * 1024 * 1024)},
 }
 
 for _, s in ipairs(scenarios) do
diff --git a/src/scan/avx2.rs b/src/scan/avx2.rs
index 0b9f09e..d98d8db 100644
--- a/src/scan/avx2.rs
+++ b/src/scan/avx2.rs
@@ -63,17 +63,21 @@ unsafe fn scan_avx2_impl(buf: &[u8], out: &mut Vec<u32>) -> Result<(), usize> {
     // skip it (treat as an escaped data byte, not a structural). Outside
     // a string backslashes are plain characters and bs_carry has no effect.
     if i < buf.len() {
+        // Invariant: scalar_start ∈ {i, i+1} and i < buf.len(), so
+        // scalar_start <= buf.len(). The boundary case scalar_start ==
+        // buf.len() only fires when i == buf.len()-1 AND in_string != 0
+        // AND bs_carry != 0; scan_emit_resume handles it by entering with
+        // an empty loop body and returning Err(buf.len()) from its
+        // post-loop `if in_str` check.
         let scalar_start = if in_string != 0 && bs_carry != 0 {
             i + 1
         } else {
             i
         };
-        if scalar_start <= buf.len() {
-            super::scalar::scan_emit_resume(buf, scalar_start, in_string != 0, out)?;
-        } else if in_string != 0 {
-            return Err(buf.len());
-        }
+        super::scalar::scan_emit_resume(buf, scalar_start, in_string != 0, out)?;
     } else if in_string != 0 {
+        // 64-aligned input that ended mid-string: tail handler never runs,
+        // so flag the unterminated string here.
         return Err(buf.len());
     }
 
@@ -281,11 +285,11 @@ mod tests {
         parity(&buf);
     }
 
-    /// String contains escaped quotes — the fast path must NOT fire when
-    /// `real_quote != 0` even though we may still be inside a string at
-    /// the chunk boundary.
+    /// String contains escaped quotes — the parity output must still
+    /// match scalar. (We cannot directly observe whether the fast path
+    /// took the branch; parity asserts equivalence either way.)
     #[test]
-    fn escaped_quotes_do_not_trip_fastpath() {
+    fn escaped_quotes_remain_correct_with_fastpath() {
         if !host_supports_avx2() { return; }
         let mut buf = Vec::new();
         buf.extend_from_slice(b"{\"k\":\"");
diff --git a/src/scan/mod.rs b/src/scan/mod.rs
index 85f9874..84598d7 100644
--- a/src/scan/mod.rs
+++ b/src/scan/mod.rs
@@ -36,9 +36,11 @@ pub(crate) fn scan(buf: &[u8], out: &mut Vec<u32>) -> Result<(), usize> {
 
 /// Walk a sequence of already-emitted structural offsets and verify that
 /// `{`/`}` and `[`/`]` are properly paired. String quotes toggle an
-/// `in_string` flag and are otherwise skipped — well-formed emit paths
-/// never push structural chars from inside strings, but the check is
-/// defensive.
+/// `in_string` flag and are otherwise skipped. This pass trusts the emit
+/// phase: a forged quote in the index list would flip `in_string` and
+/// mask subsequent bracket mismatches, so the function is correctness-
+/// coupled with the scanner that produced `indices`, not defensive
+/// against arbitrary inputs.
 ///
 /// On the first mismatch, returns `Err(offset_in_buf)`. On unmatched
 /// openers at end of input, returns `Err(buf.len())`.
diff --git a/tests/scanner_crosscheck.rs b/tests/scanner_crosscheck.rs
index 66ee338..209ac5a 100644
--- a/tests/scanner_crosscheck.rs
+++ b/tests/scanner_crosscheck.rs
@@ -17,13 +17,16 @@ proptest! {
         let mut a = Vec::new();
         let mut b = Vec::new();
         let ra = ScalarScanner::scan(input.as_bytes(), &mut a);
-        let _rb = Avx2Scanner::scan(input.as_bytes(), &mut b);
-        // Only compare positions when scalar says the input is valid.
-        // AVX2 does not validate bracket matching (only structural positions),
-        // so we cannot assert error agreement for structurally invalid inputs.
-        if ra.is_ok() {
-            prop_assert_eq!(a, b, "mismatch on {:?}", input);
-        }
+        let rb = Avx2Scanner::scan(input.as_bytes(), &mut b);
+        // Both paths run the same scan_emit_resume + validate_brackets
+        // pipeline, so Result equality is required: same Ok/Err verdict
+        // AND same error offset when Err.
+        prop_assert_eq!(&ra, &rb, "scan results differ for {:?}", input);
+        // Indices are produced entirely by scan_emit_resume (which walks
+        // through end-of-buffer before any Err) and are not modified by
+        // validate_brackets, so both `a` and `b` reflect the full emit
+        // regardless of whether the final result was Ok or Err.
+        prop_assert_eq!(&a, &b, "indices differ for {:?}", input);
     }
 }