Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ LUA_ENV := LD_LIBRARY_PATH=$(LIB_DIR) LUA_CPATH='$(LUA_CPATH)'
.PHONY: help build test lint bench clean

help: ## Show this help
@awk 'BEGIN {FS = ":.*## "} /^[a-zA-Z_-]+:.*## / {printf " \033[36m%-10s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
@# FS uses [^#]* (not .*) so a description containing `##` isn't truncated.
@# Consequence: targets whose prerequisite list contains `#` won't render — none today.
@awk 'BEGIN {FS = ":[^#]*## "} /^[a-zA-Z_-]+:[^#]*## / {printf " \033[36m%-10s\033[0m — %s\n", $$1, $$2}' $(MAKEFILE_LIST)

build: ## Build the release cdylib (target/release/libquickdecode.so)
cargo build --release
Expand Down
39 changes: 32 additions & 7 deletions benches/lua_bench.lua
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,43 @@ end
-- Shape: a multimodal chat-completion request with one ~1.5K text question
-- and N base64-encoded image parts (each 50-500 KB) until the payload reaches
-- target_bytes. Mirrors the production case the bench is meant to reflect.
--
-- Image sizes are drawn from a deterministic Park-Miller LCG (not math.random,
-- which delegates to libc rand() and varies across machines) so the same
-- target_bytes produces byte-identical output on any LuaJIT 2.1 host.
--
-- Size accuracy: the normal-branch upper is `min(500K, remaining)` so the
-- loop cannot overshoot during steady state. When fewer than 50 KB remain
-- the final image falls through to `math.max(1024, remaining)` — undershoot
-- is at most a few hundred bytes; worst-case overshoot is ~1 KB (only when
-- `remaining < 1024`, which the seed=42 walk does not hit for our ladder).
local function make_payload(target_bytes)
math.randomseed(42)
local rng_state = 42
local function rng_range(lo, hi)
-- Park-Miller minimal-standard LCG: a=48271, m=2^31-1. Multiplication
-- fits in double precision (48271 * 2^31 < 2^53).
rng_state = (rng_state * 48271) % 2147483647
return lo + (rng_state % (hi - lo + 1))
end

local text = string.rep("Q", 1500)
local text_part = '{"type":"text","text":"' .. text .. '"}'
local parts = { text_part }
local current = 200 + #text_part -- approx outer envelope overhead

while current < target_bytes do
local remaining = target_bytes - current
local upper = math.min(500 * 1024, math.max(50 * 1024, remaining + 50 * 1024))
local lower = math.min(50 * 1024, upper)
local img_size = math.random(lower, upper)
local img_size
if remaining < 50 * 1024 then
-- Final image: shrink below the 50 KB floor so the label matches
-- the actual payload size. Bench iters all see the same payload
-- regardless, so the smaller tail blob doesn't change what's
-- being measured.
img_size = math.max(1024, remaining)
else
local upper = math.min(500 * 1024, remaining)
img_size = rng_range(50 * 1024, upper)
end
local b64 = string.rep("A", img_size)
local img_part = '{"type":"image_url","image_url":{"url":"data:image/jpeg;base64,'
.. b64 .. '"}}'
Expand Down Expand Up @@ -57,9 +82,9 @@ local scenarios = {
{name = "200k", iters = 50, payload = make_payload(200 * 1024)},
{name = "500k", iters = 20, payload = make_payload(500 * 1024)},
{name = "1m", iters = 15, payload = make_payload(1024 * 1024)},
{name = "2m", iters = 10, payload = make_payload(2 * 1024 * 1024)},
{name = "5m", iters = 10, payload = make_payload(5 * 1024 * 1024)},
{name = "10m", iters = 10, payload = make_payload(10 * 1024 * 1024)},
{name = "2m", iters = 20, payload = make_payload(2 * 1024 * 1024)},
{name = "5m", iters = 20, payload = make_payload(5 * 1024 * 1024)},
{name = "10m", iters = 20, payload = make_payload(10 * 1024 * 1024)},
}

for _, s in ipairs(scenarios) do
Expand Down
22 changes: 13 additions & 9 deletions src/scan/avx2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,17 +63,21 @@ unsafe fn scan_avx2_impl(buf: &[u8], out: &mut Vec<u32>) -> Result<(), usize> {
// skip it (treat as an escaped data byte, not a structural). Outside
// a string backslashes are plain characters and bs_carry has no effect.
if i < buf.len() {
// Invariant: scalar_start ∈ {i, i+1} and i < buf.len(), so
// scalar_start <= buf.len(). The boundary case scalar_start ==
// buf.len() only fires when i == buf.len()-1 AND in_string != 0
// AND bs_carry != 0; scan_emit_resume handles it by entering with
// an empty loop body and returning Err(buf.len()) from its
// post-loop `if in_str` check.
let scalar_start = if in_string != 0 && bs_carry != 0 {
i + 1
} else {
i
};
if scalar_start <= buf.len() {
super::scalar::scan_emit_resume(buf, scalar_start, in_string != 0, out)?;
} else if in_string != 0 {
return Err(buf.len());
}
super::scalar::scan_emit_resume(buf, scalar_start, in_string != 0, out)?;
} else if in_string != 0 {
// 64-aligned input that ended mid-string: tail handler never runs,
// so flag the unterminated string here.
return Err(buf.len());
}

Expand Down Expand Up @@ -281,11 +285,11 @@ mod tests {
parity(&buf);
}

/// String contains escaped quotes — the fast path must NOT fire when
/// `real_quote != 0` even though we may still be inside a string at
/// the chunk boundary.
/// String contains escaped quotes — the parity output must still
/// match scalar. (We cannot directly observe whether the fast path
/// took the branch; parity asserts equivalence either way.)
#[test]
fn escaped_quotes_do_not_trip_fastpath() {
fn escaped_quotes_remain_correct_with_fastpath() {
if !host_supports_avx2() { return; }
let mut buf = Vec::new();
buf.extend_from_slice(b"{\"k\":\"");
Expand Down
8 changes: 5 additions & 3 deletions src/scan/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@ pub(crate) fn scan(buf: &[u8], out: &mut Vec<u32>) -> Result<(), usize> {

/// Walk a sequence of already-emitted structural offsets and verify that
/// `{`/`}` and `[`/`]` are properly paired. String quotes toggle an
/// `in_string` flag and are otherwise skipped — well-formed emit paths
/// never push structural chars from inside strings, but the check is
/// defensive.
/// `in_string` flag and are otherwise skipped. This pass trusts the emit
/// phase: a forged quote in the index list would flip `in_string` and
/// mask subsequent bracket mismatches, so the function is correctness-
/// coupled with the scanner that produced `indices`, not defensive
/// against arbitrary inputs.
///
/// On the first mismatch, returns `Err(offset_in_buf)`. On unmatched
/// openers at end of input, returns `Err(buf.len())`.
Expand Down
17 changes: 10 additions & 7 deletions tests/scanner_crosscheck.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,16 @@ proptest! {
let mut a = Vec::new();
let mut b = Vec::new();
let ra = ScalarScanner::scan(input.as_bytes(), &mut a);
let _rb = Avx2Scanner::scan(input.as_bytes(), &mut b);
// Only compare positions when scalar says the input is valid.
// AVX2 does not validate bracket matching (only structural positions),
// so we cannot assert error agreement for structurally invalid inputs.
if ra.is_ok() {
prop_assert_eq!(a, b, "mismatch on {:?}", input);
}
let rb = Avx2Scanner::scan(input.as_bytes(), &mut b);
// Both paths run the same scan_emit_resume + validate_brackets
// pipeline, so Result equality is required: same Ok/Err verdict
// AND same error offset when Err.
prop_assert_eq!(&ra, &rb, "scan results differ for {:?}", input);
// Indices are produced entirely by scan_emit_resume (which walks
// through end-of-buffer before any Err) and are not modified by
// validate_brackets, so both `a` and `b` reflect the full emit
// regardless of whether the final result was Ok or Err.
prop_assert_eq!(&a, &b, "indices differ for {:?}", input);
}
}

Expand Down
Loading