diff --git a/src/scan/avx2.rs b/src/scan/avx2.rs index 2161f7d..977c162 100644 --- a/src/scan/avx2.rs +++ b/src/scan/avx2.rs @@ -21,6 +21,7 @@ unsafe fn scan_avx2_impl(buf: &[u8], out: &mut Vec) -> Result<(), usize> { let mut i: usize = 0; let mut bs_carry: u64 = 0; let mut in_string: u64 = 0; + let mut stack: Vec = Vec::with_capacity(32); while i + 64 <= buf.len() { let chunk_lo = _mm256_loadu_si256(buf.as_ptr().add(i) as *const __m256i); @@ -33,6 +34,8 @@ unsafe fn scan_avx2_impl(buf: &[u8], out: &mut Vec) -> Result<(), usize> { // ~10-op scalar `find_escape_mask_with_carry`. bs_carry must be // 0 leaving this chunk (no backslashes in chunk → no trailing // run); in_string stays 1 (no real quote → no polarity flip). + // The depth stack is correctly left untouched: no bracket chars + // can appear in a pure string-interior chunk. if in_string != 0 { let interesting = quote_or_backslash_mask(chunk_lo, chunk_hi); if interesting == 0 { @@ -54,19 +57,20 @@ unsafe fn scan_avx2_impl(buf: &[u8], out: &mut Vec) -> Result<(), usize> { // Exclude structural chars inside strings; re-add real quotes. let final_mask = (struct_mask & !inside) | real_quote; - super::emit_bits(final_mask, i as u32, out); + super::emit_bits_validate(buf, final_mask, i as u32, &mut stack, out)?; i += 64; } // Tail (<64 bytes): continue emit-only via scalar, carrying the // in_string / bs_carry state from the last AVX2 chunk. Bracket pairing - // is checked once at the end on the merged indices. + // for the tail-emitted indices is folded in after via validate_tail_indices. // // If bs_carry == 1 the byte at position `i` is escape-targeted by the // trailing backslash run of the prior chunk; inside a string we must // skip it (treat as an escaped data byte, not a structural). Outside // a string backslashes are plain characters and bs_carry has no effect. + let tail_start = out.len(); if i < buf.len() { // Invariant: scalar_start ∈ {i, i+1} and i < buf.len(), so // scalar_start <= buf.len(). The boundary case scalar_start == @@ -86,7 +90,11 @@ unsafe fn scan_avx2_impl(buf: &[u8], out: &mut Vec) -> Result<(), usize> { return Err(buf.len()); } - super::validate_brackets(buf, out) + super::validate_tail_indices(buf, &out[tail_start..], &mut stack)?; + if !stack.is_empty() { + return Err(buf.len()); + } + Ok(()) } #[inline(always)] diff --git a/src/scan/mod.rs b/src/scan/mod.rs index 84b9867..ebac54e 100644 --- a/src/scan/mod.rs +++ b/src/scan/mod.rs @@ -79,52 +79,59 @@ pub(crate) fn find_escape_mask_with_carry(bs: u64, prev_carry: &mut u64) -> u64 escaped } -/// Emit all set-bit positions in `mask` (relative to `base`) into `out`. +/// Emit all set-bit positions in `mask` (relative to `base`) into `out`, while +/// fusing bracket-pair validation inline. The SIMD scanners guarantee that any +/// emitted offset corresponds to a byte that is either a real (unescaped) quote +/// or a top-level structural char outside of strings — so `"`, `:`, `,` are +/// no-ops here and `{` `[` `}` `]` are validated against `stack`. +/// +/// Returns `Err(pos)` on the first bracket mismatch. On success, `stack` is +/// left in its final state for the caller (further tail emits and end-of-input +/// `stack.is_empty()` check). #[inline(always)] -pub(crate) fn emit_bits(mut mask: u64, base: u32, out: &mut Vec) { +pub(crate) fn emit_bits_validate( + buf: &[u8], + mut mask: u64, + base: u32, + stack: &mut Vec, + out: &mut Vec, +) -> Result<(), usize> { while mask != 0 { - let tz = mask.trailing_zeros(); - out.push(base + tz); + let tz = mask.trailing_zeros(); + let pos = base + tz; + out.push(pos); + match buf[pos as usize] { + c @ (b'{' | b'[') => stack.push(c), + b'}' => if stack.pop() != Some(b'{') { return Err(pos as usize); }, + b']' => if stack.pop() != Some(b'[') { return Err(pos as usize); }, + _ => {} // `"` `:` `,` — no validation + } mask &= mask - 1; } + Ok(()) } -/// Walk a sequence of already-emitted structural offsets and verify that -/// `{`/`}` and `[`/`]` are properly paired. String quotes toggle an -/// `in_string` flag and are otherwise skipped. This pass trusts the emit -/// phase: a forged quote in the index list would flip `in_string` and -/// mask subsequent bracket mismatches, so the function is correctness- -/// coupled with the scanner that produced `indices`, not defensive -/// against arbitrary inputs. +/// Walk already-emitted indices (from the scalar tail handler) and continue +/// bracket-pair validation using the SIMD-loop's stack. Same per-index logic +/// as `emit_bits_validate`; does not push to `out` (the tail handler already +/// did). Used after `scan_emit_resume` to fold the tail into the same pass. /// -/// On the first mismatch, returns `Err(offset_in_buf)`. On unmatched -/// openers at end of input, returns `Err(buf.len())`. -pub(crate) fn validate_brackets(buf: &[u8], indices: &[u32]) -> Result<(), usize> { - let mut stack: Vec = Vec::with_capacity(32); - let mut in_string = false; - +/// Like `emit_bits_validate`, this relies on the invariant that no in-string +/// bracket / colon / comma is ever emitted: `"`, `:`, `,` are no-ops. +#[inline] +pub(crate) fn validate_tail_indices( + buf: &[u8], + indices: &[u32], + stack: &mut Vec, +) -> Result<(), usize> { for &idx in indices { let pos = idx as usize; - let b = buf[pos]; - - if b == b'"' { - in_string = !in_string; - continue; + match buf[pos] { + c @ (b'{' | b'[') => stack.push(c), + b'}' => if stack.pop() != Some(b'{') { return Err(pos); }, + b']' => if stack.pop() != Some(b'[') { return Err(pos); }, + _ => {} // `"` `:` `,` — no validation } - if in_string { - continue; - } - - match b { - b'{' | b'[' => stack.push(b), - b'}' if stack.pop() != Some(b'{') => return Err(pos), - b']' if stack.pop() != Some(b'[') => return Err(pos), - _ => {} - } - } - - if !stack.is_empty() { - return Err(buf.len()); } Ok(()) } diff --git a/src/scan/neon.rs b/src/scan/neon.rs index 568f0f0..8b785f3 100644 --- a/src/scan/neon.rs +++ b/src/scan/neon.rs @@ -88,6 +88,7 @@ unsafe fn scan_neon_impl(buf: &[u8], out: &mut Vec) -> Result<(), usize> { let mut i = 0usize; let mut bs_carry: u64 = 0; let mut in_string: u64 = 0; + let mut stack: Vec = Vec::with_capacity(32); while i + 64 <= buf.len() { let c0 = vld1q_u8(buf.as_ptr().add(i)); @@ -100,6 +101,8 @@ unsafe fn scan_neon_impl(buf: &[u8], out: &mut Vec) -> Result<(), usize> { // In-string fast probe: skip the escape/prefix-XOR path entirely when // we are already inside a string and there are no quotes or backslashes. + // No bracket chars can appear in a pure string-interior chunk, so the + // depth stack is correctly left untouched. if in_string != 0 && (backslash | quote) == 0 { bs_carry = 0; i += 64; @@ -113,11 +116,12 @@ unsafe fn scan_neon_impl(buf: &[u8], out: &mut Vec) -> Result<(), usize> { let struct_mask = structural_mask64(c0, c1, c2, c3); let final_mask = (struct_mask & !inside) | real_quote; - super::emit_bits(final_mask, i as u32, out); + super::emit_bits_validate(buf, final_mask, i as u32, &mut stack, out)?; i += 64; } // Tail (<64 bytes): hand off to scalar emit, carrying in_string / bs_carry state. + let tail_start = out.len(); if i < buf.len() { let scalar_start = if in_string != 0 && bs_carry != 0 { i + 1 } else { i }; super::scalar::scan_emit_resume(buf, scalar_start, in_string != 0, out)?; @@ -125,7 +129,11 @@ unsafe fn scan_neon_impl(buf: &[u8], out: &mut Vec) -> Result<(), usize> { return Err(buf.len()); } - super::validate_brackets(buf, out) + super::validate_tail_indices(buf, &out[tail_start..], &mut stack)?; + if !stack.is_empty() { + return Err(buf.len()); + } + Ok(()) } #[cfg(test)] diff --git a/src/scan/scalar.rs b/src/scan/scalar.rs index 634a3f0..e29c4b3 100644 --- a/src/scan/scalar.rs +++ b/src/scan/scalar.rs @@ -9,7 +9,7 @@ impl Scanner for ScalarScanner { } /// Single-pass: emit structural offsets AND validate bracket pairing inline. -/// Replaces the two-pass `scan_emit_resume` + `validate_brackets` sequence. +/// Replaces the two-pass `scan_emit_resume` + bracket-walk sequence. pub(crate) fn scan_and_validate(buf: &[u8], out: &mut Vec) -> Result<(), usize> { out.reserve(buf.len() / 6); let mut i = 0usize; @@ -40,11 +40,12 @@ pub(crate) fn scan_and_validate(buf: &[u8], out: &mut Vec) -> Result<(), us /// Emit structural-character offsets for `buf[start..]`, continuing from a /// given in-string state. Does NOT validate bracket pairing; the caller is -/// responsible for running `validate_brackets` over the emitted offsets. +/// responsible for running `validate_tail_indices` over the emitted offsets +/// if validation is required. /// -/// Used by `ScalarScanner::scan` (with start=0, in_str_init=false) and as -/// the unaligned-tail handler by `Avx2Scanner::scan` (with the carried -/// in-string state from the last AVX2 chunk). +/// Used as the unaligned-tail handler by both `Avx2Scanner::scan` and +/// `NeonScanner::scan` (with the carried in-string state from the last +/// SIMD chunk). pub(crate) fn scan_emit_resume( buf: &[u8], start: usize, diff --git a/tests/scanner_crosscheck.rs b/tests/scanner_crosscheck.rs index f27b737..24a9591 100644 --- a/tests/scanner_crosscheck.rs +++ b/tests/scanner_crosscheck.rs @@ -21,9 +21,9 @@ proptest! { // Both scanners must agree on Ok vs Err (and on the error offset). prop_assert_eq!(&ra, &rb, "scan results differ for {:?}", input); // On success, indices must be identical. On error, the partial - // emit may differ: the fused scalar (scan_and_validate) aborts at - // the first bracket mismatch, while AVX2 emits all structural - // chars before validate_brackets runs. Only compare on Ok. + // emit may differ: scalar aborts at the failing byte, while AVX2 + // emits the rest of its current 64-byte chunk before its fused + // emit_bits_validate detects the mismatch. Only compare on Ok. if ra.is_ok() { prop_assert_eq!(&a, &b, "indices differ for {:?}", input); } @@ -81,9 +81,9 @@ proptest! { // Both scanners must agree on Ok vs Err (and on the error offset). prop_assert_eq!(&ra, &rb, "scan results differ for {:?}", input); // On success, indices must be identical. On error, the partial - // emit may differ between fused-scalar and two-pass NEON because - // the fused path stops at the first bracket error while NEON emits - // all structural chars before validating; only check on Ok. + // emit may differ: scalar aborts at the failing byte, while NEON + // emits the rest of its current 64-byte chunk before its fused + // emit_bits_validate detects the mismatch. Only compare on Ok. if ra.is_ok() { prop_assert_eq!(&a, &b, "indices differ for {:?}", input); }