From 1c3da2374c39ee23ee50d9352772a9b1ef3e5f4e Mon Sep 17 00:00:00 2001
From: Yuansheng Wang <membphis@gmail.com>
Date: Sun, 17 May 2026 15:29:16 +0000
Subject: [PATCH 01/21] feat(error): add 6 RFC 8259 audit error codes synced
 across Rust/C/Lua

---
 include/lua_quick_decode.h | 24 +++++++++++-------
 lua/quickdecode.lua        | 20 +++++++++++++++
 src/error.rs               | 51 ++++++++++++++++++++++++--------------
 src/ffi.rs                 | 26 +++++++++++--------
 4 files changed, 84 insertions(+), 37 deletions(-)

diff --git a/include/lua_quick_decode.h b/include/lua_quick_decode.h
index e3aeab2..54d28b6 100644
--- a/include/lua_quick_decode.h
+++ b/include/lua_quick_decode.h
@@ -9,15 +9,21 @@ extern "C" {
 #endif
 
 typedef enum {
-    QJD_OK            = 0,
-    QJD_PARSE_ERROR   = 1,
-    QJD_NOT_FOUND     = 2,
-    QJD_TYPE_MISMATCH = 3,
-    QJD_OUT_OF_RANGE  = 4,
-    QJD_DECODE_FAILED = 5,
-    QJD_INVALID_PATH  = 6,
-    QJD_INVALID_ARG   = 7,
-    QJD_OOM           = 8
+    QJD_OK                  =  0,
+    QJD_PARSE_ERROR         =  1,
+    QJD_NOT_FOUND           =  2,
+    QJD_TYPE_MISMATCH       =  3,
+    QJD_OUT_OF_RANGE        =  4,
+    QJD_DECODE_FAILED       =  5,
+    QJD_INVALID_PATH        =  6,
+    QJD_INVALID_ARG         =  7,
+    QJD_OOM                 =  8,
+    QJD_NESTING_TOO_DEEP    =  9,
+    QJD_TRAILING_CONTENT    = 10,
+    QJD_NUMBER_OUT_OF_RANGE = 11,
+    QJD_INVALID_NUMBER      = 12,
+    QJD_INVALID_STRING      = 13,
+    QJD_INVALID_UTF8        = 14
 } qjd_err;
 
 typedef enum {
diff --git a/lua/quickdecode.lua b/lua/quickdecode.lua
index 0851895..c4e0c6f 100644
--- a/lua/quickdecode.lua
+++ b/lua/quickdecode.lua
@@ -48,11 +48,31 @@ local strp_box = ffi.new("const uint8_t*[1]")
 local cur_box  = ffi.new("qjd_cursor[1]")
 
 local NOT_FOUND = 2
+-- Error codes mirrored from include/lua_quick_decode.h. Kept in sync manually;
+-- src/error.rs has the authoritative numbering.
+local ERR = {
+    OK                  =  0,
+    PARSE_ERROR         =  1,
+    NOT_FOUND           =  2,
+    TYPE_MISMATCH       =  3,
+    OUT_OF_RANGE        =  4,
+    DECODE_FAILED       =  5,
+    INVALID_PATH        =  6,
+    INVALID_ARG         =  7,
+    OOM                 =  8,
+    NESTING_TOO_DEEP    =  9,
+    TRAILING_CONTENT    = 10,
+    NUMBER_OUT_OF_RANGE = 11,
+    INVALID_NUMBER      = 12,
+    INVALID_STRING      = 13,
+    INVALID_UTF8        = 14,
+}
 
 local _M = {
     T_NULL = 0, T_BOOL = 1, T_NUM = 2,
     T_STR  = 3, T_ARR  = 4, T_OBJ = 5,
 }
+_M.ERR = ERR
 
 local Doc    = {}; Doc.__index    = Doc
 local Cursor = {}; Cursor.__index = Cursor
diff --git a/src/error.rs b/src/error.rs
index 270ea10..72ff3e9 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -3,15 +3,21 @@
 #[repr(C)]
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub enum qjd_err {
-    QJD_OK              = 0,
-    QJD_PARSE_ERROR     = 1,
-    QJD_NOT_FOUND       = 2,
-    QJD_TYPE_MISMATCH   = 3,
-    QJD_OUT_OF_RANGE    = 4,
-    QJD_DECODE_FAILED   = 5,
-    QJD_INVALID_PATH    = 6,
-    QJD_INVALID_ARG     = 7,
-    QJD_OOM             = 8,
+    QJD_OK                  =  0,
+    QJD_PARSE_ERROR         =  1,
+    QJD_NOT_FOUND           =  2,
+    QJD_TYPE_MISMATCH       =  3,
+    QJD_OUT_OF_RANGE        =  4,
+    QJD_DECODE_FAILED       =  5,
+    QJD_INVALID_PATH        =  6,
+    QJD_INVALID_ARG         =  7,
+    QJD_OOM                 =  8,
+    QJD_NESTING_TOO_DEEP    =  9,
+    QJD_TRAILING_CONTENT    = 10,
+    QJD_NUMBER_OUT_OF_RANGE = 11,
+    QJD_INVALID_NUMBER      = 12,
+    QJD_INVALID_STRING      = 13,
+    QJD_INVALID_UTF8        = 14,
 }
 
 #[repr(C)]
@@ -27,15 +33,21 @@ pub enum qjd_type {
 
 pub fn strerror(code: qjd_err) -> &'static str {
     match code {
-        qjd_err::QJD_OK            => "ok",
-        qjd_err::QJD_PARSE_ERROR   => "JSON parse error",
-        qjd_err::QJD_NOT_FOUND     => "path not found",
-        qjd_err::QJD_TYPE_MISMATCH => "type mismatch at path",
-        qjd_err::QJD_OUT_OF_RANGE  => "numeric out of range",
-        qjd_err::QJD_DECODE_FAILED => "decode failed",
-        qjd_err::QJD_INVALID_PATH  => "invalid path syntax",
-        qjd_err::QJD_INVALID_ARG   => "invalid argument",
-        qjd_err::QJD_OOM           => "out of memory",
+        qjd_err::QJD_OK                  => "ok",
+        qjd_err::QJD_PARSE_ERROR         => "JSON parse error",
+        qjd_err::QJD_NOT_FOUND           => "path not found",
+        qjd_err::QJD_TYPE_MISMATCH       => "type mismatch at path",
+        qjd_err::QJD_OUT_OF_RANGE        => "numeric out of range",
+        qjd_err::QJD_DECODE_FAILED       => "decode failed",
+        qjd_err::QJD_INVALID_PATH        => "invalid path syntax",
+        qjd_err::QJD_INVALID_ARG         => "invalid argument",
+        qjd_err::QJD_OOM                 => "out of memory",
+        qjd_err::QJD_NESTING_TOO_DEEP    => "nesting depth exceeds limit",
+        qjd_err::QJD_TRAILING_CONTENT    => "trailing content after root value",
+        qjd_err::QJD_NUMBER_OUT_OF_RANGE => "number out of representable range",
+        qjd_err::QJD_INVALID_NUMBER      => "invalid number format (RFC 8259)",
+        qjd_err::QJD_INVALID_STRING      => "invalid string content (unescaped control char)",
+        qjd_err::QJD_INVALID_UTF8        => "invalid UTF-8 in string",
     }
 }
 
@@ -50,6 +62,9 @@ mod tests {
             qjd_err::QJD_TYPE_MISMATCH, qjd_err::QJD_OUT_OF_RANGE,
             qjd_err::QJD_DECODE_FAILED, qjd_err::QJD_INVALID_PATH,
             qjd_err::QJD_INVALID_ARG, qjd_err::QJD_OOM,
+            qjd_err::QJD_NESTING_TOO_DEEP, qjd_err::QJD_TRAILING_CONTENT,
+            qjd_err::QJD_NUMBER_OUT_OF_RANGE, qjd_err::QJD_INVALID_NUMBER,
+            qjd_err::QJD_INVALID_STRING, qjd_err::QJD_INVALID_UTF8,
         ] {
             assert!(!strerror(code).is_empty());
         }
diff --git a/src/ffi.rs b/src/ffi.rs
index b5110b1..bdf9038 100644
--- a/src/ffi.rs
+++ b/src/ffi.rs
@@ -55,16 +55,22 @@ pub struct qjd_doc(pub(crate) Document<'static>);
 pub unsafe extern "C" fn qjd_strerror(code: c_int) -> *const c_char {
     // Hardcoded NUL-terminated map; avoids runtime allocation and lifetime issues.
     let s: &'static [u8] = match code {
-        0 => b"ok\0",
-        1 => b"JSON parse error\0",
-        2 => b"path not found\0",
-        3 => b"type mismatch at path\0",
-        4 => b"numeric out of range\0",
-        5 => b"decode failed\0",
-        6 => b"invalid path syntax\0",
-        7 => b"invalid argument\0",
-        8 => b"out of memory\0",
-        _ => b"unknown error code\0",
+         0 => b"ok\0",
+         1 => b"JSON parse error\0",
+         2 => b"path not found\0",
+         3 => b"type mismatch at path\0",
+         4 => b"numeric out of range\0",
+         5 => b"decode failed\0",
+         6 => b"invalid path syntax\0",
+         7 => b"invalid argument\0",
+         8 => b"out of memory\0",
+         9 => b"nesting depth exceeds limit\0",
+        10 => b"trailing content after root value\0",
+        11 => b"number out of representable range\0",
+        12 => b"invalid number format (RFC 8259)\0",
+        13 => b"invalid string content (unescaped control char)\0",
+        14 => b"invalid UTF-8 in string\0",
+         _ => b"unknown error code\0",
     };
     s.as_ptr() as *const c_char
 }

From a872d5cd146ff5da21eb475f15ec7c84f08a69ef Mon Sep 17 00:00:00 2001
From: Yuansheng Wang <membphis@gmail.com>
Date: Sun, 17 May 2026 15:35:12 +0000
Subject: [PATCH 02/21] feat(options): introduce Options +
 Document::parse_with_options scaffold

---
 src/doc.rs     | 26 +++++++++++++++++++++++-
 src/lib.rs     |  1 +
 src/options.rs | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 81 insertions(+), 1 deletion(-)
 create mode 100644 src/options.rs

diff --git a/src/doc.rs b/src/doc.rs
index 707bb44..faed1ec 100644
--- a/src/doc.rs
+++ b/src/doc.rs
@@ -12,9 +12,18 @@ pub struct Document<'a> {
 
 impl<'a> Document<'a> {
     pub fn parse(buf: &'a [u8]) -> Result<Self, qjd_err> {
+        Self::parse_with_options(buf, &crate::options::Options::default())
+    }
+
+    pub fn parse_with_options(
+        buf: &'a [u8],
+        _opts: &crate::options::Options,
+    ) -> Result<Self, qjd_err> {
+        // TODO(Task 6+): plug in validate_depth / validate_trailing /
+        // validate_eager_values. For now this is a structural-only parse
+        // matching the historical `parse` behavior.
         let mut indices = Vec::new();
         crate::scan::scan(buf, &mut indices).map_err(|_| qjd_err::QJD_PARSE_ERROR)?;
-        // Sentinel simplifies boundary checks during Phase 2.
         indices.push(u32::MAX);
         Ok(Self {
             buf,
@@ -169,4 +178,19 @@ mod tests {
     fn parse_error_on_malformed() {
         assert!(Document::parse(b"{").is_err());
     }
+
+    #[test]
+    fn parse_with_options_defaults_match_parse() {
+        let opts = crate::options::Options::default();
+        let a = Document::parse(b"{\"a\":1}").unwrap();
+        let b = Document::parse_with_options(b"{\"a\":1}", &opts).unwrap();
+        assert_eq!(a.indices, b.indices);
+    }
+
+    #[test]
+    fn parse_with_lazy_skips_eager_validation() {
+        // Trailing content is an eager-only check; lazy must accept it.
+        let opts = crate::options::Options { mode: crate::options::QJD_MODE_LAZY, max_depth: 0 };
+        assert!(Document::parse_with_options(b"{}garbage", &opts).is_ok());
+    }
 }
diff --git a/src/lib.rs b/src/lib.rs
index 83f161b..d9c29b7 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,6 +1,7 @@
 //! lua-quick-decode: Rust JSON decoder for LuaJIT FFI consumers.
 
 pub mod error;
+pub mod options;
 pub(crate) mod scan;
 mod skip_cache;
 mod doc;
diff --git a/src/options.rs b/src/options.rs
new file mode 100644
index 0000000..3c1241c
--- /dev/null
+++ b/src/options.rs
@@ -0,0 +1,55 @@
+#![allow(non_camel_case_types)]
+
+pub const QJD_MODE_EAGER: u32 = 0;
+pub const QJD_MODE_LAZY:  u32 = 1;
+pub const QJD_DEFAULT_MAX_DEPTH: u32 = 1024;
+pub const QJD_MAX_MAX_DEPTH:     u32 = 4096;
+
+/// Caller-visible parse options. Layout is FFI-stable: kept in sync with
+/// `qjd_options` in `include/lua_quick_decode.h`.
+#[repr(C)]
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub struct Options {
+    /// `QJD_MODE_EAGER` (0) — full RFC 8259 validation during parse.
+    /// `QJD_MODE_LAZY`  (1) — structural-only; defer value errors to access.
+    pub mode: u32,
+    /// Max bracket nesting depth. `0` selects `QJD_DEFAULT_MAX_DEPTH` (1024).
+    /// Values >`QJD_MAX_MAX_DEPTH` are clamped to that ceiling.
+    pub max_depth: u32,
+}
+
+impl Default for Options {
+    fn default() -> Self {
+        Self { mode: QJD_MODE_EAGER, max_depth: 0 }
+    }
+}
+
+#[allow(dead_code)] // used in Task 6+ validators
+impl Options {
+    pub(crate) fn effective_max_depth(&self) -> u32 {
+        let d = if self.max_depth == 0 { QJD_DEFAULT_MAX_DEPTH } else { self.max_depth };
+        d.min(QJD_MAX_MAX_DEPTH)
+    }
+
+    pub(crate) fn is_eager(&self) -> bool {
+        self.mode == QJD_MODE_EAGER
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test] fn default_is_eager() { assert!(Options::default().is_eager()); }
+
+    #[test]
+    fn zero_max_depth_falls_back_to_default() {
+        assert_eq!(Options::default().effective_max_depth(), QJD_DEFAULT_MAX_DEPTH);
+    }
+
+    #[test]
+    fn huge_max_depth_is_clamped() {
+        let o = Options { mode: 0, max_depth: u32::MAX };
+        assert_eq!(o.effective_max_depth(), QJD_MAX_MAX_DEPTH);
+    }
+}

From d5aaaec4e2ff9d407920cdbf2a047ab6e0243d8e Mon Sep 17 00:00:00 2001
From: Yuansheng Wang <membphis@gmail.com>
Date: Sun, 17 May 2026 15:46:45 +0000
Subject: [PATCH 03/21] feat(ffi): add qjd_parse_ex symbol with qjd_options
 struct

---
 include/lua_quick_decode.h | 11 ++++++++++
 src/doc.rs                 |  1 +
 src/ffi.rs                 | 32 +++++++++++++++++++++++----
 tests/ffi_options_smoke.rs | 44 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 84 insertions(+), 4 deletions(-)
 create mode 100644 tests/ffi_options_smoke.rs

diff --git a/include/lua_quick_decode.h b/include/lua_quick_decode.h
index 54d28b6..f920ab1 100644
--- a/include/lua_quick_decode.h
+++ b/include/lua_quick_decode.h
@@ -31,6 +31,15 @@ typedef enum {
     QJD_T_STR  = 3, QJD_T_ARR  = 4, QJD_T_OBJ = 5
 } qjd_type;
 
+#define QJD_MODE_EAGER          0u
+#define QJD_MODE_LAZY           1u
+#define QJD_DEFAULT_MAX_DEPTH   1024u
+
+typedef struct {
+    uint32_t mode;       /* QJD_MODE_EAGER (0) or QJD_MODE_LAZY (1) */
+    uint32_t max_depth;  /* 0 = use QJD_DEFAULT_MAX_DEPTH */
+} qjd_options;
+
 typedef struct qjd_doc qjd_doc;
 
 typedef struct {
@@ -44,6 +53,8 @@ typedef struct {
 const char* qjd_strerror(int code);
 
 qjd_doc* qjd_parse(const uint8_t* buf, size_t len, int* err_out);
+qjd_doc* qjd_parse_ex(const uint8_t* buf, size_t len,
+                      const qjd_options* opts, int* err_out);
 void     qjd_free (qjd_doc* doc);
 
 int qjd_get_str  (qjd_doc*, const char* path, size_t path_len,
diff --git a/src/doc.rs b/src/doc.rs
index faed1ec..d6790ac 100644
--- a/src/doc.rs
+++ b/src/doc.rs
@@ -11,6 +11,7 @@ pub struct Document<'a> {
 }
 
 impl<'a> Document<'a> {
+    #[allow(dead_code)] // public convenience API; used in tests and external crates
     pub fn parse(buf: &'a [u8]) -> Result<Self, qjd_err> {
         Self::parse_with_options(buf, &crate::options::Options::default())
     }
diff --git a/src/ffi.rs b/src/ffi.rs
index bdf9038..f5cf5a9 100644
--- a/src/ffi.rs
+++ b/src/ffi.rs
@@ -92,20 +92,44 @@ pub unsafe extern "C" fn qjd_parse(
     buf:     *const u8,
     len:     usize,
     err_out: *mut c_int,
+) -> *mut qjd_doc {
+    let default = crate::options::Options::default();
+    qjd_parse_ex(buf, len, &default as *const _, err_out)
+}
+
+/// Parse with caller-supplied options. `opts` may be NULL to mean defaults
+/// (eager mode, default max_depth).
+///
+/// # Safety
+///
+/// Same as `qjd_parse`, with the additional contract that `opts`, when
+/// non-NULL, points to a readable `qjd_options` for the duration of the call
+/// (the struct is copied internally).
+#[no_mangle]
+pub unsafe extern "C" fn qjd_parse_ex(
+    buf:     *const u8,
+    len:     usize,
+    opts:    *const crate::options::Options,
+    err_out: *mut c_int,
 ) -> *mut qjd_doc {
     let r = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
-        if buf.is_null() || err_out.is_null() {
+        if buf.is_null() {
             if !err_out.is_null() { *err_out = qjd_err::QJD_INVALID_ARG as c_int; }
             return ptr::null_mut();
         }
+        let opts_owned = if opts.is_null() {
+            crate::options::Options::default()
+        } else {
+            *opts
+        };
         let slice: &'static [u8] = std::slice::from_raw_parts(buf, len);
-        match Document::parse(slice) {
+        match Document::parse_with_options(slice, &opts_owned) {
             Ok(d) => {
-                *err_out = qjd_err::QJD_OK as c_int;
+                if !err_out.is_null() { *err_out = qjd_err::QJD_OK as c_int; }
                 Box::into_raw(Box::new(qjd_doc(d)))
             }
             Err(e) => {
-                *err_out = e as c_int;
+                if !err_out.is_null() { *err_out = e as c_int; }
                 ptr::null_mut()
             }
         }
diff --git a/tests/ffi_options_smoke.rs b/tests/ffi_options_smoke.rs
new file mode 100644
index 0000000..83d942d
--- /dev/null
+++ b/tests/ffi_options_smoke.rs
@@ -0,0 +1,44 @@
+//! Smoke test for qjd_parse_ex and qjd_options C ABI.
+
+use std::os::raw::c_int;
+
+use quickdecode::ffi::{qjd_doc, qjd_free, qjd_parse, qjd_parse_ex};
+use quickdecode::options::Options;
+
+#[test]
+fn parse_ex_default_options_matches_parse() {
+    let buf = b"{\"a\":1}";
+    let mut err: c_int = -1;
+    let d1: *mut qjd_doc = unsafe { qjd_parse(buf.as_ptr(), buf.len(), &mut err) };
+    assert!(!d1.is_null());
+    assert_eq!(err, 0);
+
+    let opts = Options { mode: 0, max_depth: 0 };
+    let mut err2: c_int = -1;
+    let d2: *mut qjd_doc = unsafe { qjd_parse_ex(buf.as_ptr(), buf.len(), &opts, &mut err2) };
+    assert!(!d2.is_null());
+    assert_eq!(err2, 0);
+
+    unsafe { qjd_free(d1); qjd_free(d2); }
+}
+
+#[test]
+fn parse_ex_null_opts_uses_defaults() {
+    let buf = b"{}";
+    let mut err: c_int = -1;
+    let d: *mut qjd_doc = unsafe {
+        qjd_parse_ex(buf.as_ptr(), buf.len(), std::ptr::null(), &mut err)
+    };
+    assert!(!d.is_null());
+    assert_eq!(err, 0);
+    unsafe { qjd_free(d) };
+}
+
+#[test]
+fn parse_ex_null_err_returns_null_on_bad_buf() {
+    let opts = Options { mode: 0, max_depth: 0 };
+    let d: *mut qjd_doc = unsafe {
+        qjd_parse_ex(std::ptr::null(), 0, &opts, std::ptr::null_mut())
+    };
+    assert!(d.is_null());
+}

From 80c13587f00a989edeb061a6ef8e54d59e3902ee Mon Sep 17 00:00:00 2001
From: Yuansheng Wang <membphis@gmail.com>
Date: Sun, 17 May 2026 15:53:33 +0000
Subject: [PATCH 04/21] docs(ffi): clarify qjd_parse err_out contract and
 dead_code rationale

---
 src/doc.rs | 2 +-
 src/ffi.rs | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/doc.rs b/src/doc.rs
index d6790ac..edc51b4 100644
--- a/src/doc.rs
+++ b/src/doc.rs
@@ -11,7 +11,7 @@ pub struct Document<'a> {
 }
 
 impl<'a> Document<'a> {
-    #[allow(dead_code)] // public convenience API; used in tests and external crates
+    #[allow(dead_code)] // suppressed until mod doc is re-exported (Task 5)
     pub fn parse(buf: &'a [u8]) -> Result<Self, qjd_err> {
         Self::parse_with_options(buf, &crate::options::Options::default())
     }
diff --git a/src/ffi.rs b/src/ffi.rs
index f5cf5a9..4cecaef 100644
--- a/src/ffi.rs
+++ b/src/ffi.rs
@@ -81,8 +81,8 @@ pub unsafe extern "C" fn qjd_strerror(code: c_int) -> *const c_char {
 ///
 /// - `buf` must point to `len` readable bytes, or be NULL (in which case the
 ///   function returns NULL with `*err_out = QJD_INVALID_ARG`).
-/// - `err_out` must point to a writable `int`, or be NULL (in which case the
-///   function returns NULL with no error code written).
+/// - `err_out` may be NULL. When non-NULL it receives `QJD_OK` on success or
+///   an error code on failure.
 /// - The buffer must remain valid and unmodified for the lifetime of the
 ///   returned `qjd_doc*`; the document borrows it.
 /// - On success, the returned pointer must be freed exactly once with

From 1e8b55b38c505ecf78765cc0588671d5901cd992 Mon Sep 17 00:00:00 2001
From: Yuansheng Wang <membphis@gmail.com>
Date: Sun, 17 May 2026 15:55:38 +0000
Subject: [PATCH 05/21] feat(lua): accept opts table in qd.parse(json, { lazy,
 max_depth })

---
 lua/quickdecode.lua        | 38 ++++++++++++++++++++++++++++++++++----
 tests/lua/options_spec.lua | 26 ++++++++++++++++++++++++++
 2 files changed, 60 insertions(+), 4 deletions(-)
 create mode 100644 tests/lua/options_spec.lua

diff --git a/lua/quickdecode.lua b/lua/quickdecode.lua
index c4e0c6f..b8ad662 100644
--- a/lua/quickdecode.lua
+++ b/lua/quickdecode.lua
@@ -7,9 +7,16 @@ typedef struct {
     uint32_t idx_start, idx_end, _reserved0, _reserved1;
 } qjd_cursor;
 
+typedef struct {
+    uint32_t mode;
+    uint32_t max_depth;
+} qjd_options;
+
 const char* qjd_strerror(int code);
-qjd_doc* qjd_parse(const uint8_t* buf, size_t len, int* err_out);
-void qjd_free(qjd_doc* doc);
+qjd_doc* qjd_parse   (const uint8_t* buf, size_t len, int* err_out);
+qjd_doc* qjd_parse_ex(const uint8_t* buf, size_t len,
+                       const qjd_options* opts, int* err_out);
+void     qjd_free    (qjd_doc* doc);
 
 int qjd_get_str (qjd_doc*, const char* path, size_t path_len, const uint8_t** p, size_t* n);
 int qjd_get_i64 (qjd_doc*, const char* path, size_t path_len, int64_t* out);
@@ -83,8 +90,31 @@ local function check_err(rc)
     error("quickdecode: " .. ffi.string(C.qjd_strerror(rc)))
 end
 
-function _M.parse(json_str)
-    local ptr = C.qjd_parse(json_str, #json_str, err_box)
+local opts_box = ffi.new("qjd_options[1]")
+
+local MODE_EAGER = 0
+local MODE_LAZY  = 1
+
+function _M.parse(json_str, opts)
+    local ptr
+    if opts == nil then
+        ptr = C.qjd_parse(json_str, #json_str, err_box)
+    else
+        if type(opts) ~= "table" then
+            error("quickdecode.parse: opts must be a table")
+        end
+        local lazy = opts.lazy
+        if lazy ~= nil and type(lazy) ~= "boolean" then
+            error("quickdecode.parse: opts.lazy must be a boolean")
+        end
+        local max_depth = opts.max_depth or 0
+        if type(max_depth) ~= "number" or max_depth < 0 then
+            error("quickdecode.parse: opts.max_depth must be a non-negative integer")
+        end
+        opts_box[0].mode      = lazy and MODE_LAZY or MODE_EAGER
+        opts_box[0].max_depth = max_depth
+        ptr = C.qjd_parse_ex(json_str, #json_str, opts_box, err_box)
+    end
     if ptr == nil then
         error("quickdecode: " .. ffi.string(C.qjd_strerror(err_box[0])))
     end
diff --git a/tests/lua/options_spec.lua b/tests/lua/options_spec.lua
new file mode 100644
index 0000000..786b1b1
--- /dev/null
+++ b/tests/lua/options_spec.lua
@@ -0,0 +1,26 @@
+local qd = require "quickdecode"
+
+describe("parse with options", function()
+    it("accepts no second arg (default eager)", function()
+        assert.is_not_nil(qd.parse('{"a":1}'))
+    end)
+
+    it("accepts an empty opts table", function()
+        assert.is_not_nil(qd.parse('{"a":1}', {}))
+    end)
+
+    it("accepts lazy=true and tolerates trailing content", function()
+        -- Trailing content is eager-only; lazy must parse OK.
+        assert.is_not_nil(qd.parse('{}garbage', { lazy = true }))
+    end)
+
+    it("accepts max_depth", function()
+        assert.is_not_nil(qd.parse('[[[1]]]', { max_depth = 1024 }))
+    end)
+
+    it("rejects invalid mode key value", function()
+        assert.has_error(function()
+            qd.parse('{}', { lazy = "yes please" })
+        end)
+    end)
+end)

From 16a149bff38ea0f6aa80b80a371d657399eb44c4 Mon Sep 17 00:00:00 2001
From: Yuansheng Wang <membphis@gmail.com>
Date: Sun, 17 May 2026 15:58:18 +0000
Subject: [PATCH 06/21] fix(lua): reject fractional max_depth; add
 combined-opts test

---
 lua/quickdecode.lua        |  2 +-
 tests/lua/options_spec.lua | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/lua/quickdecode.lua b/lua/quickdecode.lua
index b8ad662..5ab6c5f 100644
--- a/lua/quickdecode.lua
+++ b/lua/quickdecode.lua
@@ -108,7 +108,7 @@ function _M.parse(json_str, opts)
             error("quickdecode.parse: opts.lazy must be a boolean")
         end
         local max_depth = opts.max_depth or 0
-        if type(max_depth) ~= "number" or max_depth < 0 then
+        if type(max_depth) ~= "number" or max_depth < 0 or max_depth ~= math.floor(max_depth) then
             error("quickdecode.parse: opts.max_depth must be a non-negative integer")
         end
         opts_box[0].mode      = lazy and MODE_LAZY or MODE_EAGER
diff --git a/tests/lua/options_spec.lua b/tests/lua/options_spec.lua
index 786b1b1..c689d2a 100644
--- a/tests/lua/options_spec.lua
+++ b/tests/lua/options_spec.lua
@@ -23,4 +23,14 @@ describe("parse with options", function()
             qd.parse('{}', { lazy = "yes please" })
         end)
     end)
+
+    it("accepts lazy=true and max_depth combined", function()
+        assert.is_not_nil(qd.parse('[[1]]', { lazy = true, max_depth = 256 }))
+    end)
+
+    it("rejects fractional max_depth", function()
+        assert.has_error(function()
+            qd.parse('{}', { max_depth = 1.5 })
+        end)
+    end)
 end)

From c8dfd8446324e2689ed012e85cd4604bb86e4054 Mon Sep 17 00:00:00 2001
From: Yuansheng Wang <membphis@gmail.com>
Date: Sun, 17 May 2026 16:01:40 +0000
Subject: [PATCH 07/21] test(rfc8259): scaffold compliance suite with
 cross-mode helper macros

---
 src/doc.rs                  |  1 -
 src/lib.rs                  |  2 +-
 tests/rfc8259_compliance.rs | 79 +++++++++++++++++++++++++++++++++++++
 3 files changed, 80 insertions(+), 2 deletions(-)
 create mode 100644 tests/rfc8259_compliance.rs

diff --git a/src/doc.rs b/src/doc.rs
index edc51b4..faed1ec 100644
--- a/src/doc.rs
+++ b/src/doc.rs
@@ -11,7 +11,6 @@ pub struct Document<'a> {
 }
 
 impl<'a> Document<'a> {
-    #[allow(dead_code)] // suppressed until mod doc is re-exported (Task 5)
     pub fn parse(buf: &'a [u8]) -> Result<Self, qjd_err> {
         Self::parse_with_options(buf, &crate::options::Options::default())
     }
diff --git a/src/lib.rs b/src/lib.rs
index d9c29b7..80c0d5c 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -4,7 +4,7 @@ pub mod error;
 pub mod options;
 pub(crate) mod scan;
 mod skip_cache;
-mod doc;
+pub mod doc;
 mod path;
 mod cursor;
 mod decode;
diff --git a/tests/rfc8259_compliance.rs b/tests/rfc8259_compliance.rs
new file mode 100644
index 0000000..f982581
--- /dev/null
+++ b/tests/rfc8259_compliance.rs
@@ -0,0 +1,79 @@
+//! RFC 8259 conformance suite.
+//!
+//! Cross-mode contract:
+//!   * `y_*` inputs MUST parse successfully in both EAGER and LAZY modes,
+//!     and any specified field-level access MUST return the expected value.
+//!   * `n_*` inputs MUST fail to parse in EAGER mode, and MUST either
+//!     fail to parse OR fail on the documented field access in LAZY mode.
+//!   * `i_*` inputs document our current behavior; we assert what we do
+//!     today (so regressions surface), referencing JSONTestSuite naming.
+//!
+//! RFC 8259 references are in section-paragraph form, e.g. RFC8259 §6 for
+//! the number grammar.
+
+use quickdecode::doc::Document;
+use quickdecode::options::{Options, QJD_MODE_EAGER, QJD_MODE_LAZY};
+
+fn eager() -> Options { Options { mode: QJD_MODE_EAGER, max_depth: 0 } }
+fn lazy()  -> Options { Options { mode: QJD_MODE_LAZY,  max_depth: 0 } }
+
+/// Asserts the input is accepted in both modes.
+///
+/// Usage: `assert_accepts!("[]");`
+#[macro_export]
+macro_rules! assert_accepts {
+    ($input:expr) => {{
+        let buf: &[u8] = $input.as_ref();
+        let r_eager = Document::parse_with_options(buf, &eager());
+        assert!(r_eager.is_ok(),
+            "EAGER unexpectedly rejected {:?}: {:?}", $input, r_eager.err());
+        let r_lazy = Document::parse_with_options(buf, &lazy());
+        assert!(r_lazy.is_ok(),
+            "LAZY unexpectedly rejected {:?}: {:?}", $input, r_lazy.err());
+    }};
+}
+
+/// Asserts the input is REJECTED by eager parse.
+///
+/// Usage: `assert_rejects_eager!("01", QJD_INVALID_NUMBER);`
+#[macro_export]
+macro_rules! assert_rejects_eager {
+    ($input:expr, $expected_err:path) => {{
+        use quickdecode::error::qjd_err;
+        let buf: &[u8] = $input.as_ref();
+        match Document::parse_with_options(buf, &eager()) {
+            Err($expected_err) => {}
+            Err(other) => panic!(
+                "EAGER rejected {:?} with {:?}, expected {:?}",
+                $input, other, qjd_err::$expected_err),
+            Ok(_) => panic!("EAGER unexpectedly accepted {:?}", $input),
+        }
+    }};
+}
+
+/// Asserts the input is rejected at parse time in BOTH modes (structural).
+#[macro_export]
+macro_rules! assert_rejects_both {
+    ($input:expr) => {{
+        let buf: &[u8] = $input.as_ref();
+        assert!(Document::parse_with_options(buf, &eager()).is_err(),
+            "EAGER unexpectedly accepted {:?}", $input);
+        assert!(Document::parse_with_options(buf, &lazy()).is_err(),
+            "LAZY unexpectedly accepted {:?}", $input);
+    }};
+}
+
+// ─────────────────────────────────────────────────────────────
+// Scaffold smoke tests — replaced by Task 11 with full corpus.
+// ─────────────────────────────────────────────────────────────
+
+#[test]
+fn smoke_accepts_empty_object() { assert_accepts!("{}"); }
+
+#[test]
+fn smoke_accepts_empty_array() { assert_accepts!("[]"); }
+
+#[test]
+fn smoke_rejects_unmatched_brace_both_modes() {
+    assert_rejects_both!("{");
+}

From 75c7244251b1a97b6bf8e17339471dda46dc09d8 Mon Sep 17 00:00:00 2001
From: Yuansheng Wang <membphis@gmail.com>
Date: Sun, 17 May 2026 16:04:56 +0000
Subject: [PATCH 08/21] fix(test): assert_rejects_eager macro now actually
 matches by variant

Switch the fragment specifier from :path to :ident so the variant name
can be used in a qjd_err:: path, and replace the pattern arm with a
runtime guard (if e == expected) to avoid the binding-vs-pattern
ambiguity. Add macro_rejects_wrong_error_code as a regression canary.
---
 tests/rfc8259_compliance.rs | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/tests/rfc8259_compliance.rs b/tests/rfc8259_compliance.rs
index f982581..f1d4f7f 100644
--- a/tests/rfc8259_compliance.rs
+++ b/tests/rfc8259_compliance.rs
@@ -38,14 +38,15 @@ macro_rules! assert_accepts {
 /// Usage: `assert_rejects_eager!("01", QJD_INVALID_NUMBER);`
 #[macro_export]
 macro_rules! assert_rejects_eager {
-    ($input:expr, $expected_err:path) => {{
+    ($input:expr, $expected_err:ident) => {{
         use quickdecode::error::qjd_err;
         let buf: &[u8] = $input.as_ref();
+        let expected = qjd_err::$expected_err;
         match Document::parse_with_options(buf, &eager()) {
-            Err($expected_err) => {}
+            Err(e) if e == expected => {}
             Err(other) => panic!(
                 "EAGER rejected {:?} with {:?}, expected {:?}",
-                $input, other, qjd_err::$expected_err),
+                $input, other, expected),
             Ok(_) => panic!("EAGER unexpectedly accepted {:?}", $input),
         }
     }};
@@ -77,3 +78,13 @@ fn smoke_accepts_empty_array() { assert_accepts!("[]"); }
 fn smoke_rejects_unmatched_brace_both_modes() {
     assert_rejects_both!("{");
 }
+
+#[test]
+#[should_panic(expected = "expected QJD_INVALID_NUMBER")]
+fn macro_rejects_wrong_error_code() {
+    // Sanity: passing the wrong expected variant must panic.
+    // `{` is rejected as QJD_PARSE_ERROR, NOT QJD_INVALID_NUMBER.
+    // With the buggy macro, this test would NOT panic (false positive
+    // — the macro would silently bind whatever Err came back).
+    assert_rejects_eager!("{", QJD_INVALID_NUMBER);
+}

From 1b86918a4f3f70236c48dda2e22574e7fcad9365 Mon Sep 17 00:00:00 2001
From: Yuansheng Wang <membphis@gmail.com>
Date: Sun, 17 May 2026 16:10:38 +0000
Subject: [PATCH 09/21] feat(validate): enforce max_depth in both eager and
 lazy modes

---
 src/doc.rs                  | 10 +++---
 src/lib.rs                  |  1 +
 src/validate.rs             | 64 +++++++++++++++++++++++++++++++++++++
 tests/rfc8259_compliance.rs | 44 +++++++++++++++++++++++++
 4 files changed, 115 insertions(+), 4 deletions(-)
 create mode 100644 src/validate.rs

diff --git a/src/doc.rs b/src/doc.rs
index faed1ec..3a07760 100644
--- a/src/doc.rs
+++ b/src/doc.rs
@@ -17,14 +17,16 @@ impl<'a> Document<'a> {
 
     pub fn parse_with_options(
         buf: &'a [u8],
-        _opts: &crate::options::Options,
+        opts: &crate::options::Options,
     ) -> Result<Self, qjd_err> {
-        // TODO(Task 6+): plug in validate_depth / validate_trailing /
-        // validate_eager_values. For now this is a structural-only parse
-        // matching the historical `parse` behavior.
+        let max_depth = opts.effective_max_depth();
         let mut indices = Vec::new();
         crate::scan::scan(buf, &mut indices).map_err(|_| qjd_err::QJD_PARSE_ERROR)?;
         indices.push(u32::MAX);
+
+        crate::validate::validate_depth(buf, &indices, max_depth)?;
+        // TODO(Task 7+): trailing-content and eager value validators.
+
         Ok(Self {
             buf,
             indices,
diff --git a/src/lib.rs b/src/lib.rs
index 80c0d5c..87f5c6a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -8,6 +8,7 @@ pub mod doc;
 mod path;
 mod cursor;
 mod decode;
+mod validate;
 pub mod ffi;
 
 #[doc(hidden)]
diff --git a/src/validate.rs b/src/validate.rs
new file mode 100644
index 0000000..38f0181
--- /dev/null
+++ b/src/validate.rs
@@ -0,0 +1,64 @@
+//! Post-scan validators invoked by Document::parse_with_options.
+//!
+//! Walking the already-emitted `indices` array is intentionally
+//! decoupled from the SIMD/scalar scanner paths so the structural
+//! scanner code stays untouched.
+
+use crate::error::qjd_err;
+
+/// Verify that the maximum bracket-stack depth implied by `indices`
+/// does not exceed `max_depth`. Walks indices once; assumes scan() has
+/// already validated bracket pairing.
+///
+/// `indices` is the post-scan vector with the trailing u32::MAX sentinel.
+pub(crate) fn validate_depth(
+    buf: &[u8],
+    indices: &[u32],
+    max_depth: u32,
+) -> Result<(), qjd_err> {
+    let mut depth: u32 = 0;
+    for &idx in indices {
+        if idx == u32::MAX { break; }
+        match buf[idx as usize] {
+            b'{' | b'[' => {
+                depth += 1;
+                if depth > max_depth {
+                    return Err(qjd_err::QJD_NESTING_TOO_DEEP);
+                }
+            }
+            b'}' | b']' => {
+                // Cannot underflow: scan() already validated pairing.
+                depth -= 1;
+            }
+            _ => {}
+        }
+    }
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn ix(buf: &[u8]) -> Vec<u32> {
+        let mut v = Vec::new();
+        crate::scan::scan(buf, &mut v).unwrap();
+        v.push(u32::MAX);
+        v
+    }
+
+    #[test]
+    fn under_limit_ok() {
+        let buf = b"[[1]]";
+        assert!(validate_depth(buf, &ix(buf), 2).is_ok());
+    }
+
+    #[test]
+    fn over_limit_rejected() {
+        let buf = b"[[[1]]]";
+        assert_eq!(
+            validate_depth(buf, &ix(buf), 2),
+            Err(qjd_err::QJD_NESTING_TOO_DEEP),
+        );
+    }
+}
diff --git a/tests/rfc8259_compliance.rs b/tests/rfc8259_compliance.rs
index f1d4f7f..b193c94 100644
--- a/tests/rfc8259_compliance.rs
+++ b/tests/rfc8259_compliance.rs
@@ -88,3 +88,47 @@ fn macro_rejects_wrong_error_code() {
     // — the macro would silently bind whatever Err came back).
     assert_rejects_eager!("{", QJD_INVALID_NUMBER);
 }
+
+// ── Phase 3: nesting depth ───────────────────────────────────
+
+#[test]
+fn rejects_deeply_nested_at_default_limit() {
+    use quickdecode::error::qjd_err;
+    let mut buf = String::new();
+    for _ in 0..1100 { buf.push('['); }
+    for _ in 0..1100 { buf.push(']'); }
+    match Document::parse_with_options(buf.as_bytes(), &eager()) {
+        Err(qjd_err::QJD_NESTING_TOO_DEEP) => {}
+        other => panic!("expected QJD_NESTING_TOO_DEEP, got {:?}", other.err()),
+    }
+}
+
+#[test]
+fn lazy_mode_also_enforces_max_depth() {
+    use quickdecode::error::qjd_err;
+    let mut buf = String::new();
+    for _ in 0..1100 { buf.push('['); }
+    for _ in 0..1100 { buf.push(']'); }
+    assert_eq!(
+        Document::parse_with_options(buf.as_bytes(), &lazy()).err().unwrap(),
+        qjd_err::QJD_NESTING_TOO_DEEP,
+    );
+}
+
+#[test]
+fn accepts_nested_at_configured_limit() {
+    let mut buf = String::new();
+    for _ in 0..256 { buf.push('['); }
+    for _ in 0..256 { buf.push(']'); }
+    let opts = Options { mode: QJD_MODE_EAGER, max_depth: 256 };
+    assert!(Document::parse_with_options(buf.as_bytes(), &opts).is_ok());
+}
+
+#[test]
+fn rejects_when_one_past_configured_limit() {
+    let mut buf = String::new();
+    for _ in 0..33 { buf.push('['); }
+    for _ in 0..33 { buf.push(']'); }
+    let opts = Options { mode: QJD_MODE_EAGER, max_depth: 32 };
+    assert!(Document::parse_with_options(buf.as_bytes(), &opts).is_err());
+}

From 1f93104d4f54a0ef5c29a2fb784ce4ab74084945 Mon Sep 17 00:00:00 2001
From: Yuansheng Wang <membphis@gmail.com>
Date: Sun, 17 May 2026 16:17:35 +0000
Subject: [PATCH 10/21] feat(validate): reject trailing content after root
 value (eager only)

---
 src/doc.rs                  |  6 ++-
 src/validate.rs             | 89 +++++++++++++++++++++++++++++++++++++
 tests/rfc8259_compliance.rs | 40 +++++++++++++++++
 3 files changed, 134 insertions(+), 1 deletion(-)

diff --git a/src/doc.rs b/src/doc.rs
index 3a07760..75deb8d 100644
--- a/src/doc.rs
+++ b/src/doc.rs
@@ -25,7 +25,11 @@ impl<'a> Document<'a> {
         indices.push(u32::MAX);
 
         crate::validate::validate_depth(buf, &indices, max_depth)?;
-        // TODO(Task 7+): trailing-content and eager value validators.
+
+        if opts.is_eager() {
+            crate::validate::validate_trailing(buf, &indices)?;
+            // TODO(Task 10): validate_eager_values
+        }
 
         Ok(Self {
             buf,
diff --git a/src/validate.rs b/src/validate.rs
index 38f0181..db1fc73 100644
--- a/src/validate.rs
+++ b/src/validate.rs
@@ -36,6 +36,59 @@ pub(crate) fn validate_depth(
     Ok(())
 }
 
+/// Verify there is no non-whitespace content after the root value.
+///
+/// The root value's closer is the last non-sentinel structural offset
+/// in `indices` for a container, or the start of the scalar's trailing
+/// whitespace for a top-level scalar value. We locate the position
+/// `end_of_root` past which only whitespace is allowed.
+pub(crate) fn validate_trailing(
+    buf: &[u8],
+    indices: &[u32],
+) -> Result<(), qjd_err> {
+    // Find the last real offset (skip the u32::MAX sentinel).
+    let last = indices.iter().rev()
+        .find(|&&i| i != u32::MAX)
+        .copied();
+
+    let root_end = match last {
+        // No structural chars at all: input is whitespace or a bare scalar.
+        // Bare scalar: locate the end by scanning until whitespace or EOF.
+        None => {
+            // Strip leading whitespace, then find the scalar's terminator.
+            let mut p = 0;
+            while p < buf.len() && is_ws(buf[p]) { p += 1; }
+            let start = p;
+            // Scan until next whitespace (end of scalar token).
+            while p < buf.len() && !is_ws(buf[p]) { p += 1; }
+            if start == p { return Ok(()); } // input was only whitespace
+            // Advance past trailing whitespace so `42   ` is accepted.
+            while p < buf.len() && is_ws(buf[p]) { p += 1; }
+            p
+        }
+        // Structural close (`}` or `]`) of root container, OR root quote
+        // close, OR last structural (`,`/`:`/`{`/`[`) — in which case the
+        // parse should already have failed at scan(). The only "valid root
+        // ending in a structural" cases are a closing `}` / `]` / `"`.
+        Some(last_idx) => {
+            let mut p = last_idx as usize + 1;
+            // Advance past any trailing whitespace.
+            while p < buf.len() && is_ws(buf[p]) { p += 1; }
+            p
+        }
+    };
+
+    if root_end < buf.len() {
+        return Err(qjd_err::QJD_TRAILING_CONTENT);
+    }
+    Ok(())
+}
+
+#[inline(always)]
+fn is_ws(b: u8) -> bool {
+    matches!(b, b' ' | b'\t' | b'\n' | b'\r')
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -61,4 +114,40 @@ mod tests {
             Err(qjd_err::QJD_NESTING_TOO_DEEP),
         );
     }
+
+    #[test]
+    fn trailing_clean_container() {
+        let buf = b"{}";
+        assert!(validate_trailing(buf, &ix(buf)).is_ok());
+    }
+
+    #[test]
+    fn trailing_whitespace_accepted() {
+        let buf = b"{}   \n\t";
+        assert!(validate_trailing(buf, &ix(buf)).is_ok());
+    }
+
+    #[test]
+    fn trailing_garbage_rejected() {
+        let buf = b"{}garbage";
+        assert_eq!(
+            validate_trailing(buf, &ix(buf)),
+            Err(qjd_err::QJD_TRAILING_CONTENT),
+        );
+    }
+
+    #[test]
+    fn bare_scalar_trailing_ws_accepted() {
+        let buf = b"42 \n\t";
+        assert!(validate_trailing(buf, &ix(buf)).is_ok());
+    }
+
+    #[test]
+    fn two_root_scalars_rejected() {
+        let buf = b"1 2";
+        assert_eq!(
+            validate_trailing(buf, &ix(buf)),
+            Err(qjd_err::QJD_TRAILING_CONTENT),
+        );
+    }
 }
diff --git a/tests/rfc8259_compliance.rs b/tests/rfc8259_compliance.rs
index b193c94..b07f7ab 100644
--- a/tests/rfc8259_compliance.rs
+++ b/tests/rfc8259_compliance.rs
@@ -132,3 +132,43 @@ fn rejects_when_one_past_configured_limit() {
     let opts = Options { mode: QJD_MODE_EAGER, max_depth: 32 };
     assert!(Document::parse_with_options(buf.as_bytes(), &opts).is_err());
 }
+
+// ── Phase 6: trailing content ────────────────────────────────
+
+#[test]
+fn eager_rejects_trailing_content() {
+    use quickdecode::error::qjd_err;
+    assert_eq!(
+        Document::parse_with_options(b"{}garbage", &eager()).err().unwrap(),
+        qjd_err::QJD_TRAILING_CONTENT,
+    );
+}
+
+#[test]
+fn eager_rejects_multiple_root_values() {
+    use quickdecode::error::qjd_err;
+    assert_eq!(
+        Document::parse_with_options(b"1 2", &eager()).err().unwrap(),
+        qjd_err::QJD_TRAILING_CONTENT,
+    );
+    assert_eq!(
+        Document::parse_with_options(b"true false", &eager()).err().unwrap(),
+        qjd_err::QJD_TRAILING_CONTENT,
+    );
+}
+
+#[test]
+fn eager_accepts_trailing_whitespace() {
+    assert_accepts!("{}   \n\t");
+}
+
+#[test]
+fn eager_accepts_top_level_scalar_with_trailing_whitespace() {
+    assert_accepts!("42 \n\t");
+}
+
+#[test]
+fn lazy_accepts_trailing_garbage() {
+    // Lazy preserves historical behavior: trailing bytes are ignored.
+    assert!(Document::parse_with_options(b"{}garbage", &lazy()).is_ok());
+}

From e9a2b57c4168f11895582c767cd829a6140dea70 Mon Sep 17 00:00:00 2001
From: Yuansheng Wang <membphis@gmail.com>
Date: Sun, 17 May 2026 16:25:35 +0000
Subject: [PATCH 11/21] feat(validate): strict RFC 8259 number ABNF (lazy
 decode + lazy entry points)

---
 src/decode/number.rs                 | 24 ++++----
 src/{validate.rs => validate/mod.rs} |  3 +
 src/validate/number.rs               | 91 ++++++++++++++++++++++++++++
 tests/rfc8259_compliance.rs          | 37 +++++++++++
 4 files changed, 142 insertions(+), 13 deletions(-)
 rename src/{validate.rs => validate/mod.rs} (98%)
 create mode 100644 src/validate/number.rs

diff --git a/src/decode/number.rs b/src/decode/number.rs
index 45d2f89..1beda2d 100644
--- a/src/decode/number.rs
+++ b/src/decode/number.rs
@@ -1,10 +1,8 @@
 use crate::error::qjd_err;
 
 pub(crate) fn parse_i64(bytes: &[u8]) -> Result<i64, qjd_err> {
-    if bytes.is_empty() {
-        return Err(qjd_err::QJD_DECODE_FAILED);
-    }
-    // Reject non-integer JSON numbers (with decimal point or exponent).
+    crate::validate::validate_number(bytes)?;
+    // After ABNF validation, integer-only inputs have no `.`/`e`/`E`.
     if bytes.iter().any(|&b| b == b'.' || b == b'e' || b == b'E') {
         return Err(qjd_err::QJD_TYPE_MISMATCH);
     }
@@ -12,9 +10,7 @@ pub(crate) fn parse_i64(bytes: &[u8]) -> Result<i64, qjd_err> {
         b'-' => (true, &bytes[1..]),
         _    => (false, bytes),
     };
-    if rest.is_empty() || !rest.iter().all(|c| c.is_ascii_digit()) {
-        return Err(qjd_err::QJD_DECODE_FAILED);
-    }
+    // ABNF guarantees `rest` is non-empty and digit-only here.
     let mut v: i64 = 0;
     for &c in rest {
         let d = (c - b'0') as i64;
@@ -29,11 +25,13 @@ pub(crate) fn parse_i64(bytes: &[u8]) -> Result<i64, qjd_err> {
 }
 
 pub(crate) fn parse_f64(bytes: &[u8]) -> Result<f64, qjd_err> {
-    if bytes.is_empty() {
-        return Err(qjd_err::QJD_DECODE_FAILED);
-    }
+    crate::validate::validate_number(bytes)?;
     let s = std::str::from_utf8(bytes).map_err(|_| qjd_err::QJD_DECODE_FAILED)?;
-    s.parse::<f64>().map_err(|_| qjd_err::QJD_DECODE_FAILED)
+    match s.parse::<f64>() {
+        Ok(v) if v.is_finite() => Ok(v),
+        Ok(_)                  => Err(qjd_err::QJD_NUMBER_OUT_OF_RANGE),
+        Err(_)                 => Err(qjd_err::QJD_DECODE_FAILED),
+    }
 }
 
 #[cfg(test)]
@@ -63,7 +61,7 @@ mod tests {
 
     #[test]
     fn i64_rejects_empty() {
-        assert_eq!(parse_i64(b""), Err(qjd_err::QJD_DECODE_FAILED));
+        assert_eq!(parse_i64(b""), Err(qjd_err::QJD_INVALID_NUMBER));
     }
 
     #[test] fn f64_zero()    { assert_eq!(parse_f64(b"0.0").unwrap(),  0.0); }
@@ -73,6 +71,6 @@ mod tests {
 
     #[test]
     fn f64_rejects_garbage() {
-        assert_eq!(parse_f64(b"hello"), Err(qjd_err::QJD_DECODE_FAILED));
+        assert_eq!(parse_f64(b"hello"), Err(qjd_err::QJD_INVALID_NUMBER));
     }
 }
diff --git a/src/validate.rs b/src/validate/mod.rs
similarity index 98%
rename from src/validate.rs
rename to src/validate/mod.rs
index db1fc73..28d9311 100644
--- a/src/validate.rs
+++ b/src/validate/mod.rs
@@ -4,6 +4,9 @@
 //! decoupled from the SIMD/scalar scanner paths so the structural
 //! scanner code stays untouched.
 
+pub(crate) mod number;
+pub(crate) use number::validate_number;
+
 use crate::error::qjd_err;
 
 /// Verify that the maximum bracket-stack depth implied by `indices`
diff --git a/src/validate/number.rs b/src/validate/number.rs
new file mode 100644
index 0000000..c212bdb
--- /dev/null
+++ b/src/validate/number.rs
@@ -0,0 +1,91 @@
+//! Strict RFC 8259 §6 number-format validation.
+
+use crate::error::qjd_err;
+
+/// Returns Ok if `bytes` matches the JSON `number` grammar exactly.
+/// Otherwise returns `QJD_INVALID_NUMBER`.
+///
+/// Out-of-range (i.e. f64 overflow) is NOT detected here; the f64 decode
+/// step surfaces it as `QJD_NUMBER_OUT_OF_RANGE`.
+pub(crate) fn validate_number(bytes: &[u8]) -> Result<(), qjd_err> {
+    let mut i = 0;
+
+    // optional minus
+    if bytes.first() == Some(&b'-') { i += 1; }
+
+    // int: "0" | (digit1-9 *digit)
+    match bytes.get(i) {
+        Some(&b'0') => { i += 1; }
+        Some(&(b'1'..=b'9')) => {
+            i += 1;
+            while let Some(&c) = bytes.get(i) {
+                if !c.is_ascii_digit() { break; }
+                i += 1;
+            }
+        }
+        _ => return Err(qjd_err::QJD_INVALID_NUMBER),
+    }
+
+    // optional frac: "." 1*digit
+    if bytes.get(i) == Some(&b'.') {
+        i += 1;
+        let frac_start = i;
+        while let Some(&c) = bytes.get(i) {
+            if !c.is_ascii_digit() { break; }
+            i += 1;
+        }
+        if i == frac_start { return Err(qjd_err::QJD_INVALID_NUMBER); }
+    }
+
+    // optional exp: ("e"|"E") ["+"|"-"] 1*digit
+    if matches!(bytes.get(i), Some(&b'e') | Some(&b'E')) {
+        i += 1;
+        if matches!(bytes.get(i), Some(&b'+') | Some(&b'-')) { i += 1; }
+        let exp_start = i;
+        while let Some(&c) = bytes.get(i) {
+            if !c.is_ascii_digit() { break; }
+            i += 1;
+        }
+        if i == exp_start { return Err(qjd_err::QJD_INVALID_NUMBER); }
+    }
+
+    if i != bytes.len() { return Err(qjd_err::QJD_INVALID_NUMBER); }
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn ok(s: &[u8]) { assert!(validate_number(s).is_ok(), "{:?}", std::str::from_utf8(s)); }
+    fn bad(s: &[u8]) { assert!(validate_number(s).is_err(), "{:?}", std::str::from_utf8(s)); }
+
+    #[test] fn zero_ok()                 { ok(b"0"); }
+    #[test] fn neg_zero_ok()             { ok(b"-0"); }
+    #[test] fn int_ok()                  { ok(b"123"); }
+    #[test] fn neg_int_ok()              { ok(b"-456"); }
+    #[test] fn frac_ok()                 { ok(b"3.14"); }
+    #[test] fn neg_frac_ok()             { ok(b"-2.718"); }
+    #[test] fn exp_lower_ok()            { ok(b"1e10"); }
+    #[test] fn exp_upper_ok()            { ok(b"1E10"); }
+    #[test] fn exp_plus_ok()             { ok(b"1e+10"); }
+    #[test] fn exp_minus_ok()            { ok(b"1e-10"); }
+    #[test] fn frac_exp_ok()             { ok(b"1.5e2"); }
+    #[test] fn i64_max_str_ok()          { ok(b"9223372036854775807"); }
+
+    #[test] fn leading_plus_bad()        { bad(b"+1"); }
+    #[test] fn leading_zero_bad()        { bad(b"01"); }
+    #[test] fn leading_zeros_bad()       { bad(b"00"); }
+    #[test] fn bare_dot_bad()            { bad(b".5"); }
+    #[test] fn trailing_dot_bad()        { bad(b"1."); }
+    #[test] fn missing_frac_digits_bad() { bad(b"1.e5"); }
+    #[test] fn hex_bad()                 { bad(b"0x1F"); }
+    #[test] fn incomplete_exp_bad()      { bad(b"1e"); }
+    #[test] fn incomplete_exp_sign_bad() { bad(b"1e+"); }
+    #[test] fn nan_bad()                 { bad(b"NaN"); }
+    #[test] fn inf_bad()                 { bad(b"Infinity"); }
+    #[test] fn neg_inf_bad()             { bad(b"-Infinity"); }
+    #[test] fn empty_bad()               { bad(b""); }
+    #[test] fn lone_minus_bad()          { bad(b"-"); }
+    #[test] fn double_dot_bad()          { bad(b"1..2"); }
+}
diff --git a/tests/rfc8259_compliance.rs b/tests/rfc8259_compliance.rs
index b07f7ab..13022b2 100644
--- a/tests/rfc8259_compliance.rs
+++ b/tests/rfc8259_compliance.rs
@@ -172,3 +172,40 @@ fn lazy_accepts_trailing_garbage() {
     // Lazy preserves historical behavior: trailing bytes are ignored.
     assert!(Document::parse_with_options(b"{}garbage", &lazy()).is_ok());
 }
+
+// ── Phase 2: number format ───────────────────────────────────
+
+#[test]
+fn eager_accepts_canonical_numbers() {
+    for s in ["0", "-0", "1", "-1", "3.14", "-2.718",
+              "1e10", "1E10", "1e+10", "1e-10", "1.5e2",
+              "9223372036854775807", "-9223372036854775808"] {
+        let input = format!("[{}]", s);
+        assert_accepts!(input);
+    }
+}
+
+#[test]
+#[ignore = "wired in Task 10"]
+fn eager_rejects_invalid_numbers() {
+    use quickdecode::error::qjd_err;
+    for s in ["+1", "01", "00", ".5", "1.", "1.e5", "0x1F",
+              "NaN", "Infinity", "-Infinity", "1e", "1e+"] {
+        let input = format!("[{}]", s);
+        match Document::parse_with_options(input.as_bytes(), &eager()) {
+            Err(qjd_err::QJD_INVALID_NUMBER) => {}
+            Err(other) => panic!(
+                "expected QJD_INVALID_NUMBER for {:?}, got {:?}", input, other),
+            Ok(_) => panic!("EAGER unexpectedly accepted {:?}", input),
+        }
+    }
+}
+
+#[test]
+fn lazy_defers_invalid_number_until_access() {
+    // In LAZY mode, "[01]" parses; the error surfaces when you ask for the value.
+    let doc = Document::parse_with_options(b"[01]", &lazy()).unwrap();
+    // Walking via FFI tests is verbose; we only check that the LAZY parse
+    // itself does not fail. Field-level access is covered in tests/ffi_*.
+    drop(doc);
+}

From 5e0eb269b9229939fd6f700c12eca862aed270bf Mon Sep 17 00:00:00 2001
From: Yuansheng Wang <membphis@gmail.com>
Date: Sun, 17 May 2026 16:30:48 +0000
Subject: [PATCH 12/21] feat(validate): reject control chars and invalid UTF-8
 in string spans

---
 src/decode/string.rs        |  1 +
 src/validate/mod.rs         |  3 +++
 src/validate/strings.rs     | 36 +++++++++++++++++++++++++
 tests/rfc8259_compliance.rs | 54 +++++++++++++++++++++++++++++++++++++
 4 files changed, 94 insertions(+)
 create mode 100644 src/validate/strings.rs

diff --git a/src/decode/string.rs b/src/decode/string.rs
index d879ac5..595329d 100644
--- a/src/decode/string.rs
+++ b/src/decode/string.rs
@@ -7,6 +7,7 @@ pub(crate) fn decode_string(
     buf: &[u8], start: usize, end: usize, scratch: &mut Vec<u8>,
 ) -> Result<(*const u8, usize), qjd_err> {
     let slice = &buf[start..end];
+    crate::validate::validate_string_span(slice)?;
     if memchr::memchr(b'\\', slice).is_none() {
         return Ok((slice.as_ptr(), slice.len()));
     }
diff --git a/src/validate/mod.rs b/src/validate/mod.rs
index 28d9311..f21dd59 100644
--- a/src/validate/mod.rs
+++ b/src/validate/mod.rs
@@ -7,6 +7,9 @@
 pub(crate) mod number;
 pub(crate) use number::validate_number;
 
+pub(crate) mod strings;
+pub(crate) use strings::validate_string_span;
+
 use crate::error::qjd_err;
 
 /// Verify that the maximum bracket-stack depth implied by `indices`
diff --git a/src/validate/strings.rs b/src/validate/strings.rs
new file mode 100644
index 0000000..f0bece1
--- /dev/null
+++ b/src/validate/strings.rs
@@ -0,0 +1,36 @@
+//! String-content validation: control chars and UTF-8.
+
+use crate::error::qjd_err;
+
+/// Verify that the raw span (excluding surrounding quotes) contains no
+/// unescaped control characters (0x00..=0x1F) and is valid UTF-8.
+pub(crate) fn validate_string_span(span: &[u8]) -> Result<(), qjd_err> {
+    // Control chars are forbidden inside a JSON string per RFC 8259 §7.
+    // Cheap pass first: bytewise check.
+    if span.iter().any(|&b| b < 0x20) {
+        return Err(qjd_err::QJD_INVALID_STRING);
+    }
+    // UTF-8 validation. Backslash escapes are not yet expanded; the byte
+    // immediately after `\` may legally be any escape introducer
+    // (`"`, `\`, `/`, `b`, `f`, `n`, `r`, `t`, `u`), all of which are ASCII.
+    // So validating the raw span (with backslashes still in place) gives
+    // the same answer as validating the escape-decoded result.
+    if std::str::from_utf8(span).is_err() {
+        return Err(qjd_err::QJD_INVALID_UTF8);
+    }
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test] fn ascii_ok()         { assert!(validate_string_span(b"hello").is_ok()); }
+    #[test] fn utf8_ok()          { assert!(validate_string_span("中文".as_bytes()).is_ok()); }
+    #[test] fn escapes_ok()       { assert!(validate_string_span(b"a\\nb\\u00e9").is_ok()); }
+    #[test] fn tab_raw_bad()      { assert_eq!(validate_string_span(b"a\tb").unwrap_err(), qjd_err::QJD_INVALID_STRING); }
+    #[test] fn null_raw_bad()     { assert_eq!(validate_string_span(b"a\x00b").unwrap_err(), qjd_err::QJD_INVALID_STRING); }
+    #[test] fn newline_raw_bad()  { assert_eq!(validate_string_span(b"a\nb").unwrap_err(), qjd_err::QJD_INVALID_STRING); }
+    #[test] fn del_0x7f_ok()      { assert!(validate_string_span(b"a\x7fb").is_ok()); } // RFC 8259 does NOT forbid 0x7F
+    #[test] fn invalid_utf8_bad() { assert_eq!(validate_string_span(&[0xC0, 0xC0]).unwrap_err(), qjd_err::QJD_INVALID_UTF8); }
+}
diff --git a/tests/rfc8259_compliance.rs b/tests/rfc8259_compliance.rs
index 13022b2..d5dbc57 100644
--- a/tests/rfc8259_compliance.rs
+++ b/tests/rfc8259_compliance.rs
@@ -209,3 +209,57 @@ fn lazy_defers_invalid_number_until_access() {
     // itself does not fail. Field-level access is covered in tests/ffi_*.
     drop(doc);
 }
+
+// ── Phase 4 + 5: string content ──────────────────────────────
+
+#[test]
+#[ignore = "wired in Task 10"]
+fn eager_rejects_raw_tab_in_string() {
+    use quickdecode::error::qjd_err;
+    let input = b"[\"a\tb\"]";
+    match Document::parse_with_options(input, &eager()) {
+        Err(qjd_err::QJD_INVALID_STRING) => {}
+        Err(other) => panic!("expected QJD_INVALID_STRING, got {:?}", other),
+        Ok(_) => panic!("EAGER unexpectedly accepted raw tab in string"),
+    }
+}
+
+#[test]
+#[ignore = "wired in Task 10"]
+fn eager_rejects_raw_null_in_string() {
+    use quickdecode::error::qjd_err;
+    let input = b"[\"a\x00b\"]";
+    match Document::parse_with_options(input, &eager()) {
+        Err(qjd_err::QJD_INVALID_STRING) => {}
+        Err(other) => panic!("expected QJD_INVALID_STRING, got {:?}", other),
+        Ok(_) => panic!("EAGER unexpectedly accepted raw null in string"),
+    }
+}
+
+#[test]
+#[ignore = "wired in Task 10"]
+fn eager_rejects_invalid_utf8_in_string() {
+    use quickdecode::error::qjd_err;
+    let input = &[b'[', b'"', 0xC0, 0xC0, b'"', b']'];
+    match Document::parse_with_options(input, &eager()) {
+        Err(qjd_err::QJD_INVALID_UTF8) => {}
+        Err(other) => panic!("expected QJD_INVALID_UTF8, got {:?}", other),
+        Ok(_) => panic!("EAGER unexpectedly accepted invalid UTF-8 in string"),
+    }
+}
+
+#[test]
+fn eager_accepts_escape_sequences() {
+    assert_accepts!("[\"a\\nb\\u00e9\"]");
+    assert_accepts!("[\"emoji \\uD83D\\uDE00\"]");
+}
+
+#[test]
+fn lazy_accepts_raw_tab_but_decode_fails() {
+    let input = b"[\"a\tb\"]";
+    let doc = Document::parse_with_options(input, &lazy()).expect("lazy accepts raw control");
+    drop(doc);
+    // Field-level rejection on access is enforced by decode/string.rs and
+    // is covered by tests/ffi_strings.rs (existing decode_string tests cover
+    // the error type); no extra assertion needed here.
+}

From 3eb8082805160bff3e03d520a1ff35b980cd753c Mon Sep 17 00:00:00 2001
From: Yuansheng Wang <membphis@gmail.com>
Date: Sun, 17 May 2026 16:42:55 +0000
Subject: [PATCH 13/21] =?UTF-8?q?feat(validate):=20wire=20eager=20pass=20?=
 =?UTF-8?q?=E2=80=94=20full=20RFC=208259=20number+string=20validation?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/doc.rs                  |  2 +-
 src/validate/mod.rs         | 95 +++++++++++++++++++++++++++++++++++++
 tests/rfc8259_compliance.rs |  4 --
 3 files changed, 96 insertions(+), 5 deletions(-)

diff --git a/src/doc.rs b/src/doc.rs
index 75deb8d..1e6b5c7 100644
--- a/src/doc.rs
+++ b/src/doc.rs
@@ -28,7 +28,7 @@ impl<'a> Document<'a> {
 
         if opts.is_eager() {
             crate::validate::validate_trailing(buf, &indices)?;
-            // TODO(Task 10): validate_eager_values
+            crate::validate::validate_eager_values(buf, &indices)?;
         }
 
         Ok(Self {
diff --git a/src/validate/mod.rs b/src/validate/mod.rs
index f21dd59..1b804fb 100644
--- a/src/validate/mod.rs
+++ b/src/validate/mod.rs
@@ -90,6 +90,101 @@ pub(crate) fn validate_trailing(
     Ok(())
 }
 
+/// Walk `indices` and validate every scalar value (numbers + strings).
+/// Called only in EAGER mode.
+pub(crate) fn validate_eager_values(
+    buf: &[u8],
+    indices: &[u32],
+) -> Result<(), qjd_err> {
+    let mut i = 0;
+    while i + 1 < indices.len() {
+        let idx = indices[i];
+        if idx == u32::MAX { break; }
+        let pos = idx as usize;
+        let b = buf[pos];
+
+        // Strings: opening quote here, closing quote at indices[i+1].
+        // (The scanner emits BOTH quotes of a string in order.)
+        if b == b'"' {
+            let close = indices[i + 1] as usize;
+            // Defensive: scanner pairs quotes correctly, but guard anyway.
+            if close <= pos || close >= buf.len() || buf[close] != b'"' {
+                return Err(qjd_err::QJD_PARSE_ERROR);
+            }
+            let span = &buf[pos + 1 .. close];
+            strings::validate_string_span(span)?;
+            i += 2;
+            continue;
+        }
+
+        // Container brackets and `:`/`,` are not values; skip.
+        if matches!(b, b'{' | b'}' | b'[' | b']' | b':' | b',') {
+            i += 1;
+            continue;
+        }
+
+        // Should not happen: scanner only emits the 7 structural chars.
+        return Err(qjd_err::QJD_PARSE_ERROR);
+    }
+
+    // Scalar values (numbers, true, false, null) live in the gaps between
+    // structural offsets. Walk those gaps and dispatch.
+    validate_scalars_in_gaps(buf, indices)
+}
+
+/// For each consecutive pair of structural offsets, examine the bytes
+/// between them. If the gap contains a scalar (anything other than
+/// whitespace), validate its grammar.
+fn validate_scalars_in_gaps(buf: &[u8], indices: &[u32]) -> Result<(), qjd_err> {
+    let mut prev_end: usize = 0;
+    let mut in_str = false;
+    for &idx in indices {
+        if idx == u32::MAX { break; }
+        let pos = idx as usize;
+        let b = buf[pos];
+
+        if b == b'"' {
+            // Toggle: the bytes between two quotes are the string interior
+            // (already validated above). Skip gap-scanning across them.
+            if in_str {
+                in_str = false;
+                prev_end = pos + 1;
+            } else {
+                // Validate any scalar in the gap leading up to this quote.
+                check_gap(buf, prev_end, pos)?;
+                in_str = true;
+            }
+            continue;
+        }
+        if in_str { continue; }
+
+        check_gap(buf, prev_end, pos)?;
+        prev_end = pos + 1;
+    }
+    // Tail gap (top-level scalar like "42")
+    check_gap(buf, prev_end, buf.len())
+}
+
+fn check_gap(buf: &[u8], start: usize, end: usize) -> Result<(), qjd_err> {
+    // Strip surrounding whitespace.
+    let mut s = start;
+    while s < end && is_ws(buf[s]) { s += 1; }
+    let mut e = end;
+    while e > s && is_ws(buf[e - 1]) { e -= 1; }
+    if s == e { return Ok(()); }
+    let scalar = &buf[s..e];
+
+    // Dispatch on first byte.
+    match scalar[0] {
+        b't' => if scalar == b"true"  { Ok(()) } else { Err(qjd_err::QJD_PARSE_ERROR) },
+        b'f' => if scalar == b"false" { Ok(()) } else { Err(qjd_err::QJD_PARSE_ERROR) },
+        b'n' => if scalar == b"null"  { Ok(()) } else { Err(qjd_err::QJD_PARSE_ERROR) },
+        // Everything else (including `+`, `.`, letters like `N`/`I`) is
+        // treated as a malformed number so the caller gets QJD_INVALID_NUMBER.
+        _ => number::validate_number(scalar),
+    }
+}
+
 #[inline(always)]
 fn is_ws(b: u8) -> bool {
     matches!(b, b' ' | b'\t' | b'\n' | b'\r')
diff --git a/tests/rfc8259_compliance.rs b/tests/rfc8259_compliance.rs
index d5dbc57..be9c8de 100644
--- a/tests/rfc8259_compliance.rs
+++ b/tests/rfc8259_compliance.rs
@@ -186,7 +186,6 @@ fn eager_accepts_canonical_numbers() {
 }
 
 #[test]
-#[ignore = "wired in Task 10"]
 fn eager_rejects_invalid_numbers() {
     use quickdecode::error::qjd_err;
     for s in ["+1", "01", "00", ".5", "1.", "1.e5", "0x1F",
@@ -213,7 +212,6 @@ fn lazy_defers_invalid_number_until_access() {
 // ── Phase 4 + 5: string content ──────────────────────────────
 
 #[test]
-#[ignore = "wired in Task 10"]
 fn eager_rejects_raw_tab_in_string() {
     use quickdecode::error::qjd_err;
     let input = b"[\"a\tb\"]";
@@ -225,7 +223,6 @@ fn eager_rejects_raw_tab_in_string() {
 }
 
 #[test]
-#[ignore = "wired in Task 10"]
 fn eager_rejects_raw_null_in_string() {
     use quickdecode::error::qjd_err;
     let input = b"[\"a\x00b\"]";
@@ -237,7 +234,6 @@ fn eager_rejects_raw_null_in_string() {
 }
 
 #[test]
-#[ignore = "wired in Task 10"]
 fn eager_rejects_invalid_utf8_in_string() {
     use quickdecode::error::qjd_err;
     let input = &[b'[', b'"', 0xC0, 0xC0, b'"', b']'];

From 69e1b976181ddc459ca5ee323213951b3477f5ee Mon Sep 17 00:00:00 2001
From: Yuansheng Wang <membphis@gmail.com>
Date: Sun, 17 May 2026 16:50:59 +0000
Subject: [PATCH 14/21] fix(validate): check_gap distinguishes wrong-case
 literals from number-like tokens

---
 src/validate/mod.rs         |  9 ++++--
 tests/rfc8259_compliance.rs | 62 +++++++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+), 3 deletions(-)

diff --git a/src/validate/mod.rs b/src/validate/mod.rs
index 1b804fb..c834ea7 100644
--- a/src/validate/mod.rs
+++ b/src/validate/mod.rs
@@ -179,9 +179,12 @@ fn check_gap(buf: &[u8], start: usize, end: usize) -> Result<(), qjd_err> {
         b't' => if scalar == b"true"  { Ok(()) } else { Err(qjd_err::QJD_PARSE_ERROR) },
         b'f' => if scalar == b"false" { Ok(()) } else { Err(qjd_err::QJD_PARSE_ERROR) },
         b'n' => if scalar == b"null"  { Ok(()) } else { Err(qjd_err::QJD_PARSE_ERROR) },
-        // Everything else (including `+`, `.`, letters like `N`/`I`) is
-        // treated as a malformed number so the caller gets QJD_INVALID_NUMBER.
-        _ => number::validate_number(scalar),
+        // RFC-valid and common malformed number starters (+, ., -, digit).
+        b'-' | b'0'..=b'9' | b'+' | b'.' => number::validate_number(scalar),
+        // NaN / Infinity are "meant as numbers" → QJD_INVALID_NUMBER, not parse error.
+        _ if scalar == b"NaN" || scalar == b"Infinity" => number::validate_number(scalar),
+        // Wrong-case literals (TRUE, NULL), identifiers (undefined), other garbage.
+        _ => Err(qjd_err::QJD_PARSE_ERROR),
     }
 }
 
diff --git a/tests/rfc8259_compliance.rs b/tests/rfc8259_compliance.rs
index be9c8de..9db082a 100644
--- a/tests/rfc8259_compliance.rs
+++ b/tests/rfc8259_compliance.rs
@@ -259,3 +259,65 @@ fn lazy_accepts_raw_tab_but_decode_fails() {
     // is covered by tests/ffi_strings.rs (existing decode_string tests cover
     // the error type); no extra assertion needed here.
 }
+
+// ── Task 10 fix: check_gap dispatch ──────────────────────────
+
+#[test]
+fn eager_rejects_uppercase_true_as_parse_error() {
+    use quickdecode::error::qjd_err;
+    let r = Document::parse_with_options(b"TRUE", &eager());
+    match r {
+        Err(qjd_err::QJD_PARSE_ERROR) => {}
+        other => panic!("expected QJD_PARSE_ERROR, got {:?}", other.err()),
+    }
+}
+
+#[test]
+fn eager_rejects_uppercase_false_as_parse_error() {
+    use quickdecode::error::qjd_err;
+    let r = Document::parse_with_options(b"False", &eager());
+    match r {
+        Err(qjd_err::QJD_PARSE_ERROR) => {}
+        other => panic!("expected QJD_PARSE_ERROR, got {:?}", other.err()),
+    }
+}
+
+#[test]
+fn eager_rejects_uppercase_null_as_parse_error() {
+    use quickdecode::error::qjd_err;
+    let r = Document::parse_with_options(b"NULL", &eager());
+    match r {
+        Err(qjd_err::QJD_PARSE_ERROR) => {}
+        other => panic!("expected QJD_PARSE_ERROR, got {:?}", other.err()),
+    }
+}
+
+#[test]
+fn eager_rejects_undefined_as_parse_error() {
+    use quickdecode::error::qjd_err;
+    let r = Document::parse_with_options(b"undefined", &eager());
+    match r {
+        Err(qjd_err::QJD_PARSE_ERROR) => {}
+        other => panic!("expected QJD_PARSE_ERROR, got {:?}", other.err()),
+    }
+}
+
+#[test]
+fn eager_rejects_nan_as_invalid_number() {
+    use quickdecode::error::qjd_err;
+    let r = Document::parse_with_options(b"NaN", &eager());
+    match r {
+        Err(qjd_err::QJD_INVALID_NUMBER) => {}
+        other => panic!("expected QJD_INVALID_NUMBER, got {:?}", other.err()),
+    }
+}
+
+#[test]
+fn eager_rejects_infinity_as_invalid_number() {
+    use quickdecode::error::qjd_err;
+    let r = Document::parse_with_options(b"Infinity", &eager());
+    match r {
+        Err(qjd_err::QJD_INVALID_NUMBER) => {}
+        other => panic!("expected QJD_INVALID_NUMBER, got {:?}", other.err()),
+    }
+}

From 33d85223d2d1a2121425be83f8c105207baf9ffb Mon Sep 17 00:00:00 2001
From: Yuansheng Wang <membphis@gmail.com>
Date: Sun, 17 May 2026 17:02:47 +0000
Subject: [PATCH 15/21] test(rfc8259): exhaustive RFC 8259 conformance corpus
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add five nested mod blocks (structural / whitespace / literals / strings /
numbers) to tests/rfc8259_compliance.rs with 76 tests (73 passing, 3 ignored).

Fix two gaps in eager validation:
- parse_with_options: reject empty / whitespace-only input (RFC 8259 §2 requires
  a value; both EAGER and LAZY now return QJD_PARSE_ERROR).
- validate_scalars_in_gaps: track prev/next structural context in check_gap so
  that an empty gap after ':' or ',' (when not followed by a value-starter like
  '"', '{', '[') is rejected as QJD_PARSE_ERROR. Catches {"a":}, [,], [1,],
  and {\"a\":1,} without a full grammar-aware walk.

Three tests are marked #[ignore] with issue #37 references for cases that
require a grammar-aware pass: missing-colon ({\"a\"}), leading-comma-with-value
([,1]), and missing-comma-in-object ({\"a\":1\"b\":2}).
---
 src/doc.rs                  |   6 +
 src/validate/mod.rs         |  33 ++-
 tests/rfc8259_compliance.rs | 396 ++++++++++++++++++++++++++++++++++++
 3 files changed, 429 insertions(+), 6 deletions(-)

diff --git a/src/doc.rs b/src/doc.rs
index 1e6b5c7..d20e17f 100644
--- a/src/doc.rs
+++ b/src/doc.rs
@@ -19,6 +19,12 @@ impl<'a> Document<'a> {
         buf: &'a [u8],
         opts: &crate::options::Options,
     ) -> Result<Self, qjd_err> {
+        // RFC 8259 §2: "A JSON text is a serialized value."
+        // Empty input and whitespace-only input contain no value.
+        if buf.iter().all(|&b| matches!(b, b' ' | b'\t' | b'\n' | b'\r')) {
+            return Err(qjd_err::QJD_PARSE_ERROR);
+        }
+
         let max_depth = opts.effective_max_depth();
         let mut indices = Vec::new();
         crate::scan::scan(buf, &mut indices).map_err(|_| qjd_err::QJD_PARSE_ERROR)?;
diff --git a/src/validate/mod.rs b/src/validate/mod.rs
index c834ea7..2a37bd9 100644
--- a/src/validate/mod.rs
+++ b/src/validate/mod.rs
@@ -138,6 +138,9 @@ pub(crate) fn validate_eager_values(
 fn validate_scalars_in_gaps(buf: &[u8], indices: &[u32]) -> Result<(), qjd_err> {
     let mut prev_end: usize = 0;
     let mut in_str = false;
+    // Track the last non-quote structural char so check_gap can reject empty
+    // gaps in positions where a value is required (after `:` or `,`).
+    let mut prev_structural: u8 = 0;
     for &idx in indices {
         if idx == u32::MAX { break; }
         let pos = idx as usize;
@@ -151,27 +154,45 @@ fn validate_scalars_in_gaps(buf: &[u8], indices: &[u32]) -> Result<(), qjd_err>
                 prev_end = pos + 1;
             } else {
                 // Validate any scalar in the gap leading up to this quote.
-                check_gap(buf, prev_end, pos)?;
+                // An open-quote is itself a value, so pass it as the next char:
+                // an empty gap before a string is always fine (`:` `"` and `,` `"` are
+                // both valid — the string IS the value).
+                check_gap(buf, prev_end, pos, prev_structural, b'"')?;
                 in_str = true;
+                prev_structural = b'"';
             }
             continue;
         }
         if in_str { continue; }
 
-        check_gap(buf, prev_end, pos)?;
+        check_gap(buf, prev_end, pos, prev_structural, b)?;
         prev_end = pos + 1;
+        prev_structural = b;
     }
-    // Tail gap (top-level scalar like "42")
-    check_gap(buf, prev_end, buf.len())
+    // Tail gap (top-level scalar like "42"): next char is EOF (0 sentinel)
+    check_gap(buf, prev_end, buf.len(), prev_structural, 0)
 }
 
-fn check_gap(buf: &[u8], start: usize, end: usize) -> Result<(), qjd_err> {
+/// `prev_structural`: the last non-quote structural char before this gap.
+/// `next_structural`: the structural char immediately after this gap (opens or closes).
+fn check_gap(buf: &[u8], start: usize, end: usize, prev_structural: u8, next_structural: u8) -> Result<(), qjd_err> {
     // Strip surrounding whitespace.
     let mut s = start;
     while s < end && is_ws(buf[s]) { s += 1; }
     let mut e = end;
     while e > s && is_ws(buf[e - 1]) { e -= 1; }
-    if s == e { return Ok(()); }
+    if s == e {
+        // Empty gap: a value is required after `:` (object value) or `,` (next
+        // element), BUT only when the next token is not a structural value-starter
+        // (`"`, `{`, `[`) — those ARE the values. An empty gap before `}` / `]`
+        // / `,` when the preceding token demands a value is a structural error.
+        // This heuristic catches {"a":}, [,], [1,] without a full grammar walk.
+        let next_is_value_starter = matches!(next_structural, b'"' | b'{' | b'[');
+        if matches!(prev_structural, b':' | b',') && !next_is_value_starter {
+            return Err(qjd_err::QJD_PARSE_ERROR);
+        }
+        return Ok(());
+    }
     let scalar = &buf[s..e];
 
     // Dispatch on first byte.
diff --git a/tests/rfc8259_compliance.rs b/tests/rfc8259_compliance.rs
index 9db082a..b85b921 100644
--- a/tests/rfc8259_compliance.rs
+++ b/tests/rfc8259_compliance.rs
@@ -321,3 +321,399 @@ fn eager_rejects_infinity_as_invalid_number() {
         other => panic!("expected QJD_INVALID_NUMBER, got {:?}", other.err()),
     }
 }
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Task 11: Comprehensive RFC 8259 conformance corpus
+// Organized into nested mod blocks per category.
+// ─────────────────────────────────────────────────────────────────────────────
+
+mod structural {
+    use super::*;
+
+    // RFC 8259 §2-3: JSON values — null, true, false are valid root values.
+    #[test]
+    fn primitives_valid() {
+        assert_accepts!("null");
+        assert_accepts!("true");
+        assert_accepts!("false");
+    }
+
+    // RFC 8259 §2: a JSON text contains exactly one value — empty is not valid.
+    #[test]
+    fn empty_input_rejected() {
+        assert_rejects_both!("");
+    }
+
+    // RFC 8259 §2: whitespace-only input also contains no value.
+    #[test]
+    fn whitespace_only_rejected() {
+        assert_rejects_both!("   ");
+        assert_rejects_both!("\t\n\r");
+    }
+
+    // RFC 8259 §4-5: empty object and empty array are valid.
+    #[test]
+    fn empty_containers() {
+        assert_accepts!("{}");
+        assert_accepts!("[]");
+    }
+
+    // RFC 8259 §4-5: nested containers with mixed value types.
+    #[test]
+    fn nested_containers() {
+        assert_accepts!("[{\"a\":[1,{\"b\":2}]}]");
+        assert_accepts!("{\"x\":{\"y\":{\"z\":null}}}");
+        assert_accepts!("[[],[],[[],[]]]");
+    }
+
+    // RFC 8259 §4: '{' must be followed by a matching '}'.
+    #[test]
+    fn unclosed_brace() {
+        assert_rejects_both!("{");
+    }
+
+    // RFC 8259 §5: '[' must be followed by a matching ']'.
+    #[test]
+    fn unclosed_bracket() {
+        assert_rejects_both!("[");
+    }
+
+    // Bracket mismatch: '{' closed by ']'.
+    #[test]
+    fn mismatched_brace_bracket() {
+        assert_rejects_both!("{]");
+    }
+
+    // Bracket mismatch: '[' closed by '}'.
+    #[test]
+    fn mismatched_bracket_brace() {
+        assert_rejects_both!("[}");
+    }
+
+    // RFC 8259 §4: object value must follow the colon — omitting it is invalid.
+    // Eager catches the empty gap after ':'; lazy defers (structural-only rule).
+    #[test]
+    fn missing_value() {
+        assert_rejects_eager!("{\"a\":}", QJD_PARSE_ERROR);
+    }
+
+    // RFC 8259 §4: colon between key and value is mandatory.
+    // The scanner emits {"a"} as {""} with no ':' — eager does not detect this
+    // because no structural gap heuristic covers the absence of ':'.
+    // Deferred to a follow-up grammar-aware pass (issue #37).
+    #[test]
+    #[ignore = "missing-colon detection deferred — grammar-aware pass required (issue #37)"]
+    fn missing_colon() {
+        assert_rejects_eager!("{\"a\"}", QJD_PARSE_ERROR);
+    }
+
+    // RFC 8259 §5: a leading comma in an array is invalid.
+    // [,] — both commas have empty gaps → eager rejects via the ':'/','
+    // heuristic in check_gap.
+    #[test]
+    fn leading_comma_array_empty() {
+        assert_rejects_eager!("[,]", QJD_PARSE_ERROR);
+    }
+
+    // [,1] — leading comma followed by a value: the gap between '[' and ','
+    // is empty (no value yet) but prev_structural is '[', not ',' — so the
+    // heuristic does not fire. Deferred to a grammar-aware pass (issue #37).
+    #[test]
+    #[ignore = "leading-comma-before-value detection deferred — grammar-aware pass required (issue #37)"]
+    fn leading_comma_array_with_value() {
+        assert_rejects_eager!("[,1]", QJD_PARSE_ERROR);
+    }
+
+    // RFC 8259 §5: trailing comma in an array is invalid.
+    #[test]
+    fn trailing_comma_array() {
+        assert_rejects_eager!("[1,]", QJD_PARSE_ERROR);
+    }
+
+    // RFC 8259 §4: trailing comma in an object is invalid.
+    #[test]
+    fn trailing_comma_object() {
+        assert_rejects_eager!("{\"a\":1,}", QJD_PARSE_ERROR);
+    }
+
+    // RFC 8259 §5: array elements must be separated by exactly one comma.
+    // [1 2] contains a space-separated pair that validate_number rejects as
+    // QJD_INVALID_NUMBER (not QJD_PARSE_ERROR) — the element IS rejected by
+    // eager, just with a different error code.
+    #[test]
+    fn missing_comma_in_array_rejected() {
+        // We assert only that eager rejects; the exact code is QJD_INVALID_NUMBER
+        // because the "1 2" token fails number validation (space within number).
+        let input = b"[1 2]";
+        assert!(
+            Document::parse_with_options(input, &eager()).is_err(),
+            "EAGER should reject [1 2]"
+        );
+    }
+
+    // Missing comma inside an object (no structural separator between values):
+    // {"a":1"b":2} — the scanner emits `{`, `"`, `"`, `:`, `"`, `"`, `}`.
+    // The gap between the second close-quote and the third open-quote is empty,
+    // but prev_structural is `"` (quote) and next is `"` — the heuristic only
+    // fires on `:` / `,`, so this slips through.
+    // Deferred to grammar-aware pass (issue #37).
+    #[test]
+    #[ignore = "missing-comma-in-object detection deferred — grammar-aware pass required (issue #37)"]
+    fn missing_comma_in_object() {
+        assert_rejects_eager!("{\"a\":1\"b\":2}", QJD_PARSE_ERROR);
+    }
+}
+
+mod whitespace {
+    use super::*;
+
+    // RFC 8259 §2: insignificant whitespace (space, tab, LF, CR) is allowed
+    // before and after structural characters.
+
+    #[test]
+    fn spaces_around_object() {
+        assert_accepts!("  {  }  ");
+    }
+
+    #[test]
+    fn tabs_around_object() {
+        assert_accepts!("\t{}\t");
+    }
+
+    #[test]
+    fn newlines_around() {
+        assert_accepts!("\n{}\n");
+    }
+
+    #[test]
+    fn cr_around() {
+        assert_accepts!("\r{}\r");
+    }
+
+    #[test]
+    fn inside_object() {
+        assert_accepts!("{ \"a\" : 1 , \"b\" : 2 }");
+    }
+
+    #[test]
+    fn inside_array() {
+        assert_accepts!("[ 1 , 2 , 3 ]");
+    }
+
+    // All four RFC whitespace characters interleaved.
+    #[test]
+    fn mixed_whitespace() {
+        assert_accepts!(" \t\n\r { \t\n\r } \t\n\r ");
+    }
+}
+
+mod literals {
+    use super::*;
+
+    // RFC 8259 §3: only lowercase "true", "false", "null" are valid.
+    // Wrong case must be rejected by eager.
+
+    #[test]
+    fn true_must_be_lowercase() {
+        assert_rejects_eager!("TRUE", QJD_PARSE_ERROR);
+        assert_rejects_eager!("True", QJD_PARSE_ERROR);
+        assert_rejects_eager!("tRuE", QJD_PARSE_ERROR);
+    }
+
+    #[test]
+    fn false_must_be_lowercase() {
+        assert_rejects_eager!("FALSE", QJD_PARSE_ERROR);
+        assert_rejects_eager!("False", QJD_PARSE_ERROR);
+    }
+
+    #[test]
+    fn null_must_be_lowercase() {
+        assert_rejects_eager!("NULL", QJD_PARSE_ERROR);
+        assert_rejects_eager!("Null", QJD_PARSE_ERROR);
+    }
+
+    // JavaScript-ism: "nil" is not a valid JSON value.
+    #[test]
+    fn nil_rejected() {
+        assert_rejects_eager!("nil", QJD_PARSE_ERROR);
+    }
+
+    // JavaScript-ism: "undefined" is not a valid JSON value.
+    #[test]
+    fn undefined_rejected() {
+        assert_rejects_eager!("undefined", QJD_PARSE_ERROR);
+    }
+}
+
+mod strings {
+    use super::*;
+
+    // RFC 8259 §7: string grammar.
+
+    // Empty string is valid.
+    #[test]
+    fn empty_string() {
+        assert_accepts!("\"\"");
+        assert_accepts!("[\"\"  ]");
+    }
+
+    // Printable ASCII (no special chars) is valid.
+    #[test]
+    fn ascii_string() {
+        assert_accepts!("\"hello world\"");
+        assert_accepts!("\"abcdefghijklmnopqrstuvwxyz 0123456789 !@#$%^&*()\"");
+    }
+
+    // RFC 8259 §7: all defined escape sequences must be accepted.
+    #[test]
+    fn all_escape_sequences() {
+        // \"  \\  \/  \b  \f  \n  \r  \t
+        assert_accepts!("\"\\\" \\\\ \\/ \\b \\f \\n \\r \\t\"");
+    }
+
+    // RFC 8259 §7: \uXXXX Unicode escape (4 hex digits).
+    #[test]
+    fn unicode_escape() {
+        assert_accepts!("\"\\u0000\"");   // NUL encoded as escape — valid
+        assert_accepts!("\"\\u00e9\"");   // é
+        assert_accepts!("\"\\u4e2d\\u6587\""); // 中文
+    }
+
+    // RFC 8259 §7: surrogate pair (\uD800–\uDBFF followed by \uDC00–\uDFFF).
+    #[test]
+    fn surrogate_pair() {
+        assert_accepts!("\"\\uD83D\\uDE00\""); // 😀 U+1F600
+    }
+
+    // RFC 8259 §7: strings must be terminated with a closing '"'.
+    #[test]
+    fn unclosed_string_rejected() {
+        assert_rejects_both!("\"hello");
+        assert_rejects_both!("\"");
+    }
+
+    // JSON does not allow single-quoted strings (JavaScript-ism).
+    #[test]
+    fn single_quoted_string_rejected() {
+        assert_rejects_eager!("'hello'", QJD_PARSE_ERROR);
+    }
+
+    // RFC 8259 §7: control characters (U+0000–U+001F) must be escaped.
+    // A raw tab (0x09) inside a string is forbidden.
+    #[test]
+    fn raw_control_char_rejected() {
+        use quickdecode::error::qjd_err;
+        let with_tab  = b"[\"a\tb\"]";
+        let with_null = b"[\"a\x00b\"]";
+        match Document::parse_with_options(with_tab, &eager()) {
+            Err(qjd_err::QJD_INVALID_STRING) => {}
+            other => panic!("expected QJD_INVALID_STRING for raw tab, got {:?}", other.err()),
+        }
+        match Document::parse_with_options(with_null, &eager()) {
+            Err(qjd_err::QJD_INVALID_STRING) => {}
+            other => panic!("expected QJD_INVALID_STRING for raw NUL, got {:?}", other.err()),
+        }
+    }
+
+    // Strings with valid multi-byte UTF-8 content are accepted.
+    #[test]
+    fn utf8_multibyte_string() {
+        assert_accepts!("\"café\"");          // 2-byte sequence
+        assert_accepts!("\"中文\"");            // 3-byte sequences
+        assert_accepts!("\"😀\"");             // 4-byte sequence (emoji)
+    }
+}
+
+mod numbers {
+    use super::*;
+
+    // RFC 8259 §6: number grammar.
+    // These complement the existing top-level number tests with a thorough
+    // table-driven suite organized by sub-rule.
+
+    // §6 integer: optional minus, zero, or non-zero digit followed by digits.
+    #[test]
+    fn integers_valid() {
+        for s in ["0", "-0", "1", "-1", "123", "-456",
+                  "9223372036854775807", "-9223372036854775808"] {
+            let input = format!("[{}]", s);
+            assert_accepts!(input);
+        }
+    }
+
+    // §6 fraction: a '.' followed by one or more digits.
+    #[test]
+    fn fractions_valid() {
+        for s in ["0.0", "-0.0", "1.5", "-2.718", "3.14159",
+                  "0.123456789"] {
+            let input = format!("[{}]", s);
+            assert_accepts!(input);
+        }
+    }
+
+    // §6 exponent: 'e'/'E' with optional '+'/'-' and one or more digits.
+    #[test]
+    fn exponents_valid() {
+        for s in ["1e10", "1E10", "1e+10", "1e-10",
+                  "1.5e2", "2.5E-3", "0e0", "-0e0"] {
+            let input = format!("[{}]", s);
+            assert_accepts!(input);
+        }
+    }
+
+    // §6: leading '+' is not allowed.
+    #[test]
+    fn leading_plus_rejected() {
+        assert_rejects_eager!("[+1]", QJD_INVALID_NUMBER);
+    }
+
+    // §6: leading zeros are not allowed (except bare "0").
+    #[test]
+    fn leading_zero_rejected() {
+        assert_rejects_eager!("[01]", QJD_INVALID_NUMBER);
+        assert_rejects_eager!("[00]", QJD_INVALID_NUMBER);
+        assert_rejects_eager!("[007]", QJD_INVALID_NUMBER);
+    }
+
+    // §6: fraction requires at least one digit after the dot.
+    #[test]
+    fn trailing_dot_rejected() {
+        assert_rejects_eager!("[1.]", QJD_INVALID_NUMBER);
+        assert_rejects_eager!("[1.e5]", QJD_INVALID_NUMBER);
+    }
+
+    // §6: fraction cannot start without an integer part.
+    #[test]
+    fn leading_dot_rejected() {
+        assert_rejects_eager!("[.5]", QJD_INVALID_NUMBER);
+    }
+
+    // §6: exponent requires at least one digit.
+    #[test]
+    fn incomplete_exponent_rejected() {
+        assert_rejects_eager!("[1e]", QJD_INVALID_NUMBER);
+        assert_rejects_eager!("[1e+]", QJD_INVALID_NUMBER);
+        assert_rejects_eager!("[1e-]", QJD_INVALID_NUMBER);
+    }
+
+    // Hex notation is not part of the JSON number grammar.
+    #[test]
+    fn hex_notation_rejected() {
+        assert_rejects_eager!("[0x1F]", QJD_INVALID_NUMBER);
+        assert_rejects_eager!("[0xFF]", QJD_INVALID_NUMBER);
+    }
+
+    // Non-finite values are not part of JSON.
+    #[test]
+    fn non_finite_rejected() {
+        assert_rejects_eager!("[NaN]", QJD_INVALID_NUMBER);
+        assert_rejects_eager!("[Infinity]", QJD_INVALID_NUMBER);
+        assert_rejects_eager!("[-Infinity]", QJD_INVALID_NUMBER);
+    }
+
+    // Lone minus is not a valid number.
+    #[test]
+    fn lone_minus_rejected() {
+        assert_rejects_eager!("[-]", QJD_INVALID_NUMBER);
+    }
+}

From da99b7dadc6d4ade9de2cc925649aa5f1b91b4f2 Mon Sep 17 00:00:00 2001
From: Yuansheng Wang <membphis@gmail.com>
Date: Sun, 17 May 2026 17:24:29 +0000
Subject: [PATCH 16/21] test(json_test_suite): vendor JSONTestSuite and add
 cross-mode walker
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add JSONTestSuite as a git submodule at tests/vendor/JSONTestSuite and
introduce tests/json_test_suite.rs which walks every y_*, n_*, and i_*
file: y_ files must parse in both modes, n_ files must fail eager parse,
i_ files are logged but not asserted.

While running the walker, two real validator gaps were discovered and fixed
(both < 20 lines each):

- validate_trailing: used the last structural char in the whole buffer as
  the root-end marker, causing [][], ["a":true]"x" etc. to slip through
  as if they had no trailing content.  Fixed by walking indices to find
  the first depth-0 container close (or the first root string's close).

- validate_string_span: validated UTF-8 and control chars but did not
  check escape sequences, so \a, \x00, \uZZZZ, dangling \ etc. were
  accepted.  Added a one-pass walker that validates every backslash escape
  against the RFC 8259 §7 grammar.

The three unit tests in decode/string.rs that expected QJD_DECODE_FAILED
for bad escapes now expect QJD_INVALID_STRING because validate_string_span
(called first by decode_string) catches them before the decode loop does.

13 n_* files remain in KNOWN_N_FAILURES: all require a grammar-aware pass
to enforce token-ordering rules (non-string keys, comma-vs-colon placement,
missing commas between items).  Each entry is annotated with the follow-up
reference (issue #37).

Walker results: y_* 95/95 pass, n_* 175/188 pass (13 whitelisted), i_* 35
informational verdicts printed.
---
 .gitmodules                |   3 +
 src/decode/string.rs       |  11 +-
 src/validate/mod.rs        |  83 ++++++++++-----
 src/validate/strings.rs    |  49 +++++++--
 tests/json_test_suite.rs   | 200 +++++++++++++++++++++++++++++++++++++
 tests/vendor/JSONTestSuite |   1 +
 6 files changed, 309 insertions(+), 38 deletions(-)
 create mode 100644 tests/json_test_suite.rs
 create mode 160000 tests/vendor/JSONTestSuite

diff --git a/.gitmodules b/.gitmodules
index 2d1c2aa..8baae4a 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,6 @@
 [submodule "vendor/lua-cjson"]
 	path = vendor/lua-cjson
 	url = https://github.com/openresty/lua-cjson.git
+[submodule "tests/vendor/JSONTestSuite"]
+	path = tests/vendor/JSONTestSuite
+	url = https://github.com/nst/JSONTestSuite
diff --git a/src/decode/string.rs b/src/decode/string.rs
index 595329d..8572441 100644
--- a/src/decode/string.rs
+++ b/src/decode/string.rs
@@ -164,16 +164,21 @@ mod tests {
 
     #[test]
     fn invalid_hex_in_unicode_fails() {
-        assert_eq!(d(b"\\uZZZZ").unwrap_err(), qjd_err::QJD_DECODE_FAILED);
+        // validate_string_span (called first) catches non-hex digits as
+        // QJD_INVALID_STRING; the decode loop would also catch it as
+        // QJD_DECODE_FAILED, but we never reach it.
+        assert_eq!(d(b"\\uZZZZ").unwrap_err(), qjd_err::QJD_INVALID_STRING);
     }
 
     #[test]
     fn unknown_escape_fails() {
-        assert_eq!(d(b"\\q").unwrap_err(), qjd_err::QJD_DECODE_FAILED);
+        // validate_string_span catches unknown escape introducers first.
+        assert_eq!(d(b"\\q").unwrap_err(), qjd_err::QJD_INVALID_STRING);
     }
 
     #[test]
     fn dangling_backslash_fails() {
-        assert_eq!(d(b"a\\").unwrap_err(), qjd_err::QJD_DECODE_FAILED);
+        // validate_string_span catches a trailing lone backslash first.
+        assert_eq!(d(b"a\\").unwrap_err(), qjd_err::QJD_INVALID_STRING);
     }
 }
diff --git a/src/validate/mod.rs b/src/validate/mod.rs
index 2a37bd9..c6d972d 100644
--- a/src/validate/mod.rs
+++ b/src/validate/mod.rs
@@ -44,43 +44,78 @@ pub(crate) fn validate_depth(
 
 /// Verify there is no non-whitespace content after the root value.
 ///
-/// The root value's closer is the last non-sentinel structural offset
-/// in `indices` for a container, or the start of the scalar's trailing
-/// whitespace for a top-level scalar value. We locate the position
-/// `end_of_root` past which only whitespace is allowed.
+/// For container roots (`{`/`[`), we walk `indices` to find the closing
+/// bracket where nesting depth returns to zero — that is the actual root
+/// end, regardless of how many additional structural chars the buffer has.
+/// For scalar roots (no opening bracket), we scan the raw bytes.
 pub(crate) fn validate_trailing(
     buf: &[u8],
     indices: &[u32],
 ) -> Result<(), qjd_err> {
-    // Find the last real offset (skip the u32::MAX sentinel).
-    let last = indices.iter().rev()
-        .find(|&&i| i != u32::MAX)
-        .copied();
+    // Find the first real structural character to determine root kind.
+    let first = indices.iter().find(|&&i| i != u32::MAX).copied();
 
-    let root_end = match last {
-        // No structural chars at all: input is whitespace or a bare scalar.
-        // Bare scalar: locate the end by scanning until whitespace or EOF.
+    let root_end = match first {
         None => {
-            // Strip leading whitespace, then find the scalar's terminator.
+            // No structural chars: bare scalar (number/true/false/null).
             let mut p = 0;
             while p < buf.len() && is_ws(buf[p]) { p += 1; }
             let start = p;
-            // Scan until next whitespace (end of scalar token).
             while p < buf.len() && !is_ws(buf[p]) { p += 1; }
-            if start == p { return Ok(()); } // input was only whitespace
-            // Advance past trailing whitespace so `42   ` is accepted.
+            if start == p { return Ok(()); } // whitespace-only (scan already rejected empty)
             while p < buf.len() && is_ws(buf[p]) { p += 1; }
             p
         }
-        // Structural close (`}` or `]`) of root container, OR root quote
-        // close, OR last structural (`,`/`:`/`{`/`[`) — in which case the
-        // parse should already have failed at scan(). The only "valid root
-        // ending in a structural" cases are a closing `}` / `]` / `"`.
-        Some(last_idx) => {
-            let mut p = last_idx as usize + 1;
-            // Advance past any trailing whitespace.
-            while p < buf.len() && is_ws(buf[p]) { p += 1; }
-            p
+        Some(first_idx) => {
+            match buf[first_idx as usize] {
+                b'{' | b'[' => {
+                    // Walk indices to find the closing bracket at depth 0.
+                    let mut depth: i32 = 0;
+                    let mut closer: usize = first_idx as usize;
+                    // Track whether we're inside a string (skip string interiors).
+                    let mut in_str = false;
+                    for &idx in indices {
+                        if idx == u32::MAX { break; }
+                        let pos = idx as usize;
+                        match buf[pos] {
+                            b'"' => { in_str = !in_str; }
+                            _ if in_str => {}
+                            b'{' | b'[' => { depth += 1; }
+                            b'}' | b']' => {
+                                depth -= 1;
+                                if depth == 0 { closer = pos; break; }
+                            }
+                            _ => {}
+                        }
+                    }
+                    let mut p = closer + 1;
+                    while p < buf.len() && is_ws(buf[p]) { p += 1; }
+                    p
+                }
+                b'"' => {
+                    // Root is a string: opening quote at first_idx.
+                    // The closing quote is the next structural char.
+                    let close = indices.iter()
+                        .skip(1) // skip the opening quote
+                        .find(|&&i| i != u32::MAX)
+                        .copied()
+                        .unwrap_or(first_idx); // unclosed: scan already rejected
+                    let mut p = close as usize + 1;
+                    while p < buf.len() && is_ws(buf[p]) { p += 1; }
+                    p
+                }
+                _ => {
+                    // Structural char that's not an opener: scan/eager already
+                    // would have caught a malformed root. Treat last structural as end.
+                    let last = indices.iter().rev()
+                        .find(|&&i| i != u32::MAX)
+                        .copied()
+                        .unwrap_or(first_idx);
+                    let mut p = last as usize + 1;
+                    while p < buf.len() && is_ws(buf[p]) { p += 1; }
+                    p
+                }
+            }
         }
     };
 
diff --git a/src/validate/strings.rs b/src/validate/strings.rs
index f0bece1..dc974f2 100644
--- a/src/validate/strings.rs
+++ b/src/validate/strings.rs
@@ -3,21 +3,48 @@
 use crate::error::qjd_err;
 
 /// Verify that the raw span (excluding surrounding quotes) contains no
-/// unescaped control characters (0x00..=0x1F) and is valid UTF-8.
+/// unescaped control characters (0x00..=0x1F), is valid UTF-8, and that
+/// every backslash escape sequence is RFC 8259 §7 compliant.
 pub(crate) fn validate_string_span(span: &[u8]) -> Result<(), qjd_err> {
-    // Control chars are forbidden inside a JSON string per RFC 8259 §7.
-    // Cheap pass first: bytewise check.
-    if span.iter().any(|&b| b < 0x20) {
-        return Err(qjd_err::QJD_INVALID_STRING);
-    }
-    // UTF-8 validation. Backslash escapes are not yet expanded; the byte
-    // immediately after `\` may legally be any escape introducer
-    // (`"`, `\`, `/`, `b`, `f`, `n`, `r`, `t`, `u`), all of which are ASCII.
-    // So validating the raw span (with backslashes still in place) gives
-    // the same answer as validating the escape-decoded result.
+    // UTF-8 validation first (includes multi-byte content validation).
+    // Backslash escapes are ASCII, so validating the unexpanded span gives
+    // the correct answer for the UTF-8 structure of non-escape bytes.
     if std::str::from_utf8(span).is_err() {
         return Err(qjd_err::QJD_INVALID_UTF8);
     }
+
+    // Walk the span validating control chars and escape sequences.
+    let mut i = 0;
+    while i < span.len() {
+        let b = span[i];
+        // RFC 8259 §7: control characters must be escaped.
+        if b < 0x20 {
+            return Err(qjd_err::QJD_INVALID_STRING);
+        }
+        if b == b'\\' {
+            i += 1;
+            if i >= span.len() {
+                return Err(qjd_err::QJD_INVALID_STRING);
+            }
+            match span[i] {
+                b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => {}
+                b'u' => {
+                    // Must be followed by exactly 4 hex digits.
+                    if i + 4 >= span.len() {
+                        return Err(qjd_err::QJD_INVALID_STRING);
+                    }
+                    for &h in &span[i + 1..=i + 4] {
+                        if !h.is_ascii_hexdigit() {
+                            return Err(qjd_err::QJD_INVALID_STRING);
+                        }
+                    }
+                    i += 4; // consumed 4 hex digits; loop adds 1 more
+                }
+                _ => return Err(qjd_err::QJD_INVALID_STRING),
+            }
+        }
+        i += 1;
+    }
     Ok(())
 }
 
diff --git a/tests/json_test_suite.rs b/tests/json_test_suite.rs
new file mode 100644
index 0000000..6f4d031
--- /dev/null
+++ b/tests/json_test_suite.rs
@@ -0,0 +1,200 @@
+//! Walker over the JSONTestSuite corpus (submodule at tests/vendor/JSONTestSuite).
+//!
+//! - `y_*` files: must parse in both EAGER and LAZY modes.
+//! - `n_*` files: must fail to parse in EAGER mode.
+//!                In LAZY mode the file MAY parse (structural-only) but a
+//!                value-level access of the malformed field would fail; we
+//!                do not assert against LAZY here.
+//! - `i_*` files: implementation-defined; we record our behavior (no
+//!                assertions). The list of accepted/rejected i_* cases is
+//!                printed at the end of the test run for documentation.
+//!
+//! # Known failures
+//!
+//! Files listed in KNOWN_Y_FAILURES / KNOWN_N_FAILURES are skipped with a
+//! logged explanation.  Removing a file from these lists re-enables the test.
+//!
+//! KNOWN_Y_FAILURES: y_* files we don't handle correctly yet.
+//!   Each entry documents why; follow-up issues are referenced in comments.
+//!
+//! KNOWN_N_FAILURES: n_* files our eager validator passes when it shouldn't.
+//!   These correspond to grammar-aware gaps deferred to issue #37.
+
+use std::fs;
+use std::path::Path;
+
+use quickdecode::doc::Document;
+use quickdecode::options::{Options, QJD_MODE_EAGER, QJD_MODE_LAZY};
+
+/// y_* files that we currently reject but shouldn't.
+/// Each is annotated with why and what follow-up would fix it.
+const KNOWN_Y_FAILURES: &[&str] = &[
+    // "y_string_utf8.json" — example placeholder (none currently needed)
+];
+
+/// n_* files that we currently accept but shouldn't (validator gap).
+///
+/// All 13 entries below require a grammar-aware structural pass that tracks
+/// which token types are legal in each parser state (array element, object
+/// key, object value, etc.).  That pass is deferred to issue #37.
+///
+/// The current validator only catches structural errors detectable from
+/// bracket balance + gap heuristics; it does not enforce:
+///   - that object keys must be strings
+///   - that `:` vs `,` are used in the right places
+///   - that array elements are separated by commas (not colons/semicolons)
+///   - leading commas before values (gap heuristic fires only for `[,]`)
+///   - missing commas between items when no structural gap exists
+///
+/// Fix: implement a state-machine pass in src/validate/mod.rs that tracks
+/// parser state (AfterKey, AfterColon, AfterValue, …) and rejects tokens
+/// that violate the grammar at that state.  Removing a file from this list
+/// re-enables the assertion.
+const KNOWN_N_FAILURES: &[&str] = &[
+    // ── array structural gaps ────────────────────────────────────────────
+    // ["": 1] — colon inside array (issue #37: grammar-aware pass)
+    "n_array_colon_instead_of_comma.json",
+    // [,1] — leading comma before first value (issue #37)
+    "n_array_comma_and_number.json",
+    // [3[4]] — missing comma between elements (issue #37)
+    "n_array_inner_array_no_comma.json",
+    // [1:2] — semicolon used instead of comma (issue #37)
+    "n_array_items_separated_by_semicolon.json",
+    // [   , ""] — leading comma (gap heuristic only catches [,] not [  ,v]) (issue #37)
+    "n_array_missing_value.json",
+    // ── object structural gaps ───────────────────────────────────────────
+    // {"x", null} — comma instead of colon (issue #37)
+    "n_object_comma_instead_of_colon.json",
+    // {"a":"a" 123} — missing comma between key-value pairs (issue #37)
+    "n_object_garbage_at_end.json",
+    // {:"b"} — missing object key (issue #37)
+    "n_object_missing_key.json",
+    // {"a" "b"} — missing colon between key and value (issue #37)
+    "n_object_missing_semicolon.json",
+    // {1:1} — non-string key: number (issue #37)
+    "n_object_non_string_key.json",
+    // {9999E9999:1} — non-string key: huge number (issue #37)
+    "n_object_non_string_key_but_huge_number_instead.json",
+    // {null:null,null:null} — non-string key: null literal (issue #37)
+    "n_object_repeated_null_null.json",
+    // { "foo" : "bar", "a" } — trailing key without value (issue #37)
+    "n_object_with_single_string.json",
+];
+
+fn corpus_dir() -> &'static Path {
+    Path::new(env!("CARGO_MANIFEST_DIR"))
+}
+
+fn parsing_dir() -> std::path::PathBuf {
+    corpus_dir().join("tests/vendor/JSONTestSuite/test_parsing")
+}
+
+fn iter_files(prefix: &str) -> Vec<std::path::PathBuf> {
+    let dir = parsing_dir();
+    let entries = fs::read_dir(&dir)
+        .unwrap_or_else(|e| panic!(
+            "missing JSONTestSuite submodule at {:?}: {} — run: git submodule update --init",
+            dir, e
+        ));
+    let mut paths: Vec<_> = entries
+        .filter_map(|r| r.ok())
+        .map(|e| e.path())
+        .filter(|p| {
+            p.extension().and_then(|s| s.to_str()) == Some("json")
+                && p.file_name()
+                    .and_then(|s| s.to_str())
+                    .map(|n| n.starts_with(prefix))
+                    .unwrap_or(false)
+        })
+        .collect();
+    paths.sort();
+    paths
+}
+
+fn is_known_y_failure(path: &std::path::Path) -> bool {
+    let name = path.file_name().and_then(|s| s.to_str()).unwrap_or("");
+    KNOWN_Y_FAILURES.contains(&name)
+}
+
+fn is_known_n_failure(path: &std::path::Path) -> bool {
+    let name = path.file_name().and_then(|s| s.to_str()).unwrap_or("");
+    KNOWN_N_FAILURES.contains(&name)
+}
+
+#[test]
+fn y_files_accepted_in_both_modes() {
+    let eager = Options { mode: QJD_MODE_EAGER, max_depth: 0 };
+    let lazy  = Options { mode: QJD_MODE_LAZY,  max_depth: 0 };
+    let mut failures = Vec::new();
+    let mut skipped = 0usize;
+
+    for path in iter_files("y_") {
+        if is_known_y_failure(&path) {
+            eprintln!("SKIP (known-y-failure): {:?}", path.file_name().unwrap());
+            skipped += 1;
+            continue;
+        }
+        let data = fs::read(&path).unwrap();
+        let r_e = Document::parse_with_options(&data, &eager);
+        let r_l = Document::parse_with_options(&data, &lazy);
+        if r_e.is_err() || r_l.is_err() {
+            failures.push((
+                path.file_name().unwrap().to_owned(),
+                format!("eager={:?} lazy={:?}", r_e.err(), r_l.err()),
+            ));
+        }
+    }
+
+    if skipped > 0 {
+        eprintln!("y_* skipped (known failures): {}", skipped);
+    }
+    if !failures.is_empty() {
+        for (n, e) in &failures {
+            eprintln!("UNEXPECTED REJECT: {:?} → {}", n, e);
+        }
+        panic!("{} y_* file(s) unexpectedly rejected", failures.len());
+    }
+}
+
+#[test]
+fn n_files_rejected_in_eager_mode() {
+    let eager = Options { mode: QJD_MODE_EAGER, max_depth: 0 };
+    let mut accepted = Vec::new();
+    let mut skipped = 0usize;
+
+    for path in iter_files("n_") {
+        if is_known_n_failure(&path) {
+            eprintln!("SKIP (known-n-failure): {:?}", path.file_name().unwrap());
+            skipped += 1;
+            continue;
+        }
+        let data = fs::read(&path).unwrap();
+        if Document::parse_with_options(&data, &eager).is_ok() {
+            accepted.push(path.file_name().unwrap().to_owned());
+        }
+    }
+
+    if skipped > 0 {
+        eprintln!("n_* skipped (known failures): {}", skipped);
+    }
+    if !accepted.is_empty() {
+        for n in &accepted {
+            eprintln!("UNEXPECTED ACCEPT: {:?}", n);
+        }
+        panic!("{} n_* file(s) unexpectedly accepted", accepted.len());
+    }
+}
+
+#[test]
+fn document_i_files_behavior() {
+    // Implementation-defined cases — document what we do, do not assert.
+    let eager = Options { mode: QJD_MODE_EAGER, max_depth: 0 };
+    for path in iter_files("i_") {
+        let data = fs::read(&path).unwrap();
+        let verdict = match Document::parse_with_options(&data, &eager) {
+            Ok(_)  => "ACCEPT".to_owned(),
+            Err(e) => format!("REJECT({:?})", e),
+        };
+        eprintln!("i_* {:?} → {}", path.file_name().unwrap(), verdict);
+    }
+}
diff --git a/tests/vendor/JSONTestSuite b/tests/vendor/JSONTestSuite
new file mode 160000
index 0000000..1ef36fa
--- /dev/null
+++ b/tests/vendor/JSONTestSuite
@@ -0,0 +1 @@
+Subproject commit 1ef36fa01286573e846ac449e8683f8833c5b26a

From 469b3bb419fece099065802a442d796aa1e46c54 Mon Sep 17 00:00:00 2001
From: Yuansheng Wang <membphis@gmail.com>
Date: Sun, 17 May 2026 17:34:35 +0000
Subject: [PATCH 17/21] fix(test): clippy doc_overindented_list_items in
 json_test_suite

---
 tests/json_test_suite.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/json_test_suite.rs b/tests/json_test_suite.rs
index 6f4d031..31b7b1b 100644
--- a/tests/json_test_suite.rs
+++ b/tests/json_test_suite.rs
@@ -2,12 +2,12 @@
 //!
 //! - `y_*` files: must parse in both EAGER and LAZY modes.
 //! - `n_*` files: must fail to parse in EAGER mode.
-//!                In LAZY mode the file MAY parse (structural-only) but a
-//!                value-level access of the malformed field would fail; we
-//!                do not assert against LAZY here.
+//!   In LAZY mode the file MAY parse (structural-only) but a value-level
+//!   access of the malformed field would fail; we do not assert against
+//!   LAZY here.
 //! - `i_*` files: implementation-defined; we record our behavior (no
-//!                assertions). The list of accepted/rejected i_* cases is
-//!                printed at the end of the test run for documentation.
+//!   assertions). The list of accepted/rejected i_* cases is printed at
+//!   the end of the test run for documentation.
 //!
 //! # Known failures
 //!

From b56f93d90bfc1b197a8b0ec14124f34b007fe469 Mon Sep 17 00:00:00 2001
From: Yuansheng Wang <membphis@gmail.com>
Date: Sun, 17 May 2026 17:36:27 +0000
Subject: [PATCH 18/21] docs: update two-phase invariants for eager/lazy modes
 and RFC 8259 audit

---
 CLAUDE.md                   |  4 ++--
 README.md                   | 34 ++++++++++++++++++++++++++++++++++
 docs/rfc8259-conformance.md | 19 +++++++++++++++++++
 3 files changed, 55 insertions(+), 2 deletions(-)
 create mode 100644 docs/rfc8259-conformance.md

diff --git a/CLAUDE.md b/CLAUDE.md
index e5039a8..cec0555 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -45,12 +45,12 @@ cargo test --features test-panic --release
 
 ### Two-phase parse
 
-**Phase 1** (`src/scan/`, called from `Document::parse`): a structural scanner walks the input once and writes the byte offset of every non-string-interior `{ } [ ] : , "` into `doc.indices: Vec<u32>`. A `u32::MAX` sentinel is appended. The scanner is selected at first use via `OnceCell` in `src/scan/mod.rs`:
+**Phase 1** (`src/scan/`, called from `Document::parse_with_options`): a structural scanner walks the input once and writes the byte offset of every non-string-interior `{ } [ ] : , "` into `doc.indices`. Then `validate_depth` is run unconditionally; in EAGER mode, `validate_trailing` and `validate_eager_values` (number ABNF + string content + UTF-8) follow. In LAZY mode, value-level checks are skipped and rely on the lazy decode path at field-access time. A `u32::MAX` sentinel is appended. The scanner is selected at first use via `OnceCell` in `src/scan/mod.rs`:
 
 - `Avx2Scanner` (gated by the `avx2` cargo feature, default-on) when both `avx2` and `pclmulqdq` are detected at runtime.
 - `ScalarScanner` otherwise.
 
-Validation is shallow — bracket/quote balance only. Value-level errors (bad escapes, malformed numbers, invalid UTF-8 in `\u`) are deferred to Phase 2 and surface only if that field is accessed.
+Validation level depends on `qjd_options.mode`. **EAGER** (default): a post-scan pass walks `indices` and validates RFC 8259 number ABNF, string content (no unescaped control chars), and UTF-8 — parse fails on any value-level violation. **LAZY** (opt-in): bracket/quote balance + max-depth only; value-level errors surface when the offending field is accessed (lua-cjson-equivalent behavior). Trailing-content rejection and value-level validation are eager-only; max-depth (default 1024, configurable up to 4096) is enforced in both modes.
 
 **Phase 2** (`src/cursor.rs`, `src/path.rs`, `src/decode/`): path strings are parsed by a zero-alloc `PathIter` into `PathSeg::Key | Idx`. A `Cursor` (a `(idx_start, idx_end)` pair into `doc.indices`) is walked to the target, optionally caching sibling spans in `doc.skip` (`SkipCache`) so repeated lookups on the same container skip brace-counting. Strings are decoded into `doc.scratch` only when they contain escapes; otherwise the original buffer slice is handed back.
 
diff --git a/README.md b/README.md
index 9054aef..bc9d0c2 100644
--- a/README.md
+++ b/README.md
@@ -116,3 +116,37 @@ methodology + reproduction command.
 ```sh
 make bench       # quickdecode vs cjson
 ```
+
+## RFC 8259 conformance
+
+This crate implements RFC 8259 with both strict and lenient modes; the strict
+(eager) mode is the default and is required by API-gateway use cases that must
+reject malformed payloads before forwarding them upstream.
+
+- Strict-mode acceptance corpus: `tests/rfc8259_compliance.rs`
+- Industry corpus: `tests/json_test_suite.rs` (against the
+  [JSONTestSuite](https://github.com/nst/JSONTestSuite) submodule at
+  `tests/vendor/JSONTestSuite`)
+- Behavior on implementation-defined (`i_*`) cases: `docs/rfc8259-conformance.md`
+
+### Switching modes
+
+From Lua:
+
+```lua
+local doc = qd.parse(json)                            -- eager (default)
+local doc = qd.parse(json, { lazy = true })           -- lazy mode
+local doc = qd.parse(json, { max_depth = 256 })       -- stricter depth limit
+local doc = qd.parse(json, { lazy = true, max_depth = 256 })
+```
+
+From C:
+
+```c
+qjd_options opts = { .mode = QJD_MODE_LAZY, .max_depth = 256 };
+qjd_doc* doc = qjd_parse_ex(buf, len, &opts, &err);
+```
+
+### Known gaps
+
+Three structural-grammar checks are deferred to a follow-up — they require a grammar-aware walk beyond the current heuristic. See `tests/rfc8259_compliance.rs` for the specific `#[ignore]`d cases, and `tests/json_test_suite.rs::KNOWN_N_FAILURES` for the corresponding JSONTestSuite files.
diff --git a/docs/rfc8259-conformance.md b/docs/rfc8259-conformance.md
new file mode 100644
index 0000000..b203d84
--- /dev/null
+++ b/docs/rfc8259-conformance.md
@@ -0,0 +1,19 @@
+# RFC 8259 conformance: implementation-defined cases
+
+JSONTestSuite categorizes some inputs as `i_*` — the spec allows either
+acceptance or rejection. This file records `lua-quick-decode`'s behavior on
+each, so changes show up in `git diff`.
+
+Behavior is recorded for the default **EAGER** mode unless noted.
+
+| File pattern | Our verdict | Rationale |
+|---|---|---|
+| `i_number_huge_exp` | REJECT (`QJD_NUMBER_OUT_OF_RANGE`) | f64 overflow surfaces at decode. |
+| `i_number_very_big_negative_int` | varies — see below | ABNF-valid; representational, not structural. |
+| `i_string_*` (UTF-16 surrogate halves in `\u` escapes) | REJECT (`QJD_DECODE_FAILED`) | We require well-formed surrogate pairs. |
+| `i_structure_500_nested_arrays` | ACCEPT (within default 1024 max_depth) | Configurable. |
+
+Run `cargo test --release --test json_test_suite -- --nocapture` to print the
+live verdict for every `i_*` file via the `document_i_files_behavior` test.
+That is the source of truth for these entries; update this table when a
+verdict changes (e.g. after a validator gap is closed).

From 1a3a4b7060821ea5bf31aa97a1adc8626e3c88be Mon Sep 17 00:00:00 2001
From: Yuansheng Wang <membphis@gmail.com>
Date: Sun, 17 May 2026 17:48:57 +0000
Subject: [PATCH 19/21] ci: init JSONTestSuite submodule on Rust matrix
 checkouts

---
 .github/workflows/ci.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 225b49f..14adffc 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -17,6 +17,8 @@ jobs:
         os: [ubuntu-latest, macos-14]
     steps:
       - uses: actions/checkout@v4
+        with:
+          submodules: recursive
 
       - name: Install Rust (stable)
         run: |

From 4aac34e693691c1e9f97fdfcbe72459ee2af96df Mon Sep 17 00:00:00 2001
From: Yuansheng Wang <membphis@gmail.com>
Date: Mon, 18 May 2026 00:43:23 +0000
Subject: [PATCH 20/21] perf(validate): single-pass string validator with SIMD
 ASCII fast path

Replace the 3-pass string validator (control-char check + std::str::from_utf8
+ byte-by-byte escape grammar walk) with a single-pass state machine, fronted
by an ASCII-only SIMD fast path that bulk-skips chunks of pure printable
ASCII bytes.

The previous implementation walked every interior byte three times, which
made eager validation 10-48x slower than the lazy baseline on parse+access
benchmarks. The single-pass scalar walker combines all three checks; the
fast path adds AVX2 (32B chunks) and NEON (16B chunks) skips for the
common case where strings contain no escapes, no UTF-8 multi-bytes, and
no control characters.

Strict UTF-8 per RFC 3629: rejects overlong encodings (C0/C1, E0 with
A0-BF only, F0 with 90-BF only), surrogates (ED A0-BF), and out-of-range
leads (F5-FF). Matches std::str::from_utf8 for the corpus the project
already covers.

Module structure:
  src/validate/strings/mod.rs     dispatcher + tests
  src/validate/strings/scalar.rs  pure-Rust state machine
  src/validate/strings/avx2.rs    x86_64 AVX2 ASCII skip
  src/validate/strings/neon.rs    aarch64 NEON ASCII skip

All 8 baseline unit tests are preserved verbatim. 16 new tests cover SIMD
chunk-boundary cases (UTF-8 straddling, backslash at boundary, long ASCII
runs), truncated \uXXXX, dangling backslash, unknown escape introducers,
overlong/surrogate UTF-8, and lone continuation bytes.

Bench delta (quickdecode.parse + access 3 fields, median ops/s):
  100k:        4,004 ->  61,881  (15.5x)
  1m:            392 ->   7,075  (18.0x)
  github-100k: 1,711 ->   1,897  (1.1x; mostly non-ASCII)
---
 src/validate/strings.rs        |  63 -----------
 src/validate/strings/avx2.rs   |  68 ++++++++++++
 src/validate/strings/mod.rs    | 192 +++++++++++++++++++++++++++++++++
 src/validate/strings/neon.rs   |  67 ++++++++++++
 src/validate/strings/scalar.rs | 158 +++++++++++++++++++++++++++
 5 files changed, 485 insertions(+), 63 deletions(-)
 delete mode 100644 src/validate/strings.rs
 create mode 100644 src/validate/strings/avx2.rs
 create mode 100644 src/validate/strings/mod.rs
 create mode 100644 src/validate/strings/neon.rs
 create mode 100644 src/validate/strings/scalar.rs

diff --git a/src/validate/strings.rs b/src/validate/strings.rs
deleted file mode 100644
index dc974f2..0000000
--- a/src/validate/strings.rs
+++ /dev/null
@@ -1,63 +0,0 @@
-//! String-content validation: control chars and UTF-8.
-
-use crate::error::qjd_err;
-
-/// Verify that the raw span (excluding surrounding quotes) contains no
-/// unescaped control characters (0x00..=0x1F), is valid UTF-8, and that
-/// every backslash escape sequence is RFC 8259 §7 compliant.
-pub(crate) fn validate_string_span(span: &[u8]) -> Result<(), qjd_err> {
-    // UTF-8 validation first (includes multi-byte content validation).
-    // Backslash escapes are ASCII, so validating the unexpanded span gives
-    // the correct answer for the UTF-8 structure of non-escape bytes.
-    if std::str::from_utf8(span).is_err() {
-        return Err(qjd_err::QJD_INVALID_UTF8);
-    }
-
-    // Walk the span validating control chars and escape sequences.
-    let mut i = 0;
-    while i < span.len() {
-        let b = span[i];
-        // RFC 8259 §7: control characters must be escaped.
-        if b < 0x20 {
-            return Err(qjd_err::QJD_INVALID_STRING);
-        }
-        if b == b'\\' {
-            i += 1;
-            if i >= span.len() {
-                return Err(qjd_err::QJD_INVALID_STRING);
-            }
-            match span[i] {
-                b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => {}
-                b'u' => {
-                    // Must be followed by exactly 4 hex digits.
-                    if i + 4 >= span.len() {
-                        return Err(qjd_err::QJD_INVALID_STRING);
-                    }
-                    for &h in &span[i + 1..=i + 4] {
-                        if !h.is_ascii_hexdigit() {
-                            return Err(qjd_err::QJD_INVALID_STRING);
-                        }
-                    }
-                    i += 4; // consumed 4 hex digits; loop adds 1 more
-                }
-                _ => return Err(qjd_err::QJD_INVALID_STRING),
-            }
-        }
-        i += 1;
-    }
-    Ok(())
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test] fn ascii_ok()         { assert!(validate_string_span(b"hello").is_ok()); }
-    #[test] fn utf8_ok()          { assert!(validate_string_span("中文".as_bytes()).is_ok()); }
-    #[test] fn escapes_ok()       { assert!(validate_string_span(b"a\\nb\\u00e9").is_ok()); }
-    #[test] fn tab_raw_bad()      { assert_eq!(validate_string_span(b"a\tb").unwrap_err(), qjd_err::QJD_INVALID_STRING); }
-    #[test] fn null_raw_bad()     { assert_eq!(validate_string_span(b"a\x00b").unwrap_err(), qjd_err::QJD_INVALID_STRING); }
-    #[test] fn newline_raw_bad()  { assert_eq!(validate_string_span(b"a\nb").unwrap_err(), qjd_err::QJD_INVALID_STRING); }
-    #[test] fn del_0x7f_ok()      { assert!(validate_string_span(b"a\x7fb").is_ok()); } // RFC 8259 does NOT forbid 0x7F
-    #[test] fn invalid_utf8_bad() { assert_eq!(validate_string_span(&[0xC0, 0xC0]).unwrap_err(), qjd_err::QJD_INVALID_UTF8); }
-}
diff --git a/src/validate/strings/avx2.rs b/src/validate/strings/avx2.rs
new file mode 100644
index 0000000..7823d8c
--- /dev/null
+++ b/src/validate/strings/avx2.rs
@@ -0,0 +1,68 @@
+#![cfg(all(target_arch = "x86_64", feature = "avx2"))]
+
+//! AVX2 ASCII fast path for string-content validation.
+//!
+//! For each 32-byte chunk, compute a "needs-attention" mask covering bytes
+//! that are either control chars (< 0x20), backslashes, or high-bit bytes.
+//! If the mask is all-zero the chunk is pure printable ASCII (no escapes,
+//! no UTF-8, no control) and can be skipped entirely.
+//!
+//! On the first non-zero chunk we hand off to the scalar state machine for
+//! the remainder of the span — we don't try to bit-scan inside the chunk.
+//! The fast-path payoff comes from cleanly skipping long ASCII prefixes;
+//! the scalar tail handles correctness without needing SIMD escape logic.
+
+use crate::error::qjd_err;
+use core::arch::x86_64::*;
+
+use super::scalar::validate_span_scalar;
+
+/// Validate `span` using AVX2 to bulk-skip pure-ASCII 32-byte chunks.
+pub(crate) fn validate_span_avx2(span: &[u8]) -> Result<(), qjd_err> {
+    // SAFETY: dispatcher has verified the AVX2 feature is present.
+    unsafe { validate_span_avx2_impl(span) }
+}
+
+#[target_feature(enable = "avx2")]
+unsafe fn validate_span_avx2_impl(span: &[u8]) -> Result<(), qjd_err> {
+    let mut i: usize = 0;
+    let n = span.len();
+
+    // ASCII bytes that need scalar attention have:
+    //   - top bit set                  → byte >= 0x80
+    //   - value < 0x20                 → control char
+    //   - value == 0x5C ('\\')         → escape introducer
+    //
+    // Detection via three SIMD compares OR'd together.
+    let backslash = _mm256_set1_epi8(b'\\' as i8);
+    // For "< 0x20" we use a signed unsigned trick: compare against 0x1F via
+    // unsigned MAX. _mm256_cmpgt_epi8 is signed, but bytes <0x20 are also
+    // <0x20 as signed positive values, so signed cmpgt works here for the
+    // 0x00..=0x1F range (none of which has the high bit set).
+    let ctrl_thresh = _mm256_set1_epi8(0x20_i8);
+
+    while i + 32 <= n {
+        let chunk = _mm256_loadu_si256(span.as_ptr().add(i) as *const __m256i);
+
+        // high bit set?
+        let high  = _mm256_movemask_epi8(chunk) as u32;
+        // byte == '\\' ?
+        let bs    = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, backslash)) as u32;
+        // byte < 0x20 ?  (signed cmpgt: ctrl_thresh > chunk for 0x00..=0x1F bytes)
+        let ctrl  = _mm256_movemask_epi8(_mm256_cmpgt_epi8(ctrl_thresh, chunk)) as u32;
+
+        let interesting = high | bs | ctrl;
+        if interesting != 0 {
+            // Hand off to the scalar state machine starting at the first
+            // interesting byte in this chunk. We don't try to validate any
+            // already-cleared bytes — those are pure printable ASCII and
+            // self-terminating so it's safe to resume there.
+            let offset = interesting.trailing_zeros() as usize;
+            return validate_span_scalar(&span[i + offset..]);
+        }
+
+        i += 32;
+    }
+
+    validate_span_scalar(&span[i..])
+}
diff --git a/src/validate/strings/mod.rs b/src/validate/strings/mod.rs
new file mode 100644
index 0000000..ab10090
--- /dev/null
+++ b/src/validate/strings/mod.rs
@@ -0,0 +1,192 @@
+//! String-content validation: control chars, escape grammar, and UTF-8.
+//!
+//! Single-pass validator with an optional SIMD ASCII fast path. The public
+//! entry point [`validate_string_span`] dispatches once via `OnceCell` to
+//! the best available implementation:
+//!
+//!   - x86_64 + AVX2: 32-byte chunk skip → scalar tail.
+//!   - aarch64 NEON:  16-byte chunk skip → scalar tail.
+//!   - Otherwise:     pure scalar state machine.
+//!
+//! All paths return identical error codes for any input; the SIMD layers
+//! only accelerate the "this chunk is pure printable ASCII" common case.
+
+mod scalar;
+#[cfg(all(target_arch = "x86_64", feature = "avx2"))]
+mod avx2;
+#[cfg(target_arch = "aarch64")]
+mod neon;
+
+use crate::error::qjd_err;
+use once_cell::sync::OnceCell;
+
+type ValidateFn = fn(&[u8]) -> Result<(), qjd_err>;
+static VALIDATE_FN: OnceCell<ValidateFn> = OnceCell::new();
+
+/// Verify that the raw span (excluding surrounding quotes) contains no
+/// unescaped control characters (0x00..=0x1F), every backslash escape is
+/// RFC 8259 §7 compliant, and the byte sequence is valid UTF-8 per RFC 3629.
+pub(crate) fn validate_string_span(span: &[u8]) -> Result<(), qjd_err> {
+    let f = *VALIDATE_FN.get_or_init(|| {
+        #[cfg(all(target_arch = "x86_64", feature = "avx2"))]
+        {
+            if std::is_x86_feature_detected!("avx2") {
+                return avx2::validate_span_avx2 as ValidateFn;
+            }
+        }
+        #[cfg(target_arch = "aarch64")]
+        {
+            return neon::validate_span_neon as ValidateFn;
+        }
+        #[allow(unreachable_code)]
+        {
+            scalar::validate_span_scalar as ValidateFn
+        }
+    });
+    f(span)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // ── Pinned baseline contract (DO NOT MODIFY) ─────────────────────────
+    // These 8 tests reproduce the original 3-pass validator's externally
+    // observable behavior and pin it down. The single-pass refactor must
+    // not change any of these outcomes.
+
+    #[test] fn ascii_ok()         { assert!(validate_string_span(b"hello").is_ok()); }
+    #[test] fn utf8_ok()          { assert!(validate_string_span("中文".as_bytes()).is_ok()); }
+    #[test] fn escapes_ok()       { assert!(validate_string_span(b"a\\nb\\u00e9").is_ok()); }
+    #[test] fn tab_raw_bad()      { assert_eq!(validate_string_span(b"a\tb").unwrap_err(), qjd_err::QJD_INVALID_STRING); }
+    #[test] fn null_raw_bad()     { assert_eq!(validate_string_span(b"a\x00b").unwrap_err(), qjd_err::QJD_INVALID_STRING); }
+    #[test] fn newline_raw_bad()  { assert_eq!(validate_string_span(b"a\nb").unwrap_err(), qjd_err::QJD_INVALID_STRING); }
+    #[test] fn del_0x7f_ok()      { assert!(validate_string_span(b"a\x7fb").is_ok()); } // RFC 8259 does NOT forbid 0x7F
+    #[test] fn invalid_utf8_bad() { assert_eq!(validate_string_span(&[0xC0, 0xC0]).unwrap_err(), qjd_err::QJD_INVALID_UTF8); }
+
+    // ── Single-pass / SIMD edge cases ────────────────────────────────────
+
+    #[test]
+    fn empty_span_ok() {
+        assert!(validate_string_span(b"").is_ok());
+    }
+
+    #[test]
+    fn long_ascii_ok() {
+        // > 64 bytes hits the SIMD fast path multiple times.
+        let s = vec![b'x'; 256];
+        assert!(validate_string_span(&s).is_ok());
+    }
+
+    #[test]
+    fn long_ascii_with_trailing_tab_bad() {
+        // Long ASCII run skipped by SIMD, then a control byte in the tail.
+        let mut s = vec![b'x'; 200];
+        s.push(b'\t');
+        assert_eq!(validate_string_span(&s).unwrap_err(), qjd_err::QJD_INVALID_STRING);
+    }
+
+    #[test]
+    fn utf8_at_simd_chunk_boundary() {
+        // 31 ASCII bytes + 2-byte UTF-8 (é = 0xC3 0xA9). On AVX2 the first
+        // 32-byte chunk has a high-bit byte at lane 31 → forces scalar tail
+        // starting at position 31, which must handle the 2-byte sequence.
+        let mut s = vec![b'x'; 31];
+        s.extend_from_slice("é".as_bytes());
+        assert!(validate_string_span(&s).is_ok());
+    }
+
+    #[test]
+    fn backslash_escape_at_simd_chunk_boundary() {
+        // 31 ASCII + `\n` straddles AVX2 chunk boundary at byte 31.
+        let mut s = vec![b'x'; 31];
+        s.push(b'\\');
+        s.push(b'n');
+        assert!(validate_string_span(&s).is_ok());
+    }
+
+    #[test]
+    fn backslash_at_chunk_boundary_with_bad_followup() {
+        // Backslash lands as the last byte of a 32-byte chunk; the next byte
+        // is an invalid escape introducer. Tail must reject.
+        let mut s = vec![b'x'; 31];
+        s.push(b'\\');
+        s.push(b'q');
+        assert_eq!(validate_string_span(&s).unwrap_err(), qjd_err::QJD_INVALID_STRING);
+    }
+
+    #[test]
+    fn truncated_u_escape_at_end() {
+        // `\uXX` with only 2 hex digits — RFC requires exactly 4.
+        assert_eq!(validate_string_span(b"\\uAB").unwrap_err(), qjd_err::QJD_INVALID_STRING);
+        assert_eq!(validate_string_span(b"\\uABC").unwrap_err(), qjd_err::QJD_INVALID_STRING);
+        // Bare `\u` at end.
+        assert_eq!(validate_string_span(b"\\u").unwrap_err(), qjd_err::QJD_INVALID_STRING);
+    }
+
+    #[test]
+    fn dangling_backslash_at_end() {
+        assert_eq!(validate_string_span(b"abc\\").unwrap_err(), qjd_err::QJD_INVALID_STRING);
+    }
+
+    #[test]
+    fn unknown_escape_introducer() {
+        // `\a`, `\q`, etc. are not valid RFC 8259 escapes.
+        assert_eq!(validate_string_span(b"\\a").unwrap_err(), qjd_err::QJD_INVALID_STRING);
+        assert_eq!(validate_string_span(b"\\q").unwrap_err(), qjd_err::QJD_INVALID_STRING);
+        assert_eq!(validate_string_span(b"\\x41").unwrap_err(), qjd_err::QJD_INVALID_STRING);
+    }
+
+    #[test]
+    fn u_escape_non_hex_bad() {
+        assert_eq!(validate_string_span(b"\\u00ZZ").unwrap_err(), qjd_err::QJD_INVALID_STRING);
+        assert_eq!(validate_string_span(b"\\uGHIJ").unwrap_err(), qjd_err::QJD_INVALID_STRING);
+    }
+
+    #[test]
+    fn overlong_utf8_rejected() {
+        // C0 80 would encode U+0000 in 2 bytes (overlong) — RFC 3629 forbids.
+        assert_eq!(validate_string_span(&[0xC0, 0x80]).unwrap_err(), qjd_err::QJD_INVALID_UTF8);
+        // E0 80 80 would encode U+0000 in 3 bytes (overlong).
+        assert_eq!(validate_string_span(&[0xE0, 0x80, 0x80]).unwrap_err(), qjd_err::QJD_INVALID_UTF8);
+        // F0 80 80 80 would encode U+0000 in 4 bytes (overlong).
+        assert_eq!(validate_string_span(&[0xF0, 0x80, 0x80, 0x80]).unwrap_err(), qjd_err::QJD_INVALID_UTF8);
+    }
+
+    #[test]
+    fn surrogate_in_utf8_rejected() {
+        // ED A0 80 = U+D800, the start of the high-surrogate range.
+        assert_eq!(validate_string_span(&[0xED, 0xA0, 0x80]).unwrap_err(), qjd_err::QJD_INVALID_UTF8);
+        // ED BF BF = U+DFFF, the end of the low-surrogate range.
+        assert_eq!(validate_string_span(&[0xED, 0xBF, 0xBF]).unwrap_err(), qjd_err::QJD_INVALID_UTF8);
+    }
+
+    #[test]
+    fn lone_continuation_byte_rejected() {
+        assert_eq!(validate_string_span(&[0x80]).unwrap_err(), qjd_err::QJD_INVALID_UTF8);
+        assert_eq!(validate_string_span(&[b'a', 0xBF, b'b']).unwrap_err(), qjd_err::QJD_INVALID_UTF8);
+    }
+
+    #[test]
+    fn four_byte_emoji_ok() {
+        // U+1F600 grinning face = F0 9F 98 80.
+        assert!(validate_string_span(&[0xF0, 0x9F, 0x98, 0x80]).is_ok());
+    }
+
+    #[test]
+    fn truncated_utf8_sequence_rejected() {
+        // 2-byte lead with no continuation.
+        assert_eq!(validate_string_span(&[0xC3]).unwrap_err(), qjd_err::QJD_INVALID_UTF8);
+        // 3-byte lead with only one continuation.
+        assert_eq!(validate_string_span(&[0xE4, 0xB8]).unwrap_err(), qjd_err::QJD_INVALID_UTF8);
+        // 4-byte lead with only two continuations.
+        assert_eq!(validate_string_span(&[0xF0, 0x9F, 0x98]).unwrap_err(), qjd_err::QJD_INVALID_UTF8);
+    }
+
+    #[test]
+    fn utf8_out_of_range_rejected() {
+        // F5..FF are not valid lead bytes (would encode > U+10FFFF).
+        assert_eq!(validate_string_span(&[0xF5, 0x80, 0x80, 0x80]).unwrap_err(), qjd_err::QJD_INVALID_UTF8);
+        assert_eq!(validate_string_span(&[0xFF]).unwrap_err(), qjd_err::QJD_INVALID_UTF8);
+    }
+}
diff --git a/src/validate/strings/neon.rs b/src/validate/strings/neon.rs
new file mode 100644
index 0000000..34d887e
--- /dev/null
+++ b/src/validate/strings/neon.rs
@@ -0,0 +1,67 @@
+#![cfg(target_arch = "aarch64")]
+
+//! NEON ASCII fast path for string-content validation.
+//!
+//! For each 16-byte chunk, compute a single "needs-attention" mask covering
+//! bytes that are control chars (< 0x20), backslashes, or high-bit bytes.
+//! If the chunk is pure printable ASCII the mask is all-zero and the chunk
+//! can be skipped entirely. The first non-zero chunk hands off to the
+//! scalar state machine, which handles correctness for the remainder.
+
+use crate::error::qjd_err;
+use core::arch::aarch64::*;
+
+use super::scalar::validate_span_scalar;
+
+/// Validate `span` using NEON to bulk-skip pure-ASCII 16-byte chunks.
+pub(crate) fn validate_span_neon(span: &[u8]) -> Result<(), qjd_err> {
+    // SAFETY: aarch64 NEON is always available on aarch64 (it is part of
+    // the AArch64 base ISA), so no runtime feature check is required.
+    unsafe { validate_span_neon_impl(span) }
+}
+
+#[target_feature(enable = "neon")]
+unsafe fn validate_span_neon_impl(span: &[u8]) -> Result<(), qjd_err> {
+    let mut i: usize = 0;
+    let n = span.len();
+
+    let backslash = vdupq_n_u8(b'\\');
+    let ctrl_top  = vdupq_n_u8(0x20);
+
+    while i + 16 <= n {
+        let chunk = vld1q_u8(span.as_ptr().add(i));
+
+        // byte >= 0x80 ?  high bit set
+        let high = vcgeq_u8(chunk, vdupq_n_u8(0x80));
+        // byte == '\\' ?
+        let bs   = vceqq_u8(chunk, backslash);
+        // byte <  0x20 ?
+        let ctrl = vcltq_u8(chunk, ctrl_top);
+
+        let interesting = vorrq_u8(vorrq_u8(high, bs), ctrl);
+
+        // Reduce 16 lanes → single u64 to test for any non-zero byte.
+        // vmaxvq_u8 returns 0 iff every lane is 0.
+        if vmaxvq_u8(interesting) != 0 {
+            // First interesting byte: find via lane index.
+            // Build 0xFF/0x00 per-lane mask already in `interesting`; convert
+            // each lane to its index-or-MAX via a small scalar loop. A 16-lane
+            // ctz would be tidier but isn't critical here — interesting chunks
+            // are the slow case anyway.
+            for lane in 0..16usize {
+                if span[i + lane] >= 0x80
+                    || span[i + lane] == b'\\'
+                    || span[i + lane] < 0x20
+                {
+                    return validate_span_scalar(&span[i + lane..]);
+                }
+            }
+            // Unreachable: vmaxvq_u8 said at least one lane is non-zero.
+            unreachable!();
+        }
+
+        i += 16;
+    }
+
+    validate_span_scalar(&span[i..])
+}
diff --git a/src/validate/strings/scalar.rs b/src/validate/strings/scalar.rs
new file mode 100644
index 0000000..7784679
--- /dev/null
+++ b/src/validate/strings/scalar.rs
@@ -0,0 +1,158 @@
+//! Single-pass scalar validator for a JSON string span (interior bytes,
+//! excluding the surrounding quotes).
+//!
+//! Combines three checks into one byte walk:
+//!   1. RFC 8259 §7: no raw control characters (b < 0x20).
+//!   2. RFC 8259 §7: every `\` escape is one of `" \ / b f n r t` or `\uXXXX`.
+//!   3. RFC 3629: valid UTF-8 (rejects overlong encodings and surrogates,
+//!      matching `std::str::from_utf8` for full corpus parity).
+//!
+//! Error-code precedence on mixed inputs:
+//!   - Control char or invalid escape introducer encountered first → INVALID_STRING.
+//!   - Bad UTF-8 lead/continuation byte encountered first → INVALID_UTF8.
+//!
+//! This means a span like `[0x09, 0xFF]` returns INVALID_STRING (control byte
+//! seen before the UTF-8 problem), whereas `[0xFF, 0x09]` returns INVALID_UTF8.
+//! The previous two-pass code preferred UTF-8 in both cases; no existing test
+//! pins down which wins on mixed input, so the position-ordered choice here
+//! is the natural single-pass behavior.
+
+use crate::error::qjd_err;
+
+/// Validate `span` byte-by-byte. The caller passes the unescaped string
+/// interior (between the JSON `"…"` quotes) — `\` therefore introduces an
+/// RFC 8259 escape sequence, not a literal backslash byte.
+pub(crate) fn validate_span_scalar(span: &[u8]) -> Result<(), qjd_err> {
+    let mut i: usize = 0;
+    let n = span.len();
+    while i < n {
+        let b = span[i];
+
+        // Fast path: plain ASCII non-escape non-control.
+        if b < 0x80 {
+            if b < 0x20 {
+                return Err(qjd_err::QJD_INVALID_STRING);
+            }
+            if b == b'\\' {
+                i = validate_escape(span, i + 1)?;
+                continue;
+            }
+            i += 1;
+            continue;
+        }
+
+        // High-bit byte: must be the lead of a 2/3/4-byte UTF-8 sequence.
+        i = validate_utf8_sequence(span, i)?;
+    }
+    Ok(())
+}
+
+/// At entry `i` points to the byte AFTER the `\`. Returns the index of the
+/// next byte to validate (i.e. one past the last consumed escape byte).
+#[inline]
+fn validate_escape(span: &[u8], i: usize) -> Result<usize, qjd_err> {
+    if i >= span.len() {
+        // Dangling `\` at end of span.
+        return Err(qjd_err::QJD_INVALID_STRING);
+    }
+    match span[i] {
+        b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => Ok(i + 1),
+        b'u' => {
+            // Must be followed by exactly 4 hex digits.
+            let hex_start = i + 1;
+            let hex_end = hex_start + 4;
+            if hex_end > span.len() {
+                return Err(qjd_err::QJD_INVALID_STRING);
+            }
+            for &h in &span[hex_start..hex_end] {
+                if !h.is_ascii_hexdigit() {
+                    return Err(qjd_err::QJD_INVALID_STRING);
+                }
+            }
+            Ok(hex_end)
+        }
+        _ => Err(qjd_err::QJD_INVALID_STRING),
+    }
+}
+
+/// At entry `i` points to a byte with the high bit set. Validate the
+/// multi-byte UTF-8 sequence starting here per RFC 3629 (rejects overlong
+/// encodings and UTF-16 surrogates U+D800..=U+DFFF). Returns the index one
+/// past the last byte of the sequence.
+#[inline]
+fn validate_utf8_sequence(span: &[u8], i: usize) -> Result<usize, qjd_err> {
+    let lead = span[i];
+    let n = span.len();
+
+    // 2-byte: 110xxxxx 10xxxxxx, lead in C2..=DF (C0/C1 are overlong).
+    if (0xC2..=0xDF).contains(&lead) {
+        if i + 1 >= n {
+            return Err(qjd_err::QJD_INVALID_UTF8);
+        }
+        let b1 = span[i + 1];
+        if !(0x80..=0xBF).contains(&b1) {
+            return Err(qjd_err::QJD_INVALID_UTF8);
+        }
+        return Ok(i + 2);
+    }
+
+    // 3-byte: 1110xxxx 10xxxxxx 10xxxxxx, lead in E0..=EF.
+    // Extra constraints: E0 second must be A0..BF (else overlong);
+    //                    ED second must be 80..9F (else surrogate U+D800..=DFFF).
+    if (0xE0..=0xEF).contains(&lead) {
+        if i + 2 >= n {
+            return Err(qjd_err::QJD_INVALID_UTF8);
+        }
+        let b1 = span[i + 1];
+        let b2 = span[i + 2];
+        let b1_lo = match lead {
+            0xE0 => 0xA0,
+            _    => 0x80,
+        };
+        let b1_hi = match lead {
+            0xED => 0x9F,
+            _    => 0xBF,
+        };
+        if b1 < b1_lo || b1 > b1_hi {
+            return Err(qjd_err::QJD_INVALID_UTF8);
+        }
+        if !(0x80..=0xBF).contains(&b2) {
+            return Err(qjd_err::QJD_INVALID_UTF8);
+        }
+        return Ok(i + 3);
+    }
+
+    // 4-byte: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx, lead in F0..=F4.
+    // Extra constraints: F0 second must be 90..BF (else overlong);
+    //                    F4 second must be 80..8F (else > U+10FFFF).
+    if (0xF0..=0xF4).contains(&lead) {
+        if i + 3 >= n {
+            return Err(qjd_err::QJD_INVALID_UTF8);
+        }
+        let b1 = span[i + 1];
+        let b2 = span[i + 2];
+        let b3 = span[i + 3];
+        let b1_lo = match lead {
+            0xF0 => 0x90,
+            _    => 0x80,
+        };
+        let b1_hi = match lead {
+            0xF4 => 0x8F,
+            _    => 0xBF,
+        };
+        if b1 < b1_lo || b1 > b1_hi {
+            return Err(qjd_err::QJD_INVALID_UTF8);
+        }
+        if !(0x80..=0xBF).contains(&b2) {
+            return Err(qjd_err::QJD_INVALID_UTF8);
+        }
+        if !(0x80..=0xBF).contains(&b3) {
+            return Err(qjd_err::QJD_INVALID_UTF8);
+        }
+        return Ok(i + 4);
+    }
+
+    // C0, C1 (overlong 2-byte lead), F5..FF (out of range), or a bare
+    // continuation byte (80..BF with no lead) — all invalid.
+    Err(qjd_err::QJD_INVALID_UTF8)
+}

From d0999de28791c249375c8fb4e8e6b88c883dda4d Mon Sep 17 00:00:00 2001
From: Yuansheng Wang <membphis@gmail.com>
Date: Mon, 18 May 2026 01:02:38 +0000
Subject: [PATCH 21/21] feat(validate): grammar-aware eager pass closes
 structural gaps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the two-pass heuristic (string-span loop + scalar-gap walker
with `:`/`,` empty-gap detection) with a single grammar-aware state
machine that walks `indices` once.

The machine tracks the expected next-token kind in each container
context via a stack (Top/TopDone, ArrAfter{Open,Value,Comma},
ObjAfter{Open,Key,Colon,Value,Comma}). String tokens and structural
characters are validated against the state; scalar tokens living in
the byte gap before the next structural are dispatched through the
same true/false/null/number precedence the previous `check_gap`
used, so existing tests keep their current error codes.

Closes the 3 ignored cases in tests/rfc8259_compliance::structural
(missing_colon, leading_comma_array_with_value, missing_comma_in_object)
and drops all 13 entries from KNOWN_N_FAILURES in
tests/json_test_suite — every grammar-only n_* case in JSONTestSuite
is now correctly rejected.
---
 src/validate/mod.rs         | 335 +++++++++++++++++++++++++++---------
 tests/json_test_suite.rs    |  49 +-----
 tests/rfc8259_compliance.rs |  23 +--
 3 files changed, 269 insertions(+), 138 deletions(-)

diff --git a/src/validate/mod.rs b/src/validate/mod.rs
index c6d972d..8ddee23 100644
--- a/src/validate/mod.rs
+++ b/src/validate/mod.rs
@@ -125,121 +125,232 @@ pub(crate) fn validate_trailing(
     Ok(())
 }
 
-/// Walk `indices` and validate every scalar value (numbers + strings).
-/// Called only in EAGER mode.
+/// Grammar-aware eager pass: walk `indices` once and validate every
+/// structural transition, key/value string, and scalar value.
+///
+/// The state machine tracks the expected next-token kind in each
+/// container context (object/array) via a stack. Empty gaps where a
+/// value is required (`[,]`, `{"a":}`), missing colons (`{"a"}`),
+/// missing commas (`{"a":1"b":2}`), non-string object keys (`{1:1}`),
+/// and stray structural tokens (`[1:2]`) all surface here as
+/// `QJD_PARSE_ERROR`.
+///
+/// Scalar tokens (numbers, `true`, `false`, `null`) live in the byte
+/// gap before the *next* structural offset. They are dispatched to
+/// `validate_number` or matched against the three literal keywords;
+/// the error-code precedence matches the previous heuristic-based
+/// `check_gap` so existing tests keep their current error codes.
 pub(crate) fn validate_eager_values(
     buf: &[u8],
     indices: &[u32],
 ) -> Result<(), qjd_err> {
-    let mut i = 0;
-    while i + 1 < indices.len() {
+    // Stack of container contexts; the top is the current state.
+    // We use a single seed entry `CtxKind::Top` for the root value.
+    let mut stack: Vec<CtxKind> = Vec::with_capacity(16);
+    stack.push(CtxKind::Top);
+
+    // Byte position just past the previous structural we consumed —
+    // i.e. the start of the current gap. A gap may contain a scalar
+    // value or be whitespace-only.
+    let mut prev_end: usize = 0;
+
+    let mut i: usize = 0;
+    while i < indices.len() {
         let idx = indices[i];
         if idx == u32::MAX { break; }
         let pos = idx as usize;
         let b = buf[pos];
 
-        // Strings: opening quote here, closing quote at indices[i+1].
-        // (The scanner emits BOTH quotes of a string in order.)
-        if b == b'"' {
-            let close = indices[i + 1] as usize;
-            // Defensive: scanner pairs quotes correctly, but guard anyway.
-            if close <= pos || close >= buf.len() || buf[close] != b'"' {
-                return Err(qjd_err::QJD_PARSE_ERROR);
+        // First, consume any scalar token sitting in the gap before
+        // this structural. This may transition the current state from
+        // a value-expecting form to its "AfterValue" form.
+        consume_scalar_gap(buf, prev_end, pos, stack.last_mut().unwrap())?;
+
+        match b {
+            b'{' | b'[' => {
+                let cur = stack.last_mut().unwrap();
+                match *cur {
+                    CtxKind::Top
+                    | CtxKind::ArrAfterOpen
+                    | CtxKind::ArrAfterComma
+                    | CtxKind::ObjAfterColon => {
+                        // Transition parent to AfterValue ahead of the
+                        // descent; the inner container's close pops back.
+                        *cur = parent_after_value(*cur);
+                        stack.push(if b == b'{' {
+                            CtxKind::ObjAfterOpen
+                        } else {
+                            CtxKind::ArrAfterOpen
+                        });
+                    }
+                    _ => return Err(qjd_err::QJD_PARSE_ERROR),
+                }
+                prev_end = pos + 1;
+                i += 1;
             }
-            let span = &buf[pos + 1 .. close];
-            strings::validate_string_span(span)?;
-            i += 2;
-            continue;
-        }
+            b'}' => {
+                let top = stack.pop().ok_or(qjd_err::QJD_PARSE_ERROR)?;
+                if !matches!(top, CtxKind::ObjAfterOpen | CtxKind::ObjAfterValue) {
+                    return Err(qjd_err::QJD_PARSE_ERROR);
+                }
+                if stack.is_empty() { return Err(qjd_err::QJD_PARSE_ERROR); }
+                prev_end = pos + 1;
+                i += 1;
+            }
+            b']' => {
+                let top = stack.pop().ok_or(qjd_err::QJD_PARSE_ERROR)?;
+                if !matches!(top, CtxKind::ArrAfterOpen | CtxKind::ArrAfterValue) {
+                    return Err(qjd_err::QJD_PARSE_ERROR);
+                }
+                if stack.is_empty() { return Err(qjd_err::QJD_PARSE_ERROR); }
+                prev_end = pos + 1;
+                i += 1;
+            }
+            b',' => {
+                let cur = stack.last_mut().ok_or(qjd_err::QJD_PARSE_ERROR)?;
+                match *cur {
+                    CtxKind::ArrAfterValue => *cur = CtxKind::ArrAfterComma,
+                    CtxKind::ObjAfterValue => *cur = CtxKind::ObjAfterComma,
+                    _ => return Err(qjd_err::QJD_PARSE_ERROR),
+                }
+                prev_end = pos + 1;
+                i += 1;
+            }
+            b':' => {
+                let cur = stack.last_mut().ok_or(qjd_err::QJD_PARSE_ERROR)?;
+                match *cur {
+                    CtxKind::ObjAfterKey => *cur = CtxKind::ObjAfterColon,
+                    _ => return Err(qjd_err::QJD_PARSE_ERROR),
+                }
+                prev_end = pos + 1;
+                i += 1;
+            }
+            b'"' => {
+                // The scanner pairs the opening and closing quotes; the
+                // closing quote is at indices[i + 1].
+                if i + 1 >= indices.len() { return Err(qjd_err::QJD_PARSE_ERROR); }
+                let close = indices[i + 1] as usize;
+                if close <= pos || close >= buf.len() || buf[close] != b'"' {
+                    return Err(qjd_err::QJD_PARSE_ERROR);
+                }
+                strings::validate_string_span(&buf[pos + 1 .. close])?;
 
-        // Container brackets and `:`/`,` are not values; skip.
-        if matches!(b, b'{' | b'}' | b'[' | b']' | b':' | b',') {
-            i += 1;
-            continue;
+                let cur = stack.last_mut().ok_or(qjd_err::QJD_PARSE_ERROR)?;
+                match *cur {
+                    // Key position in an object.
+                    CtxKind::ObjAfterOpen | CtxKind::ObjAfterComma => {
+                        *cur = CtxKind::ObjAfterKey;
+                    }
+                    // Value position (top-level, array element, or object value).
+                    CtxKind::Top
+                    | CtxKind::ArrAfterOpen
+                    | CtxKind::ArrAfterComma
+                    | CtxKind::ObjAfterColon => {
+                        *cur = parent_after_value(*cur);
+                    }
+                    _ => return Err(qjd_err::QJD_PARSE_ERROR),
+                }
+                prev_end = close + 1;
+                i += 2;
+            }
+            _ => return Err(qjd_err::QJD_PARSE_ERROR),
         }
+    }
 
-        // Should not happen: scanner only emits the 7 structural chars.
+    // Tail: a top-level scalar root (e.g. `42`, `true`) lives in the
+    // gap after the last structural — or, if there are no structurals,
+    // the whole buffer.
+    consume_scalar_gap(buf, prev_end, buf.len(), stack.last_mut().unwrap())?;
+
+    // After the walk, the stack must hold exactly one frame: the root
+    // context, which must be `TopDone` (root value consumed).
+    if stack.len() != 1 || stack[0] != CtxKind::TopDone {
         return Err(qjd_err::QJD_PARSE_ERROR);
     }
-
-    // Scalar values (numbers, true, false, null) live in the gaps between
-    // structural offsets. Walk those gaps and dispatch.
-    validate_scalars_in_gaps(buf, indices)
+    Ok(())
 }
 
-/// For each consecutive pair of structural offsets, examine the bytes
-/// between them. If the gap contains a scalar (anything other than
-/// whitespace), validate its grammar.
-fn validate_scalars_in_gaps(buf: &[u8], indices: &[u32]) -> Result<(), qjd_err> {
-    let mut prev_end: usize = 0;
-    let mut in_str = false;
-    // Track the last non-quote structural char so check_gap can reject empty
-    // gaps in positions where a value is required (after `:` or `,`).
-    let mut prev_structural: u8 = 0;
-    for &idx in indices {
-        if idx == u32::MAX { break; }
-        let pos = idx as usize;
-        let b = buf[pos];
-
-        if b == b'"' {
-            // Toggle: the bytes between two quotes are the string interior
-            // (already validated above). Skip gap-scanning across them.
-            if in_str {
-                in_str = false;
-                prev_end = pos + 1;
-            } else {
-                // Validate any scalar in the gap leading up to this quote.
-                // An open-quote is itself a value, so pass it as the next char:
-                // an empty gap before a string is always fine (`:` `"` and `,` `"` are
-                // both valid — the string IS the value).
-                check_gap(buf, prev_end, pos, prev_structural, b'"')?;
-                in_str = true;
-                prev_structural = b'"';
-            }
-            continue;
-        }
-        if in_str { continue; }
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+enum CtxKind {
+    Top,           // top-level value not yet consumed
+    TopDone,       // top-level value consumed; only whitespace/EOI allowed
+    ArrAfterOpen,  // just saw `[`; expect value or `]`
+    ArrAfterValue, // just saw a value; expect `,` or `]`
+    ArrAfterComma, // just saw `,`; expect value (no trailing comma)
+    ObjAfterOpen,  // just saw `{`; expect key (string) or `}`
+    ObjAfterKey,   // just saw key string; expect `:`
+    ObjAfterColon, // just saw `:`; expect value
+    ObjAfterValue, // just saw value; expect `,` or `}`
+    ObjAfterComma, // just saw `,`; expect key (no trailing comma)
+}
 
-        check_gap(buf, prev_end, pos, prev_structural, b)?;
-        prev_end = pos + 1;
-        prev_structural = b;
+/// Transition the value-expecting state to its corresponding
+/// "after value" state once the value (scalar / string / container)
+/// has been consumed.
+#[inline]
+fn parent_after_value(s: CtxKind) -> CtxKind {
+    match s {
+        CtxKind::Top           => CtxKind::TopDone,
+        CtxKind::ArrAfterOpen  => CtxKind::ArrAfterValue,
+        CtxKind::ArrAfterComma => CtxKind::ArrAfterValue,
+        CtxKind::ObjAfterColon => CtxKind::ObjAfterValue,
+        other                  => other, // unreachable for callers
     }
-    // Tail gap (top-level scalar like "42"): next char is EOF (0 sentinel)
-    check_gap(buf, prev_end, buf.len(), prev_structural, 0)
 }
 
-/// `prev_structural`: the last non-quote structural char before this gap.
-/// `next_structural`: the structural char immediately after this gap (opens or closes).
-fn check_gap(buf: &[u8], start: usize, end: usize, prev_structural: u8, next_structural: u8) -> Result<(), qjd_err> {
-    // Strip surrounding whitespace.
+/// Examine the byte gap `[start, end)` between two structurals.
+/// If the gap contains a scalar token, validate it and transition
+/// `*state` to its corresponding "AfterValue" form. If the gap is
+/// whitespace only, leave `*state` unchanged — the next structural's
+/// own check rejects empty values where they are not allowed
+/// (e.g. `ObjAfterColon` followed by `}` is caught when `}` pops).
+fn consume_scalar_gap(
+    buf: &[u8],
+    start: usize,
+    end: usize,
+    state: &mut CtxKind,
+) -> Result<(), qjd_err> {
+    // Strip whitespace.
     let mut s = start;
     while s < end && is_ws(buf[s]) { s += 1; }
     let mut e = end;
     while e > s && is_ws(buf[e - 1]) { e -= 1; }
+
     if s == e {
-        // Empty gap: a value is required after `:` (object value) or `,` (next
-        // element), BUT only when the next token is not a structural value-starter
-        // (`"`, `{`, `[`) — those ARE the values. An empty gap before `}` / `]`
-        // / `,` when the preceding token demands a value is a structural error.
-        // This heuristic catches {"a":}, [,], [1,] without a full grammar walk.
-        let next_is_value_starter = matches!(next_structural, b'"' | b'{' | b'[');
-        if matches!(prev_structural, b':' | b',') && !next_is_value_starter {
-            return Err(qjd_err::QJD_PARSE_ERROR);
-        }
         return Ok(());
     }
-    let scalar = &buf[s..e];
 
-    // Dispatch on first byte.
+    // The gap is non-empty: it MUST be a scalar token, and the state
+    // must allow a scalar at this position. Strings and containers are
+    // handled by their structural-token cases, not here.
+    if !matches!(
+        *state,
+        CtxKind::Top
+            | CtxKind::ArrAfterOpen
+            | CtxKind::ArrAfterComma
+            | CtxKind::ObjAfterColon
+    ) {
+        return Err(qjd_err::QJD_PARSE_ERROR);
+    }
+
+    validate_scalar(&buf[s..e])?;
+    *state = parent_after_value(*state);
+    Ok(())
+}
+
+/// Dispatch a non-empty whitespace-trimmed scalar token to its
+/// grammar validator. Mirrors the previous `check_gap` precedence:
+///   - `true` / `false` / `null` exact → Ok
+///   - `NaN` / `Infinity` → `QJD_INVALID_NUMBER` (via validate_number)
+///   - `-` / digit / `+` / `.` → `validate_number`
+///   - Else → `QJD_PARSE_ERROR`
+fn validate_scalar(scalar: &[u8]) -> Result<(), qjd_err> {
     match scalar[0] {
         b't' => if scalar == b"true"  { Ok(()) } else { Err(qjd_err::QJD_PARSE_ERROR) },
         b'f' => if scalar == b"false" { Ok(()) } else { Err(qjd_err::QJD_PARSE_ERROR) },
         b'n' => if scalar == b"null"  { Ok(()) } else { Err(qjd_err::QJD_PARSE_ERROR) },
-        // RFC-valid and common malformed number starters (+, ., -, digit).
         b'-' | b'0'..=b'9' | b'+' | b'.' => number::validate_number(scalar),
-        // NaN / Infinity are "meant as numbers" → QJD_INVALID_NUMBER, not parse error.
         _ if scalar == b"NaN" || scalar == b"Infinity" => number::validate_number(scalar),
-        // Wrong-case literals (TRUE, NULL), identifiers (undefined), other garbage.
         _ => Err(qjd_err::QJD_PARSE_ERROR),
     }
 }
@@ -310,4 +421,68 @@ mod tests {
             Err(qjd_err::QJD_TRAILING_CONTENT),
         );
     }
+
+    // ── grammar state machine (validate_eager_values) ──────────────────
+
+    #[test]
+    fn grammar_accepts_empty_containers() {
+        for buf in [&b"{}"[..], &b"[]"[..]] {
+            assert!(validate_eager_values(buf, &ix(buf)).is_ok(),
+                "grammar should accept {:?}", buf);
+        }
+    }
+
+    #[test]
+    fn grammar_accepts_simple_values() {
+        for buf in [
+            &b"{\"a\":1}"[..], &b"[1,2,3]"[..],
+            &b"[true,false,null]"[..], &b"\"hi\""[..], &b"42"[..],
+            &b"{\"a\":[1,{\"b\":2}]}"[..],
+        ] {
+            assert!(validate_eager_values(buf, &ix(buf)).is_ok(),
+                "grammar should accept {:?}", buf);
+        }
+    }
+
+    #[test]
+    fn grammar_rejects_missing_colon() {
+        let buf = b"{\"a\"}";
+        assert_eq!(validate_eager_values(buf, &ix(buf)), Err(qjd_err::QJD_PARSE_ERROR));
+    }
+
+    #[test]
+    fn grammar_rejects_leading_comma_with_value() {
+        let buf = b"[,1]";
+        assert_eq!(validate_eager_values(buf, &ix(buf)), Err(qjd_err::QJD_PARSE_ERROR));
+    }
+
+    #[test]
+    fn grammar_rejects_missing_comma_in_object() {
+        let buf = b"{\"a\":1\"b\":2}";
+        assert_eq!(validate_eager_values(buf, &ix(buf)), Err(qjd_err::QJD_PARSE_ERROR));
+    }
+
+    #[test]
+    fn grammar_rejects_non_string_object_key() {
+        let buf = b"{1:1}";
+        assert_eq!(validate_eager_values(buf, &ix(buf)), Err(qjd_err::QJD_PARSE_ERROR));
+    }
+
+    #[test]
+    fn grammar_rejects_colon_in_array() {
+        let buf = b"[1:2]";
+        assert_eq!(validate_eager_values(buf, &ix(buf)), Err(qjd_err::QJD_PARSE_ERROR));
+    }
+
+    #[test]
+    fn grammar_rejects_missing_comma_between_arrays() {
+        let buf = b"[3[4]]";
+        assert_eq!(validate_eager_values(buf, &ix(buf)), Err(qjd_err::QJD_PARSE_ERROR));
+    }
+
+    #[test]
+    fn grammar_rejects_trailing_garbage_inside_object() {
+        let buf = b"{\"a\":\"a\" 123}";
+        assert_eq!(validate_eager_values(buf, &ix(buf)), Err(qjd_err::QJD_PARSE_ERROR));
+    }
 }
diff --git a/tests/json_test_suite.rs b/tests/json_test_suite.rs
index 31b7b1b..c799395 100644
--- a/tests/json_test_suite.rs
+++ b/tests/json_test_suite.rs
@@ -34,51 +34,12 @@ const KNOWN_Y_FAILURES: &[&str] = &[
 
 /// n_* files that we currently accept but shouldn't (validator gap).
 ///
-/// All 13 entries below require a grammar-aware structural pass that tracks
-/// which token types are legal in each parser state (array element, object
-/// key, object value, etc.).  That pass is deferred to issue #37.
-///
-/// The current validator only catches structural errors detectable from
-/// bracket balance + gap heuristics; it does not enforce:
-///   - that object keys must be strings
-///   - that `:` vs `,` are used in the right places
-///   - that array elements are separated by commas (not colons/semicolons)
-///   - leading commas before values (gap heuristic fires only for `[,]`)
-///   - missing commas between items when no structural gap exists
-///
-/// Fix: implement a state-machine pass in src/validate/mod.rs that tracks
-/// parser state (AfterKey, AfterColon, AfterValue, …) and rejects tokens
-/// that violate the grammar at that state.  Removing a file from this list
-/// re-enables the assertion.
+/// The grammar-aware eager pass in src/validate/mod.rs tracks parser
+/// state per container and rejects token transitions that violate
+/// RFC 8259.  Removing a file from this list re-enables the assertion.
 const KNOWN_N_FAILURES: &[&str] = &[
-    // ── array structural gaps ────────────────────────────────────────────
-    // ["": 1] — colon inside array (issue #37: grammar-aware pass)
-    "n_array_colon_instead_of_comma.json",
-    // [,1] — leading comma before first value (issue #37)
-    "n_array_comma_and_number.json",
-    // [3[4]] — missing comma between elements (issue #37)
-    "n_array_inner_array_no_comma.json",
-    // [1:2] — semicolon used instead of comma (issue #37)
-    "n_array_items_separated_by_semicolon.json",
-    // [   , ""] — leading comma (gap heuristic only catches [,] not [  ,v]) (issue #37)
-    "n_array_missing_value.json",
-    // ── object structural gaps ───────────────────────────────────────────
-    // {"x", null} — comma instead of colon (issue #37)
-    "n_object_comma_instead_of_colon.json",
-    // {"a":"a" 123} — missing comma between key-value pairs (issue #37)
-    "n_object_garbage_at_end.json",
-    // {:"b"} — missing object key (issue #37)
-    "n_object_missing_key.json",
-    // {"a" "b"} — missing colon between key and value (issue #37)
-    "n_object_missing_semicolon.json",
-    // {1:1} — non-string key: number (issue #37)
-    "n_object_non_string_key.json",
-    // {9999E9999:1} — non-string key: huge number (issue #37)
-    "n_object_non_string_key_but_huge_number_instead.json",
-    // {null:null,null:null} — non-string key: null literal (issue #37)
-    "n_object_repeated_null_null.json",
-    // { "foo" : "bar", "a" } — trailing key without value (issue #37)
-    "n_object_with_single_string.json",
+    // (intentionally empty — see git history for the previous list,
+    // which was closed by the grammar-aware structural pass.)
 ];
 
 fn corpus_dir() -> &'static Path {
diff --git a/tests/rfc8259_compliance.rs b/tests/rfc8259_compliance.rs
index b85b921..790511d 100644
--- a/tests/rfc8259_compliance.rs
+++ b/tests/rfc8259_compliance.rs
@@ -398,11 +398,10 @@ mod structural {
     }
 
     // RFC 8259 §4: colon between key and value is mandatory.
-    // The scanner emits {"a"} as {""} with no ':' — eager does not detect this
-    // because no structural gap heuristic covers the absence of ':'.
-    // Deferred to a follow-up grammar-aware pass (issue #37).
+    // The grammar-aware pass detects this: after consuming the key
+    // string the state is ObjAfterKey, and `}` is rejected because
+    // it can only close ObjAfterOpen/ObjAfterValue.
     #[test]
-    #[ignore = "missing-colon detection deferred — grammar-aware pass required (issue #37)"]
     fn missing_colon() {
         assert_rejects_eager!("{\"a\"}", QJD_PARSE_ERROR);
     }
@@ -415,11 +414,10 @@ mod structural {
         assert_rejects_eager!("[,]", QJD_PARSE_ERROR);
     }
 
-    // [,1] — leading comma followed by a value: the gap between '[' and ','
-    // is empty (no value yet) but prev_structural is '[', not ',' — so the
-    // heuristic does not fire. Deferred to a grammar-aware pass (issue #37).
+    // [,1] — leading comma followed by a value: the grammar-aware
+    // pass rejects this because `,` is invalid in the ArrAfterOpen
+    // state (only a value or `]` is allowed after `[`).
     #[test]
-    #[ignore = "leading-comma-before-value detection deferred — grammar-aware pass required (issue #37)"]
     fn leading_comma_array_with_value() {
         assert_rejects_eager!("[,1]", QJD_PARSE_ERROR);
     }
@@ -452,13 +450,10 @@ mod structural {
     }
 
     // Missing comma inside an object (no structural separator between values):
-    // {"a":1"b":2} — the scanner emits `{`, `"`, `"`, `:`, `"`, `"`, `}`.
-    // The gap between the second close-quote and the third open-quote is empty,
-    // but prev_structural is `"` (quote) and next is `"` — the heuristic only
-    // fires on `:` / `,`, so this slips through.
-    // Deferred to grammar-aware pass (issue #37).
+    // {"a":1"b":2} — after consuming the value `1`, the state is
+    // ObjAfterValue; the next `"` (start of "b") is rejected because
+    // a key/value-position quote is not legal there.
     #[test]
-    #[ignore = "missing-comma-in-object detection deferred — grammar-aware pass required (issue #37)"]
     fn missing_comma_in_object() {
         assert_rejects_eager!("{\"a\":1\"b\":2}", QJD_PARSE_ERROR);
     }