Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@ env:

jobs:
rust:
name: Rust tests
runs-on: ubuntu-latest
name: Rust tests (${{ matrix.os }})
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-14]
steps:
- uses: actions/checkout@v4

Expand All @@ -27,17 +30,17 @@ jobs:
~/.cargo/registry
~/.cargo/git
target
key: cargo-${{ runner.os }}-${{ hashFiles('Cargo.toml') }}
key: cargo-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('Cargo.toml') }}
restore-keys: |
cargo-${{ runner.os }}-
cargo-${{ runner.os }}-${{ runner.arch }}-

- name: Build (release)
run: cargo build --release

- name: Test (release)
run: cargo test --release

- name: Test scalar-only (no AVX2 feature)
- name: Test scalar-only (no AVX2/NEON feature)
run: cargo test --release --no-default-features

- name: Test with test-panic feature
Expand Down
34 changes: 12 additions & 22 deletions src/scan/neon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,25 +88,6 @@ unsafe fn tag_mask16(tag: uint8x16_t, bits: u8) -> u16 {
movemask16(vtstq_u8(tag, vdupq_n_u8(bits)))
}

#[inline(always)]
unsafe fn byte_mask16(bytes: uint8x16_t, needle: u8) -> u16 {
movemask16(vceqq_u8(bytes, vdupq_n_u8(needle)))
}

#[inline(always)]
unsafe fn byte_mask64(
c0: uint8x16_t,
c1: uint8x16_t,
c2: uint8x16_t,
c3: uint8x16_t,
needle: u8,
) -> u64 {
(byte_mask16(c0, needle) as u64)
| ((byte_mask16(c1, needle) as u64) << 16)
| ((byte_mask16(c2, needle) as u64) << 32)
| ((byte_mask16(c3, needle) as u64) << 48)
}

#[inline(always)]
unsafe fn classify_tags64(
c0: uint8x16_t,
Expand Down Expand Up @@ -170,10 +151,19 @@ unsafe fn scan_neon_impl(buf: &[u8], out: &mut Vec<u32>) -> Result<(), usize> {

// In-string fast probe: while already in a string, avoid the full
// nibble-LUT classification unless this block contains quote/backslash.
// Uses vmaxvq_u8 on OR'd comparison results instead of byte_mask64 to
// avoid the expensive movemask16 pairwise-add chain (~3x faster probe).
if in_string != 0 {
let quote_probe = byte_mask64(c0, c1, c2, c3, b'"');
let backslash_probe = byte_mask64(c0, c1, c2, c3, b'\\');
if (quote_probe | backslash_probe) == 0 {
let quote = vdupq_n_u8(b'"');
let backslash = vdupq_n_u8(b'\\');
let m0 = vorrq_u8(vceqq_u8(c0, quote), vceqq_u8(c0, backslash));
let m1 = vorrq_u8(vceqq_u8(c1, quote), vceqq_u8(c1, backslash));
let m2 = vorrq_u8(vceqq_u8(c2, quote), vceqq_u8(c2, backslash));
let m3 = vorrq_u8(vceqq_u8(c3, quote), vceqq_u8(c3, backslash));
let m01 = vorrq_u8(m0, m1);
let m23 = vorrq_u8(m2, m3);
let m = vorrq_u8(m01, m23);
if vmaxvq_u8(m) == 0 {
bs_carry = 0;
i += 64;
// Cross-chunk jump: with no quote/backslash in the chunk we just
Expand Down
Loading