Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions sjsonnet/src-js/sjsonnet/CharSWAR.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ object CharSWAR {
val len = s.length
while (i < len) {
val c = s.charAt(i)
if (c < 32 || c == '"' || c == '\\') return true
if (c < 32 || c == '"' || c == '\\' || (c >= 0x7f && c <= 0x9f)) return true
i += 1
}
false
Expand All @@ -17,7 +17,7 @@ object CharSWAR {
var i = from
while (i < to) {
val c = arr(i)
if (c < 32 || c == '"' || c == '\\') return true
if (c < 32 || c == '"' || c == '\\' || (c >= 0x7f && c <= 0x9f)) return true
i += 1
}
false
Expand All @@ -29,7 +29,7 @@ object CharSWAR {
var i = from
while (i < to) {
val c = s.charAt(i)
if (c < 32 || c == '"' || c == '\\' || c >= 128) return false
if (c < 32 || c == '"' || c == '\\' || c >= 0x7f) return false
i += 1
}
true
Expand All @@ -40,7 +40,7 @@ object CharSWAR {
var i = from
while (i < to) {
val b = arr(i) & 0xff
if (b < 32 || b == '"' || b == '\\') return true
if (b < 32 || b == '"' || b == '\\' || b == 0x7f) return true
i += 1
}
false
Expand All @@ -50,7 +50,7 @@ object CharSWAR {
var i = from
while (i < to) {
val b = arr(i) & 0xff
if (b < 32 || b == '"' || b == '\\') return i
if (b < 32 || b == '"' || b == '\\' || b == 0x7f) return i
i += 1
}
-1
Expand Down
58 changes: 27 additions & 31 deletions sjsonnet/src-jvm/sjsonnet/CharSWAR.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,16 @@
import java.lang.invoke.MethodHandles;
import java.lang.invoke.VarHandle;
import java.nio.ByteOrder;
import java.nio.charset.StandardCharsets;

/**
* SWAR (SIMD Within A Register) escape-char scanner for JSON string rendering.
*
* <p>Detects characters requiring JSON escaping: control chars ({@code < 32}),
* double-quote ({@code '"'}), and backslash ({@code '\\'}).
* double-quote ({@code '"'}), backslash ({@code '\\'}), DEL ({@code 0x7F}),
* and C1 control characters ({@code 0x80–0x9F}).
*
* <p>For strings above a threshold length, converts to ISO-8859-1 bytes and
* processes 8 bytes at a time using {@link VarHandle} bulk reads + Hacker's
* Delight zero-detection formula. For shorter strings, uses a scalar charAt loop.
* <p>String scans use char-level semantics. Byte-array scans process 8 bytes at a time using
* {@link VarHandle} bulk reads + Hacker's Delight zero-detection formula.
*
* <p>Based on the SWAR technique from Hacker's Delight Ch. 6, as used by
* <a href="https://github.com/netty/netty/blob/4.2/common/src/main/java/io/netty/util/internal/SWARUtil.java">
Expand Down Expand Up @@ -51,9 +50,6 @@ private CharSWAR() {}
/** Mask for bits 5-7 of each byte; zero result means byte < 32. */
private static final long CTRL = 0xE0E0_E0E0_E0E0_E0E0L;

/** Below this length, scalar charAt is faster than SWAR + byte[] conversion. */
private static final int SWAR_THRESHOLD = 128;

private static final long U16_HOLE = 0x7FFF_7FFF_7FFF_7FFFL;
private static final long U16_QUOTE = 0x0022_0022_0022_0022L;
private static final long U16_BSLAS = 0x005C_005C_005C_005CL;
Expand All @@ -66,21 +62,14 @@ private CharSWAR() {}
*/
static boolean hasEscapeChar(String str) {
int len = str.length();
if (len < SWAR_THRESHOLD) {
return hasEscapeCharScalar(str, len);
}
// ISO-8859-1 encoding is a JVM intrinsic for LATIN1 compact strings —
// essentially a memcpy of the internal byte[]. Chars > 255 map to '?'
// (0x3F), which is safe (not a control char, not '"', not '\\').
byte[] bytes = str.getBytes(StandardCharsets.ISO_8859_1);
return hasEscapeCharSWAR(bytes, 0, bytes.length);
return hasEscapeCharScalar(str, len);
}

/**
* Check if any byte in {@code arr[from..to)} needs JSON string escaping.
* Used by ByteRenderer for in-place SWAR scan on byte[] buffers.
* UTF-8 multi-byte sequences never produce bytes matching '"', '\\', or &lt; 0x20,
* so this is safe for scanning UTF-8 encoded data.
* Used by ByteRenderer for in-place SWAR scan on UTF-8 byte[] buffers. UTF-8 multi-byte
* sequences can contain high-bit bytes, but those are data bytes, not JSON escapes; callers that
* need C1 detection must scan the original chars before encoding.
*/
static boolean hasEscapeChar(byte[] arr, int from, int to) {
return hasEscapeCharSWAR(arr, from, to);
Expand Down Expand Up @@ -116,7 +105,7 @@ static boolean isAsciiJsonSafe(String str, int from, int to) {
}
while (i < to) {
char c = str.charAt(i);
if (c < 32 || c == '"' || c == '\\' || c >= 128) return false;
if (c < 32 || c == '"' || c == '\\' || c >= 0x7F) return false;
i++;
}
return true;
Expand All @@ -128,7 +117,7 @@ static boolean isAsciiJsonSafe(String str, int from, int to) {
static boolean hasEscapeChar(char[] arr, int from, int to) {
for (int i = from; i < to; i++) {
char c = arr[i];
if (c < 32 || c == '"' || c == '\\') return true;
if (c < 32 || c == '"' || c == '\\' || (c >= 0x7F && c <= 0x9F)) return true;
}
return false;
}
Expand All @@ -150,7 +139,7 @@ static int findFirstEscapeChar(byte[] arr, int from, int to) {
}
while (i < to) {
int b = arr[i] & 0xFF;
if (b < 32 || b == '"' || b == '\\') return i;
if (b < 32 || b == '"' || b == '\\' || b == 0x7F) return i;
i++;
}
return -1;
Expand All @@ -167,18 +156,18 @@ private static boolean hasEscapeCharSWAR(byte[] arr, int from, int to) {
// Tail: remaining 0-7 bytes
while (i < to) {
int b = arr[i] & 0xFF;
if (b < 32 || b == '"' || b == '\\') return true;
if (b < 32 || b == '"' || b == '\\' || b == 0x7F) return true;
i++;
}
return false;
}

/**
* 8-bit SWAR: returns true if any byte lane in {@code word}
* contains '"' (0x22), '\\' (0x5C), or a control char (&lt; 0x20).
* 8-bit SWAR mask for bytes requiring JSON escaping: control chars ({@code < 0x20}),
* double-quote ({@code '"'}), backslash ({@code '\\'}), or DEL ({@code 0x7F}).
*
* <p>Uses Netty/Pekko pattern: XOR to produce zero lanes, then
* Hacker's Delight formula to detect zero bytes.
* <p>Uses Netty/Pekko pattern: XOR to produce zero lanes, then Hacker's Delight formula to
* detect zero bytes.
*/
private static long swarMatchMask(long word) {
// 1. Detect '"' via XOR + zero-detection (Netty SWARUtil.applyPattern)
Expand All @@ -193,7 +182,11 @@ private static long swarMatchMask(long word) {
long c = word & CTRL;
long cz = ~((c & HOLE) + HOLE | c | HOLE);

return qz | bz | cz;
// 4. Detect DEL (0x7F) via XOR + zero-detection (HOLE == DEL broadcast pattern)
long d = word ^ HOLE;
long dz = ~((d & HOLE) + HOLE | d | HOLE);

return qz | bz | cz | dz;
}

private static int firstMatchedByte(long mask) {
Expand All @@ -202,13 +195,16 @@ private static int firstMatchedByte(long mask) {
: Long.numberOfLeadingZeros(mask)) >>> 3;
}

private static final long U16_DEL = 0x007F_007F_007F_007FL;

private static boolean swarHasUnsafeAsciiChar(long word) {
if ((word & U16_ASCII) != 0L) return true;

long qz = zero16(word ^ U16_QUOTE);
long bz = zero16(word ^ U16_BSLAS);
long cz = zero16(word & U16_CTRL);
return (qz | bz | cz) != 0L;
long dz = zero16(word ^ U16_DEL);
return (qz | bz | cz | dz) != 0L;
}

private static long zero16(long word) {
Expand All @@ -219,15 +215,15 @@ private static long zero16(long word) {
private static boolean hasEscapeCharScalar(String s, int len) {
for (int i = 0; i < len; i++) {
char c = s.charAt(i);
if (c < 32 || c == '"' || c == '\\') return true;
if (c < 32 || c == '"' || c == '\\' || (c >= 0x7F && c <= 0x9F)) return true;
}
return false;
}

private static boolean isAsciiJsonSafeScalar(String s, int from, int to) {
for (int i = from; i < to; i++) {
char c = s.charAt(i);
if (c < 32 || c == '"' || c == '\\' || c >= 128) return false;
if (c < 32 || c == '"' || c == '\\' || c >= 0x7F) return false;
}
return true;
}
Expand Down
46 changes: 24 additions & 22 deletions sjsonnet/src-native/sjsonnet/CharSWAR.scala
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ import scala.scalanative.runtime.{ByteArray, Intrinsics}
* Uses Scala Native's `Intrinsics.loadLong` + `ByteArray.atRawUnsafe` for zero-overhead 8-byte bulk
* reads directly from Array[Byte] memory, matching the JVM VarHandle SWAR performance.
*
* For String scanning, uses `getBytes(UTF-8)` + byte[] SWAR. On Scala Native compact strings are
* UTF-16, so converting to bytes first is necessary.
* String scans use char-level semantics. Byte-array scans process 8 bytes at a time using
* `Intrinsics.loadLong`.
*
* Inspired by netty's SWARUtil (io.netty.util.SWARUtil) and Hacker's Delight Ch. 6 zero-detection
* formula.
Expand All @@ -28,10 +28,11 @@ object CharSWAR {
private final val U16_BSLAS = 0x005c005c005c005cL
private final val U16_CTRL = 0xffe0ffe0ffe0ffe0L
private final val U16_ASCII = 0xff80ff80ff80ff80L
private final val U16_DEL = 0x007f007f007f007fL

/**
* SWAR: returns a mask for byte lanes in `word` containing '"' (0x22), '\\' (0x5C), or a control
* char (< 0x20).
* SWAR: returns a mask for byte lanes in `word` containing '"' (0x22), '\\' (0x5C), a control
* char (< 0x20), or DEL (0x7F).
*/
@inline private def swarMatchMask(word: Long): Long = {
// 1. Detect '"' via XOR + zero-detection
Expand All @@ -46,7 +47,11 @@ object CharSWAR {
val c = word & CTRL
val cz = ~((c & HOLE) + HOLE | c | HOLE)

qz | bz | cz
// 4. Detect DEL (0x7F) via XOR + zero-detection (HOLE == DEL broadcast pattern)
val d = word ^ HOLE
val dz = ~((d & HOLE) + HOLE | d | HOLE)

qz | bz | cz | dz
}

@inline private def firstMatchedByte(mask: Long): Int =
Expand All @@ -55,19 +60,14 @@ object CharSWAR {

def hasEscapeChar(s: String): Boolean = {
val len = s.length
if (len < 128) {
hasEscapeCharScalar(s, len)
} else {
val bytes = s.getBytes(java.nio.charset.StandardCharsets.UTF_8)
hasEscapeChar(bytes, 0, bytes.length)
}
hasEscapeCharScalar(s, len)
}

def hasEscapeChar(arr: Array[Char], from: Int, to: Int): Boolean = {
var i = from
while (i < to) {
val c = arr(i)
if (c < 32 || c == '"' || c == '\\') return true
if (c < 32 || c == '"' || c == '\\' || (c >= 0x7f && c <= 0x9f)) return true
i += 1
}
false
Expand All @@ -92,16 +92,17 @@ object CharSWAR {
}
while (i < to) {
val c = s.charAt(i)
if (c < 32 || c == '"' || c == '\\' || c >= 128) return false
if (c < 32 || c == '"' || c == '\\' || c >= 0x7f) return false
i += 1
}
true
}

/**
* SWAR scan for byte[] using Intrinsics.loadLong for zero-overhead bulk reads. Processes 8 bytes
* per iteration — same throughput as the JVM VarHandle path. UTF-8 multi-byte sequences never
* produce bytes matching '"', '\', or < 0x20.
* per iteration — same throughput as the JVM VarHandle path. UTF-8 multi-byte sequences can
* contain high-bit bytes, but those are data bytes, not JSON escapes; callers that need C1
* detection must scan the original chars before encoding.
*/
def hasEscapeChar(arr: Array[Byte], from: Int, to: Int): Boolean = {
val len = to - from
Expand All @@ -119,7 +120,7 @@ object CharSWAR {
// Tail: remaining 0-7 bytes
while (i < to) {
val b = arr(i) & 0xff
if (b < 32 || b == '"' || b == '\\') return true
if (b < 32 || b == '"' || b == '\\' || b == 0x7f) return true
i += 1
}
false
Expand All @@ -141,7 +142,7 @@ object CharSWAR {
}
while (i < to) {
val b = arr(i) & 0xff
if (b < 32 || b == '"' || b == '\\') return i
if (b < 32 || b == '"' || b == '\\' || b == 0x7f) return i
i += 1
}
-1
Expand All @@ -151,7 +152,7 @@ object CharSWAR {
var i = 0
while (i < len) {
val c = s.charAt(i)
if (c < 32 || c == '"' || c == '\\') return true
if (c < 32 || c == '"' || c == '\\' || (c >= 0x7f && c <= 0x9f)) return true
i += 1
}
false
Expand All @@ -162,7 +163,8 @@ object CharSWAR {
val qz = zero16(word ^ U16_QUOTE)
val bz = zero16(word ^ U16_BSLAS)
val cz = zero16(word & U16_CTRL)
(qz | bz | cz) != 0L
val dz = zero16(word ^ U16_DEL)
(qz | bz | cz | dz) != 0L
}

@inline private def zero16(word: Long): Long =
Expand All @@ -172,7 +174,7 @@ object CharSWAR {
var i = from
while (i < to) {
val c = s.charAt(i)
if (c < 32 || c == '"' || c == '\\' || c >= 128) return false
if (c < 32 || c == '"' || c == '\\' || c >= 0x7f) return false
i += 1
}
true
Expand All @@ -182,7 +184,7 @@ object CharSWAR {
var i = from
while (i < to) {
val b = arr(i) & 0xff
if (b < 32 || b == '"' || b == '\\') return true
if (b < 32 || b == '"' || b == '\\' || b == 0x7f) return true
i += 1
}
false
Expand All @@ -192,7 +194,7 @@ object CharSWAR {
var i = from
while (i < to) {
val b = arr(i) & 0xff
if (b < 32 || b == '"' || b == '\\') return i
if (b < 32 || b == '"' || b == '\\' || b == 0x7f) return i
i += 1
}
-1
Expand Down
Loading
Loading