From f64c9c3e7b7fa88bd324124dee7f0eff26f8b0db Mon Sep 17 00:00:00 2001 From: Anthony Petrov Date: Wed, 13 May 2026 15:23:32 -0700 Subject: [PATCH 1/9] perf: unroll VarInt parsing loops Signed-off-by: Anthony Petrov --- .../runtime/io/ReadableSequentialData.java | 57 +++++- .../io/buffer/ByteArrayBufferedData.java | 173 ++++++++++++++---- .../hedera/pbj/runtime/io/buffer/Bytes.java | 70 ++++++- .../runtime/io/buffer/DirectBufferedData.java | 165 ++++++++++++++--- .../runtime/io/buffer/RandomAccessData.java | 60 +++++- .../varint/read/VarIntByteArrayReadBench.java | 5 +- 6 files changed, 438 insertions(+), 92 deletions(-) diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/ReadableSequentialData.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/ReadableSequentialData.java index e9c858536..39e60e348 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/ReadableSequentialData.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/ReadableSequentialData.java @@ -473,15 +473,58 @@ default int readVarInt(final boolean zigZag) throws BufferUnderflowException, Un * @throws DataEncodingException if the variable long cannot be decoded */ default long readVarLong(final boolean zigZag) throws BufferUnderflowException, UncheckedIOException { - long value = 0; + byte b; + long v = (b = readByte()) & 0x7F; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } - for (int i = 0; i < 10; i++) { - final byte b = readByte(); - value |= (long) (b & 0x7F) << (i * 7); - if (b >= 0) { - return zigZag ? (value >>> 1) ^ -(value & 1) : value; - } + v |= ((b = readByte()) & 0x7F) << 7; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + + v |= ((b = readByte()) & 0x7F) << 14; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + + v |= ((b = readByte()) & 0x7F) << 21; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + + v |= ((b = readByte()) & 0x7FL) << 28; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + + v |= ((b = readByte()) & 0x7FL) << 35; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + + v |= ((b = readByte()) & 0x7FL) << 42; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + + v |= ((b = readByte()) & 0x7FL) << 49; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; } + + v |= ((b = readByte()) & 0x7FL) << 56; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + + b = readByte(); + if ((b & 0x80) == 0) { + v |= (long) b << 63; + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + throw new DataEncodingException("Malformed var int"); } diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/ByteArrayBufferedData.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/ByteArrayBufferedData.java index ab2b6b281..f81d71edf 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/ByteArrayBufferedData.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/ByteArrayBufferedData.java @@ -2,7 +2,6 @@ package com.hedera.pbj.runtime.io.buffer; import com.hedera.pbj.runtime.io.DataEncodingException; -import com.hedera.pbj.runtime.io.UnsafeUtils; import edu.umd.cs.findbugs.annotations.NonNull; import java.io.IOException; import java.io.InputStream; @@ -142,27 +141,75 @@ public long getVarLong(final long offset, final boolean zigZag) { return getVar(Math.toIntExact(offset), zigZag); } - private long getVar(final int offset, final boolean zigZag) { + private long getVar(int offset, final boolean zigZag) { checkOffset(offset, buffer.limit()); + offset += arrayOffset; - final int readOffset = arrayOffset + offset; - int rem = buffer.limit() - offset; - if (rem > 10) { - rem = 10; + final int limit = Math.min(arrayOffset + (int) length(), offset + 10); + if (offset >= limit) throw new BufferUnderflowException(); + + byte b; + long v = (b = array[offset++]) & 0x7F; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; } + if (offset >= limit) throw new BufferUnderflowException(); - long value = 0; + v |= ((b = array[offset++]) & 0x7F) << 7; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); - int i = 0; - while (i != rem) { - final byte b = UnsafeUtils.getArrayByteNoChecks(array, readOffset + i); - value |= (long) (b & 0x7F) << (i * 7); - i++; - if (b >= 0) { - return zigZag ? (value >>> 1) ^ -(value & 1) : value; - } + v |= ((b = array[offset++]) & 0x7F) << 14; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = array[offset++]) & 0x7F) << 21; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = array[offset++]) & 0x7FL) << 28; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = array[offset++]) & 0x7FL) << 35; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = array[offset++]) & 0x7FL) << 42; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = array[offset++]) & 0x7FL) << 49; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = array[offset++]) & 0x7FL) << 56; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; } - throw (i == 10) ? new DataEncodingException("Malformed var int") : new BufferUnderflowException(); + if (offset >= limit) throw new BufferUnderflowException(); + + b = array[offset++]; + if ((b & 0x80) == 0) { + v |= (long) b << 63; + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + + throw new DataEncodingException("Malformed var int"); } /** @@ -250,25 +297,83 @@ public long readVarLong(final boolean zigZag) { private long readVar(final boolean zigZag) { final int pos = buffer.position(); - final int offset = arrayOffset + pos; - int rem = buffer.remaining(); - if (rem > 10) { - rem = 10; - } - - long value = 0; - - int i = 0; - while (i != rem) { - final byte b = UnsafeUtils.getArrayByteNoChecks(array, offset + i); - value |= (long) (b & 0x7F) << (i * 7); - i++; - if (b >= 0) { - buffer.position(pos + i); - return zigZag ? (value >>> 1) ^ -(value & 1) : value; - } + int offset = arrayOffset + pos; + + final int limit = Math.min(offset + buffer.remaining(), offset + 10); + if (offset >= limit) throw new BufferUnderflowException(); + + byte b; + long v = (b = array[offset++]) & 0x7F; + if ((b & 0x80) == 0) { + buffer.position(pos + 1); + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = array[offset++]) & 0x7F) << 7; + if ((b & 0x80) == 0) { + buffer.position(pos + 2); + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = array[offset++]) & 0x7F) << 14; + if ((b & 0x80) == 0) { + buffer.position(pos + 3); + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = array[offset++]) & 0x7F) << 21; + if ((b & 0x80) == 0) { + buffer.position(pos + 4); + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = array[offset++]) & 0x7FL) << 28; + if ((b & 0x80) == 0) { + buffer.position(pos + 5); + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = array[offset++]) & 0x7FL) << 35; + if ((b & 0x80) == 0) { + buffer.position(pos + 6); + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = array[offset++]) & 0x7FL) << 42; + if ((b & 0x80) == 0) { + buffer.position(pos + 7); + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = array[offset++]) & 0x7FL) << 49; + if ((b & 0x80) == 0) { + buffer.position(pos + 8); + return zigZag ? (v >>> 1) ^ -(v & 1) : v; } - throw (i == 10) ? new DataEncodingException("Malformed var int") : new BufferUnderflowException(); + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = array[offset++]) & 0x7FL) << 56; + if ((b & 0x80) == 0) { + buffer.position(pos + 9); + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + b = array[offset++]; + if ((b & 0x80) == 0) { + buffer.position(pos + 10); + v |= (long) b << 63; + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + + throw new DataEncodingException("Malformed var int"); } /** diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java index 5d0c03c36..5fe28a69e 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java @@ -865,20 +865,70 @@ private long getVar(int offset, final boolean zigZag) { } offset += start; - int rem = (start + length) - offset; - if (rem > 10) { - rem = 10; + final int limit = Math.min(start + length, offset + 10); + if (offset >= limit) throw new DataEncodingException("Malformed var int"); + + byte b; + long v = (b = buffer[offset++]) & 0x7F; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; } + if (offset >= limit) throw new DataEncodingException("Malformed var int"); - long value = 0; + v |= ((b = buffer[offset++]) & 0x7F) << 7; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new DataEncodingException("Malformed var int"); - for (int i = 0; i != rem; i++) { - final byte b = UnsafeUtils.getArrayByteNoChecks(buffer, offset + i); - value |= (long) (b & 0x7F) << (i * 7); - if (b >= 0) { - return zigZag ? (value >>> 1) ^ -(value & 1) : value; - } + v |= ((b = buffer[offset++]) & 0x7F) << 14; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new DataEncodingException("Malformed var int"); + + v |= ((b = buffer[offset++]) & 0x7F) << 21; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new DataEncodingException("Malformed var int"); + + v |= ((b = buffer[offset++]) & 0x7FL) << 28; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new DataEncodingException("Malformed var int"); + + v |= ((b = buffer[offset++]) & 0x7FL) << 35; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; } + if (offset >= limit) throw new DataEncodingException("Malformed var int"); + + v |= ((b = buffer[offset++]) & 0x7FL) << 42; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new DataEncodingException("Malformed var int"); + + v |= ((b = buffer[offset++]) & 0x7FL) << 49; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new DataEncodingException("Malformed var int"); + + v |= ((b = buffer[offset++]) & 0x7FL) << 56; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new DataEncodingException("Malformed var int"); + + b = buffer[offset++]; + if ((b & 0x80) == 0) { + v |= (long) b << 63; + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + throw new DataEncodingException("Malformed var int"); } } diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/DirectBufferedData.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/DirectBufferedData.java index 39672e81d..ff4ef63d5 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/DirectBufferedData.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/DirectBufferedData.java @@ -87,27 +87,74 @@ public long getVarLong(final long offset, final boolean zigZag) { return getVar(Math.toIntExact(offset), zigZag); } - private long getVar(final int offset, final boolean zigZag) { + private long getVar(int offset, final boolean zigZag) { checkOffset(offset, length()); - int rem = Math.toIntExact(buffer.limit() - offset); - if (rem > 10) { - rem = 10; + final int limit = Math.min(buffer.limit(), offset + 10); + if (offset >= limit) throw new BufferUnderflowException(); + + byte b; + long v = (b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7F; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; } + if (offset >= limit) throw new BufferUnderflowException(); - long value = 0; + v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7F) << 7; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); - int i = 0; - while (i != rem) { - final byte b = UnsafeUtils.getDirectBufferByte(buffer, offset + i); - value |= (long) (b & 0x7F) << (i * 7); - i++; - if (b >= 0) { - buffer.position(offset + i); - return zigZag ? (value >>> 1) ^ -(value & 1) : value; - } + v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7F) << 14; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7F) << 21; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; } - throw (i == 10) ? new DataEncodingException("Malformed var int") : new BufferUnderflowException(); + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7FL) << 28; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7FL) << 35; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7FL) << 42; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7FL) << 49; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7FL) << 56; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++); + if ((b & 0x80) == 0) { + v |= (long) b << 63; + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + + throw new DataEncodingException("Malformed var int"); } /** @@ -185,25 +232,83 @@ public long readVarLong(final boolean zigZag) { } private long readVar(final boolean zigZag) { - final int pos = buffer.position(); - int rem = buffer.remaining(); - if (rem > 10) { - rem = 10; + int offset = buffer.position(); + + final int limit = Math.min(offset + buffer.remaining(), offset + 10); + if (offset >= limit) throw new BufferUnderflowException(); + + byte b; + long v = (b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7F; + if ((b & 0x80) == 0) { + buffer.position(offset); + return zigZag ? (v >>> 1) ^ -(v & 1) : v; } + if (offset >= limit) throw new BufferUnderflowException(); - long value = 0; + v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7F) << 7; + if ((b & 0x80) == 0) { + buffer.position(offset); + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); - int i = 0; - while (i != rem) { - final byte b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, pos + i); - value |= (long) (b & 0x7F) << (i * 7); - i++; - if (b >= 0) { - buffer.position(pos + i); - return zigZag ? (value >>> 1) ^ -(value & 1) : value; - } + v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7F) << 14; + if ((b & 0x80) == 0) { + buffer.position(offset); + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7F) << 21; + if ((b & 0x80) == 0) { + buffer.position(offset); + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7FL) << 28; + if ((b & 0x80) == 0) { + buffer.position(offset); + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7FL) << 35; + if ((b & 0x80) == 0) { + buffer.position(offset); + return zigZag ? (v >>> 1) ^ -(v & 1) : v; } - throw (i == 10) ? new DataEncodingException("") : new BufferUnderflowException(); + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7FL) << 42; + if ((b & 0x80) == 0) { + buffer.position(offset); + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7FL) << 49; + if ((b & 0x80) == 0) { + buffer.position(offset); + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7FL) << 56; + if ((b & 0x80) == 0) { + buffer.position(offset); + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + if (offset >= limit) throw new BufferUnderflowException(); + + b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++); + if ((b & 0x80) == 0) { + buffer.position(offset); + v |= (long) b << 63; + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + + throw new DataEncodingException("Malformed var int"); } /** diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/RandomAccessData.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/RandomAccessData.java index dd9cf8183..deefafbfe 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/RandomAccessData.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/RandomAccessData.java @@ -429,15 +429,59 @@ default int getVarInt(final long offset, final boolean zigZag) { * or the end of the buffer is encountered before the last segment of the varlong * @throws DataEncodingException if the var long is malformed */ - default long getVarLong(final long offset, final boolean zigZag) { - long value = 0; - for (int i = 0; i < 10; i++) { - final byte b = getByte(offset + i); - value |= (long) (b & 0x7F) << (i * 7); - if (b >= 0) { - return zigZag ? (value >>> 1) ^ -(value & 1) : value; - } + default long getVarLong(long offset, final boolean zigZag) { + byte b; + long v = (b = getByte(offset++)) & 0x7F; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + + v |= ((b = getByte(offset++)) & 0x7F) << 7; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + + v |= ((b = getByte(offset++)) & 0x7F) << 14; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + + v |= ((b = getByte(offset++)) & 0x7F) << 21; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + + v |= ((b = getByte(offset++)) & 0x7FL) << 28; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + + v |= ((b = getByte(offset++)) & 0x7FL) << 35; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + + v |= ((b = getByte(offset++)) & 0x7FL) << 42; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + + v |= ((b = getByte(offset++)) & 0x7FL) << 49; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; } + + v |= ((b = getByte(offset++)) & 0x7FL) << 56; + if ((b & 0x80) == 0) { + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + + b = getByte(offset++); + if ((b & 0x80) == 0) { + v |= (long) b << 63; + return zigZag ? (v >>> 1) ^ -(v & 1) : v; + } + throw new DataEncodingException("Malformed var int"); } diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java index 98f7453ca..5b35fc717 100644 --- a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java @@ -186,8 +186,7 @@ public void pbj_doWhileLoop(final BenchState state, final Blackhole blackhole) { /// A slightly modified copy of Google implementation in CodecInputStream. /// PBJ used to use a very similar algorithm, just before https://github.com/hashgraph/pbj/pull/144 /// where we switched to LEB128. - @SuppressWarnings("lossy-conversions") // the impl is able to support longs, but we ignore that here and use ints. - // @Benchmark // disabled because it performs worse than the standard pbj implementation + @Benchmark @OperationsPerInvocation(INVOCATIONS) public void google(final BenchState state, final Blackhole blackhole) { state.sum = 0; @@ -202,7 +201,7 @@ public void google(final BenchState state, final Blackhole blackhole) { break fastpath; } - int x; + long x; int y; if ((y = state.array[tempPos++]) >= 0) { pos = tempPos; From 87c833fbd3a7b9c120cd893311dee236c228f1bf Mon Sep 17 00:00:00 2001 From: Anthony Petrov Date: Wed, 13 May 2026 17:06:07 -0700 Subject: [PATCH 2/9] fix google bench Signed-off-by: Anthony Petrov --- .../varint/read/VarIntByteArrayReadBench.java | 90 ++++++++++++++++++- 1 file changed, 88 insertions(+), 2 deletions(-) diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java index 5b35fc717..6eca2e3c6 100644 --- a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java @@ -186,12 +186,12 @@ public void pbj_doWhileLoop(final BenchState state, final Blackhole blackhole) { /// A slightly modified copy of Google implementation in CodecInputStream. /// PBJ used to use a very similar algorithm, just before https://github.com/hashgraph/pbj/pull/144 /// where we switched to LEB128. - @Benchmark + // @Benchmark // disabled because PBJ requires zigZag handling and stricter limit checks @OperationsPerInvocation(INVOCATIONS) public void google(final BenchState state, final Blackhole blackhole) { state.sum = 0; for (int invocation = 0, pos = 0; invocation < INVOCATIONS; invocation++) { - final int limit = pos + 5; + final int limit = Math.min(state.array.length, pos + 10); fastpath: { @@ -266,6 +266,92 @@ public void google(final BenchState state, final Blackhole blackhole) { blackhole.consume(state.sum); } + /// A modified version of the Google implementation adapted to PBJ. + /// Specifically: + /// * zigZag is handled. Google's original readRawVarint64() doesn't handle zigZag directly. + /// * limit checks are added. Google's original version relies on IOOBE. But PBJ can wrap array slices and still + /// must respect the length of the slice. So in PBJ we cannot rely on the IOOBE. + @Benchmark + @OperationsPerInvocation(INVOCATIONS) + public void google_zigZagAndLimit(final BenchState state, final Blackhole blackhole) { + state.sum = 0; + for (int invocation = 0, pos = 0; invocation < INVOCATIONS; invocation++) { + final int limit = Math.min(state.array.length, pos + 10); + + fastpath: + { + int tempPos = pos; + + if (limit == tempPos) { + break fastpath; + } + + long x; + int y; + if ((y = state.array[tempPos++]) >= 0) { + pos = tempPos; + state.sum += state.zigZag ? (y >>> 1) ^ -(y & 1) : y; + continue; + } else if (limit - tempPos < 9) { + break fastpath; + } else if ((y ^= (state.array[tempPos++] << 7)) < 0) { + x = y ^ (~0 << 7); + } else if ((y ^= (state.array[tempPos++] << 14)) >= 0) { + x = y ^ ((~0 << 7) ^ (~0 << 14)); + } else if ((y ^= (state.array[tempPos++] << 21)) < 0) { + x = y ^ ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + } else if ((x = y ^ (state.array[tempPos++] << 28)) >= 0L) { + x ^= (~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28); + } else if ((x ^= (state.array[tempPos++] << 35)) < 0L) { + x ^= (~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35); + } else if ((x ^= (state.array[tempPos++] << 42)) >= 0L) { + x ^= (~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42); + } else if ((x ^= (state.array[tempPos++] << 49)) < 0L) { + x ^= (~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49); + } else if ((x ^= (state.array[tempPos++] << 56)) >= 0L) { + x ^= (~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56); + } else if ((x ^= (state.array[tempPos++] << 63)) >= 0L) { + x ^= (~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63); + } else { + break fastpath; // Will throw malformedVarint() + } + pos = tempPos; + state.sum += state.zigZag ? (x >>> 1) ^ -(x & 1) : x; + continue; + } + + slowpath: + { + int result = 0; + for (int shift = 0; pos < limit && shift < 64; shift += 7) { + final byte b = state.array[pos++]; + result |= (b & 0x7F) << shift; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (result >>> 1) ^ -(result & 1) : result; + break slowpath; + } + } + throw new DataEncodingException("Malformed var int"); + } + } + blackhole.consume(state.sum); + } + /// A LEB128 with fully unrolled loop. // @Benchmark // disabled because the algorithm is missing limit checks @OperationsPerInvocation(INVOCATIONS) From d1c6b1ea6e31b29dec4cd5f279ff1c761c6dfbb6 Mon Sep 17 00:00:00 2001 From: Anthony Petrov Date: Thu, 14 May 2026 15:34:34 -0700 Subject: [PATCH 3/9] optimize more Signed-off-by: Anthony Petrov --- .../varint/read/VarIntByteArrayReadBench.java | 334 +++++++++++++++++- .../integration/test/VectorVarIntTest.java | 169 +++++++++ 2 files changed, 502 insertions(+), 1 deletion(-) diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java index 6eca2e3c6..86d6fd1ad 100644 --- a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java @@ -445,7 +445,7 @@ public void loopLess_withLimitChecks(final BenchState state, final Blackhole bla } /// A vectorized LEB128, similar to the loopLess above, with some minor tweaks and supporting long varints. - @Benchmark + // @Benchmark // disabled because of improvements in versions below @OperationsPerInvocation(INVOCATIONS) public void vector_zigZag(final BenchState state, final Blackhole blackhole) { state.sum = 0; @@ -529,6 +529,338 @@ public void vector_zigZag(final BenchState state, final Blackhole blackhole) { blackhole.consume(state.sum); } + /// A vectorized LEB128 version that: + /// * avoids limit checks if we have enough bytes ahead (or if we relied on an EOFException in case of streams) + /// * uses byte for 1 byte varint, int for a few bytes, and finally long for many bytes varint + @Benchmark + @OperationsPerInvocation(INVOCATIONS) + public void vector_smartLimitByteIntLong(final BenchState state, final Blackhole blackhole) { + state.sum = 0; + for (int invocation = 0, pos = 0; invocation < INVOCATIONS; invocation++) { + final int limit = Math.min(state.array.length, pos + 10); + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + byte b = state.array[pos++]; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (b >>> 1) ^ -(b & 1) : b; + continue; + } + + int vi = b & 0x7F; + + // Fast path w/o any limit checks if we have all 10 bytes, or if it was a stream b/c we'd get an EOF + if (pos + 9 == limit) { + vi |= ((b = state.array[pos++]) & 0x7F) << 7; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + + vi |= ((b = state.array[pos++]) & 0x7F) << 14; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + + vi |= ((b = state.array[pos++]) & 0x7F) << 21; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + + long vl = vi | ((b = state.array[pos++]) & 0x7FL) << 28; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + vl |= ((b = state.array[pos++]) & 0x7FL) << 35; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + vl |= ((b = state.array[pos++]) & 0x7FL) << 42; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + vl |= ((b = state.array[pos++]) & 0x7FL) << 49; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + vl |= ((b = state.array[pos++]) & 0x7FL) << 56; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + b = state.array[pos++]; + if ((b & 0x80) == 0) { + vl |= (long) b << 63; + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + throw new DataEncodingException("Malformed var int"); + } + + // Slower path because this is an array/buffer, and we have less than 9 bytes ahead + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + vi |= ((b = state.array[pos++]) & 0x7F) << 7; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + vi |= ((b = state.array[pos++]) & 0x7F) << 14; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + vi |= ((b = state.array[pos++]) & 0x7F) << 21; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + long vl = vi | ((b = state.array[pos++]) & 0x7FL) << 28; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + vl |= ((b = state.array[pos++]) & 0x7FL) << 35; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + vl |= ((b = state.array[pos++]) & 0x7FL) << 42; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + vl |= ((b = state.array[pos++]) & 0x7FL) << 49; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + vl |= ((b = state.array[pos++]) & 0x7FL) << 56; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + b = state.array[pos++]; + if ((b & 0x80) == 0) { + vl |= (long) b << 63; + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + throw new DataEncodingException("Malformed var int"); + } + blackhole.consume(state.sum); + } + + /// A vectorized LEB128 similar to vector_smartLimitByteIntLong that also uses an XOR trick from Google impl. + @Benchmark + @OperationsPerInvocation(INVOCATIONS) + public void vector_fastXOR(final BenchState state, final Blackhole blackhole) { + state.sum = 0; + for (int invocation = 0, pos = 0; invocation < INVOCATIONS; invocation++) { + final int limit = Math.min(state.array.length, pos + 10); + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + int vi = state.array[pos++]; + if (vi >= 0) { + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + + // Fast path w/o any limit checks if we have all 10 bytes, or if it was a stream b/c we'd get an EOF + if (pos + 9 == limit) { + if ((vi ^= state.array[pos++] << 7) < 0) { + vi ^= (~0 << 7); + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + + if ((vi ^= state.array[pos++] << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + + if ((vi ^= state.array[pos++] << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + + long vl = vi; + if ((vl ^= (long) state.array[pos++] << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + if ((vl ^= (long) state.array[pos++] << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + if ((vl ^= (long) state.array[pos++] << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + if ((vl ^= (long) state.array[pos++] << 49) < 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + if ((vl ^= (long) state.array[pos++] << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + if ((vl ^= (long) state.array[pos++] << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + throw new DataEncodingException("Malformed var int"); + } + + // Slower path because this is an array/buffer, and we have less than 9 bytes ahead + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + if ((vi ^= state.array[pos++] << 7) < 0) { + vi ^= (~0 << 7); + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + if ((vi ^= state.array[pos++] << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + if ((vi ^= state.array[pos++] << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + long vl = vi; + if ((vl ^= (long) state.array[pos++] << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + if ((vl ^= (long) state.array[pos++] << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + if ((vl ^= (long) state.array[pos++] << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + if ((vl ^= (long) state.array[pos++] << 49) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + if ((vl ^= (long) state.array[pos++] << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + if ((vl ^= (long) state.array[pos++] << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + throw new DataEncodingException("Malformed var int"); + } + blackhole.consume(state.sum); + } + public static void main(String[] args) throws Exception { Options opt = new OptionsBuilder() .include(VarIntByteArrayReadBench.class.getSimpleName()) diff --git a/pbj-integration-tests/src/test/java/com/hedera/pbj/integration/test/VectorVarIntTest.java b/pbj-integration-tests/src/test/java/com/hedera/pbj/integration/test/VectorVarIntTest.java index 830bc0132..df5acc3af 100644 --- a/pbj-integration-tests/src/test/java/com/hedera/pbj/integration/test/VectorVarIntTest.java +++ b/pbj-integration-tests/src/test/java/com/hedera/pbj/integration/test/VectorVarIntTest.java @@ -99,4 +99,173 @@ public void testVectorVarInt(boolean zigZag) { bd.reset(); } } + + /// A refactored copy from VarIntByteArrayReadBench.vector_fastXOR. + private long readVarInt_fastXOR(byte[] bytes, int pos, boolean zigZag) { + final int limit = Math.min(bytes.length, pos + 10); + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + int vi = bytes[pos++]; + if (vi >= 0) { + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + // Fast path w/o any limit checks if we have all 10 bytes, or if it was a stream b/c we'd get an EOF + if (pos + 9 == limit) { + if ((vi ^= bytes[pos++] << 7) < 0) { + vi ^= (~0 << 7); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= bytes[pos++] << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= bytes[pos++] << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + long vl = vi; + if ((vl ^= (long) bytes[pos++] << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) bytes[pos++] << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) bytes[pos++] << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) bytes[pos++] << 49) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) bytes[pos++] << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) bytes[pos++] << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + throw new DataEncodingException("Malformed var int"); + } + + // Slower path because this is an array/buffer, and we have less than 9 bytes ahead + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + if ((vi ^= bytes[pos++] << 7) < 0) { + vi ^= (~0 << 7); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + if ((vi ^= bytes[pos++] << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + if ((vi ^= bytes[pos++] << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + long vl = vi; + if ((vl ^= (long) bytes[pos++] << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + if ((vl ^= (long) bytes[pos++] << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + if ((vl ^= (long) bytes[pos++] << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + if ((vl ^= (long) bytes[pos++] << 49) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + if ((vl ^= (long) bytes[pos++] << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + if ((vl ^= (long) bytes[pos++] << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + throw new DataEncodingException("Malformed var int"); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testVectorVarInt_fastXOR(boolean zigZag) { + final byte[] bytes = new byte[64]; + final BufferedData bd = BufferedData.wrap(bytes); + final Random random = new Random(457639854); + + for (int i = 0; i < 10 * 1024 * 1024; i++) { + final int val = random.nextInt(); + Arrays.fill(bytes, (byte) 0); + bd.writeVarInt(val, zigZag); + + assertEquals(val, readVarInt_fastXOR(bytes, 0, zigZag)); + + bd.reset(); + } + } } From a94cbbc85ccafd8d323ff276deabafd8be7aaadf Mon Sep 17 00:00:00 2001 From: Anthony Petrov Date: Thu, 14 May 2026 17:19:25 -0700 Subject: [PATCH 4/9] minor tweaks Signed-off-by: Anthony Petrov --- .../jmh/varint/read/VarIntByteArrayReadBench.java | 12 +++++++----- .../pbj/integration/test/VectorVarIntTest.java | 10 ++++++---- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java index 86d6fd1ad..c168bf5fb 100644 --- a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java @@ -532,7 +532,7 @@ public void vector_zigZag(final BenchState state, final Blackhole blackhole) { /// A vectorized LEB128 version that: /// * avoids limit checks if we have enough bytes ahead (or if we relied on an EOFException in case of streams) /// * uses byte for 1 byte varint, int for a few bytes, and finally long for many bytes varint - @Benchmark + // @Benchmark // disabled because there's a faster version below @OperationsPerInvocation(INVOCATIONS) public void vector_smartLimitByteIntLong(final BenchState state, final Blackhole blackhole) { state.sum = 0; @@ -685,11 +685,13 @@ public void vector_smartLimitByteIntLong(final BenchState state, final Blackhole public void vector_fastXOR(final BenchState state, final Blackhole blackhole) { state.sum = 0; for (int invocation = 0, pos = 0; invocation < INVOCATIONS; invocation++) { - final int limit = Math.min(state.array.length, pos + 10); - if (pos >= limit) throw new DataEncodingException("Malformed var int"); + final int limit; + if (pos >= (limit = Math.min(state.array.length, pos + 10))) { + throw new DataEncodingException("Malformed var int"); + } - int vi = state.array[pos++]; - if (vi >= 0) { + int vi; + if ((vi = state.array[pos++]) >= 0) { state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; continue; } diff --git a/pbj-integration-tests/src/test/java/com/hedera/pbj/integration/test/VectorVarIntTest.java b/pbj-integration-tests/src/test/java/com/hedera/pbj/integration/test/VectorVarIntTest.java index df5acc3af..d32ae2c02 100644 --- a/pbj-integration-tests/src/test/java/com/hedera/pbj/integration/test/VectorVarIntTest.java +++ b/pbj-integration-tests/src/test/java/com/hedera/pbj/integration/test/VectorVarIntTest.java @@ -102,11 +102,13 @@ public void testVectorVarInt(boolean zigZag) { /// A refactored copy from VarIntByteArrayReadBench.vector_fastXOR. private long readVarInt_fastXOR(byte[] bytes, int pos, boolean zigZag) { - final int limit = Math.min(bytes.length, pos + 10); - if (pos >= limit) throw new DataEncodingException("Malformed var int"); + final int limit; + if (pos >= (limit = Math.min(bytes.length, pos + 10))) { + throw new DataEncodingException("Malformed var int"); + } - int vi = bytes[pos++]; - if (vi >= 0) { + int vi; + if ((vi = bytes[pos++]) >= 0) { return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; } From 72813f2a04e8daf7f05b0f757bbcde2890967377 Mon Sep 17 00:00:00 2001 From: Anthony Petrov Date: Fri, 15 May 2026 11:22:38 -0700 Subject: [PATCH 5/9] more optimizations Signed-off-by: Anthony Petrov --- .../varint/read/VarIntByteArrayReadBench.java | 208 +++++++++--------- .../integration/test/VectorVarIntTest.java | 176 ++++++++------- 2 files changed, 199 insertions(+), 185 deletions(-) diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java index c168bf5fb..5cdbd2b36 100644 --- a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java @@ -99,7 +99,7 @@ public void tearDown() {} /// We use this algorithm everywhere in PBJ - in ReadableSequentialData , DirectBufferedData, RandomAccessData, /// ByteArrayBufferedData, and Bytes. It's known as "getVarLongRichard". The proper academic name is LEB128. /// It's also the Google slow-path algorithm as well. - @Benchmark + // temp @Benchmark @OperationsPerInvocation(INVOCATIONS) public void pbj(final BenchState state, final Blackhole blackhole) { state.sum = 0; @@ -271,7 +271,7 @@ public void google(final BenchState state, final Blackhole blackhole) { /// * zigZag is handled. Google's original readRawVarint64() doesn't handle zigZag directly. /// * limit checks are added. Google's original version relies on IOOBE. But PBJ can wrap array slices and still /// must respect the length of the slice. So in PBJ we cannot rely on the IOOBE. - @Benchmark + // temp @Benchmark @OperationsPerInvocation(INVOCATIONS) public void google_zigZagAndLimit(final BenchState state, final Blackhole blackhole) { state.sum = 0; @@ -685,36 +685,124 @@ public void vector_smartLimitByteIntLong(final BenchState state, final Blackhole public void vector_fastXOR(final BenchState state, final Blackhole blackhole) { state.sum = 0; for (int invocation = 0, pos = 0; invocation < INVOCATIONS; invocation++) { - final int limit; - if (pos >= (limit = Math.min(state.array.length, pos + 10))) { - throw new DataEncodingException("Malformed var int"); - } + final int limit = Math.min(state.array.length, pos + 10); - int vi; - if ((vi = state.array[pos++]) >= 0) { - state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; - continue; + fastpath: + { + if (pos < limit) { + int vi; + if ((vi = state.array[pos++]) >= 0) { + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } else if (pos + 9 == limit) { + // Fast path w/o any limit checks if we have 9 more bytes + if ((vi ^= state.array[pos++] << 7) < 0) { + vi ^= (~0 << 7); + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + + if ((vi ^= state.array[pos++] << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + + if ((vi ^= state.array[pos++] << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + + long vl = vi; + if ((vl ^= (long) state.array[pos++] << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + if ((vl ^= (long) state.array[pos++] << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + if ((vl ^= (long) state.array[pos++] << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + if ((vl ^= (long) state.array[pos++] << 49) < 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + if ((vl ^= (long) state.array[pos++] << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + if ((vl ^= (long) state.array[pos++] << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + } + } } - // Fast path w/o any limit checks if we have all 10 bytes, or if it was a stream b/c we'd get an EOF - if (pos + 9 == limit) { + slowpath: + { + // Slower path because this is an array/buffer, and we have less than 9 (or even 10) bytes ahead + if (pos >= limit) break slowpath; + + // Since the above check is false, the pos was incremented in the fastpath above. + // This byte is in CPU L1 cache, so this should be fast. Also, this is a slowpath anyway. + int vi = state.array[pos - 1]; if ((vi ^= state.array[pos++] << 7) < 0) { vi ^= (~0 << 7); state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; continue; } + if (pos >= limit) break slowpath; if ((vi ^= state.array[pos++] << 14) >= 0) { vi ^= ((~0 << 7) ^ (~0 << 14)); state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; continue; } + if (pos >= limit) break slowpath; if ((vi ^= state.array[pos++] << 21) < 0) { vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; continue; } + if (pos >= limit) break slowpath; long vl = vi; if ((vl ^= (long) state.array[pos++] << 28) >= 0L) { @@ -722,30 +810,28 @@ public void vector_fastXOR(final BenchState state, final Blackhole blackhole) { state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; continue; } + if (pos >= limit) break slowpath; if ((vl ^= (long) state.array[pos++] << 35) < 0L) { vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; continue; } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); if ((vl ^= (long) state.array[pos++] << 42) >= 0L) { vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; continue; } + if (pos >= limit) break slowpath; if ((vl ^= (long) state.array[pos++] << 49) < 0L) { - vl ^= ((~0L << 7) - ^ (~0L << 14) - ^ (~0L << 21) - ^ (~0L << 28) - ^ (~0L << 35) - ^ (~0L << 42) - ^ (~0L << 49)); + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49)); state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; continue; } + if (pos >= limit) break slowpath; if ((vl ^= (long) state.array[pos++] << 56) >= 0L) { vl ^= ((~0L << 7) @@ -759,6 +845,7 @@ public void vector_fastXOR(final BenchState state, final Blackhole blackhole) { state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; continue; } + if (pos >= limit) break slowpath; if ((vl ^= (long) state.array[pos++] << 63) >= 0L) { vl ^= ((~0L << 7) @@ -773,89 +860,6 @@ public void vector_fastXOR(final BenchState state, final Blackhole blackhole) { state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; continue; } - - throw new DataEncodingException("Malformed var int"); - } - - // Slower path because this is an array/buffer, and we have less than 9 bytes ahead - if (pos >= limit) throw new DataEncodingException("Malformed var int"); - - if ((vi ^= state.array[pos++] << 7) < 0) { - vi ^= (~0 << 7); - state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; - continue; - } - if (pos >= limit) throw new DataEncodingException("Malformed var int"); - - if ((vi ^= state.array[pos++] << 14) >= 0) { - vi ^= ((~0 << 7) ^ (~0 << 14)); - state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; - continue; - } - if (pos >= limit) throw new DataEncodingException("Malformed var int"); - - if ((vi ^= state.array[pos++] << 21) < 0) { - vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); - state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; - continue; - } - if (pos >= limit) throw new DataEncodingException("Malformed var int"); - - long vl = vi; - if ((vl ^= (long) state.array[pos++] << 28) >= 0L) { - vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); - state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; - continue; - } - if (pos >= limit) throw new DataEncodingException("Malformed var int"); - - if ((vl ^= (long) state.array[pos++] << 35) < 0L) { - vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); - state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; - continue; - } - if (pos >= limit) throw new DataEncodingException("Malformed var int"); - - if ((vl ^= (long) state.array[pos++] << 42) >= 0L) { - vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); - state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; - continue; - } - if (pos >= limit) throw new DataEncodingException("Malformed var int"); - - if ((vl ^= (long) state.array[pos++] << 49) < 0L) { - vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49)); - state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; - continue; - } - if (pos >= limit) throw new DataEncodingException("Malformed var int"); - - if ((vl ^= (long) state.array[pos++] << 56) >= 0L) { - vl ^= ((~0L << 7) - ^ (~0L << 14) - ^ (~0L << 21) - ^ (~0L << 28) - ^ (~0L << 35) - ^ (~0L << 42) - ^ (~0L << 49) - ^ (~0L << 56)); - state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; - continue; - } - if (pos >= limit) throw new DataEncodingException("Malformed var int"); - - if ((vl ^= (long) state.array[pos++] << 63) >= 0L) { - vl ^= ((~0L << 7) - ^ (~0L << 14) - ^ (~0L << 21) - ^ (~0L << 28) - ^ (~0L << 35) - ^ (~0L << 42) - ^ (~0L << 49) - ^ (~0L << 56) - ^ (~0L << 63)); - state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; - continue; } throw new DataEncodingException("Malformed var int"); diff --git a/pbj-integration-tests/src/test/java/com/hedera/pbj/integration/test/VectorVarIntTest.java b/pbj-integration-tests/src/test/java/com/hedera/pbj/integration/test/VectorVarIntTest.java index d32ae2c02..89d8b69cd 100644 --- a/pbj-integration-tests/src/test/java/com/hedera/pbj/integration/test/VectorVarIntTest.java +++ b/pbj-integration-tests/src/test/java/com/hedera/pbj/integration/test/VectorVarIntTest.java @@ -102,53 +102,136 @@ public void testVectorVarInt(boolean zigZag) { /// A refactored copy from VarIntByteArrayReadBench.vector_fastXOR. private long readVarInt_fastXOR(byte[] bytes, int pos, boolean zigZag) { - final int limit; - if (pos >= (limit = Math.min(bytes.length, pos + 10))) { - throw new DataEncodingException("Malformed var int"); - } + final int limit = Math.min(bytes.length, pos + 10); - int vi; - if ((vi = bytes[pos++]) >= 0) { - return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + fastpath: + { + if (pos < limit) { + int vi; + if ((vi = bytes[pos++]) >= 0) { + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } else if (pos + 9 == limit) { + // Fast path w/o any limit checks if we have 9 more bytes + if ((vi ^= bytes[pos++] << 7) < 0) { + vi ^= (~0 << 7); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= bytes[pos++] << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= bytes[pos++] << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + long vl = vi; + if ((vl ^= (long) bytes[pos++] << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) bytes[pos++] << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) bytes[pos++] << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) bytes[pos++] << 49) < 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) bytes[pos++] << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) bytes[pos++] << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + } + } } - // Fast path w/o any limit checks if we have all 10 bytes, or if it was a stream b/c we'd get an EOF - if (pos + 9 == limit) { + slowpath: + { + // Slower path because this is an array/buffer, and we have less than 9 (or even 10) bytes ahead + if (pos >= limit) break slowpath; + + // Since the above check is false, the pos was incremented in the fastpath above. + // This byte is in CPU L1 cache, so this should be fast. Also, this is a slowpath anyway. + int vi = bytes[pos - 1]; if ((vi ^= bytes[pos++] << 7) < 0) { vi ^= (~0 << 7); return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; } + if (pos >= limit) break slowpath; if ((vi ^= bytes[pos++] << 14) >= 0) { vi ^= ((~0 << 7) ^ (~0 << 14)); return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; } + if (pos >= limit) break slowpath; if ((vi ^= bytes[pos++] << 21) < 0) { vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; } + if (pos >= limit) break slowpath; long vl = vi; if ((vl ^= (long) bytes[pos++] << 28) >= 0L) { vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } + if (pos >= limit) break slowpath; if ((vl ^= (long) bytes[pos++] << 35) < 0L) { vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); if ((vl ^= (long) bytes[pos++] << 42) >= 0L) { vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } + if (pos >= limit) break slowpath; if ((vl ^= (long) bytes[pos++] << 49) < 0L) { vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49)); return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } + if (pos >= limit) break slowpath; if ((vl ^= (long) bytes[pos++] << 56) >= 0L) { vl ^= ((~0L << 7) @@ -161,6 +244,7 @@ private long readVarInt_fastXOR(byte[] bytes, int pos, boolean zigZag) { ^ (~0L << 56)); return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } + if (pos >= limit) break slowpath; if ((vl ^= (long) bytes[pos++] << 63) >= 0L) { vl ^= ((~0L << 7) @@ -174,80 +258,6 @@ private long readVarInt_fastXOR(byte[] bytes, int pos, boolean zigZag) { ^ (~0L << 63)); return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } - - throw new DataEncodingException("Malformed var int"); - } - - // Slower path because this is an array/buffer, and we have less than 9 bytes ahead - if (pos >= limit) throw new DataEncodingException("Malformed var int"); - - if ((vi ^= bytes[pos++] << 7) < 0) { - vi ^= (~0 << 7); - return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; - } - if (pos >= limit) throw new DataEncodingException("Malformed var int"); - - if ((vi ^= bytes[pos++] << 14) >= 0) { - vi ^= ((~0 << 7) ^ (~0 << 14)); - return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; - } - if (pos >= limit) throw new DataEncodingException("Malformed var int"); - - if ((vi ^= bytes[pos++] << 21) < 0) { - vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); - return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; - } - if (pos >= limit) throw new DataEncodingException("Malformed var int"); - - long vl = vi; - if ((vl ^= (long) bytes[pos++] << 28) >= 0L) { - vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); - return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; - } - if (pos >= limit) throw new DataEncodingException("Malformed var int"); - - if ((vl ^= (long) bytes[pos++] << 35) < 0L) { - vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); - return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; - } - if (pos >= limit) throw new DataEncodingException("Malformed var int"); - - if ((vl ^= (long) bytes[pos++] << 42) >= 0L) { - vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); - return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; - } - if (pos >= limit) throw new DataEncodingException("Malformed var int"); - - if ((vl ^= (long) bytes[pos++] << 49) < 0L) { - vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49)); - return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; - } - if (pos >= limit) throw new DataEncodingException("Malformed var int"); - - if ((vl ^= (long) bytes[pos++] << 56) >= 0L) { - vl ^= ((~0L << 7) - ^ (~0L << 14) - ^ (~0L << 21) - ^ (~0L << 28) - ^ (~0L << 35) - ^ (~0L << 42) - ^ (~0L << 49) - ^ (~0L << 56)); - return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; - } - if (pos >= limit) throw new DataEncodingException("Malformed var int"); - - if ((vl ^= (long) bytes[pos++] << 63) >= 0L) { - vl ^= ((~0L << 7) - ^ (~0L << 14) - ^ (~0L << 21) - ^ (~0L << 28) - ^ (~0L << 35) - ^ (~0L << 42) - ^ (~0L << 49) - ^ (~0L << 56) - ^ (~0L << 63)); - return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } throw new DataEncodingException("Malformed var int"); From 973c8a366e758809e93041c060b310ef0d77e09e Mon Sep 17 00:00:00 2001 From: Anthony Petrov Date: Fri, 15 May 2026 12:25:12 -0700 Subject: [PATCH 6/9] moar Signed-off-by: Anthony Petrov --- .../varint/read/VarIntByteArrayReadBench.java | 176 +++++++++--------- .../integration/test/VectorVarIntTest.java | 150 +++++++-------- 2 files changed, 168 insertions(+), 158 deletions(-) diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java index 5cdbd2b36..5646f6137 100644 --- a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java @@ -685,92 +685,93 @@ public void vector_smartLimitByteIntLong(final BenchState state, final Blackhole public void vector_fastXOR(final BenchState state, final Blackhole blackhole) { state.sum = 0; for (int invocation = 0, pos = 0; invocation < INVOCATIONS; invocation++) { + int vi; + long vl; final int limit = Math.min(state.array.length, pos + 10); fastpath: { - if (pos < limit) { - int vi; - if ((vi = state.array[pos++]) >= 0) { + if (pos == limit) break fastpath; + + if ((vi = state.array[pos++]) >= 0) { + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } else if (pos + 9 == limit) { + // Fast path w/o any limit checks if we have 9 more bytes + if ((vi ^= state.array[pos++] << 7) < 0) { + vi ^= (~0 << 7); + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + + if ((vi ^= state.array[pos++] << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + + if ((vi ^= state.array[pos++] << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; continue; - } else if (pos + 9 == limit) { - // Fast path w/o any limit checks if we have 9 more bytes - if ((vi ^= state.array[pos++] << 7) < 0) { - vi ^= (~0 << 7); - state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; - continue; - } - - if ((vi ^= state.array[pos++] << 14) >= 0) { - vi ^= ((~0 << 7) ^ (~0 << 14)); - state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; - continue; - } - - if ((vi ^= state.array[pos++] << 21) < 0) { - vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); - state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; - continue; - } - - long vl = vi; - if ((vl ^= (long) state.array[pos++] << 28) >= 0L) { - vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); - state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; - continue; - } - - if ((vl ^= (long) state.array[pos++] << 35) < 0L) { - vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); - state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; - continue; - } - - if ((vl ^= (long) state.array[pos++] << 42) >= 0L) { - vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); - state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; - continue; - } - - if ((vl ^= (long) state.array[pos++] << 49) < 0L) { - vl ^= ((~0L << 7) - ^ (~0L << 14) - ^ (~0L << 21) - ^ (~0L << 28) - ^ (~0L << 35) - ^ (~0L << 42) - ^ (~0L << 49)); - state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; - continue; - } - - if ((vl ^= (long) state.array[pos++] << 56) >= 0L) { - vl ^= ((~0L << 7) - ^ (~0L << 14) - ^ (~0L << 21) - ^ (~0L << 28) - ^ (~0L << 35) - ^ (~0L << 42) - ^ (~0L << 49) - ^ (~0L << 56)); - state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; - continue; - } - - if ((vl ^= (long) state.array[pos++] << 63) >= 0L) { - vl ^= ((~0L << 7) - ^ (~0L << 14) - ^ (~0L << 21) - ^ (~0L << 28) - ^ (~0L << 35) - ^ (~0L << 42) - ^ (~0L << 49) - ^ (~0L << 56) - ^ (~0L << 63)); - state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; - continue; - } + } + + vl = vi; + if ((vl ^= (long) state.array[pos++] << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + if ((vl ^= (long) state.array[pos++] << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + if ((vl ^= (long) state.array[pos++] << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + if ((vl ^= (long) state.array[pos++] << 49) < 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + if ((vl ^= (long) state.array[pos++] << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + if ((vl ^= (long) state.array[pos++] << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; } } } @@ -780,9 +781,10 @@ public void vector_fastXOR(final BenchState state, final Blackhole blackhole) { // Slower path because this is an array/buffer, and we have less than 9 (or even 10) bytes ahead if (pos >= limit) break slowpath; - // Since the above check is false, the pos was incremented in the fastpath above. + // Since the above check is false, the pos was incremented in the fastpath above, and vi is actually + // assigned there. However, javac is unable to see this and throw an error. So we re-initialize it. // This byte is in CPU L1 cache, so this should be fast. Also, this is a slowpath anyway. - int vi = state.array[pos - 1]; + vi = state.array[pos - 1]; if ((vi ^= state.array[pos++] << 7) < 0) { vi ^= (~0 << 7); state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; @@ -804,7 +806,7 @@ public void vector_fastXOR(final BenchState state, final Blackhole blackhole) { } if (pos >= limit) break slowpath; - long vl = vi; + vl = vi; if ((vl ^= (long) state.array[pos++] << 28) >= 0L) { vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; @@ -827,7 +829,13 @@ public void vector_fastXOR(final BenchState state, final Blackhole blackhole) { if (pos >= limit) break slowpath; if ((vl ^= (long) state.array[pos++] << 49) < 0L) { - vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49)); + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49)); state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; continue; } diff --git a/pbj-integration-tests/src/test/java/com/hedera/pbj/integration/test/VectorVarIntTest.java b/pbj-integration-tests/src/test/java/com/hedera/pbj/integration/test/VectorVarIntTest.java index 89d8b69cd..06c8d8765 100644 --- a/pbj-integration-tests/src/test/java/com/hedera/pbj/integration/test/VectorVarIntTest.java +++ b/pbj-integration-tests/src/test/java/com/hedera/pbj/integration/test/VectorVarIntTest.java @@ -102,82 +102,83 @@ public void testVectorVarInt(boolean zigZag) { /// A refactored copy from VarIntByteArrayReadBench.vector_fastXOR. private long readVarInt_fastXOR(byte[] bytes, int pos, boolean zigZag) { + int vi; + long vl; final int limit = Math.min(bytes.length, pos + 10); fastpath: { - if (pos < limit) { - int vi; - if ((vi = bytes[pos++]) >= 0) { + if (pos == limit) break fastpath; + + if ((vi = bytes[pos++]) >= 0) { + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } else if (pos + 9 == limit) { + // Fast path w/o any limit checks if we have 9 more bytes + if ((vi ^= bytes[pos++] << 7) < 0) { + vi ^= (~0 << 7); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= bytes[pos++] << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; - } else if (pos + 9 == limit) { - // Fast path w/o any limit checks if we have 9 more bytes - if ((vi ^= bytes[pos++] << 7) < 0) { - vi ^= (~0 << 7); - return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; - } - - if ((vi ^= bytes[pos++] << 14) >= 0) { - vi ^= ((~0 << 7) ^ (~0 << 14)); - return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; - } - - if ((vi ^= bytes[pos++] << 21) < 0) { - vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); - return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; - } - - long vl = vi; - if ((vl ^= (long) bytes[pos++] << 28) >= 0L) { - vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); - return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; - } - - if ((vl ^= (long) bytes[pos++] << 35) < 0L) { - vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); - return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; - } - - if ((vl ^= (long) bytes[pos++] << 42) >= 0L) { - vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); - return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; - } - - if ((vl ^= (long) bytes[pos++] << 49) < 0L) { - vl ^= ((~0L << 7) - ^ (~0L << 14) - ^ (~0L << 21) - ^ (~0L << 28) - ^ (~0L << 35) - ^ (~0L << 42) - ^ (~0L << 49)); - return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; - } - - if ((vl ^= (long) bytes[pos++] << 56) >= 0L) { - vl ^= ((~0L << 7) - ^ (~0L << 14) - ^ (~0L << 21) - ^ (~0L << 28) - ^ (~0L << 35) - ^ (~0L << 42) - ^ (~0L << 49) - ^ (~0L << 56)); - return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; - } - - if ((vl ^= (long) bytes[pos++] << 63) >= 0L) { - vl ^= ((~0L << 7) - ^ (~0L << 14) - ^ (~0L << 21) - ^ (~0L << 28) - ^ (~0L << 35) - ^ (~0L << 42) - ^ (~0L << 49) - ^ (~0L << 56) - ^ (~0L << 63)); - return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; - } + } + + if ((vi ^= bytes[pos++] << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + vl = vi; + if ((vl ^= (long) bytes[pos++] << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) bytes[pos++] << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) bytes[pos++] << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) bytes[pos++] << 49) < 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) bytes[pos++] << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) bytes[pos++] << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } } } @@ -187,9 +188,10 @@ private long readVarInt_fastXOR(byte[] bytes, int pos, boolean zigZag) { // Slower path because this is an array/buffer, and we have less than 9 (or even 10) bytes ahead if (pos >= limit) break slowpath; - // Since the above check is false, the pos was incremented in the fastpath above. + // Since the above check is false, the pos was incremented in the fastpath above, and vi is actually + // assigned there. However, javac is unable to see this and throw an error. So we re-initialize it. // This byte is in CPU L1 cache, so this should be fast. Also, this is a slowpath anyway. - int vi = bytes[pos - 1]; + vi = bytes[pos - 1]; if ((vi ^= bytes[pos++] << 7) < 0) { vi ^= (~0 << 7); return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; @@ -208,7 +210,7 @@ private long readVarInt_fastXOR(byte[] bytes, int pos, boolean zigZag) { } if (pos >= limit) break slowpath; - long vl = vi; + vl = vi; if ((vl ^= (long) bytes[pos++] << 28) >= 0L) { vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; From 6692ee08c171e16995fe4e447116f65ef00d5824 Mon Sep 17 00:00:00 2001 From: Anthony Petrov Date: Fri, 15 May 2026 13:02:59 -0700 Subject: [PATCH 7/9] reenable benches Signed-off-by: Anthony Petrov --- .../integration/jmh/varint/read/VarIntByteArrayReadBench.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java index 5646f6137..3bb57180f 100644 --- a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java @@ -99,7 +99,7 @@ public void tearDown() {} /// We use this algorithm everywhere in PBJ - in ReadableSequentialData , DirectBufferedData, RandomAccessData, /// ByteArrayBufferedData, and Bytes. It's known as "getVarLongRichard". The proper academic name is LEB128. /// It's also the Google slow-path algorithm as well. - // temp @Benchmark + @Benchmark @OperationsPerInvocation(INVOCATIONS) public void pbj(final BenchState state, final Blackhole blackhole) { state.sum = 0; @@ -271,7 +271,7 @@ public void google(final BenchState state, final Blackhole blackhole) { /// * zigZag is handled. Google's original readRawVarint64() doesn't handle zigZag directly. /// * limit checks are added. Google's original version relies on IOOBE. But PBJ can wrap array slices and still /// must respect the length of the slice. So in PBJ we cannot rely on the IOOBE. - // temp @Benchmark + @Benchmark @OperationsPerInvocation(INVOCATIONS) public void google_zigZagAndLimit(final BenchState state, final Blackhole blackhole) { state.sum = 0; From cc42544005c87d8e54a23fd485e94a62f131e2a5 Mon Sep 17 00:00:00 2001 From: Anthony Petrov Date: Fri, 15 May 2026 14:36:23 -0700 Subject: [PATCH 8/9] bug fix Signed-off-by: Anthony Petrov --- .../jmh/varint/read/VarIntByteArrayReadBench.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java index 3bb57180f..66ae87b63 100644 --- a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java @@ -99,7 +99,7 @@ public void tearDown() {} /// We use this algorithm everywhere in PBJ - in ReadableSequentialData , DirectBufferedData, RandomAccessData, /// ByteArrayBufferedData, and Bytes. It's known as "getVarLongRichard". The proper academic name is LEB128. /// It's also the Google slow-path algorithm as well. - @Benchmark + // temp @Benchmark @OperationsPerInvocation(INVOCATIONS) public void pbj(final BenchState state, final Blackhole blackhole) { state.sum = 0; @@ -271,7 +271,7 @@ public void google(final BenchState state, final Blackhole blackhole) { /// * zigZag is handled. Google's original readRawVarint64() doesn't handle zigZag directly. /// * limit checks are added. Google's original version relies on IOOBE. But PBJ can wrap array slices and still /// must respect the length of the slice. So in PBJ we cannot rely on the IOOBE. - @Benchmark + // temp @Benchmark @OperationsPerInvocation(INVOCATIONS) public void google_zigZagAndLimit(final BenchState state, final Blackhole blackhole) { state.sum = 0; @@ -760,7 +760,8 @@ public void vector_fastXOR(final BenchState state, final Blackhole blackhole) { continue; } - if ((vl ^= (long) state.array[pos++] << 63) >= 0L) { + if (state.array[pos++] < 0) break fastpath; + if ((vl ^= (long) state.array[pos - 1] << 63) >= 0L) { vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) @@ -853,9 +854,9 @@ public void vector_fastXOR(final BenchState state, final Blackhole blackhole) { state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; continue; } - if (pos >= limit) break slowpath; + if (pos >= limit || state.array[pos++] < 0) break slowpath; - if ((vl ^= (long) state.array[pos++] << 63) >= 0L) { + if ((vl ^= (long) state.array[pos - 1] << 63) >= 0L) { vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) From d6156cfaa4f547a0df5ace2840bd455b18267892 Mon Sep 17 00:00:00 2001 From: Anthony Petrov Date: Fri, 15 May 2026 15:24:47 -0700 Subject: [PATCH 9/9] final touches Signed-off-by: Anthony Petrov --- .../runtime/io/ReadableSequentialData.java | 84 ++-- .../io/buffer/ByteArrayBufferedData.java | 424 ++++++++++++----- .../hedera/pbj/runtime/io/buffer/Bytes.java | 197 ++++++-- .../runtime/io/buffer/DirectBufferedData.java | 437 +++++++++++++----- .../runtime/io/buffer/RandomAccessData.java | 84 ++-- .../varint/read/VarIntByteArrayReadBench.java | 6 +- .../integration/test/VectorVarIntTest.java | 8 +- 7 files changed, 896 insertions(+), 344 deletions(-) diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/ReadableSequentialData.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/ReadableSequentialData.java index 39e60e348..3d461aeea 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/ReadableSequentialData.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/ReadableSequentialData.java @@ -473,56 +473,76 @@ default int readVarInt(final boolean zigZag) throws BufferUnderflowException, Un * @throws DataEncodingException if the variable long cannot be decoded */ default long readVarLong(final boolean zigZag) throws BufferUnderflowException, UncheckedIOException { - byte b; - long v = (b = readByte()) & 0x7F; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + final byte b; + int vi; + long vl; + + if ((vi = readByte()) >= 0) { + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; } - v |= ((b = readByte()) & 0x7F) << 7; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + if ((vi ^= readByte() << 7) < 0) { + vi ^= (~0 << 7); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; } - v |= ((b = readByte()) & 0x7F) << 14; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + if ((vi ^= readByte() << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; } - v |= ((b = readByte()) & 0x7F) << 21; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + if ((vi ^= readByte() << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; } - v |= ((b = readByte()) & 0x7FL) << 28; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + vl = vi; + if ((vl ^= (long) readByte() << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } - v |= ((b = readByte()) & 0x7FL) << 35; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + if ((vl ^= (long) readByte() << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } - v |= ((b = readByte()) & 0x7FL) << 42; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + if ((vl ^= (long) readByte() << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } - v |= ((b = readByte()) & 0x7FL) << 49; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + if ((vl ^= (long) readByte() << 49) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } - v |= ((b = readByte()) & 0x7FL) << 56; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + if ((vl ^= (long) readByte() << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } - b = readByte(); - if ((b & 0x80) == 0) { - v |= (long) b << 63; - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + if ((b = readByte()) < 0) { + throw new DataEncodingException("Malformed var int"); + } + if ((vl ^= (long) b << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } throw new DataEncodingException("Malformed var int"); diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/ByteArrayBufferedData.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/ByteArrayBufferedData.java index f81d71edf..597cef681 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/ByteArrayBufferedData.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/ByteArrayBufferedData.java @@ -145,68 +145,165 @@ private long getVar(int offset, final boolean zigZag) { checkOffset(offset, buffer.limit()); offset += arrayOffset; + int vi; + long vl; final int limit = Math.min(arrayOffset + (int) length(), offset + 10); - if (offset >= limit) throw new BufferUnderflowException(); - byte b; - long v = (b = array[offset++]) & 0x7F; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + fastpath: + { + if (offset == limit) break fastpath; + + if ((vi = array[offset++]) >= 0) { + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } else if (offset + 9 == limit) { + // Fast path w/o any limit checks if we have 9 more array + if ((vi ^= array[offset++] << 7) < 0) { + vi ^= (~0 << 7); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } - v |= ((b = array[offset++]) & 0x7F) << 7; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + if ((vi ^= array[offset++] << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } - v |= ((b = array[offset++]) & 0x7F) << 14; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + if ((vi ^= array[offset++] << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } - v |= ((b = array[offset++]) & 0x7F) << 21; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + vl = vi; + if ((vl ^= (long) array[offset++] << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } - v |= ((b = array[offset++]) & 0x7FL) << 28; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + if ((vl ^= (long) array[offset++] << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } - v |= ((b = array[offset++]) & 0x7FL) << 35; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + if ((vl ^= (long) array[offset++] << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } - v |= ((b = array[offset++]) & 0x7FL) << 42; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + if ((vl ^= (long) array[offset++] << 49) < 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } - v |= ((b = array[offset++]) & 0x7FL) << 49; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + if ((vl ^= (long) array[offset++] << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } - v |= ((b = array[offset++]) & 0x7FL) << 56; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + if (array[offset++] < 0) break fastpath; + if ((vl ^= (long) array[offset - 1] << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + } } - if (offset >= limit) throw new BufferUnderflowException(); - b = array[offset++]; - if ((b & 0x80) == 0) { - v |= (long) b << 63; - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + slowpath: + { + // Slower path because this is an array/array, and we have less than 9 (or even 10) array ahead + if (offset >= limit) break slowpath; + + // Since the above check is false, the offset was incremented in the fastpath above, and vi is actually + // assigned there. However, javac is unable to see this and throw an error. So we re-initialize it. + // This byte is in CPU L1 cache, so this should be fast. Also, this is a slowpath anyway. + vi = array[offset - 1]; + if ((vi ^= array[offset++] << 7) < 0) { + vi ^= (~0 << 7); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; + + if ((vi ^= array[offset++] << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; + + if ((vi ^= array[offset++] << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; + + vl = vi; + if ((vl ^= (long) array[offset++] << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) array[offset++] << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) array[offset++] << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) array[offset++] << 49) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) array[offset++] << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit || array[offset++] < 0) break slowpath; + + if ((vl ^= (long) array[offset - 1] << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } } throw new DataEncodingException("Malformed var int"); @@ -299,81 +396,188 @@ private long readVar(final boolean zigZag) { final int pos = buffer.position(); int offset = arrayOffset + pos; + int vi; + long vl; final int limit = Math.min(offset + buffer.remaining(), offset + 10); - if (offset >= limit) throw new BufferUnderflowException(); - byte b; - long v = (b = array[offset++]) & 0x7F; - if ((b & 0x80) == 0) { - buffer.position(pos + 1); - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + fastpath: + { + if (offset == limit) break fastpath; + + if ((vi = array[offset++]) >= 0) { + buffer.position(pos + 1); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } else if (offset + 9 == limit) { + // Fast path w/o any limit checks if we have 9 more array + if ((vi ^= array[offset++] << 7) < 0) { + vi ^= (~0 << 7); + buffer.position(pos + 2); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } - v |= ((b = array[offset++]) & 0x7F) << 7; - if ((b & 0x80) == 0) { - buffer.position(pos + 2); - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + if ((vi ^= array[offset++] << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + buffer.position(pos + 3); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } - v |= ((b = array[offset++]) & 0x7F) << 14; - if ((b & 0x80) == 0) { - buffer.position(pos + 3); - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + if ((vi ^= array[offset++] << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + buffer.position(pos + 4); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } - v |= ((b = array[offset++]) & 0x7F) << 21; - if ((b & 0x80) == 0) { - buffer.position(pos + 4); - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + vl = vi; + if ((vl ^= (long) array[offset++] << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + buffer.position(pos + 5); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } - v |= ((b = array[offset++]) & 0x7FL) << 28; - if ((b & 0x80) == 0) { - buffer.position(pos + 5); - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + if ((vl ^= (long) array[offset++] << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + buffer.position(pos + 6); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } - v |= ((b = array[offset++]) & 0x7FL) << 35; - if ((b & 0x80) == 0) { - buffer.position(pos + 6); - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + if ((vl ^= (long) array[offset++] << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + buffer.position(pos + 7); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } - v |= ((b = array[offset++]) & 0x7FL) << 42; - if ((b & 0x80) == 0) { - buffer.position(pos + 7); - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + if ((vl ^= (long) array[offset++] << 49) < 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49)); + buffer.position(pos + 8); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } - v |= ((b = array[offset++]) & 0x7FL) << 49; - if ((b & 0x80) == 0) { - buffer.position(pos + 8); - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + if ((vl ^= (long) array[offset++] << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + buffer.position(pos + 9); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } - v |= ((b = array[offset++]) & 0x7FL) << 56; - if ((b & 0x80) == 0) { - buffer.position(pos + 9); - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + buffer.position(pos + 10); + if (array[offset++] < 0) throw new DataEncodingException("Malformed var int"); + if ((vl ^= (long) array[offset - 1] << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + } } - if (offset >= limit) throw new BufferUnderflowException(); - b = array[offset++]; - if ((b & 0x80) == 0) { + slowpath: + { + // Slower path because this is an array/array, and we have less than 9 (or even 10) array ahead + if (offset >= limit) break slowpath; + + // Since the above check is false, the offset was incremented in the fastpath above, and vi is actually + // assigned there. However, javac is unable to see this and throw an error. So we re-initialize it. + // This byte is in CPU L1 cache, so this should be fast. Also, this is a slowpath anyway. + vi = array[offset - 1]; + if ((vi ^= array[offset++] << 7) < 0) { + vi ^= (~0 << 7); + buffer.position(pos + 2); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; + + if ((vi ^= array[offset++] << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + buffer.position(pos + 3); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; + + if ((vi ^= array[offset++] << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + buffer.position(pos + 4); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; + + vl = vi; + if ((vl ^= (long) array[offset++] << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + buffer.position(pos + 5); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) array[offset++] << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + buffer.position(pos + 6); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) array[offset++] << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + buffer.position(pos + 7); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) array[offset++] << 49) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49)); + buffer.position(pos + 8); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) array[offset++] << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + buffer.position(pos + 9); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; buffer.position(pos + 10); - v |= (long) b << 63; - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + if (array[offset++] < 0) throw new DataEncodingException("Malformed var int"); + + if ((vl ^= (long) array[offset - 1] << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } } - throw new DataEncodingException("Malformed var int"); + throw new BufferUnderflowException(); } /** diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java index 5fe28a69e..94c85974a 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java @@ -865,68 +865,165 @@ private long getVar(int offset, final boolean zigZag) { } offset += start; + int vi; + long vl; final int limit = Math.min(start + length, offset + 10); - if (offset >= limit) throw new DataEncodingException("Malformed var int"); - byte b; - long v = (b = buffer[offset++]) & 0x7F; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new DataEncodingException("Malformed var int"); + fastpath: + { + if (offset == limit) break fastpath; + + if ((vi = buffer[offset++]) >= 0) { + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } else if (offset + 9 == limit) { + // Fast path w/o any limit checks if we have 9 more buffer + if ((vi ^= buffer[offset++] << 7) < 0) { + vi ^= (~0 << 7); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } - v |= ((b = buffer[offset++]) & 0x7F) << 7; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new DataEncodingException("Malformed var int"); + if ((vi ^= buffer[offset++] << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } - v |= ((b = buffer[offset++]) & 0x7F) << 14; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new DataEncodingException("Malformed var int"); + if ((vi ^= buffer[offset++] << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } - v |= ((b = buffer[offset++]) & 0x7F) << 21; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new DataEncodingException("Malformed var int"); + vl = vi; + if ((vl ^= (long) buffer[offset++] << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } - v |= ((b = buffer[offset++]) & 0x7FL) << 28; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new DataEncodingException("Malformed var int"); + if ((vl ^= (long) buffer[offset++] << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } - v |= ((b = buffer[offset++]) & 0x7FL) << 35; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new DataEncodingException("Malformed var int"); + if ((vl ^= (long) buffer[offset++] << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } - v |= ((b = buffer[offset++]) & 0x7FL) << 42; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new DataEncodingException("Malformed var int"); + if ((vl ^= (long) buffer[offset++] << 49) < 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } - v |= ((b = buffer[offset++]) & 0x7FL) << 49; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new DataEncodingException("Malformed var int"); + if ((vl ^= (long) buffer[offset++] << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } - v |= ((b = buffer[offset++]) & 0x7FL) << 56; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + if (buffer[offset++] < 0) break fastpath; + if ((vl ^= (long) buffer[offset - 1] << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + } } - if (offset >= limit) throw new DataEncodingException("Malformed var int"); - b = buffer[offset++]; - if ((b & 0x80) == 0) { - v |= (long) b << 63; - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + slowpath: + { + // Slower path because this is an array/buffer, and we have less than 9 (or even 10) buffer ahead + if (offset >= limit) break slowpath; + + // Since the above check is false, the offset was incremented in the fastpath above, and vi is actually + // assigned there. However, javac is unable to see this and throw an error. So we re-initialize it. + // This byte is in CPU L1 cache, so this should be fast. Also, this is a slowpath anyway. + vi = buffer[offset - 1]; + if ((vi ^= buffer[offset++] << 7) < 0) { + vi ^= (~0 << 7); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; + + if ((vi ^= buffer[offset++] << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; + + if ((vi ^= buffer[offset++] << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; + + vl = vi; + if ((vl ^= (long) buffer[offset++] << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) buffer[offset++] << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) buffer[offset++] << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) buffer[offset++] << 49) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) buffer[offset++] << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit || buffer[offset++] < 0) break slowpath; + + if ((vl ^= (long) buffer[offset - 1] << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } } throw new DataEncodingException("Malformed var int"); diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/DirectBufferedData.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/DirectBufferedData.java index ff4ef63d5..185df3c89 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/DirectBufferedData.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/DirectBufferedData.java @@ -90,71 +90,171 @@ public long getVarLong(final long offset, final boolean zigZag) { private long getVar(int offset, final boolean zigZag) { checkOffset(offset, length()); + int vi; + long vl; final int limit = Math.min(buffer.limit(), offset + 10); - if (offset >= limit) throw new BufferUnderflowException(); - byte b; - long v = (b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7F; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + fastpath: + { + if (offset == limit) break fastpath; + + if ((vi = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) >= 0) { + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } else if (offset + 9 == limit) { + // Fast path w/o any limit checks if we have 9 more array + if ((vi ^= UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 7) < 0) { + vi ^= (~0 << 7); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + vl = vi; + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 49) < 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if (UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) < 0) + throw new DataEncodingException("Malformed var int"); + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset - 1) << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + } } - if (offset >= limit) throw new BufferUnderflowException(); - v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7F) << 7; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + slowpath: + { + // Slower path because this is an array/array, and we have less than 9 (or even 10) array ahead + if (offset >= limit) break slowpath; - v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7F) << 14; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); - - v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7F) << 21; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + // Since the above check is false, the offset was incremented in the fastpath above, and vi is actually + // assigned there. However, javac is unable to see this and throw an error. So we re-initialize it. + // This byte is in CPU L1 cache, so this should be fast. Also, this is a slowpath anyway. + vi = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset - 1); + if ((vi ^= UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 7) < 0) { + vi ^= (~0 << 7); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; - v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7FL) << 28; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + if ((vi ^= UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; - v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7FL) << 35; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + if ((vi ^= UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; - v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7FL) << 42; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + vl = vi; + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; - v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7FL) << 49; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; - v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7FL) << 56; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; - b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++); - if ((b & 0x80) == 0) { - v |= (long) b << 63; - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 49) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if (UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) < 0) + throw new DataEncodingException("Malformed var int"); + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset - 1) << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } } - throw new DataEncodingException("Malformed var int"); + throw new BufferUnderflowException(); } /** @@ -234,81 +334,190 @@ public long readVarLong(final boolean zigZag) { private long readVar(final boolean zigZag) { int offset = buffer.position(); + int vi; + long vl; final int limit = Math.min(offset + buffer.remaining(), offset + 10); - if (offset >= limit) throw new BufferUnderflowException(); - byte b; - long v = (b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7F; - if ((b & 0x80) == 0) { - buffer.position(offset); - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + fastpath: + { + if (offset == limit) break fastpath; + + if ((vi = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) >= 0) { + buffer.position(offset); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } else if (offset + 9 == limit) { + // Fast path w/o any limit checks if we have 9 more array + if ((vi ^= UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 7) < 0) { + vi ^= (~0 << 7); + buffer.position(offset); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + buffer.position(offset); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + buffer.position(offset); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + vl = vi; + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + buffer.position(offset); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + buffer.position(offset); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + buffer.position(offset); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 49) < 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49)); + buffer.position(offset); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + buffer.position(offset); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if (UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) < 0) + throw new DataEncodingException("Malformed var int"); + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset - 1) << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + buffer.position(offset); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + } } - if (offset >= limit) throw new BufferUnderflowException(); - v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7F) << 7; - if ((b & 0x80) == 0) { - buffer.position(offset); - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + slowpath: + { + // Slower path because this is an array/array, and we have less than 9 (or even 10) array ahead + if (offset >= limit) break slowpath; - v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7F) << 14; - if ((b & 0x80) == 0) { - buffer.position(offset); - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); - - v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7F) << 21; - if ((b & 0x80) == 0) { - buffer.position(offset); - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + // Since the above check is false, the offset was incremented in the fastpath above, and vi is actually + // assigned there. However, javac is unable to see this and throw an error. So we re-initialize it. + // This byte is in CPU L1 cache, so this should be fast. Also, this is a slowpath anyway. + vi = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset - 1); + if ((vi ^= UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 7) < 0) { + vi ^= (~0 << 7); + buffer.position(offset); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; - v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7FL) << 28; - if ((b & 0x80) == 0) { - buffer.position(offset); - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + if ((vi ^= UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + buffer.position(offset); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; - v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7FL) << 35; - if ((b & 0x80) == 0) { - buffer.position(offset); - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + if ((vi ^= UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + buffer.position(offset); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; - v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7FL) << 42; - if ((b & 0x80) == 0) { - buffer.position(offset); - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + vl = vi; + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + buffer.position(offset); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; - v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7FL) << 49; - if ((b & 0x80) == 0) { - buffer.position(offset); - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + buffer.position(offset); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; - v |= ((b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) & 0x7FL) << 56; - if ((b & 0x80) == 0) { - buffer.position(offset); - return zigZag ? (v >>> 1) ^ -(v & 1) : v; - } - if (offset >= limit) throw new BufferUnderflowException(); + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + buffer.position(offset); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; - b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++); - if ((b & 0x80) == 0) { - buffer.position(offset); - v |= (long) b << 63; - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 49) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49)); + buffer.position(offset); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + buffer.position(offset); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + buffer.position(offset + 1); + if (UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) < 0) + throw new DataEncodingException("Malformed var int"); + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset - 1) << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } } - throw new DataEncodingException("Malformed var int"); + throw new BufferUnderflowException(); } /** diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/RandomAccessData.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/RandomAccessData.java index deefafbfe..fb9e198ef 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/RandomAccessData.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/RandomAccessData.java @@ -430,56 +430,76 @@ default int getVarInt(final long offset, final boolean zigZag) { * @throws DataEncodingException if the var long is malformed */ default long getVarLong(long offset, final boolean zigZag) { - byte b; - long v = (b = getByte(offset++)) & 0x7F; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + final byte b; + int vi; + long vl; + + if ((vi = getByte(offset++)) >= 0) { + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; } - v |= ((b = getByte(offset++)) & 0x7F) << 7; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + if ((vi ^= getByte(offset++) << 7) < 0) { + vi ^= (~0 << 7); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; } - v |= ((b = getByte(offset++)) & 0x7F) << 14; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + if ((vi ^= getByte(offset++) << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; } - v |= ((b = getByte(offset++)) & 0x7F) << 21; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + if ((vi ^= getByte(offset++) << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; } - v |= ((b = getByte(offset++)) & 0x7FL) << 28; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + vl = vi; + if ((vl ^= (long) getByte(offset++) << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } - v |= ((b = getByte(offset++)) & 0x7FL) << 35; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + if ((vl ^= (long) getByte(offset++) << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } - v |= ((b = getByte(offset++)) & 0x7FL) << 42; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + if ((vl ^= (long) getByte(offset++) << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } - v |= ((b = getByte(offset++)) & 0x7FL) << 49; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + if ((vl ^= (long) getByte(offset++) << 49) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } - v |= ((b = getByte(offset++)) & 0x7FL) << 56; - if ((b & 0x80) == 0) { - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + if ((vl ^= (long) getByte(offset++) << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } - b = getByte(offset++); - if ((b & 0x80) == 0) { - v |= (long) b << 63; - return zigZag ? (v >>> 1) ^ -(v & 1) : v; + if ((b = getByte(offset++)) < 0) { + throw new DataEncodingException("Malformed var int"); + } + if ((vl ^= (long) b << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } throw new DataEncodingException("Malformed var int"); diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java index 66ae87b63..fc004dd58 100644 --- a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java @@ -99,7 +99,7 @@ public void tearDown() {} /// We use this algorithm everywhere in PBJ - in ReadableSequentialData , DirectBufferedData, RandomAccessData, /// ByteArrayBufferedData, and Bytes. It's known as "getVarLongRichard". The proper academic name is LEB128. /// It's also the Google slow-path algorithm as well. - // temp @Benchmark + @Benchmark @OperationsPerInvocation(INVOCATIONS) public void pbj(final BenchState state, final Blackhole blackhole) { state.sum = 0; @@ -271,7 +271,7 @@ public void google(final BenchState state, final Blackhole blackhole) { /// * zigZag is handled. Google's original readRawVarint64() doesn't handle zigZag directly. /// * limit checks are added. Google's original version relies on IOOBE. But PBJ can wrap array slices and still /// must respect the length of the slice. So in PBJ we cannot rely on the IOOBE. - // temp @Benchmark + @Benchmark @OperationsPerInvocation(INVOCATIONS) public void google_zigZagAndLimit(final BenchState state, final Blackhole blackhole) { state.sum = 0; @@ -820,7 +820,7 @@ public void vector_fastXOR(final BenchState state, final Blackhole blackhole) { state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; continue; } - if (pos >= limit) throw new DataEncodingException("Malformed var int"); + if (pos >= limit) break slowpath; if ((vl ^= (long) state.array[pos++] << 42) >= 0L) { vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); diff --git a/pbj-integration-tests/src/test/java/com/hedera/pbj/integration/test/VectorVarIntTest.java b/pbj-integration-tests/src/test/java/com/hedera/pbj/integration/test/VectorVarIntTest.java index 06c8d8765..db3e14105 100644 --- a/pbj-integration-tests/src/test/java/com/hedera/pbj/integration/test/VectorVarIntTest.java +++ b/pbj-integration-tests/src/test/java/com/hedera/pbj/integration/test/VectorVarIntTest.java @@ -168,7 +168,8 @@ private long readVarInt_fastXOR(byte[] bytes, int pos, boolean zigZag) { return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } - if ((vl ^= (long) bytes[pos++] << 63) >= 0L) { + if (bytes[pos++] < 0) break fastpath; + if ((vl ^= (long) bytes[pos - 1] << 63) >= 0L) { vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) @@ -221,7 +222,7 @@ private long readVarInt_fastXOR(byte[] bytes, int pos, boolean zigZag) { vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } - if (pos >= limit) throw new DataEncodingException("Malformed var int"); + if (pos >= limit) break slowpath; if ((vl ^= (long) bytes[pos++] << 42) >= 0L) { vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); @@ -248,7 +249,8 @@ private long readVarInt_fastXOR(byte[] bytes, int pos, boolean zigZag) { } if (pos >= limit) break slowpath; - if ((vl ^= (long) bytes[pos++] << 63) >= 0L) { + if (bytes[pos++] < 0) break slowpath; + if ((vl ^= (long) bytes[pos - 1] << 63) >= 0L) { vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21)