diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/ReadableSequentialData.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/ReadableSequentialData.java index e9c85853..3d461aee 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/ReadableSequentialData.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/ReadableSequentialData.java @@ -473,15 +473,78 @@ default int readVarInt(final boolean zigZag) throws BufferUnderflowException, Un * @throws DataEncodingException if the variable long cannot be decoded */ default long readVarLong(final boolean zigZag) throws BufferUnderflowException, UncheckedIOException { - long value = 0; + final byte b; + int vi; + long vl; - for (int i = 0; i < 10; i++) { - final byte b = readByte(); - value |= (long) (b & 0x7F) << (i * 7); - if (b >= 0) { - return zigZag ? (value >>> 1) ^ -(value & 1) : value; - } + if ((vi = readByte()) >= 0) { + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= readByte() << 7) < 0) { + vi ^= (~0 << 7); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= readByte() << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= readByte() << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + vl = vi; + if ((vl ^= (long) readByte() << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) readByte() << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) readByte() << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } + + if ((vl ^= (long) readByte() << 49) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) readByte() << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((b = readByte()) < 0) { + throw new DataEncodingException("Malformed var int"); + } + if ((vl ^= (long) b << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + throw new DataEncodingException("Malformed var int"); } diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/ByteArrayBufferedData.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/ByteArrayBufferedData.java index ab2b6b28..597cef68 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/ByteArrayBufferedData.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/ByteArrayBufferedData.java @@ -2,7 +2,6 @@ package com.hedera.pbj.runtime.io.buffer; import com.hedera.pbj.runtime.io.DataEncodingException; -import com.hedera.pbj.runtime.io.UnsafeUtils; import edu.umd.cs.findbugs.annotations.NonNull; import java.io.IOException; import java.io.InputStream; @@ -142,27 +141,172 @@ public long getVarLong(final long offset, final boolean zigZag) { return getVar(Math.toIntExact(offset), zigZag); } - private long getVar(final int offset, final boolean zigZag) { + private long getVar(int offset, final boolean zigZag) { checkOffset(offset, buffer.limit()); + offset += arrayOffset; + + int vi; + long vl; + final int limit = Math.min(arrayOffset + (int) length(), offset + 10); + + fastpath: + { + if (offset == limit) break fastpath; + + if ((vi = array[offset++]) >= 0) { + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } else if (offset + 9 == limit) { + // Fast path w/o any limit checks if we have 9 more array + if ((vi ^= array[offset++] << 7) < 0) { + vi ^= (~0 << 7); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= array[offset++] << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= array[offset++] << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + vl = vi; + if ((vl ^= (long) array[offset++] << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) array[offset++] << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) array[offset++] << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) array[offset++] << 49) < 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) array[offset++] << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } - final int readOffset = arrayOffset + offset; - int rem = buffer.limit() - offset; - if (rem > 10) { - rem = 10; + if (array[offset++] < 0) break fastpath; + if ((vl ^= (long) array[offset - 1] << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + } } - long value = 0; + slowpath: + { + // Slower path because this is an array/array, and we have less than 9 (or even 10) array ahead + if (offset >= limit) break slowpath; + + // Since the above check is false, the offset was incremented in the fastpath above, and vi is actually + // assigned there. However, javac is unable to see this and throw an error. So we re-initialize it. + // This byte is in CPU L1 cache, so this should be fast. Also, this is a slowpath anyway. + vi = array[offset - 1]; + if ((vi ^= array[offset++] << 7) < 0) { + vi ^= (~0 << 7); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; + + if ((vi ^= array[offset++] << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; + + if ((vi ^= array[offset++] << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; + + vl = vi; + if ((vl ^= (long) array[offset++] << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) array[offset++] << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) array[offset++] << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; - int i = 0; - while (i != rem) { - final byte b = UnsafeUtils.getArrayByteNoChecks(array, readOffset + i); - value |= (long) (b & 0x7F) << (i * 7); - i++; - if (b >= 0) { - return zigZag ? (value >>> 1) ^ -(value & 1) : value; + if ((vl ^= (long) array[offset++] << 49) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) array[offset++] << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit || array[offset++] < 0) break slowpath; + + if ((vl ^= (long) array[offset - 1] << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } } - throw (i == 10) ? new DataEncodingException("Malformed var int") : new BufferUnderflowException(); + + throw new DataEncodingException("Malformed var int"); } /** @@ -250,25 +394,190 @@ public long readVarLong(final boolean zigZag) { private long readVar(final boolean zigZag) { final int pos = buffer.position(); - final int offset = arrayOffset + pos; - int rem = buffer.remaining(); - if (rem > 10) { - rem = 10; + int offset = arrayOffset + pos; + + int vi; + long vl; + final int limit = Math.min(offset + buffer.remaining(), offset + 10); + + fastpath: + { + if (offset == limit) break fastpath; + + if ((vi = array[offset++]) >= 0) { + buffer.position(pos + 1); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } else if (offset + 9 == limit) { + // Fast path w/o any limit checks if we have 9 more array + if ((vi ^= array[offset++] << 7) < 0) { + vi ^= (~0 << 7); + buffer.position(pos + 2); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= array[offset++] << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + buffer.position(pos + 3); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= array[offset++] << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + buffer.position(pos + 4); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + vl = vi; + if ((vl ^= (long) array[offset++] << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + buffer.position(pos + 5); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) array[offset++] << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + buffer.position(pos + 6); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) array[offset++] << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + buffer.position(pos + 7); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) array[offset++] << 49) < 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49)); + buffer.position(pos + 8); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) array[offset++] << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + buffer.position(pos + 9); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + buffer.position(pos + 10); + if (array[offset++] < 0) throw new DataEncodingException("Malformed var int"); + if ((vl ^= (long) array[offset - 1] << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + } } - long value = 0; + slowpath: + { + // Slower path because this is an array/array, and we have less than 9 (or even 10) array ahead + if (offset >= limit) break slowpath; + + // Since the above check is false, the offset was incremented in the fastpath above, and vi is actually + // assigned there. However, javac is unable to see this and throw an error. So we re-initialize it. + // This byte is in CPU L1 cache, so this should be fast. Also, this is a slowpath anyway. + vi = array[offset - 1]; + if ((vi ^= array[offset++] << 7) < 0) { + vi ^= (~0 << 7); + buffer.position(pos + 2); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; + + if ((vi ^= array[offset++] << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + buffer.position(pos + 3); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; + + if ((vi ^= array[offset++] << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + buffer.position(pos + 4); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; + + vl = vi; + if ((vl ^= (long) array[offset++] << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + buffer.position(pos + 5); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; - int i = 0; - while (i != rem) { - final byte b = UnsafeUtils.getArrayByteNoChecks(array, offset + i); - value |= (long) (b & 0x7F) << (i * 7); - i++; - if (b >= 0) { - buffer.position(pos + i); - return zigZag ? (value >>> 1) ^ -(value & 1) : value; + if ((vl ^= (long) array[offset++] << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + buffer.position(pos + 6); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) array[offset++] << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + buffer.position(pos + 7); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) array[offset++] << 49) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49)); + buffer.position(pos + 8); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) array[offset++] << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + buffer.position(pos + 9); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + buffer.position(pos + 10); + if (array[offset++] < 0) throw new DataEncodingException("Malformed var int"); + + if ((vl ^= (long) array[offset - 1] << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } } - throw (i == 10) ? new DataEncodingException("Malformed var int") : new BufferUnderflowException(); + + throw new BufferUnderflowException(); } /** diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java index 5d0c03c3..94c85974 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java @@ -865,20 +865,167 @@ private long getVar(int offset, final boolean zigZag) { } offset += start; - int rem = (start + length) - offset; - if (rem > 10) { - rem = 10; + int vi; + long vl; + final int limit = Math.min(start + length, offset + 10); + + fastpath: + { + if (offset == limit) break fastpath; + + if ((vi = buffer[offset++]) >= 0) { + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } else if (offset + 9 == limit) { + // Fast path w/o any limit checks if we have 9 more buffer + if ((vi ^= buffer[offset++] << 7) < 0) { + vi ^= (~0 << 7); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= buffer[offset++] << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= buffer[offset++] << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + vl = vi; + if ((vl ^= (long) buffer[offset++] << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) buffer[offset++] << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) buffer[offset++] << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) buffer[offset++] << 49) < 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) buffer[offset++] << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if (buffer[offset++] < 0) break fastpath; + if ((vl ^= (long) buffer[offset - 1] << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + } } - long value = 0; + slowpath: + { + // Slower path because this is an array/buffer, and we have less than 9 (or even 10) buffer ahead + if (offset >= limit) break slowpath; + + // Since the above check is false, the offset was incremented in the fastpath above, and vi is actually + // assigned there. However, javac is unable to see this and throw an error. So we re-initialize it. + // This byte is in CPU L1 cache, so this should be fast. Also, this is a slowpath anyway. + vi = buffer[offset - 1]; + if ((vi ^= buffer[offset++] << 7) < 0) { + vi ^= (~0 << 7); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; + + if ((vi ^= buffer[offset++] << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; + + if ((vi ^= buffer[offset++] << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; + + vl = vi; + if ((vl ^= (long) buffer[offset++] << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) buffer[offset++] << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) buffer[offset++] << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; - for (int i = 0; i != rem; i++) { - final byte b = UnsafeUtils.getArrayByteNoChecks(buffer, offset + i); - value |= (long) (b & 0x7F) << (i * 7); - if (b >= 0) { - return zigZag ? (value >>> 1) ^ -(value & 1) : value; + if ((vl ^= (long) buffer[offset++] << 49) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) buffer[offset++] << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit || buffer[offset++] < 0) break slowpath; + + if ((vl ^= (long) buffer[offset - 1] << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } } + throw new DataEncodingException("Malformed var int"); } } diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/DirectBufferedData.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/DirectBufferedData.java index 39672e81..185df3c8 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/DirectBufferedData.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/DirectBufferedData.java @@ -87,27 +87,174 @@ public long getVarLong(final long offset, final boolean zigZag) { return getVar(Math.toIntExact(offset), zigZag); } - private long getVar(final int offset, final boolean zigZag) { + private long getVar(int offset, final boolean zigZag) { checkOffset(offset, length()); - int rem = Math.toIntExact(buffer.limit() - offset); - if (rem > 10) { - rem = 10; + int vi; + long vl; + final int limit = Math.min(buffer.limit(), offset + 10); + + fastpath: + { + if (offset == limit) break fastpath; + + if ((vi = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) >= 0) { + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } else if (offset + 9 == limit) { + // Fast path w/o any limit checks if we have 9 more array + if ((vi ^= UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 7) < 0) { + vi ^= (~0 << 7); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + vl = vi; + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 49) < 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if (UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) < 0) + throw new DataEncodingException("Malformed var int"); + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset - 1) << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + } } - long value = 0; + slowpath: + { + // Slower path because this is an array/array, and we have less than 9 (or even 10) array ahead + if (offset >= limit) break slowpath; + + // Since the above check is false, the offset was incremented in the fastpath above, and vi is actually + // assigned there. However, javac is unable to see this and throw an error. So we re-initialize it. + // This byte is in CPU L1 cache, so this should be fast. Also, this is a slowpath anyway. + vi = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset - 1); + if ((vi ^= UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 7) < 0) { + vi ^= (~0 << 7); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; + + if ((vi ^= UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; + + if ((vi ^= UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; + + vl = vi; + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; - int i = 0; - while (i != rem) { - final byte b = UnsafeUtils.getDirectBufferByte(buffer, offset + i); - value |= (long) (b & 0x7F) << (i * 7); - i++; - if (b >= 0) { - buffer.position(offset + i); - return zigZag ? (value >>> 1) ^ -(value & 1) : value; + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 49) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if (UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) < 0) + throw new DataEncodingException("Malformed var int"); + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset - 1) << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } } - throw (i == 10) ? new DataEncodingException("Malformed var int") : new BufferUnderflowException(); + + throw new BufferUnderflowException(); } /** @@ -185,25 +332,192 @@ public long readVarLong(final boolean zigZag) { } private long readVar(final boolean zigZag) { - final int pos = buffer.position(); - int rem = buffer.remaining(); - if (rem > 10) { - rem = 10; + int offset = buffer.position(); + + int vi; + long vl; + final int limit = Math.min(offset + buffer.remaining(), offset + 10); + + fastpath: + { + if (offset == limit) break fastpath; + + if ((vi = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++)) >= 0) { + buffer.position(offset); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } else if (offset + 9 == limit) { + // Fast path w/o any limit checks if we have 9 more array + if ((vi ^= UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 7) < 0) { + vi ^= (~0 << 7); + buffer.position(offset); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + buffer.position(offset); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + buffer.position(offset); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + vl = vi; + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + buffer.position(offset); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + buffer.position(offset); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + buffer.position(offset); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 49) < 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49)); + buffer.position(offset); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + buffer.position(offset); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if (UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) < 0) + throw new DataEncodingException("Malformed var int"); + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset - 1) << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + buffer.position(offset); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + } } - long value = 0; + slowpath: + { + // Slower path because this is an array/array, and we have less than 9 (or even 10) array ahead + if (offset >= limit) break slowpath; + + // Since the above check is false, the offset was incremented in the fastpath above, and vi is actually + // assigned there. However, javac is unable to see this and throw an error. So we re-initialize it. + // This byte is in CPU L1 cache, so this should be fast. Also, this is a slowpath anyway. + vi = UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset - 1); + if ((vi ^= UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 7) < 0) { + vi ^= (~0 << 7); + buffer.position(offset); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; + + if ((vi ^= UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + buffer.position(offset); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; + + if ((vi ^= UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + buffer.position(offset); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (offset >= limit) break slowpath; + + vl = vi; + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + buffer.position(offset); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; - int i = 0; - while (i != rem) { - final byte b = UnsafeUtils.getDirectBufferByteNoChecks(buffer, pos + i); - value |= (long) (b & 0x7F) << (i * 7); - i++; - if (b >= 0) { - buffer.position(pos + i); - return zigZag ? (value >>> 1) ^ -(value & 1) : value; + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + buffer.position(offset); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + buffer.position(offset); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 49) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49)); + buffer.position(offset); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + buffer.position(offset); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (offset >= limit) break slowpath; + + buffer.position(offset + 1); + if (UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset++) < 0) + throw new DataEncodingException("Malformed var int"); + if ((vl ^= (long) UnsafeUtils.getDirectBufferByteNoChecks(buffer, offset - 1) << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } } - throw (i == 10) ? new DataEncodingException("") : new BufferUnderflowException(); + + throw new BufferUnderflowException(); } /** diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/RandomAccessData.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/RandomAccessData.java index dd9cf818..fb9e198e 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/RandomAccessData.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/RandomAccessData.java @@ -429,15 +429,79 @@ default int getVarInt(final long offset, final boolean zigZag) { * or the end of the buffer is encountered before the last segment of the varlong * @throws DataEncodingException if the var long is malformed */ - default long getVarLong(final long offset, final boolean zigZag) { - long value = 0; - for (int i = 0; i < 10; i++) { - final byte b = getByte(offset + i); - value |= (long) (b & 0x7F) << (i * 7); - if (b >= 0) { - return zigZag ? (value >>> 1) ^ -(value & 1) : value; - } + default long getVarLong(long offset, final boolean zigZag) { + final byte b; + int vi; + long vl; + + if ((vi = getByte(offset++)) >= 0) { + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= getByte(offset++) << 7) < 0) { + vi ^= (~0 << 7); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= getByte(offset++) << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= getByte(offset++) << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + vl = vi; + if ((vl ^= (long) getByte(offset++) << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) getByte(offset++) << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) getByte(offset++) << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; } + + if ((vl ^= (long) getByte(offset++) << 49) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) getByte(offset++) << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((b = getByte(offset++)) < 0) { + throw new DataEncodingException("Malformed var int"); + } + if ((vl ^= (long) b << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + throw new DataEncodingException("Malformed var int"); } diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java index 98f7453c..fc004dd5 100644 --- a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/read/VarIntByteArrayReadBench.java @@ -186,13 +186,12 @@ public void pbj_doWhileLoop(final BenchState state, final Blackhole blackhole) { /// A slightly modified copy of Google implementation in CodecInputStream. /// PBJ used to use a very similar algorithm, just before https://github.com/hashgraph/pbj/pull/144 /// where we switched to LEB128. - @SuppressWarnings("lossy-conversions") // the impl is able to support longs, but we ignore that here and use ints. - // @Benchmark // disabled because it performs worse than the standard pbj implementation + // @Benchmark // disabled because PBJ requires zigZag handling and stricter limit checks @OperationsPerInvocation(INVOCATIONS) public void google(final BenchState state, final Blackhole blackhole) { state.sum = 0; for (int invocation = 0, pos = 0; invocation < INVOCATIONS; invocation++) { - final int limit = pos + 5; + final int limit = Math.min(state.array.length, pos + 10); fastpath: { @@ -202,7 +201,7 @@ public void google(final BenchState state, final Blackhole blackhole) { break fastpath; } - int x; + long x; int y; if ((y = state.array[tempPos++]) >= 0) { pos = tempPos; @@ -267,6 +266,92 @@ public void google(final BenchState state, final Blackhole blackhole) { blackhole.consume(state.sum); } + /// A modified version of the Google implementation adapted to PBJ. + /// Specifically: + /// * zigZag is handled. Google's original readRawVarint64() doesn't handle zigZag directly. + /// * limit checks are added. Google's original version relies on IOOBE. But PBJ can wrap array slices and still + /// must respect the length of the slice. So in PBJ we cannot rely on the IOOBE. + @Benchmark + @OperationsPerInvocation(INVOCATIONS) + public void google_zigZagAndLimit(final BenchState state, final Blackhole blackhole) { + state.sum = 0; + for (int invocation = 0, pos = 0; invocation < INVOCATIONS; invocation++) { + final int limit = Math.min(state.array.length, pos + 10); + + fastpath: + { + int tempPos = pos; + + if (limit == tempPos) { + break fastpath; + } + + long x; + int y; + if ((y = state.array[tempPos++]) >= 0) { + pos = tempPos; + state.sum += state.zigZag ? (y >>> 1) ^ -(y & 1) : y; + continue; + } else if (limit - tempPos < 9) { + break fastpath; + } else if ((y ^= (state.array[tempPos++] << 7)) < 0) { + x = y ^ (~0 << 7); + } else if ((y ^= (state.array[tempPos++] << 14)) >= 0) { + x = y ^ ((~0 << 7) ^ (~0 << 14)); + } else if ((y ^= (state.array[tempPos++] << 21)) < 0) { + x = y ^ ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + } else if ((x = y ^ (state.array[tempPos++] << 28)) >= 0L) { + x ^= (~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28); + } else if ((x ^= (state.array[tempPos++] << 35)) < 0L) { + x ^= (~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35); + } else if ((x ^= (state.array[tempPos++] << 42)) >= 0L) { + x ^= (~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42); + } else if ((x ^= (state.array[tempPos++] << 49)) < 0L) { + x ^= (~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49); + } else if ((x ^= (state.array[tempPos++] << 56)) >= 0L) { + x ^= (~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56); + } else if ((x ^= (state.array[tempPos++] << 63)) >= 0L) { + x ^= (~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63); + } else { + break fastpath; // Will throw malformedVarint() + } + pos = tempPos; + state.sum += state.zigZag ? (x >>> 1) ^ -(x & 1) : x; + continue; + } + + slowpath: + { + int result = 0; + for (int shift = 0; pos < limit && shift < 64; shift += 7) { + final byte b = state.array[pos++]; + result |= (b & 0x7F) << shift; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (result >>> 1) ^ -(result & 1) : result; + break slowpath; + } + } + throw new DataEncodingException("Malformed var int"); + } + } + blackhole.consume(state.sum); + } + /// A LEB128 with fully unrolled loop. // @Benchmark // disabled because the algorithm is missing limit checks @OperationsPerInvocation(INVOCATIONS) @@ -360,7 +445,7 @@ public void loopLess_withLimitChecks(final BenchState state, final Blackhole bla } /// A vectorized LEB128, similar to the loopLess above, with some minor tweaks and supporting long varints. - @Benchmark + // @Benchmark // disabled because of improvements in versions below @OperationsPerInvocation(INVOCATIONS) public void vector_zigZag(final BenchState state, final Blackhole blackhole) { state.sum = 0; @@ -444,6 +529,353 @@ public void vector_zigZag(final BenchState state, final Blackhole blackhole) { blackhole.consume(state.sum); } + /// A vectorized LEB128 version that: + /// * avoids limit checks if we have enough bytes ahead (or if we relied on an EOFException in case of streams) + /// * uses byte for 1 byte varint, int for a few bytes, and finally long for many bytes varint + // @Benchmark // disabled because there's a faster version below + @OperationsPerInvocation(INVOCATIONS) + public void vector_smartLimitByteIntLong(final BenchState state, final Blackhole blackhole) { + state.sum = 0; + for (int invocation = 0, pos = 0; invocation < INVOCATIONS; invocation++) { + final int limit = Math.min(state.array.length, pos + 10); + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + byte b = state.array[pos++]; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (b >>> 1) ^ -(b & 1) : b; + continue; + } + + int vi = b & 0x7F; + + // Fast path w/o any limit checks if we have all 10 bytes, or if it was a stream b/c we'd get an EOF + if (pos + 9 == limit) { + vi |= ((b = state.array[pos++]) & 0x7F) << 7; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + + vi |= ((b = state.array[pos++]) & 0x7F) << 14; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + + vi |= ((b = state.array[pos++]) & 0x7F) << 21; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + + long vl = vi | ((b = state.array[pos++]) & 0x7FL) << 28; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + vl |= ((b = state.array[pos++]) & 0x7FL) << 35; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + vl |= ((b = state.array[pos++]) & 0x7FL) << 42; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + vl |= ((b = state.array[pos++]) & 0x7FL) << 49; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + vl |= ((b = state.array[pos++]) & 0x7FL) << 56; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + b = state.array[pos++]; + if ((b & 0x80) == 0) { + vl |= (long) b << 63; + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + throw new DataEncodingException("Malformed var int"); + } + + // Slower path because this is an array/buffer, and we have less than 9 bytes ahead + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + vi |= ((b = state.array[pos++]) & 0x7F) << 7; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + vi |= ((b = state.array[pos++]) & 0x7F) << 14; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + vi |= ((b = state.array[pos++]) & 0x7F) << 21; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + long vl = vi | ((b = state.array[pos++]) & 0x7FL) << 28; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + vl |= ((b = state.array[pos++]) & 0x7FL) << 35; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + vl |= ((b = state.array[pos++]) & 0x7FL) << 42; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + vl |= ((b = state.array[pos++]) & 0x7FL) << 49; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + vl |= ((b = state.array[pos++]) & 0x7FL) << 56; + if ((b & 0x80) == 0) { + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + if (pos >= limit) throw new DataEncodingException("Malformed var int"); + + b = state.array[pos++]; + if ((b & 0x80) == 0) { + vl |= (long) b << 63; + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + throw new DataEncodingException("Malformed var int"); + } + blackhole.consume(state.sum); + } + + /// A vectorized LEB128 similar to vector_smartLimitByteIntLong that also uses an XOR trick from Google impl. + @Benchmark + @OperationsPerInvocation(INVOCATIONS) + public void vector_fastXOR(final BenchState state, final Blackhole blackhole) { + state.sum = 0; + for (int invocation = 0, pos = 0; invocation < INVOCATIONS; invocation++) { + int vi; + long vl; + final int limit = Math.min(state.array.length, pos + 10); + + fastpath: + { + if (pos == limit) break fastpath; + + if ((vi = state.array[pos++]) >= 0) { + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } else if (pos + 9 == limit) { + // Fast path w/o any limit checks if we have 9 more bytes + if ((vi ^= state.array[pos++] << 7) < 0) { + vi ^= (~0 << 7); + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + + if ((vi ^= state.array[pos++] << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + + if ((vi ^= state.array[pos++] << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + + vl = vi; + if ((vl ^= (long) state.array[pos++] << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + if ((vl ^= (long) state.array[pos++] << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + if ((vl ^= (long) state.array[pos++] << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + if ((vl ^= (long) state.array[pos++] << 49) < 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + if ((vl ^= (long) state.array[pos++] << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + + if (state.array[pos++] < 0) break fastpath; + if ((vl ^= (long) state.array[pos - 1] << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + } + } + + slowpath: + { + // Slower path because this is an array/buffer, and we have less than 9 (or even 10) bytes ahead + if (pos >= limit) break slowpath; + + // Since the above check is false, the pos was incremented in the fastpath above, and vi is actually + // assigned there. However, javac is unable to see this and throw an error. So we re-initialize it. + // This byte is in CPU L1 cache, so this should be fast. Also, this is a slowpath anyway. + vi = state.array[pos - 1]; + if ((vi ^= state.array[pos++] << 7) < 0) { + vi ^= (~0 << 7); + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + if (pos >= limit) break slowpath; + + if ((vi ^= state.array[pos++] << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + if (pos >= limit) break slowpath; + + if ((vi ^= state.array[pos++] << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + state.sum += state.zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + continue; + } + if (pos >= limit) break slowpath; + + vl = vi; + if ((vl ^= (long) state.array[pos++] << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + if (pos >= limit) break slowpath; + + if ((vl ^= (long) state.array[pos++] << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + if (pos >= limit) break slowpath; + + if ((vl ^= (long) state.array[pos++] << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + if (pos >= limit) break slowpath; + + if ((vl ^= (long) state.array[pos++] << 49) < 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + if (pos >= limit) break slowpath; + + if ((vl ^= (long) state.array[pos++] << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + if (pos >= limit || state.array[pos++] < 0) break slowpath; + + if ((vl ^= (long) state.array[pos - 1] << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + state.sum += state.zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + continue; + } + } + + throw new DataEncodingException("Malformed var int"); + } + blackhole.consume(state.sum); + } + public static void main(String[] args) throws Exception { Options opt = new OptionsBuilder() .include(VarIntByteArrayReadBench.class.getSimpleName()) diff --git a/pbj-integration-tests/src/test/java/com/hedera/pbj/integration/test/VectorVarIntTest.java b/pbj-integration-tests/src/test/java/com/hedera/pbj/integration/test/VectorVarIntTest.java index 830bc013..db3e1410 100644 --- a/pbj-integration-tests/src/test/java/com/hedera/pbj/integration/test/VectorVarIntTest.java +++ b/pbj-integration-tests/src/test/java/com/hedera/pbj/integration/test/VectorVarIntTest.java @@ -99,4 +99,189 @@ public void testVectorVarInt(boolean zigZag) { bd.reset(); } } + + /// A refactored copy from VarIntByteArrayReadBench.vector_fastXOR. + private long readVarInt_fastXOR(byte[] bytes, int pos, boolean zigZag) { + int vi; + long vl; + final int limit = Math.min(bytes.length, pos + 10); + + fastpath: + { + if (pos == limit) break fastpath; + + if ((vi = bytes[pos++]) >= 0) { + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } else if (pos + 9 == limit) { + // Fast path w/o any limit checks if we have 9 more bytes + if ((vi ^= bytes[pos++] << 7) < 0) { + vi ^= (~0 << 7); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= bytes[pos++] << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + if ((vi ^= bytes[pos++] << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + + vl = vi; + if ((vl ^= (long) bytes[pos++] << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) bytes[pos++] << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) bytes[pos++] << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) bytes[pos++] << 49) < 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if ((vl ^= (long) bytes[pos++] << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + + if (bytes[pos++] < 0) break fastpath; + if ((vl ^= (long) bytes[pos - 1] << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + } + } + + slowpath: + { + // Slower path because this is an array/buffer, and we have less than 9 (or even 10) bytes ahead + if (pos >= limit) break slowpath; + + // Since the above check is false, the pos was incremented in the fastpath above, and vi is actually + // assigned there. However, javac is unable to see this and throw an error. So we re-initialize it. + // This byte is in CPU L1 cache, so this should be fast. Also, this is a slowpath anyway. + vi = bytes[pos - 1]; + if ((vi ^= bytes[pos++] << 7) < 0) { + vi ^= (~0 << 7); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (pos >= limit) break slowpath; + + if ((vi ^= bytes[pos++] << 14) >= 0) { + vi ^= ((~0 << 7) ^ (~0 << 14)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (pos >= limit) break slowpath; + + if ((vi ^= bytes[pos++] << 21) < 0) { + vi ^= ((~0 << 7) ^ (~0 << 14) ^ (~0 << 21)); + return zigZag ? (vi >>> 1) ^ -(vi & 1) : vi; + } + if (pos >= limit) break slowpath; + + vl = vi; + if ((vl ^= (long) bytes[pos++] << 28) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (pos >= limit) break slowpath; + + if ((vl ^= (long) bytes[pos++] << 35) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (pos >= limit) break slowpath; + + if ((vl ^= (long) bytes[pos++] << 42) >= 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (pos >= limit) break slowpath; + + if ((vl ^= (long) bytes[pos++] << 49) < 0L) { + vl ^= ((~0L << 7) ^ (~0L << 14) ^ (~0L << 21) ^ (~0L << 28) ^ (~0L << 35) ^ (~0L << 42) ^ (~0L << 49)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (pos >= limit) break slowpath; + + if ((vl ^= (long) bytes[pos++] << 56) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + if (pos >= limit) break slowpath; + + if (bytes[pos++] < 0) break slowpath; + if ((vl ^= (long) bytes[pos - 1] << 63) >= 0L) { + vl ^= ((~0L << 7) + ^ (~0L << 14) + ^ (~0L << 21) + ^ (~0L << 28) + ^ (~0L << 35) + ^ (~0L << 42) + ^ (~0L << 49) + ^ (~0L << 56) + ^ (~0L << 63)); + return zigZag ? (vl >>> 1) ^ -(vl & 1) : vl; + } + } + + throw new DataEncodingException("Malformed var int"); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testVectorVarInt_fastXOR(boolean zigZag) { + final byte[] bytes = new byte[64]; + final BufferedData bd = BufferedData.wrap(bytes); + final Random random = new Random(457639854); + + for (int i = 0; i < 10 * 1024 * 1024; i++) { + final int val = random.nextInt(); + Arrays.fill(bytes, (byte) 0); + bd.writeVarInt(val, zigZag); + + assertEquals(val, readVarInt_fastXOR(bytes, 0, zigZag)); + + bd.reset(); + } + } }