From 943d4bd2c8ca488b0875fd576009e121b345b3a5 Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Tue, 16 Jun 2026 07:35:01 +0000 Subject: [PATCH 1/7] Use bulk writes instead of per-element writes in vector serialization Replace inefficient element-by-element write loops with bulk write operations in MemorySegmentVectorProvider: - writeFloatVector: Extract underlying float array and use writeFloats() instead of looping with writeFloat() - writeByteSequence: Extract underlying byte array and use write() instead of looping with writeByte() --- .../jvector/vector/MemorySegmentVectorProvider.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/jvector-native/src/main/java/io/github/jbellis/jvector/vector/MemorySegmentVectorProvider.java b/jvector-native/src/main/java/io/github/jbellis/jvector/vector/MemorySegmentVectorProvider.java index 1ce0d81b2..b1e12ca19 100644 --- a/jvector-native/src/main/java/io/github/jbellis/jvector/vector/MemorySegmentVectorProvider.java +++ b/jvector-native/src/main/java/io/github/jbellis/jvector/vector/MemorySegmentVectorProvider.java @@ -62,8 +62,8 @@ public void readFloatVector(RandomAccessReader r, int count, VectorFloat vect @Override public void writeFloatVector(IndexWriter out, VectorFloat vector) throws IOException { - for (int i = 0; i < vector.length(); i++) - out.writeFloat(vector.get(i)); + float[] data = (float[]) ((MemorySegmentVectorFloat) vector).get().heapBase().get(); + out.writeFloats(data, 0, vector.length()); } @Override @@ -98,7 +98,7 @@ public void readByteSequence(RandomAccessReader r, ByteSequence sequence) thr @Override public void writeByteSequence(IndexWriter out, ByteSequence sequence) throws IOException { - for (int i = 0; i < sequence.length(); i++) - out.writeByte(sequence.get(i)); + byte[] data = (byte[]) ((MemorySegmentByteSequence) sequence).get().heapBase().get(); + out.write(data, 0, sequence.length()); } } From 13bde4a70c6b332a1bb8a2eb2262e20940f66d2a Mon Sep 17 00:00:00 2001 From: Ted Willke Date: Wed, 17 Jun 2026 19:16:17 +0000 Subject: [PATCH 2/7] Adding essential tests. --- .../MemorySegmentVectorProviderTest.java | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 jvector-tests/src/test/java/io/github/jbellis/jvector/vector/MemorySegmentVectorProviderTest.java diff --git a/jvector-tests/src/test/java/io/github/jbellis/jvector/vector/MemorySegmentVectorProviderTest.java b/jvector-tests/src/test/java/io/github/jbellis/jvector/vector/MemorySegmentVectorProviderTest.java new file mode 100644 index 000000000..13b97d3a8 --- /dev/null +++ b/jvector-tests/src/test/java/io/github/jbellis/jvector/vector/MemorySegmentVectorProviderTest.java @@ -0,0 +1,90 @@ +package io.github.jbellis.jvector.vector; + +import io.github.jbellis.jvector.disk.IndexWriter; +import io.github.jbellis.jvector.vector.types.ByteSequence; + +import java.io.IOException; +import org.junit.jupiter.api.Test; + +public class MemorySegmentVectorProviderTest { + + @Test + void testWriteByteSequenceSlice() throws IOException { + MemorySegmentVectorProvider provider = new MemorySegmentVectorProvider(); + + byte[] originalBytes = {10, 20, 30, 40, 50}; + ByteSequence original = provider.createByteSequence(originalBytes); + + ByteSequence slice = original.slice(2, 3); + + MockIndexWriter dummyWriter = new MockIndexWriter(); + + // Proves sliced writes function perfectly + provider.writeByteSequence(dummyWriter, slice); + + byte[] expected = {30, 40, 50}; + org.junit.jupiter.api.Assertions.assertArrayEquals(expected, dummyWriter.toByteArray()); + } + + @Test + void testWriteByteSequenceFull() throws IOException { + MemorySegmentVectorProvider provider = new MemorySegmentVectorProvider(); + + byte[] expectedBytes = {1, 2, 3, 4, 5}; + ByteSequence sequence = provider.createByteSequence(expectedBytes); + + MockIndexWriter dummyWriter = new MockIndexWriter(); + + // Proves standard, non-sliced writes function perfectly + provider.writeByteSequence(dummyWriter, sequence); + + org.junit.jupiter.api.Assertions.assertArrayEquals(expectedBytes, dummyWriter.toByteArray()); + } + + @Test + void testWriteByteSequenceZeroLength() throws IOException { + MemorySegmentVectorProvider provider = new MemorySegmentVectorProvider(); + + byte[] originalBytes = {10, 20, 30}; + ByteSequence original = provider.createByteSequence(originalBytes); + + // Create a logical empty slice + ByteSequence emptySlice = original.slice(1, 0); + + MockIndexWriter dummyWriter = new MockIndexWriter(); + + // Proves edge cases don't throw IndexOutOfBoundsException + provider.writeByteSequence(dummyWriter, emptySlice); + + org.junit.jupiter.api.Assertions.assertArrayEquals(new byte[0], dummyWriter.toByteArray()); + } + + /** + * A lightweight mock to capture IndexWriter output without boilerplate. + */ + private static class MockIndexWriter implements IndexWriter { + private final java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream(); + private final java.io.DataOutputStream out = new java.io.DataOutputStream(bos); + + public byte[] toByteArray() { return bos.toByteArray(); } + + @Override public long position() { return bos.size(); } + @Override public void close() throws IOException { out.close(); } + + // DataOutput delegation + @Override public void write(int b) throws IOException { out.write(b); } + @Override public void write(byte[] b) throws IOException { out.write(b); } + @Override public void write(byte[] b, int off, int len) throws IOException { out.write(b, off, len); } + @Override public void writeBoolean(boolean v) throws IOException { out.writeBoolean(v); } + @Override public void writeByte(int v) throws IOException { out.writeByte(v); } + @Override public void writeShort(int v) throws IOException { out.writeShort(v); } + @Override public void writeChar(int v) throws IOException { out.writeChar(v); } + @Override public void writeInt(int v) throws IOException { out.writeInt(v); } + @Override public void writeLong(long v) throws IOException { out.writeLong(v); } + @Override public void writeFloat(float v) throws IOException { out.writeFloat(v); } + @Override public void writeDouble(double v) throws IOException { out.writeDouble(v); } + @Override public void writeBytes(String s) throws IOException { out.writeBytes(s); } + @Override public void writeChars(String s) throws IOException { out.writeChars(s); } + @Override public void writeUTF(String s) throws IOException { out.writeUTF(s); } + } +} From df2aed370642c69fa972c504831ccf3c45178194 Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Thu, 18 Jun 2026 04:27:23 +0000 Subject: [PATCH 3/7] Use .asByteBuffer() instead of .heapBase().get() for MemorySegmentByteSequence ,heapBase().get() ignores slicing and returns the base of the original ByteArray. --- .../jbellis/jvector/vector/MemorySegmentVectorProvider.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jvector-native/src/main/java/io/github/jbellis/jvector/vector/MemorySegmentVectorProvider.java b/jvector-native/src/main/java/io/github/jbellis/jvector/vector/MemorySegmentVectorProvider.java index b1e12ca19..a201e48c8 100644 --- a/jvector-native/src/main/java/io/github/jbellis/jvector/vector/MemorySegmentVectorProvider.java +++ b/jvector-native/src/main/java/io/github/jbellis/jvector/vector/MemorySegmentVectorProvider.java @@ -98,7 +98,7 @@ public void readByteSequence(RandomAccessReader r, ByteSequence sequence) thr @Override public void writeByteSequence(IndexWriter out, ByteSequence sequence) throws IOException { - byte[] data = (byte[]) ((MemorySegmentByteSequence) sequence).get().heapBase().get(); - out.write(data, 0, sequence.length()); + java.nio.ByteBuffer bb = ((MemorySegmentByteSequence) sequence).get().asByteBuffer(); + out.write(bb.array(), bb.arrayOffset(), bb.remaining()); } } From cd46c3ae659e1e61283aacf53a49fa9c7cd9281d Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Thu, 18 Jun 2026 04:37:47 +0000 Subject: [PATCH 4/7] Move MemorySegmentVectorProviderTest to jvector-native module --- .../jbellis/jvector/disk}/MemorySegmentVectorProviderTest.java | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {jvector-tests/src/test/java/io/github/jbellis/jvector/vector => jvector-native/src/test/java/io/github/jbellis/jvector/disk}/MemorySegmentVectorProviderTest.java (100%) diff --git a/jvector-tests/src/test/java/io/github/jbellis/jvector/vector/MemorySegmentVectorProviderTest.java b/jvector-native/src/test/java/io/github/jbellis/jvector/disk/MemorySegmentVectorProviderTest.java similarity index 100% rename from jvector-tests/src/test/java/io/github/jbellis/jvector/vector/MemorySegmentVectorProviderTest.java rename to jvector-native/src/test/java/io/github/jbellis/jvector/disk/MemorySegmentVectorProviderTest.java From 3c0057b02dd14453bae8cb5662102b559f956190 Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Thu, 18 Jun 2026 04:51:48 +0000 Subject: [PATCH 5/7] Add license --- .../disk/MemorySegmentVectorProviderTest.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/jvector-native/src/test/java/io/github/jbellis/jvector/disk/MemorySegmentVectorProviderTest.java b/jvector-native/src/test/java/io/github/jbellis/jvector/disk/MemorySegmentVectorProviderTest.java index 13b97d3a8..29b415a86 100644 --- a/jvector-native/src/test/java/io/github/jbellis/jvector/disk/MemorySegmentVectorProviderTest.java +++ b/jvector-native/src/test/java/io/github/jbellis/jvector/disk/MemorySegmentVectorProviderTest.java @@ -1,3 +1,19 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package io.github.jbellis.jvector.vector; import io.github.jbellis.jvector.disk.IndexWriter; From 29e3ef96564f900b33d9dd6b3ba499d48ff3cd65 Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Thu, 18 Jun 2026 07:14:02 +0000 Subject: [PATCH 6/7] Add microbenchmark to measure MemorySegmentVectorProvider's writeFloatVector and writeByteSequence --- .../MemorySegmentVectorProviderBenchmark.java | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/MemorySegmentVectorProviderBenchmark.java diff --git a/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/MemorySegmentVectorProviderBenchmark.java b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/MemorySegmentVectorProviderBenchmark.java new file mode 100644 index 000000000..b8d354b0c --- /dev/null +++ b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/MemorySegmentVectorProviderBenchmark.java @@ -0,0 +1,102 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.jbellis.jvector.bench; + +import io.github.jbellis.jvector.vector.MemorySegmentVectorProvider; +import io.github.jbellis.jvector.vector.types.VectorFloat; +import io.github.jbellis.jvector.vector.types.ByteSequence; +import io.github.jbellis.jvector.disk.IndexWriter; +import org.openjdk.jmh.annotations.*; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.concurrent.TimeUnit; + +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.SECONDS) +@Warmup(iterations = 2) +@Measurement(iterations = 5) +@Fork(2) +@State(Scope.Benchmark) +public class MemorySegmentVectorProviderBenchmark { + + @Param({"512","1024","1536"}) + public int length; + + private MemorySegmentVectorProvider provider; + private VectorFloat fvector; + private ByteSequence bvector; + + @Setup(Level.Trial) + public void setup() { + provider = new MemorySegmentVectorProvider(); + float[] fdata = new float[length]; + byte[] bdata = new byte[length]; + for (int i = 0; i < length; i++) { + fdata[i] = (float) i; + bdata[i] = (byte) i; + } + fvector = provider.createFloatVector(fdata); + bvector = provider.createByteSequence(bdata); + } + + @Benchmark + public void writeFloatVector() throws IOException { + try (MemoryIndexWriter w = new MemoryIndexWriter(length * 4)) { + provider.writeFloatVector(w, fvector); + } + } + + @Benchmark + public void writeByteVector() throws IOException { + try (MemoryIndexWriter w = new MemoryIndexWriter(length * 4)) { + provider.writeByteSequence(w, bvector); + } + } + + static final class MemoryIndexWriter implements IndexWriter { + private final java.io.ByteArrayOutputStream bos; + private final java.io.DataOutputStream out; + + MemoryIndexWriter(int capacity) { + this.bos = new java.io.ByteArrayOutputStream(capacity); + this.out = new java.io.DataOutputStream(bos); + } + + byte[] toByteArray() { return bos.toByteArray(); } + + @Override public long position() { return bos.size(); } + @Override public void close() throws IOException { out.close(); } + + @Override public void write(int b) throws IOException { out.write(b); } + @Override public void write(byte[] b) throws IOException { out.write(b); } + @Override public void write(byte[] b, int off, int len) throws IOException { out.write(b, off, len); } + @Override public void writeFloat(float v) throws IOException { out.writeFloat(v); } + + @Override public void writeBoolean(boolean v) throws IOException { out.writeBoolean(v); } + @Override public void writeByte(int v) throws IOException { out.writeByte(v); } + @Override public void writeShort(int v) throws IOException { out.writeShort(v); } + @Override public void writeChar(int v) throws IOException { out.writeChar(v); } + @Override public void writeInt(int v) throws IOException { out.writeInt(v); } + @Override public void writeLong(long v) throws IOException { out.writeLong(v); } + @Override public void writeDouble(double v) throws IOException { out.writeDouble(v); } + @Override public void writeBytes(String s) throws IOException { out.writeBytes(s); } + @Override public void writeChars(String s) throws IOException { out.writeChars(s); } + @Override public void writeUTF(String s) throws IOException { out.writeUTF(s); } + } +} From c8ee4bb5f483517ec4ce63cd0c9795cef44214f3 Mon Sep 17 00:00:00 2001 From: Ted Willke Date: Thu, 18 Jun 2026 17:37:00 +0000 Subject: [PATCH 7/7] Moved teset to the vector package. --- .../jvector/{disk => vector}/MemorySegmentVectorProviderTest.java | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename jvector-native/src/test/java/io/github/jbellis/jvector/{disk => vector}/MemorySegmentVectorProviderTest.java (100%) diff --git a/jvector-native/src/test/java/io/github/jbellis/jvector/disk/MemorySegmentVectorProviderTest.java b/jvector-native/src/test/java/io/github/jbellis/jvector/vector/MemorySegmentVectorProviderTest.java similarity index 100% rename from jvector-native/src/test/java/io/github/jbellis/jvector/disk/MemorySegmentVectorProviderTest.java rename to jvector-native/src/test/java/io/github/jbellis/jvector/vector/MemorySegmentVectorProviderTest.java