From b2456b7b104b92007e6d937851973ca7c95f1479 Mon Sep 17 00:00:00 2001 From: Eduard Tudenhoefner Date: Fri, 19 Jun 2026 15:06:46 +0200 Subject: [PATCH 1/2] GH-3558: Properly close buffers for vectored IO --- .../parquet/hadoop/ParquetFileReader.java | 29 +++++++++++++++++-- pom.xml | 2 +- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java index e0b0d76e0e..95914bcb2a 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java @@ -65,6 +65,7 @@ import org.apache.parquet.HadoopReadOptions; import org.apache.parquet.ParquetReadOptions; import org.apache.parquet.Preconditions; +import org.apache.parquet.bytes.ByteBufferAllocator; import org.apache.parquet.bytes.ByteBufferInputStream; import org.apache.parquet.bytes.ByteBufferReleaser; import org.apache.parquet.bytes.BytesInput; @@ -1361,8 +1362,32 @@ private void readVectored(List allParts, ChunkListBuilder b totalSize += len; } LOG.debug("Reading {} bytes of data with vectored IO in {} ranges", totalSize, ranges.size()); - // Request a vectored read; - f.readVectored(ranges, options.getAllocator()); + // Wrap the allocator to track buffers allocated during the vectored read. + // Hadoop's vectored IO may merge ranges and return slices of merged buffers, + // so the buffers returned via CompletableFuture may differ from those originally + // allocated. We track the originals here to ensure they are properly released. + ByteBufferAllocator baseAllocator = options.getAllocator(); + List allocatedBuffers = new ArrayList<>(); + ByteBufferAllocator trackingAllocator = new ByteBufferAllocator() { + @Override + public ByteBuffer allocate(int size) { + ByteBuffer buf = baseAllocator.allocate(size); + allocatedBuffers.add(buf); + return buf; + } + + @Override + public void release(ByteBuffer b) { + baseAllocator.release(b); + } + + @Override + public boolean isDirect() { + return baseAllocator.isDirect(); + } + }; + f.readVectored(ranges, trackingAllocator); + builder.addBuffersToRelease(allocatedBuffers); int k = 0; for (ConsecutivePartList consecutivePart : allParts) { ParquetFileRange currRange = ranges.get(k++); diff --git a/pom.xml b/pom.xml index a0a6678163..84b61fe81e 100644 --- a/pom.xml +++ b/pom.xml @@ -83,7 +83,7 @@ 2.46.1 shaded.parquet - 3.3.0 + 3.5.0 2.13.0 1.17.0 thrift From 676f1be06e436a80b1386f76d30d9e1ebc0f1363 Mon Sep 17 00:00:00 2001 From: Eduard Tudenhoefner Date: Fri, 19 Jun 2026 15:18:05 +0200 Subject: [PATCH 2/2] Update pom.xml --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 84b61fe81e..45ad9118e4 100644 --- a/pom.xml +++ b/pom.xml @@ -83,7 +83,7 @@ 2.46.1 shaded.parquet - 3.5.0 + 3.4.3 2.13.0 1.17.0 thrift