From 1a0c1985ccb834bf0c812dcc9f64ced6ec9c1ef0 Mon Sep 17 00:00:00 2001 From: Yuya Ebihara Date: Mon, 6 Apr 2026 15:55:09 +0900 Subject: [PATCH 1/2] Core: Extend partition statistics support to unpartitioned tables --- .../apache/iceberg/PartitionStatistics.java | 1 - .../apache/iceberg/PartitionStatsHandler.java | 2 - .../PartitionStatsHandlerTestBase.java | 45 ++++++++++++++++--- format/spec.md | 2 +- .../orc/TestOrcPartitionStatsHandler.java | 7 +++ 5 files changed, 46 insertions(+), 11 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/PartitionStatistics.java b/api/src/main/java/org/apache/iceberg/PartitionStatistics.java index b965f32161da..df1ab04f73bd 100644 --- a/api/src/main/java/org/apache/iceberg/PartitionStatistics.java +++ b/api/src/main/java/org/apache/iceberg/PartitionStatistics.java @@ -57,7 +57,6 @@ public interface PartitionStatistics extends StructLike { .build(); static Schema schema(Types.StructType unifiedPartitionType, int formatVersion) { - Preconditions.checkState(!unifiedPartitionType.fields().isEmpty(), "Table must be partitioned"); Preconditions.checkState(formatVersion > 0, "Invalid format version: %d", formatVersion); if (formatVersion <= 2) { diff --git a/core/src/main/java/org/apache/iceberg/PartitionStatsHandler.java b/core/src/main/java/org/apache/iceberg/PartitionStatsHandler.java index 9420095f94a3..edf031aeefdc 100644 --- a/core/src/main/java/org/apache/iceberg/PartitionStatsHandler.java +++ b/core/src/main/java/org/apache/iceberg/PartitionStatsHandler.java @@ -183,7 +183,6 @@ private PartitionStatsHandler() {} */ @Deprecated public static Schema schema(StructType unifiedPartitionType, int formatVersion) { - Preconditions.checkState(!unifiedPartitionType.fields().isEmpty(), "Table must be partitioned"); Preconditions.checkState( formatVersion > 0 && formatVersion <= TableMetadata.SUPPORTED_TABLE_FORMAT_VERSION, "Invalid format version: %d", @@ -278,7 +277,6 @@ public static PartitionStatisticsFile computeAndWriteStatsFile(Table table) thro public static PartitionStatisticsFile computeAndWriteStatsFile(Table table, long snapshotId) throws IOException { Preconditions.checkArgument(table != null, "Invalid table: null"); - Preconditions.checkArgument(Partitioning.isPartitioned(table), "Table must be partitioned"); Snapshot snapshot = table.snapshot(snapshotId); Preconditions.checkArgument(snapshot != null, "Snapshot not found: %s", snapshotId); diff --git a/core/src/test/java/org/apache/iceberg/PartitionStatsHandlerTestBase.java b/core/src/test/java/org/apache/iceberg/PartitionStatsHandlerTestBase.java index 7b7e85bfffce..bc256305e4d1 100644 --- a/core/src/test/java/org/apache/iceberg/PartitionStatsHandlerTestBase.java +++ b/core/src/test/java/org/apache/iceberg/PartitionStatsHandlerTestBase.java @@ -34,6 +34,7 @@ import java.util.Map; import java.util.Objects; import java.util.UUID; +import org.apache.iceberg.data.GenericRecord; import org.apache.iceberg.expressions.Literal; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; @@ -89,23 +90,53 @@ public void testPartitionStatsOnInvalidSnapshot() throws Exception { .hasMessage("Snapshot not found: 42"); } - @Test + @TestTemplate public void testPartitionStatsOnUnPartitionedTable() throws Exception { Table testTable = TestTables.create( - tempDir("unpartitioned_table"), - "unpartitioned_table", + tempDir("unpartitioned_table_" + formatVersion), + "unpartitioned_table_" + formatVersion, SCHEMA, PartitionSpec.unpartitioned(), - 2, + formatVersion, fileFormatProperty); + // Add a first data file and update partition stats DataFile dataFile = FileGenerationUtil.generateDataFile(testTable, TestHelpers.Row.of()); testTable.newAppend().appendFile(dataFile).commit(); - assertThatThrownBy(() -> PartitionStatsHandler.computeAndWriteStatsFile(testTable)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Table must be partitioned"); + testTable + .updatePartitionStatistics() + .setPartitionStatistics(PartitionStatsHandler.computeAndWriteStatsFile(testTable)) + .commit(); + + try (CloseableIterable recordIterator = + new BasePartitionStatisticsScan(testTable).scan()) { + List partitionStats = Lists.newArrayList(recordIterator); + assertThat(partitionStats).hasSize(1); + PartitionStatistics stats = partitionStats.get(0); + assertThat(stats.partition()).isEqualTo(GenericRecord.create(Types.StructType.of())); + assertThat(stats.dataRecordCount()).isEqualTo(dataFile.recordCount()); + } + + // Add a second data file and update partition stats + DataFile dataFile2 = FileGenerationUtil.generateDataFile(testTable, TestHelpers.Row.of()); + testTable.newAppend().appendFile(dataFile2).commit(); + + testTable + .updatePartitionStatistics() + .setPartitionStatistics(PartitionStatsHandler.computeAndWriteStatsFile(testTable)) + .commit(); + + try (CloseableIterable recordIterator = + new BasePartitionStatisticsScan(testTable).scan()) { + List partitionStats = Lists.newArrayList(recordIterator); + assertThat(partitionStats).hasSize(1); + PartitionStatistics stats = partitionStats.get(0); + assertThat(stats.partition()).isEqualTo(GenericRecord.create(Types.StructType.of())); + assertThat(stats.dataRecordCount()) + .isEqualTo(dataFile.recordCount() + dataFile2.recordCount()); + } } @TestTemplate diff --git a/format/spec.md b/format/spec.md index 0d3c79762c6c..ef289d844f5d 100644 --- a/format/spec.md +++ b/format/spec.md @@ -1007,7 +1007,7 @@ The schema of the partition statistics file is as follows: | v1 | v2 | v3 | Field id, name | Type | Description | |----|----|----|----------------|------|-------------| -| _required_ | _required_ | _required_ | **`1 partition`** | `struct<..>` | Partition data tuple, schema based on the unified partition type considering all specs in a table | +| _required_ | _required_ | _required_ | **`1 partition`** | `struct<..>` | Partition data tuple, schema based on the unified partition type considering all specs in a table, empty for unpartitioned tables | | _required_ | _required_ | _required_ | **`2 spec_id`** | `int` | Partition spec id | | _required_ | _required_ | _required_ | **`3 data_record_count`** | `long` | Count of records in data files | | _required_ | _required_ | _required_ | **`4 data_file_count`** | `int` | Count of data files | diff --git a/orc/src/test/java/org/apache/iceberg/orc/TestOrcPartitionStatsHandler.java b/orc/src/test/java/org/apache/iceberg/orc/TestOrcPartitionStatsHandler.java index 117293629f58..4d904c886767 100644 --- a/orc/src/test/java/org/apache/iceberg/orc/TestOrcPartitionStatsHandler.java +++ b/orc/src/test/java/org/apache/iceberg/orc/TestOrcPartitionStatsHandler.java @@ -36,6 +36,13 @@ public void testAllDatatypePartitionWriting() throws Exception { .hasMessage("Cannot write using unregistered internal data format: ORC"); } + @Override + public void testPartitionStatsOnUnPartitionedTable() { + assertThatThrownBy(super::testPartitionStatsOnUnPartitionedTable) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessage("Cannot write using unregistered internal data format: ORC"); + } + @Override public void testOptionalFieldsWriting() throws Exception { assertThatThrownBy(super::testOptionalFieldsWriting) From 638ab9fa8bcc59fb9fd2a206dd66fa9e3d9dfeea Mon Sep 17 00:00:00 2001 From: Yuya Ebihara Date: Mon, 6 Apr 2026 18:54:16 +0900 Subject: [PATCH 2/2] fixup! Core: Extend partition statistics support to unpartitioned tables --- .../apache/iceberg/PartitionStatistics.java | 73 ++++++---- .../iceberg/BasePartitionStatistics.java | 18 ++- .../apache/iceberg/PartitionStatsHandler.java | 137 ++++++++++++++---- .../PartitionStatsHandlerTestBase.java | 5 +- format/spec.md | 2 +- 5 files changed, 171 insertions(+), 64 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/PartitionStatistics.java b/api/src/main/java/org/apache/iceberg/PartitionStatistics.java index df1ab04f73bd..61ccafde4f87 100644 --- a/api/src/main/java/org/apache/iceberg/PartitionStatistics.java +++ b/api/src/main/java/org/apache/iceberg/PartitionStatistics.java @@ -20,6 +20,7 @@ import org.apache.iceberg.expressions.Literal; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.types.Types; /** Interface for partition statistics returned from a {@link PartitionStatisticsScan}. */ @@ -67,50 +68,64 @@ static Schema schema(Types.StructType unifiedPartitionType, int formatVersion) { } private static Schema v2Schema(Types.StructType unifiedPartitionType) { - return new Schema( - Types.NestedField.required( - EMPTY_PARTITION_FIELD.fieldId(), EMPTY_PARTITION_FIELD.name(), unifiedPartitionType), - SPEC_ID, - DATA_RECORD_COUNT, - DATA_FILE_COUNT, - TOTAL_DATA_FILE_SIZE_IN_BYTES, - POSITION_DELETE_RECORD_COUNT, - POSITION_DELETE_FILE_COUNT, - EQUALITY_DELETE_RECORD_COUNT, - EQUALITY_DELETE_FILE_COUNT, - TOTAL_RECORD_COUNT, - LAST_UPDATED_AT, - LAST_UPDATED_SNAPSHOT_ID); + ImmutableList.Builder fields = ImmutableList.builder(); + if (!unifiedPartitionType.fields().isEmpty()) { + fields.add( + Types.NestedField.required( + EMPTY_PARTITION_FIELD.fieldId(), EMPTY_PARTITION_FIELD.name(), unifiedPartitionType)); + } + + fields.add(SPEC_ID); + fields.add(DATA_RECORD_COUNT); + fields.add(DATA_FILE_COUNT); + fields.add(TOTAL_DATA_FILE_SIZE_IN_BYTES); + fields.add(POSITION_DELETE_RECORD_COUNT); + fields.add(POSITION_DELETE_FILE_COUNT); + fields.add(EQUALITY_DELETE_RECORD_COUNT); + fields.add(EQUALITY_DELETE_FILE_COUNT); + fields.add(TOTAL_RECORD_COUNT); + fields.add(LAST_UPDATED_AT); + fields.add(LAST_UPDATED_SNAPSHOT_ID); + return new Schema(fields.build()); } private static Schema v3Schema(Types.StructType unifiedPartitionType) { - return new Schema( - Types.NestedField.required( - EMPTY_PARTITION_FIELD.fieldId(), EMPTY_PARTITION_FIELD.name(), unifiedPartitionType), - SPEC_ID, - DATA_RECORD_COUNT, - DATA_FILE_COUNT, - TOTAL_DATA_FILE_SIZE_IN_BYTES, + ImmutableList.Builder fields = ImmutableList.builder(); + if (!unifiedPartitionType.fields().isEmpty()) { + fields.add( + Types.NestedField.required( + EMPTY_PARTITION_FIELD.fieldId(), EMPTY_PARTITION_FIELD.name(), unifiedPartitionType)); + } + + fields.add(SPEC_ID); + fields.add(DATA_RECORD_COUNT); + fields.add(DATA_FILE_COUNT); + fields.add(TOTAL_DATA_FILE_SIZE_IN_BYTES); + fields.add( Types.NestedField.required( POSITION_DELETE_RECORD_COUNT.fieldId(), POSITION_DELETE_RECORD_COUNT.name(), - Types.LongType.get()), + Types.LongType.get())); + fields.add( Types.NestedField.required( POSITION_DELETE_FILE_COUNT.fieldId(), POSITION_DELETE_FILE_COUNT.name(), - Types.IntegerType.get()), + Types.IntegerType.get())); + fields.add( Types.NestedField.required( EQUALITY_DELETE_RECORD_COUNT.fieldId(), EQUALITY_DELETE_RECORD_COUNT.name(), - Types.LongType.get()), + Types.LongType.get())); + fields.add( Types.NestedField.required( EQUALITY_DELETE_FILE_COUNT.fieldId(), EQUALITY_DELETE_FILE_COUNT.name(), - Types.IntegerType.get()), - TOTAL_RECORD_COUNT, - LAST_UPDATED_AT, - LAST_UPDATED_SNAPSHOT_ID, - DV_COUNT); + Types.IntegerType.get())); + fields.add(TOTAL_RECORD_COUNT); + fields.add(LAST_UPDATED_AT); + fields.add(LAST_UPDATED_SNAPSHOT_ID); + fields.add(DV_COUNT); + return new Schema(fields.build()); } /* The positions of each statistics within the full schema of partition statistics. */ diff --git a/core/src/main/java/org/apache/iceberg/BasePartitionStatistics.java b/core/src/main/java/org/apache/iceberg/BasePartitionStatistics.java index 4b1a3a6dba93..5bd88e0f2b26 100644 --- a/core/src/main/java/org/apache/iceberg/BasePartitionStatistics.java +++ b/core/src/main/java/org/apache/iceberg/BasePartitionStatistics.java @@ -40,6 +40,22 @@ public class BasePartitionStatistics extends SupportsIndexProjection private static final int STATS_COUNT = 13; + private static final Types.StructType BASE_TYPE = + Types.StructType.of( + EMPTY_PARTITION_FIELD, + SPEC_ID, + DATA_RECORD_COUNT, + DATA_FILE_COUNT, + TOTAL_DATA_FILE_SIZE_IN_BYTES, + POSITION_DELETE_RECORD_COUNT, + POSITION_DELETE_FILE_COUNT, + EQUALITY_DELETE_RECORD_COUNT, + EQUALITY_DELETE_FILE_COUNT, + TOTAL_RECORD_COUNT, + LAST_UPDATED_AT, + LAST_UPDATED_SNAPSHOT_ID, + DV_COUNT); + BasePartitionStatistics(StructLike partition, int specId) { super(STATS_COUNT); @@ -58,7 +74,7 @@ public class BasePartitionStatistics extends SupportsIndexProjection /** Used by internal readers to instantiate this class with a projection schema. */ BasePartitionStatistics(Types.StructType projection) { - super(STATS_COUNT); + super(BASE_TYPE, projection); } @Override diff --git a/core/src/main/java/org/apache/iceberg/PartitionStatsHandler.java b/core/src/main/java/org/apache/iceberg/PartitionStatsHandler.java index edf031aeefdc..d4f09a138b6d 100644 --- a/core/src/main/java/org/apache/iceberg/PartitionStatsHandler.java +++ b/core/src/main/java/org/apache/iceberg/PartitionStatsHandler.java @@ -196,48 +196,62 @@ public static Schema schema(StructType unifiedPartitionType, int formatVersion) } private static Schema v2Schema(StructType unifiedPartitionType) { - return new Schema( - NestedField.required(PARTITION_FIELD_ID, PARTITION_FIELD_NAME, unifiedPartitionType), - SPEC_ID, - DATA_RECORD_COUNT, - DATA_FILE_COUNT, - TOTAL_DATA_FILE_SIZE_IN_BYTES, - POSITION_DELETE_RECORD_COUNT, - POSITION_DELETE_FILE_COUNT, - EQUALITY_DELETE_RECORD_COUNT, - EQUALITY_DELETE_FILE_COUNT, - TOTAL_RECORD_COUNT, - LAST_UPDATED_AT, - LAST_UPDATED_SNAPSHOT_ID); + List fields = Lists.newArrayList(); + if (!unifiedPartitionType.fields().isEmpty()) { + fields.add( + NestedField.required(PARTITION_FIELD_ID, PARTITION_FIELD_NAME, unifiedPartitionType)); + } + + fields.add(SPEC_ID); + fields.add(DATA_RECORD_COUNT); + fields.add(DATA_FILE_COUNT); + fields.add(TOTAL_DATA_FILE_SIZE_IN_BYTES); + fields.add(POSITION_DELETE_RECORD_COUNT); + fields.add(POSITION_DELETE_FILE_COUNT); + fields.add(EQUALITY_DELETE_RECORD_COUNT); + fields.add(EQUALITY_DELETE_FILE_COUNT); + fields.add(TOTAL_RECORD_COUNT); + fields.add(LAST_UPDATED_AT); + fields.add(LAST_UPDATED_SNAPSHOT_ID); + return new Schema(fields); } private static Schema v3Schema(StructType unifiedPartitionType) { - return new Schema( - NestedField.required(PARTITION_FIELD_ID, PARTITION_FIELD_NAME, unifiedPartitionType), - SPEC_ID, - DATA_RECORD_COUNT, - DATA_FILE_COUNT, - TOTAL_DATA_FILE_SIZE_IN_BYTES, + List fields = Lists.newArrayList(); + if (!unifiedPartitionType.fields().isEmpty()) { + fields.add( + NestedField.required(PARTITION_FIELD_ID, PARTITION_FIELD_NAME, unifiedPartitionType)); + } + + fields.add(SPEC_ID); + fields.add(DATA_RECORD_COUNT); + fields.add(DATA_FILE_COUNT); + fields.add(TOTAL_DATA_FILE_SIZE_IN_BYTES); + fields.add( NestedField.required( POSITION_DELETE_RECORD_COUNT.fieldId(), POSITION_DELETE_RECORD_COUNT.name(), - LongType.get()), + LongType.get())); + fields.add( NestedField.required( POSITION_DELETE_FILE_COUNT.fieldId(), POSITION_DELETE_FILE_COUNT.name(), - IntegerType.get()), + IntegerType.get())); + fields.add( NestedField.required( EQUALITY_DELETE_RECORD_COUNT.fieldId(), EQUALITY_DELETE_RECORD_COUNT.name(), - LongType.get()), + LongType.get())); + fields.add( NestedField.required( EQUALITY_DELETE_FILE_COUNT.fieldId(), EQUALITY_DELETE_FILE_COUNT.name(), - IntegerType.get()), - TOTAL_RECORD_COUNT, - LAST_UPDATED_AT, - LAST_UPDATED_SNAPSHOT_ID, - DV_COUNT); + IntegerType.get())); + fields.add(TOTAL_RECORD_COUNT); + fields.add(LAST_UPDATED_AT); + fields.add(LAST_UPDATED_SNAPSHOT_ID); + fields.add(DV_COUNT); + return new Schema(fields); } /** @@ -329,9 +343,14 @@ static PartitionStatisticsFile writePartitionStatsFile( OutputFile outputFile = newPartitionStatsFile(table, fileFormat, snapshotId); + boolean hasPartition = + dataSchema.findField(PartitionStatistics.EMPTY_PARTITION_FIELD.fieldId()) != null; try (FileAppender writer = InternalData.write(fileFormat, outputFile).schema(dataSchema).build()) { - records.iterator().forEachRemaining(writer::add); + records + .iterator() + .forEachRemaining( + record -> writer.add(toGenericRecord(record, dataSchema, hasPartition))); } return ImmutableGenericPartitionStatisticsFile.builder() @@ -341,6 +360,32 @@ static PartitionStatisticsFile writePartitionStatsFile( .build(); } + private static GenericRecord toGenericRecord( + PartitionStatistics stats, Schema dataSchema, boolean hasPartition) { + GenericRecord record = GenericRecord.create(dataSchema.asStruct()); + int pos = 0; + if (hasPartition) { + record.set(pos++, stats.partition()); + } + + record.set(pos++, stats.specId()); + record.set(pos++, stats.dataRecordCount()); + record.set(pos++, stats.dataFileCount()); + record.set(pos++, stats.totalDataFileSizeInBytes()); + record.set(pos++, stats.positionDeleteRecordCount()); + record.set(pos++, stats.positionDeleteFileCount()); + record.set(pos++, stats.equalityDeleteRecordCount()); + record.set(pos++, stats.equalityDeleteFileCount()); + record.set(pos++, stats.totalRecords()); + record.set(pos++, stats.lastUpdatedAt()); + record.set(pos++, stats.lastUpdatedSnapshotId()); + if (dataSchema.findField(PartitionStatistics.DV_COUNT.fieldId()) != null) { + record.set(pos, stats.dvCount()); + } + + return record; + } + /** * Reads partition statistics from the specified {@link InputFile} using given schema. * @@ -400,7 +445,10 @@ private static Collection computeAndMergeStatsIncremental( table.newPartitionStatisticsScan().useSnapshot(lastSnapshotWithStats).scan()) { oldStats.forEach( partitionStats -> - statsMap.put(partitionStats.specId(), partitionStats.partition(), partitionStats)); + statsMap.put( + partitionStats.specId(), + partitionStats.partition(), + copyWithIdentityMapping(partitionStats))); } catch (Exception exception) { throw new InvalidStatsFileException(exception); } @@ -424,6 +472,10 @@ private static Collection computeAndMergeStatsIncremental( } private static GenericRecord partitionDataToRecord(PartitionData data) { + if (data.getPartitionType().fields().isEmpty()) { + return null; + } + GenericRecord record = GenericRecord.create(data.getPartitionType()); for (int index = 0; index < record.size(); index++) { record.set(index, data.get(index)); @@ -510,11 +562,12 @@ private static PartitionMap collectStatsForManifest( PartitionUtil.coercePartition(partitionType, spec, file.partition()); StructLike key = keyTemplate.copyFor(coercedPartition); Snapshot snapshot = table.snapshot(entry.snapshotId()); + StructLike partitionValue = partitionType.fields().isEmpty() ? null : key; PartitionStatistics stats = statsMap.computeIfAbsent( specId, ((PartitionData) file.partition()).copy(), - () -> new BasePartitionStatistics(key, specId)); + () -> new BasePartitionStatistics(partitionValue, specId)); if (entry.isLive()) { // Live can have both added and existing entries. Consider only added entries for // incremental compute as existing entries was already included in previous compute. @@ -740,6 +793,30 @@ private static void updateSnapshotInfo( } } + private static BasePartitionStatistics copyWithIdentityMapping(PartitionStatistics stats) { + BasePartitionStatistics copy = new BasePartitionStatistics(stats.partition(), stats.specId()); + copy.set(PartitionStatistics.DATA_RECORD_COUNT_POSITION, stats.dataRecordCount()); + copy.set(PartitionStatistics.DATA_FILE_COUNT_POSITION, stats.dataFileCount()); + copy.set( + PartitionStatistics.TOTAL_DATA_FILE_SIZE_IN_BYTES_POSITION, + stats.totalDataFileSizeInBytes()); + copy.set( + PartitionStatistics.POSITION_DELETE_RECORD_COUNT_POSITION, + stats.positionDeleteRecordCount()); + copy.set( + PartitionStatistics.POSITION_DELETE_FILE_COUNT_POSITION, stats.positionDeleteFileCount()); + copy.set( + PartitionStatistics.EQUALITY_DELETE_RECORD_COUNT_POSITION, + stats.equalityDeleteRecordCount()); + copy.set( + PartitionStatistics.EQUALITY_DELETE_FILE_COUNT_POSITION, stats.equalityDeleteFileCount()); + copy.set(PartitionStatistics.TOTAL_RECORD_COUNT_POSITION, stats.totalRecords()); + copy.set(PartitionStatistics.LAST_UPDATED_AT_POSITION, stats.lastUpdatedAt()); + copy.set(PartitionStatistics.LAST_UPDATED_SNAPSHOT_ID_POSITION, stats.lastUpdatedSnapshotId()); + copy.set(PartitionStatistics.DV_COUNT_POSITION, stats.dvCount()); + return copy; + } + private static class InvalidStatsFileException extends RuntimeException { InvalidStatsFileException(Throwable cause) { diff --git a/core/src/test/java/org/apache/iceberg/PartitionStatsHandlerTestBase.java b/core/src/test/java/org/apache/iceberg/PartitionStatsHandlerTestBase.java index bc256305e4d1..e29ae1d63a64 100644 --- a/core/src/test/java/org/apache/iceberg/PartitionStatsHandlerTestBase.java +++ b/core/src/test/java/org/apache/iceberg/PartitionStatsHandlerTestBase.java @@ -34,7 +34,6 @@ import java.util.Map; import java.util.Objects; import java.util.UUID; -import org.apache.iceberg.data.GenericRecord; import org.apache.iceberg.expressions.Literal; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; @@ -115,7 +114,7 @@ public void testPartitionStatsOnUnPartitionedTable() throws Exception { List partitionStats = Lists.newArrayList(recordIterator); assertThat(partitionStats).hasSize(1); PartitionStatistics stats = partitionStats.get(0); - assertThat(stats.partition()).isEqualTo(GenericRecord.create(Types.StructType.of())); + assertThat(stats.partition()).isNull(); assertThat(stats.dataRecordCount()).isEqualTo(dataFile.recordCount()); } @@ -133,7 +132,7 @@ public void testPartitionStatsOnUnPartitionedTable() throws Exception { List partitionStats = Lists.newArrayList(recordIterator); assertThat(partitionStats).hasSize(1); PartitionStatistics stats = partitionStats.get(0); - assertThat(stats.partition()).isEqualTo(GenericRecord.create(Types.StructType.of())); + assertThat(stats.partition()).isNull(); assertThat(stats.dataRecordCount()) .isEqualTo(dataFile.recordCount() + dataFile2.recordCount()); } diff --git a/format/spec.md b/format/spec.md index ef289d844f5d..119ad1380ef3 100644 --- a/format/spec.md +++ b/format/spec.md @@ -1007,7 +1007,7 @@ The schema of the partition statistics file is as follows: | v1 | v2 | v3 | Field id, name | Type | Description | |----|----|----|----------------|------|-------------| -| _required_ | _required_ | _required_ | **`1 partition`** | `struct<..>` | Partition data tuple, schema based on the unified partition type considering all specs in a table, empty for unpartitioned tables | +| _optional_ | _optional_ | _optional_ | **`1 partition`** | `struct<..>` | Partition data tuple, schema based on the unified partition type considering all specs in a table, `NULL` for unpartitioned tables | | _required_ | _required_ | _required_ | **`2 spec_id`** | `int` | Partition spec id | | _required_ | _required_ | _required_ | **`3 data_record_count`** | `long` | Count of records in data files | | _required_ | _required_ | _required_ | **`4 data_file_count`** | `int` | Count of data files |