From bc61314b47be4303caef1f518226708213a02ce5 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 9 Jun 2026 08:46:53 -0400 Subject: [PATCH 1/2] Encode parquet-format minor_version in thrift metadata --- src/main/thrift/parquet.thrift | 35 ++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/src/main/thrift/parquet.thrift b/src/main/thrift/parquet.thrift index fe259d61..938fb89b 100644 --- a/src/main/thrift/parquet.thrift +++ b/src/main/thrift/parquet.thrift @@ -1363,16 +1363,39 @@ union EncryptionAlgorithm { * Description for file metadata */ struct FileMetaData { - /** Version of this file + /** Major Parquet Format Version * - * As of December 2025, there is no agreed upon consensus of what constitutes - * version 2 of the file. For maximum compatibility with readers, writers should - * always populate "1" for version. For maximum compatibility with writers, - * readers should accept "1" and "2" interchangeably. All other versions are - * reserved for potential future use-cases. + * This corresponds to the highest major version of the parquet-format whose + * features the file uses. For example, if a file contains features from parquet-format + * version 2.4, then this field should be set to "2". + * + * Prior to 2026, some readers support features added in version 2.0 and + * greater, but will reject files with the version set to 2.0. It was common + * practice for writers to populate "1" for version even if they used version + * 2.0. + * + * For maximum compatibility with writers, readers should accept "1" and "2" + * interchangeably. All other versions are reserved for potential future + * use-cases. */ 1: required i32 version + /** + * Minor Parquet Format Version + * + * This corresponds to the highest minor version of the parquet-format whose + * features the file uses. For example, if a file contains features from + * parquet-format version 2.4, then this field should be set to "4". + * + * Note that Parquet does not follow semantic versioning and new forward + * incompatible features, such as new encodings, can be added in a minor + * version. See the documentation[1] for more details on the versioning scheme + * and the features added in each version. + * + * [1]: http://parquet.apache.org/docs/file-format/versions + **/ + 10: optional i32 minor_version + /** Parquet schema for this file. This schema contains metadata for all the columns. * The schema is represented as a tree with a single root. The nodes of the tree * are flattened to a list by doing a depth-first traversal. From cf448f1813ab7463056781ed3984ee82c77d7971 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 9 Jun 2026 08:55:16 -0400 Subject: [PATCH 2/2] tweaks --- src/main/thrift/parquet.thrift | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/src/main/thrift/parquet.thrift b/src/main/thrift/parquet.thrift index 938fb89b..040ffc15 100644 --- a/src/main/thrift/parquet.thrift +++ b/src/main/thrift/parquet.thrift @@ -1369,8 +1369,8 @@ struct FileMetaData { * features the file uses. For example, if a file contains features from parquet-format * version 2.4, then this field should be set to "2". * - * Prior to 2026, some readers support features added in version 2.0 and - * greater, but will reject files with the version set to 2.0. It was common + * Prior to 2026, some readers supported features added in version 2.0 and + * greater, but would reject files with the version set to 2. It was common * practice for writers to populate "1" for version even if they used version * 2.0. * @@ -1380,20 +1380,19 @@ struct FileMetaData { */ 1: required i32 version - /** - * Minor Parquet Format Version - * - * This corresponds to the highest minor version of the parquet-format whose - * features the file uses. For example, if a file contains features from - * parquet-format version 2.4, then this field should be set to "4". - * - * Note that Parquet does not follow semantic versioning and new forward - * incompatible features, such as new encodings, can be added in a minor - * version. See the documentation[1] for more details on the versioning scheme - * and the features added in each version. - * - * [1]: http://parquet.apache.org/docs/file-format/versions - **/ + /** Minor Parquet Format Version + * + * This corresponds to the highest minor version of the parquet-format whose + * features the file uses. For example, if a file contains features from + * parquet-format version 2.4, then this field should be set to "4". + * + * Note that Parquet does not follow semantic versioning and new + * forward-incompatible features, such as new encodings, can be added in + * minor versions. See the documentation[1] for more details on the versioning + * scheme and the features added in each version. + * + * [1]: http://parquet.apache.org/docs/file-format/versions + */ 10: optional i32 minor_version /** Parquet schema for this file. This schema contains metadata for all the columns.