diff --git a/README.md b/README.md index 42578c7be..2fb3a046e 100644 --- a/README.md +++ b/README.md @@ -118,6 +118,51 @@ chunks they are interested in. The columns chunks should then be read sequentia ![File Layout](https://raw.github.com/apache/parquet-format/master/doc/images/FileLayout.gif) + ### PAR3 File Footers + + PAR3 file footer footer format designed to better support wider-schemas and more control + over the various footer size vs compute trade-offs. Its format is as follows: + - Serialized Thrift FileMetadata Structure + - (Optional) 4 byte CRC32 of the serialized Thrift FileMetadata. + - 4-byte length in bytes (little endian) of all preceding elements in the footer. + - 4-byte little-endian flag field to indicate features that require special parsing of the footer. + Readers MUST raise an error if there is an unrecognized flag. Current flags: + + * 0x01 - Footer encryption enabled (when set the encryption information is written before + FileMeta structure as in the PAR1 footer). + * 0x02 - CRC32 of FileMetadata Footer. + + - 4-byte magic number "PAR3" + + When parsing the footer implementations SHOULD read at least the last 12 bytes of the footer. Then + read in the entirety of the footer based on the length of all preceding elements. This prevents further + I/O cost for accessing metadata stored in the data pages. PAR3 footers can fully replace PAR1 footers. + If a file is written with only PAR3 footer, implementation MUST write PAR3 as the first four bytes in + they file. PAR3 footers can also be written in a backwards compatible way after PAR1 Metadata + (see next section for details). + + #### Dual Mode PAR1 and PAR3 footers + + The following section defines a layout that allows PAR1 + and PAR3 headers to co-exist in a single logical footer + but allow legacy readers to still read files. + + The laout consists of the following: + - Serialized PAR1 FileMetadata Thrift object + - PAR3 footer as described above + - 4 byte little-endian length in bytes of all + preceding elements. + - 4-byte magic number "PAR1" + + Readers aware of PAR3 can check for the "PAR3" magic number + beginning 12 bytes from the end of the file (This should + be unambiguous because thrift serialization of structs + use 0x00 as a field end delimiter). + (TODO: decide if one of the alternatives of embedding + the footer as a unknown field FileMetadata desirable as discussed in [Alkis's doc](https://docs.google.com/document/d/1PQpY418LkIDHMFYCY8ne_G-CFpThK15LLpzWYbc7rFU/edit)) + + When embedded into a PAR1 file no modification to the magic number at the beginning of the file is mandated. + ## Metadata There are three types of metadata: file metadata, column (chunk) metadata and page header metadata. All thrift structures are serialized using the TCompactProtocol. diff --git a/src/main/thrift/parquet.thrift b/src/main/thrift/parquet.thrift index c928ad66b..415b966b0 100644 --- a/src/main/thrift/parquet.thrift +++ b/src/main/thrift/parquet.thrift @@ -770,17 +770,31 @@ struct PageEncodingStats { /** * Description for column metadata + * Next-Id: 20 */ struct ColumnMetaData { - /** Type of this column **/ - 1: required Type type + /** Type of this column + * + * Available from schema via efficient lookup with schema_index. + * + * PAR1: Required. + * PAR3: Don't populate. + **/ + 1: optional Type type /** Set of all encodings used for this column. The purpose is to validate - * whether we can decode those pages. **/ - 2: required list encodings + * whether we can decode those pages. + * + * PAR1: Required. + * PAR3: don't populate redundant with column page stats. + **/ + 2: optional list encodings - /** Path in schema **/ - 3: required list path_in_schema + /** Path in schema + * PAR1 Footer: Required. + * PAR3 Footer: Deprecated (don't populate). Can be inferred from schema element. + */ + 3: optional list path_in_schema /** Compression codec **/ 4: required CompressionCodec codec @@ -792,12 +806,23 @@ struct ColumnMetaData { 6: required i64 total_uncompressed_size /** total byte size of all compressed, and potentially encrypted, pages - * in this column chunk (including the headers) **/ + * in this column chunk (including the headers) + * + * Fetching the range of min(dictionary_page_offset, data_page_offset) + * + total_compressed_size should fetch all data in the the given column + * chunk. + */ 7: required i64 total_compressed_size - /** Optional key/value metadata **/ + /** Optional key/value metadata + * PAR1: Optional. + * PAR3: Don't write use key_value_metadata instead. + **/ 8: optional list key_value_metadata + /** See description on FileMetata.key_value_metadata **/ + 19: optional MetadataPage key_value_metadata_page + /** Byte offset from beginning of file to first data page **/ 9: required i64 data_page_offset @@ -812,8 +837,20 @@ struct ColumnMetaData { /** Set of all encodings used for pages in this column chunk. * This information can be used to determine if all data pages are - * dictionary encoded for example **/ + * dictionary encoded for example + * + * PAR1: Optional. May be deprecated in a future release in favor + * serialized_encoding_stats. + * PAR3: Don't populate. Write serialized_page_encoding_stats. + **/ 13: optional list encoding_stats; + /** + * Serialized page encoding stats. + * + * PAR1: Start populating after encoding_stats is deprecated. + * PAR3: Populate instead of encoding_stats. + */ + 17: optional binary serialized_encoding_stats /** Byte offset from beginning of file to Bloom filter data. **/ 14: optional i64 bloom_filter_offset; @@ -831,8 +868,13 @@ struct ColumnMetaData { * representations. The histograms contained in these statistics can * also be useful in some cases for more fine-grained nullability/list length * filter pushdown. + * + * PAR1: Optional. + * PAR3: Populate serialized_size_statistics. */ 16: optional SizeStatistics size_statistics; + /** Thrift serialized SizeStatistics **/ + 18: optional binary serialized_size_statistics; } struct EncryptionWithFooterKey { @@ -854,6 +896,9 @@ union ColumnCryptoMetaData { struct ColumnChunk { /** File where column data is stored. If not set, assumed to be same file as * metadata. This path is relative to the current file. + * + * DEPRECATED. The one know use-case for this is metadata cache files. + * These have been superceded by open source table formats, prefer those. **/ 1: optional string file_path @@ -883,13 +928,42 @@ struct ColumnChunk { /** Encrypted column metadata for this chunk **/ 9: optional binary encrypted_column_metadata + /** + * The column order for this chunk. + * + * If not set readers should check FileMetadata.column_orders + * instead. + * + * Populated in both PAR1 and PAR3 + */ + 10: optional ColumnOrder column_order + /** Set to true if all pages in the column chunk are dictionary + * encoded + */ + 11: optional bool all_pages_dictionary_encoded + /** + * The index to the SchemaElement in FileMetadata for this + * column. + */ + 12: optional i32 schema_index } struct RowGroup { /** Metadata for each column chunk in this row group. * This list must have the same order as the SchemaElement list in FileMetaData. + * + * PAR1: Required + * PAR3: Not populated. Use columns_page. **/ - 1: required list columns + 1: optional list columns + + /** Page has BYTE_ARRAY data where each element is REQUIRED. + * + * Each element is a Thrift Serialized ColumnChunk + * + * PAR1: Don't include + * PAR3: Required **/ + 8: optional MetadataPage columns_page /** Total byte size of all the uncompressed column data in this row group **/ 2: required i64 total_byte_size @@ -1115,6 +1189,34 @@ union EncryptionAlgorithm { 2: AesGcmCtrV1 AES_GCM_CTR_V1 } +/** + * Embedded metadata page. + * + * A metadata page is a data page used to store metadata about + * the data stored in the file. This is a key feature of PAR3 + * footers which allow for deferred decoding of metadata. + * + * For common use cases the current recommendation is to use a + * an encoding that supported random access but implementations may choose + * other configuration parameters if necessary. Implementations + * SHOULD consider allowing configurability per page to allow for end-users + * to optimize size vs compute trade-offs that make sense for their use-case. + * + * Statistics for Metadata pages SHOULD NOT be written. + * + * Structs of this type should never be written in PAR1. + */ +struct MetadataPage { + // A serialized page including metadata thrift header and data. + 1: required binary page + // Optional compression applied to the page. + 2: optional CompressionCodec compression + // Number of elements stored. This is duplicated here to help in + // use-cases where knowing the total number of elements up front for + // computation would be useful. + 3: num_values +} + /** * Description for file metadata */ @@ -1127,18 +1229,48 @@ struct FileMetaData { * are flattened to a list by doing a depth-first traversal. * The column metadata contains the path in the schema for that column which can be * used to map columns to nodes in the schema. - * The first element is the root **/ - 2: required list schema; + * The first element is the root + * + * PAR1: Required + * PAR3: Use schema_page + **/ + 2: optional list schema; + + /** Page has BYTE_ARRAY data where each element is REQUIRED. + * + * Each element is a serialized SchemaElement. The order and content should + * have a one to one correspondence with schema. + */ + 10: optional MetadataPage schema_page; /** Number of rows in this file **/ 3: required i64 num_rows - /** Row groups in this file **/ - 4: required list row_groups + /** Row groups in this file + * + * PAR1: Required + * PAR3: Use row_groups_page + **/ + 4: optional list row_groups + /** Page has BYTE_ARRAY data where each element is REQUIRED. + * + * Each element is a thrift serialized RowGroup. + */ + 10: optional MetadataPage row_groups_page - /** Optional key/value metadata **/ + /** Optional key/value metadata + * + * PAR1: optional + * PAR3: Use key_value_metadata_page + **/ 5: optional list key_value_metadata + /** Page has BYTE_ARRAY data where each element is REQUIRED. + * + * Each element in the page is a serialized KeyValue struct. + */ + 13: optional MetadataPage key_value_metadata_page + /** String for application that wrote this file. This should be in the format * version (build ). * e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55) @@ -1160,6 +1292,10 @@ struct FileMetaData { * * The obsolete min and max fields in the Statistics object are always sorted * by signed comparison regardless of column_orders. + * + * PAR1: Optional, may be deprecated in the future in favor of + * ColumnChunk.column_order + * PAR3: Not written use ColumnChunk.column_order. */ 7: optional list column_orders;