From f951a6d657af90c82a180c92292d8f8a4c57ad1a Mon Sep 17 00:00:00 2001 From: Alkis Evlogimenos Date: Fri, 12 Dec 2025 08:56:36 +0100 Subject: [PATCH 1/5] Add parquet flatbuf schema --- src/main/flatbuf/parquet3.fbs | 224 ++++++++++++++++++++++++++++++++++ 1 file changed, 224 insertions(+) create mode 100644 src/main/flatbuf/parquet3.fbs diff --git a/src/main/flatbuf/parquet3.fbs b/src/main/flatbuf/parquet3.fbs new file mode 100644 index 000000000..68d858f50 --- /dev/null +++ b/src/main/flatbuf/parquet3.fbs @@ -0,0 +1,224 @@ +namespace parquet.format3; + +// Optimization notes +// 1. Statistics are stored in integral types if their size is fixed, otherwise prefix + suffix +// 2. ColumnMetaData.encoding_stats are removed, they are replaced with +// ColumnMetaData.is_fully_dict_encoded. +// 3. RowGroups are limited to 2GB in size, so we can use int for sizes. +// 4. ColumnChunk/ColumnMetaData offsets are now relative to the start of the row group, so we can +// use int for offsets. +// 5. Remove ordinal. +// 6. Restrict RowGroups to 2^31-1 rows. +// 7. Remove offset/column indexes, they are small and just their offsets are of similar size. + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Physical types. +/////////////////////////////////////////////////////////////////////////////////////////////////// + +enum Type : byte { + BOOLEAN = 0, + INT32 = 1, + INT64 = 2, + INT96 = 3, + FLOAT = 4, + DOUBLE = 5, + BYTE_ARRAY = 6, + FIXED_LEN_BYTE_ARRAY = 7, +} + +enum FieldRepetitionType : byte { + REQUIRED = 0, + OPTIONAL = 1, + REPEATED = 2, +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Encodings. +/////////////////////////////////////////////////////////////////////////////////////////////////// + +// Note: Match the thrift enum values so that we can cast between them. +enum Encoding : byte { + PLAIN = 0, + // GROUP_VAR_INT = 1, + PLAIN_DICTIONARY = 2, + RLE = 3, + // BIT_PACKED = 4, + DELTA_BINARY_PACKED = 5, + DELTA_LENGTH_BYTE_ARRAY = 6, + DELTA_BYTE_ARRAY = 7, + RLE_DICTIONARY = 8, + BYTE_STREAM_SPLIT = 9, +} + +// Note: Match the thrift enum values so that we can cast between them. +enum CompressionCodec : byte { + UNCOMPRESSED = 0, + SNAPPY = 1, + GZIP = 2, + LZO = 3, + BROTLI = 4, + // LZ4 = 5, + ZSTD = 6, + LZ4_RAW = 7, +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Logical types. +/////////////////////////////////////////////////////////////////////////////////////////////////// + +table Empty {} +table DecimalOpts { + precision: int; + scale: int; +} +enum TimeUnit : byte { + MS = 0, + US = 1, + NS = 2, +} +table TimeOpts { + is_adjusted_to_utc: bool; + unit: TimeUnit; +} +table IntOpts { + bit_width: byte = 8; + is_signed: bool; +} +table GeometryType { + crs: string; +} +enum EdgeInterpolationAlgorithm : byte { + SPHERICAL = 0, + VINCENTY = 1, + THOMAS = 2, + ANDOYER = 3, + KARNEY = 4, +} +table GeographyType { + crs: string; + algorithm: EdgeInterpolationAlgorithm; +} +union LogicalType { + StringType:Empty, + MapType:Empty, + ListType:Empty, + EnumType:Empty, + DecimalType:DecimalOpts, + DateType:Empty, + TimeType:TimeOpts, + TimestampType:TimeOpts, + IntType:IntOpts, + NullType:Empty, + JsonType:Empty, + BsonType:Empty, + UUIDType:Empty, + Float16Type:Empty, + VariantType:Empty, + GeometryType:GeometryType, + GeographyType:GeographyType, +} + +table Statistics { + null_count: int = null; + // Store min/max values fixed sized entities depending on the physical type. If len is present + // then the min/max value is present. + // + // - BOOLEAN: none + // - INT32/FLOAT: lo4 (little-endian) + // - INT64/DOUBLE: lo8 (little-endian) + // - INT96: lo4+lo8 (little-endian) + // - FIXED_LEN_BYTE_ARRAY: + // - BYTE_ARRAY: + // prefix: the longest common prefix of min/max + // lo8+hi8 zero padded 16 bytes (big-endian) of the suffix + // len: the length for the suffix of the value after removing the prefix. If > 16 then the + // value is inexact + min_lo4: uint; + min_lo8: ulong; + min_hi8: ulong; + min_len: byte = null; + max_lo4: uint; + max_lo8: ulong; + max_hi8: ulong; + max_len: byte = null; + prefix: string; +} + +union ColumnOrder { + TypeDefinedOrder:Empty, +} + +table SchemaElement { + name: string; + type: Type = null; + repetition_type: FieldRepetitionType; + logical_type: LogicalType; + type_length: int = null; + num_children: int = 0; + field_id: int = null; + column_order: ColumnOrder; // only present for leaf nodes +} + +enum PageType : byte { + DATA_PAGE = 0, + INDEX_PAGE = 1, + DICTIONARY_PAGE = 2, + DATA_PAGE_V2 = 3, +} + +table KV { + key: string; + val: string; +} + +table ColumnMetadata { + codec: CompressionCodec; + num_values: long = null; // only present if not equal to rg.num_rows + total_uncompressed_size: long; + total_compressed_size: long; + key_value_metadata: [KV]; + data_page_offset: long; + index_page_offset: long = null; + dictionary_page_offset: long = null; + statistics: Statistics; + is_fully_dict_encoded: bool; + bloom_filter_offset: long = null; + bloom_filter_length: int = null; +} + +table ColumnChunk { + file_path: string; + meta_data: ColumnMetadata; + // crypto_metadata: ColumnCryptoMetadata; // TODO + // encrypted_column_metadata: [byte]; // TODO +} + +table SortingColumn { + column_idx: int; + descending: bool; + nulls_first: bool; +} + +table RowGroup { + columns: [ColumnChunk]; + total_byte_size: long; + num_rows: long; + sorting_columns: [SortingColumn]; + file_offset: long; + total_compressed_size: long; + ordinal: short = null; +} + +table FileMetaData { + version: int; + schema: [SchemaElement]; + num_rows: long; + row_groups: [RowGroup]; + kv: [KV]; + created_by: string; + // column_orders: [ColumnOrder]; // moved to SchemaElement + // encryption_algorithm: [EncryptionAlgorithm]; // TODO + // footer_signing_key_metadata: binary; // TODO +} + +root_type FileMetaData; From a77d2774c99d68953df0de9c1ea14af0fb4e4001 Mon Sep 17 00:00:00 2001 From: Jiayi Wang Date: Tue, 3 Mar 2026 11:53:18 +0000 Subject: [PATCH 2/5] address comments --- src/main/flatbuf/parquet3.fbs | 500 ++++++++++++++++++++++++++++++---- 1 file changed, 440 insertions(+), 60 deletions(-) diff --git a/src/main/flatbuf/parquet3.fbs b/src/main/flatbuf/parquet3.fbs index 68d858f50..8406d5c0a 100644 --- a/src/main/flatbuf/parquet3.fbs +++ b/src/main/flatbuf/parquet3.fbs @@ -1,65 +1,165 @@ -namespace parquet.format3; +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ -// Optimization notes -// 1. Statistics are stored in integral types if their size is fixed, otherwise prefix + suffix -// 2. ColumnMetaData.encoding_stats are removed, they are replaced with -// ColumnMetaData.is_fully_dict_encoded. -// 3. RowGroups are limited to 2GB in size, so we can use int for sizes. -// 4. ColumnChunk/ColumnMetaData offsets are now relative to the start of the row group, so we can -// use int for offsets. -// 5. Remove ordinal. -// 6. Restrict RowGroups to 2^31-1 rows. -// 7. Remove offset/column indexes, they are small and just their offsets are of similar size. +namespace parquet.format; -/////////////////////////////////////////////////////////////////////////////////////////////////// -// Physical types. -/////////////////////////////////////////////////////////////////////////////////////////////////// +// The FlatBuffers footer preserves the same information as the Thrift Parquet footer, +// while removing duplicated fields, unused details, and inefficient encodings that +// waste space and memory. +// It can currently be attached as a footer extension, and may fully replace the +// Thrift footer in the future. +// +// Optimization notes: +// 1. Statistics use fixed-width integral types when possible; otherwise they are +// encoded as prefix + suffix. +// 2. ColumnChunk file_path and file_offset are removed since they are unused. +// 3. ColumnMetaData.encoding_stats are removed and replaced by +// ColumnMetaData.is_fully_dict_encoded. +// 4. ColumnMetaData.path_in_schema is removed since it can be derived from the schema. +// 5. ConvertedType is fully dropped as it is superseded by LogicalType. +// 6. Offset and column indexes are removed since they are small and their offsets +// alone take comparable space. +/** + * Types supported by Parquet. These types are intended to be used in combination + * with the encodings to control the on disk storage format. + * For example INT16 is not included as a type since a good encoding of INT32 + * would handle this. + */ enum Type : byte { BOOLEAN = 0, INT32 = 1, INT64 = 2, - INT96 = 3, + INT96 = 3, // deprecated, new Parquet writers should not write data in INT96 FLOAT = 4, DOUBLE = 5, BYTE_ARRAY = 6, FIXED_LEN_BYTE_ARRAY = 7, } +/** + * Representation of Schemas + */ enum FieldRepetitionType : byte { + /** This field is required (can not be null) and each row has exactly 1 value. */ REQUIRED = 0, + + /** The field is optional (can be null) and each row has 0 or 1 values. */ OPTIONAL = 1, + + /** The field is repeated and can contain 0 or more values */ REPEATED = 2, } -/////////////////////////////////////////////////////////////////////////////////////////////////// -// Encodings. -/////////////////////////////////////////////////////////////////////////////////////////////////// - -// Note: Match the thrift enum values so that we can cast between them. +/** + * Encodings supported by Parquet. Not all encodings are valid for all types. These + * enums are also used to specify the encoding of definition and repetition levels. + * See the accompanying doc for the details of the more complicated encodings. + * Note: Match the thrift enum values so that we can cast between them. + */ enum Encoding : byte { + /** Default encoding. + * BOOLEAN - 1 bit per value. 0 is false; 1 is true. + * INT32 - 4 bytes per value. Stored as little-endian. + * INT64 - 8 bytes per value. Stored as little-endian. + * FLOAT - 4 bytes per value. IEEE. Stored as little-endian. + * DOUBLE - 8 bytes per value. IEEE. Stored as little-endian. + * BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes. + * FIXED_LEN_BYTE_ARRAY - Just the bytes. + */ PLAIN = 0, + + /** Group VarInt encoding for INT32/INT64. + * This encoding is deprecated. It was never used + */ // GROUP_VAR_INT = 1, + + /** + * Deprecated: Dictionary encoding. The values in the dictionary are encoded in the + * plain type. + * in a data page use RLE_DICTIONARY instead. + * in a Dictionary page use PLAIN instead + */ PLAIN_DICTIONARY = 2, + + /** Group packed run length encoding. Usable for definition/repetition levels + * encoding and Booleans (on one bit: 0 is false; 1 is true.) + */ RLE = 3, + + /** Bit packed encoding. This can only be used if the data has a known max + * width. Usable for definition/repetition levels encoding. + * This encoding is deprecated and is replaced by the RLE/bit-packing hybrid encoding. + */ // BIT_PACKED = 4, + + /** Delta encoding for integers. This can be used for int columns and works best + * on sorted data + */ DELTA_BINARY_PACKED = 5, + + /** Encoding for byte arrays to separate the length values and the data. The lengths + * are encoded using DELTA_BINARY_PACKED + */ DELTA_LENGTH_BYTE_ARRAY = 6, + + /** Incremental-encoded byte array. Prefix lengths are encoded using DELTA_BINARY_PACKED. + * Suffixes are stored as delta length byte arrays. + */ DELTA_BYTE_ARRAY = 7, + + /** Dictionary encoding: the ids are encoded using the RLE encoding + */ RLE_DICTIONARY = 8, + + /** Encoding for fixed-width data (FLOAT, DOUBLE, INT32, INT64, FIXED_LEN_BYTE_ARRAY). + K byte-streams are created where K is the size in bytes of the data type. + The individual bytes of a value are scattered to the corresponding stream and + the streams are concatenated. + This itself does not reduce the size of the data but can lead to better compression + afterwards. + + Added in 2.8 for FLOAT and DOUBLE. + Support for INT32, INT64 and FIXED_LEN_BYTE_ARRAY added in 2.11. + */ BYTE_STREAM_SPLIT = 9, } -// Note: Match the thrift enum values so that we can cast between them. +/** + * Supported compression algorithms. + * + * Codecs added in format version X.Y can be read by readers based on X.Y and later. + * Codec support may vary between readers based on the format version and + * libraries available at runtime. + * + * See Compression.md for a detailed specification of these algorithms. + * Note: Match the thrift enum values so that we can cast between them. + */ enum CompressionCodec : byte { UNCOMPRESSED = 0, SNAPPY = 1, GZIP = 2, LZO = 3, - BROTLI = 4, - // LZ4 = 5, - ZSTD = 6, - LZ4_RAW = 7, + BROTLI = 4, // Added in 2.4 + LZ4 = 5, // DEPRECATED (Added in 2.4) + ZSTD = 6, // Added in 2.4 + LZ4_RAW = 7, // Added in 2.9 } /////////////////////////////////////////////////////////////////////////////////////////////////// @@ -67,26 +167,62 @@ enum CompressionCodec : byte { /////////////////////////////////////////////////////////////////////////////////////////////////// table Empty {} -table DecimalOpts { + +/** + * Decimal logical type annotation + * + * Scale must be zero or a positive integer less than or equal to the precision. + * Precision must be a non-zero positive integer. + * + * To maintain forward-compatibility in v1, implementations using this logical + * type must also set scale and precision on the annotated SchemaElement. + * + * Allowed for physical types: INT32, INT64, FIXED_LEN_BYTE_ARRAY, and BYTE_ARRAY. + */ +table DecimalOptions { precision: int; scale: int; } + +/** Time units for logical types */ enum TimeUnit : byte { - MS = 0, - US = 1, - NS = 2, + MILLIS = 0, + MICROS = 1, + NANOS = 2, } -table TimeOpts { + +/** + * Timestamp logical type annotation + * + * Allowed for physical types: INT64 + */ +table TimeOptions { is_adjusted_to_utc: bool; unit: TimeUnit; } -table IntOpts { + +/** + * Integer logical type annotation + * + * bitWidth must be 8, 16, 32, or 64. + * + * Allowed for physical types: INT32, INT64 + */ +table IntOptions { bit_width: byte = 8; is_signed: bool; } -table GeometryType { - crs: string; + +/** + * Embedded Variant logical type annotation + */ +table VariantType { + // The version of the variant specification that the variant was + // written with. + specification_version: byte = null; } + +/** Edge interpolation algorithm for Geography logical type */ enum EdgeInterpolationAlgorithm : byte { SPHERICAL = 0, VINCENTY = 1, @@ -94,45 +230,97 @@ enum EdgeInterpolationAlgorithm : byte { ANDOYER = 3, KARNEY = 4, } + +/** + * Embedded Geometry logical type annotation + * + * Geospatial features in the Well-Known Binary (WKB) format and edges interpolation + * is always linear/planar. + * + * A custom CRS can be set by the crs field. If unset, it defaults to "OGC:CRS84", + * which means that the geometries must be stored in longitude, latitude based on + * the WGS84 datum. + * + * Allowed for physical type: BYTE_ARRAY. + * + * See Geospatial.md for details. + */ +table GeometryType { + crs: string; +} + +/** + * Embedded Geography logical type annotation + * + * Geospatial features in the WKB format with an explicit (non-linear/non-planar) + * edges interpolation algorithm. + * + * A custom geographic CRS can be set by the crs field, where longitudes are + * bound by [-180, 180] and latitudes are bound by [-90, 90]. If unset, the CRS + * defaults to "OGC:CRS84". + * + * An optional algorithm can be set to correctly interpret edges interpolation + * of the geometries. If unset, the algorithm defaults to SPHERICAL. + * + * Allowed for physical type: BYTE_ARRAY. + * + * See Geospatial.md for details. + */ table GeographyType { crs: string; algorithm: EdgeInterpolationAlgorithm; } + +/** + * LogicalType annotations to replace ConvertedType. + */ union LogicalType { - StringType:Empty, + StringType:Empty, MapType:Empty, ListType:Empty, EnumType:Empty, - DecimalType:DecimalOpts, + DecimalType:DecimalOptions, DateType:Empty, - TimeType:TimeOpts, - TimestampType:TimeOpts, - IntType:IntOpts, + TimeType:TimeOptions, + TimestampType:TimeOptions, + IntType:IntOptions, NullType:Empty, JsonType:Empty, BsonType:Empty, UUIDType:Empty, Float16Type:Empty, - VariantType:Empty, + VariantType:VariantType, GeometryType:GeometryType, GeographyType:GeographyType, } table Statistics { null_count: int = null; - // Store min/max values fixed sized entities depending on the physical type. If len is present - // then the min/max value is present. + // Store min/max values as fixed-width entities depending on the physical type. + // If min_len/max_len is present then the corresponding min/max value is present. // // - BOOLEAN: none - // - INT32/FLOAT: lo4 (little-endian) - // - INT64/DOUBLE: lo8 (little-endian) - // - INT96: lo4+lo8 (little-endian) + // - INT32/FLOAT: min_lo4/max_lo4 (little-endian, 4 bytes) + // - INT64/DOUBLE: min_lo8/max_lo8 (little-endian, 8 bytes) + // - INT96: lo4 contains the low 4 bytes, lo8 contains the high 8 bytes (little-endian, 12 bytes total) // - FIXED_LEN_BYTE_ARRAY: // - BYTE_ARRAY: - // prefix: the longest common prefix of min/max - // lo8+hi8 zero padded 16 bytes (big-endian) of the suffix - // len: the length for the suffix of the value after removing the prefix. If > 16 then the - // value is inexact + // prefix: the longest common prefix of min and max values + // lo8+hi8: zero-padded 16 bytes (big-endian) of the suffix after removing the prefix + // min_len/max_len: the length of the suffix of the original value after removing the prefix. + // If > 16 then the value stored in lo8+hi8 is a truncated approximation (inexact). + // If <= 16 then the value is exact. + // + // Example for BYTE_ARRAY with min="apple" and max="application": + // prefix = "appl" (longest common prefix) + // min suffix = "e" (1 byte), max suffix = "ication" (7 bytes) + // min_lo8 = big-endian encoding of "e" zero-padded to 16 bytes + // min_len = 1 (exact, since 1 <= 16) + // max_lo8 = big-endian encoding of "ication" zero-padded to 16 bytes + // max_len = 7 (exact, since 7 <= 16) + // + // Example for INT32 with min=42: + // min_lo4 = 0x2A000000 (42 in little-endian) min_lo4: uint; min_lo8: ulong; min_hi8: ulong; @@ -144,17 +332,90 @@ table Statistics { prefix: string; } +/** + * Bloom filter metadata for a column chunk. + */ +table BloomFilterInfo { + /** Byte offset from beginning of file to Bloom filter data. **/ + offset: long; + + /** Size of Bloom filter data including the serialized header, in bytes. + * Writers should write this field so readers can read the bloom filter + * in a single I/O. + */ + length: int; +} + +table AesGcmV1 { + /** AAD prefix **/ + aad_prefix: [byte]; + + /** Unique file identifier part of AAD suffix **/ + aad_file_unique: [byte]; + + /** In files encrypted with AAD prefix without storing it, + * readers must supply the prefix **/ + supply_aad_prefix: bool; +} + +table AesGcmCtrV1 { + /** AAD prefix **/ + aad_prefix: [byte]; + + /** Unique file identifier part of AAD suffix **/ + aad_file_unique: [byte]; + + /** In files encrypted with AAD prefix without storing it, + * readers must supply the prefix **/ + supply_aad_prefix: bool; +} + +union EncryptionAlgorithm { + AesGcmV1:AesGcmV1, + AesGcmCtrV1:AesGcmCtrV1, +} + union ColumnOrder { TypeDefinedOrder:Empty, } +/** + * Represents a element inside a schema definition. + * - if it is a group (inner node) then type is undefined and num_children is defined + * - if it is a primitive type (leaf) then type is defined and num_children is undefined + * the nodes are listed in depth first traversal order. + */ table SchemaElement { + /** Name of the field in the schema */ name: string; + + /** Data type for this field. Not set if the current element is a non-leaf node */ type: Type = null; + + /** repetition of the field. The root of the schema does not have a repetition_type. + * All other nodes must have one */ repetition_type: FieldRepetitionType; + + /** The logical type of this SchemaElement */ logical_type: LogicalType; + + /** If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the values. + * Otherwise, if specified, this is the maximum bit length to store any of the values. + * (e.g. a low cardinality INT col could have this set to 3). Note that this is + * in the schema, and therefore fixed for the entire file. + */ type_length: int = null; + + /** Nested fields. Since thrift does not support nested fields, + * the nesting is flattened to a single list by a depth-first traversal. + * The children count is used to construct the nested relationship. + * This field is not set when the element is a primitive type + */ num_children: int = 0; + + /** When the original schema supports field ids, this will save the + * original field id in the parquet schema + */ field_id: int = null; column_order: ColumnOrder; // only present for leaf nodes } @@ -166,59 +427,178 @@ enum PageType : byte { DATA_PAGE_V2 = 3, } -table KV { +table KeyValue { key: string; val: string; } +/** + * Description for column metadata + */ table ColumnMetadata { + /** Compression codec **/ codec: CompressionCodec; - num_values: long = null; // only present if not equal to rg.num_rows + + /** Number of values in this column, only present if not equal to rg.num_rows **/ + num_values: long = null; + + /** total byte size of all uncompressed pages in this column chunk (including the headers) **/ total_uncompressed_size: long; + + /** total byte size of all compressed, and potentially encrypted, pages + * in this column chunk (including the headers) **/ total_compressed_size: long; - key_value_metadata: [KV]; + + /** Optional key/value metadata **/ + key_value_metadata: [KeyValue]; + + /** Byte offset from beginning of file to first data page **/ data_page_offset: long; + + /** Byte offset from beginning of file to root index page **/ index_page_offset: long = null; + + /** Byte offset from the beginning of file to first (only) dictionary page **/ dictionary_page_offset: long = null; + + /** optional statistics for this column chunk */ statistics: Statistics; + + /** Indicates whether the column chunk pages are fully dictionary encoded. */ is_fully_dict_encoded: bool; - bloom_filter_offset: long = null; - bloom_filter_length: int = null; + + /** Optional Bloom filter information for this column chunk */ + bloom_filter: BloomFilterInfo; +} + +union ColumnCryptoMetadata { + EncryptionWithFooterKey:Empty, + EncryptionWithColumnKey:Empty, } table ColumnChunk { - file_path: string; + /** Column metadata for this chunk. + * Note: while marked as optional, this field is in fact required by most major + * Parquet implementations. As such, writers MUST populate this field. + **/ meta_data: ColumnMetadata; - // crypto_metadata: ColumnCryptoMetadata; // TODO - // encrypted_column_metadata: [byte]; // TODO + + /** Crypto metadata of encrypted columns **/ + crypto_metadata: ColumnCryptoMetadata; + + /** Encrypted column metadata for this chunk **/ + encrypted_column_metadata: [byte]; } +/** + * Sort order within a RowGroup of a leaf column + */ table SortingColumn { + /** The ordinal position of the column (in this row group) **/ column_idx: int; + + /** If true, indicates this column is sorted in descending order. **/ descending: bool; + + /** If true, nulls will come before non-null values, otherwise, + * nulls go at the end. */ nulls_first: bool; } table RowGroup { + /** Metadata for each column chunk in this row group. + * This list must have the same order as the SchemaElement list in FileMetaData. + **/ columns: [ColumnChunk]; + + /** Total byte size of all the uncompressed column data in this row group **/ total_byte_size: long; + + /** Number of rows in this row group **/ num_rows: long; + + /** If set, specifies a sort ordering of the rows in this RowGroup. + * The sorting columns can be a subset of all the columns. + */ sorting_columns: [SortingColumn]; + + /** Byte offset from beginning of file to first page (data or dictionary) + * in this row group **/ file_offset: long; + + /** Total byte size of all compressed (and potentially encrypted) column data + * in this row group **/ total_compressed_size: long; + + /** Row group ordinal in the file **/ ordinal: short = null; } +/** + * Crypto metadata for files with encrypted footer. + */ +table FileCryptoMetaData { + /** + * Encryption algorithm. This field is only used for files + * with encrypted footer. Files with plaintext footer store algorithm id + * inside footer (FileMetaData structure). + */ + encryption_algorithm: EncryptionAlgorithm; + + /** Retrieval metadata of key used for encryption of footer, + * and (possibly) columns **/ + key_metadata: [byte]; +} + +/** + * Description for file metadata + */ table FileMetaData { + /** Version of this file + * + * As of December 2025, there is no agreed upon consensus of what constitutes + * version 2 of the file. For maximum compatibility with readers, writers should + * always populate "1" for version. For maximum compatibility with writers, + * readers should accept "1" and "2" interchangeably. All other versions are + * reserved for potential future use-cases. + */ version: int; + + /** Parquet schema for this file. This schema contains metadata for all the columns. + * The schema is represented as a tree with a single root. The nodes of the tree + * are flattened to a list by doing a depth-first traversal. + * The column metadata contains the path in the schema for that column which can be + * used to map columns to nodes in the schema. + * The first element is the root **/ schema: [SchemaElement]; + + /** Number of rows in this file **/ num_rows: long; + + /** Row groups in this file **/ row_groups: [RowGroup]; - kv: [KV]; + + /** Optional key/value metadata **/ + kv: [KeyValue]; + + /** String for application that wrote this file. This should be in the format + * version (build ). + * e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55) + **/ created_by: string; - // column_orders: [ColumnOrder]; // moved to SchemaElement - // encryption_algorithm: [EncryptionAlgorithm]; // TODO - // footer_signing_key_metadata: binary; // TODO + + /** + * Encryption algorithm. This field is set only in encrypted files + * with plaintext footer. Files with encrypted footer store algorithm id + * in FileCryptoMetaData structure. + */ + encryption_algorithm: EncryptionAlgorithm; + + /** + * Retrieval metadata of key used for signing the footer. + * Used only in encrypted files with plaintext footer. + */ + footer_signing_key_metadata: [byte]; } root_type FileMetaData; From bf0825c1f60f2464c57d04d8a0b0fffc0ef7ddf7 Mon Sep 17 00:00:00 2001 From: Jiayi Wang Date: Wed, 11 Mar 2026 17:07:31 +0000 Subject: [PATCH 3/5] address comments - Add GeospatialStatistics, type change in Statistics --- src/main/flatbuf/parquet3.fbs | 56 ++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/src/main/flatbuf/parquet3.fbs b/src/main/flatbuf/parquet3.fbs index 8406d5c0a..eccc25718 100644 --- a/src/main/flatbuf/parquet3.fbs +++ b/src/main/flatbuf/parquet3.fbs @@ -23,11 +23,13 @@ namespace parquet.format; // while removing duplicated fields, unused details, and inefficient encodings that // waste space and memory. // It can currently be attached as a footer extension, and may fully replace the -// Thrift footer in the future. +// Thrift footer in the future. As of now, the Thrift footer is still required; +// this FlatBuffers footer is supplementary. // // Optimization notes: // 1. Statistics use fixed-width integral types when possible; otherwise they are -// encoded as prefix + suffix. +// encoded as prefix + suffix. SizeStatistics and Statistics.distinct_count +// are removed. // 2. ColumnChunk file_path and file_offset are removed since they are unused. // 3. ColumnMetaData.encoding_stats are removed and replaced by // ColumnMetaData.is_fully_dict_encoded. @@ -180,8 +182,8 @@ table Empty {} * Allowed for physical types: INT32, INT64, FIXED_LEN_BYTE_ARRAY, and BYTE_ARRAY. */ table DecimalOptions { - precision: int; scale: int; + precision: int; } /** Time units for logical types */ @@ -295,7 +297,7 @@ union LogicalType { } table Statistics { - null_count: int = null; + null_count: long = null; // Store min/max values as fixed-width entities depending on the physical type. // If min_len/max_len is present then the corresponding min/max value is present. // @@ -303,7 +305,7 @@ table Statistics { // - INT32/FLOAT: min_lo4/max_lo4 (little-endian, 4 bytes) // - INT64/DOUBLE: min_lo8/max_lo8 (little-endian, 8 bytes) // - INT96: lo4 contains the low 4 bytes, lo8 contains the high 8 bytes (little-endian, 12 bytes total) - // - FIXED_LEN_BYTE_ARRAY: + // - FIXED_LEN_BYTE_ARRAY: Encoded the same way as BYTE_ARRAY below // - BYTE_ARRAY: // prefix: the longest common prefix of min and max values // lo8+hi8: zero-padded 16 bytes (big-endian) of the suffix after removing the prefix @@ -324,12 +326,35 @@ table Statistics { min_lo4: uint; min_lo8: ulong; min_hi8: ulong; - min_len: byte = null; + min_len: int = null; max_lo4: uint; max_lo8: ulong; max_hi8: ulong; - max_len: byte = null; - prefix: string; + max_len: int = null; + prefix: [byte]; +} + +/** + * Bounding box for GEOMETRY or GEOGRAPHY type in the representation of min/max + * value pair of coordinates from each axis. + */ +table BoundingBox { + xmin: double; + xmax: double; + ymin: double; + ymax: double; + zmin: double = null; + zmax: double = null; + mmin: double = null; + mmax: double = null; +} + +/** Statistics specific to Geometry and Geography logical types */ +table GeospatialStatistics { + /** A bounding box of geospatial instances */ + bbox: BoundingBox; + /** Geospatial type codes of all instances, or an empty list if not known */ + geospatial_types: [int]; } /** @@ -394,7 +419,7 @@ table SchemaElement { /** repetition of the field. The root of the schema does not have a repetition_type. * All other nodes must have one */ - repetition_type: FieldRepetitionType; + repetition_type: FieldRepetitionType = null; /** The logical type of this SchemaElement */ logical_type: LogicalType; @@ -469,11 +494,22 @@ table ColumnMetadata { /** Optional Bloom filter information for this column chunk */ bloom_filter: BloomFilterInfo; + + /** Optional statistics specific for Geometry and Geography logical types */ + geospatial_statistics: GeospatialStatistics; +} + +table EncryptionWithColumnKey { + /** Column path in schema **/ + path_in_schema: [string]; + + /** Retrieval metadata of column encryption key **/ + key_metadata: [byte]; } union ColumnCryptoMetadata { EncryptionWithFooterKey:Empty, - EncryptionWithColumnKey:Empty, + EncryptionWithColumnKey:EncryptionWithColumnKey, } table ColumnChunk { From 5a0baf26259f4ba5f28567c43c7c60e4a71dc042 Mon Sep 17 00:00:00 2001 From: Jiayi Wang Date: Wed, 11 Mar 2026 18:58:46 +0000 Subject: [PATCH 4/5] fix Statistics min_len and max_len --- src/main/flatbuf/parquet3.fbs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/main/flatbuf/parquet3.fbs b/src/main/flatbuf/parquet3.fbs index eccc25718..2d58e11e1 100644 --- a/src/main/flatbuf/parquet3.fbs +++ b/src/main/flatbuf/parquet3.fbs @@ -28,7 +28,7 @@ namespace parquet.format; // // Optimization notes: // 1. Statistics use fixed-width integral types when possible; otherwise they are -// encoded as prefix + suffix. SizeStatistics and Statistics.distinct_count +// encoded as prefix + truncated suffix. SizeStatistics and Statistics.distinct_count // are removed. // 2. ColumnChunk file_path and file_offset are removed since they are unused. // 3. ColumnMetaData.encoding_stats are removed and replaced by @@ -309,28 +309,28 @@ table Statistics { // - BYTE_ARRAY: // prefix: the longest common prefix of min and max values // lo8+hi8: zero-padded 16 bytes (big-endian) of the suffix after removing the prefix - // min_len/max_len: the length of the suffix of the original value after removing the prefix. - // If > 16 then the value stored in lo8+hi8 is a truncated approximation (inexact). - // If <= 16 then the value is exact. + // min_len/max_len: the absolute value is the min/max length without prefix if prefix exists. + // If >= 0, the value is exact. If < 0, the value is inexact. // - // Example for BYTE_ARRAY with min="apple" and max="application": - // prefix = "appl" (longest common prefix) - // min suffix = "e" (1 byte), max suffix = "ication" (7 bytes) - // min_lo8 = big-endian encoding of "e" zero-padded to 16 bytes - // min_len = 1 (exact, since 1 <= 16) - // max_lo8 = big-endian encoding of "ication" zero-padded to 16 bytes - // max_len = 7 (exact, since 7 <= 16) + // Example for BYTE_ARRAY with min="apple" and max="application_is_a_very_long_suffix": + // prefix = "appl" (longest common prefix, 4 bytes) + // min suffix = "e" (1 byte), max suffix = "ication_is_a_very_long_suffix" (29 bytes) + // min_lo8 = big-endian encoding of "e" (1 byte) + // min_len = 1 (>= 0, exact) + // max_lo8+max_hi8 = big-endian encoding of "ication_is_a_ves" (truncated to 16 bytes, + // last byte incremented by 1 to ensure it is still an upper bound) + // max_len = -16 (< 0, inexact) // // Example for INT32 with min=42: // min_lo4 = 0x2A000000 (42 in little-endian) min_lo4: uint; min_lo8: ulong; min_hi8: ulong; - min_len: int = null; + min_len: byte = null; max_lo4: uint; max_lo8: ulong; max_hi8: ulong; - max_len: int = null; + max_len: byte = null; prefix: [byte]; } From c938fc70f83c4953071d2ed63c2c34c8e60e4acd Mon Sep 17 00:00:00 2001 From: Jiayi Wang Date: Wed, 8 Apr 2026 09:28:52 +0000 Subject: [PATCH 5/5] address review comments from emkornfield and adamreeve - Rename parquet3.fbs to parquet.fbs - Comment out deprecated PLAIN_DICTIONARY encoding (like BIT_PACKED) - Add distinct_count back to Statistics - Remove ConvertedType forward-compat constraint from DecimalOptions - Add backward-compat note for LogicalType Empty union types - Reorder SchemaElement fields to match Thrift ordering - Expand is_fully_dict_encoded documentation - Rename meta_data to metadata in ColumnChunk - Clarify total_byte_size in RowGroup - Remove FileCryptoMetaData (encrypted footer layout not yet specified) --- .../flatbuf/{parquet3.fbs => parquet.fbs} | 71 ++++++++----------- 1 file changed, 30 insertions(+), 41 deletions(-) rename src/main/flatbuf/{parquet3.fbs => parquet.fbs} (93%) diff --git a/src/main/flatbuf/parquet3.fbs b/src/main/flatbuf/parquet.fbs similarity index 93% rename from src/main/flatbuf/parquet3.fbs rename to src/main/flatbuf/parquet.fbs index 2d58e11e1..86a403c9c 100644 --- a/src/main/flatbuf/parquet3.fbs +++ b/src/main/flatbuf/parquet.fbs @@ -35,8 +35,6 @@ namespace parquet.format; // ColumnMetaData.is_fully_dict_encoded. // 4. ColumnMetaData.path_in_schema is removed since it can be derived from the schema. // 5. ConvertedType is fully dropped as it is superseded by LogicalType. -// 6. Offset and column indexes are removed since they are small and their offsets -// alone take comparable space. /** * Types supported by Parquet. These types are intended to be used in combination @@ -95,19 +93,19 @@ enum Encoding : byte { /** * Deprecated: Dictionary encoding. The values in the dictionary are encoded in the * plain type. - * in a data page use RLE_DICTIONARY instead. - * in a Dictionary page use PLAIN instead + * In a data page use RLE_DICTIONARY instead. + * In a Dictionary page use PLAIN instead. */ - PLAIN_DICTIONARY = 2, + // PLAIN_DICTIONARY = 2, /** Group packed run length encoding. Usable for definition/repetition levels * encoding and Booleans (on one bit: 0 is false; 1 is true.) */ RLE = 3, - /** Bit packed encoding. This can only be used if the data has a known max + /** Deprecated: Bit packed encoding. This can only be used if the data has a known max * width. Usable for definition/repetition levels encoding. - * This encoding is deprecated and is replaced by the RLE/bit-packing hybrid encoding. + * This encoding is replaced by the RLE/bit-packing hybrid encoding. */ // BIT_PACKED = 4, @@ -176,9 +174,6 @@ table Empty {} * Scale must be zero or a positive integer less than or equal to the precision. * Precision must be a non-zero positive integer. * - * To maintain forward-compatibility in v1, implementations using this logical - * type must also set scale and precision on the annotated SchemaElement. - * * Allowed for physical types: INT32, INT64, FIXED_LEN_BYTE_ARRAY, and BYTE_ARRAY. */ table DecimalOptions { @@ -275,9 +270,13 @@ table GeographyType { /** * LogicalType annotations to replace ConvertedType. + * + * Types with no parameters use `Empty`. To add parameters later, append a new + * union member (e.g., StringTypeV2:StringOptions); new readers remain + * backward-compatible with old files. */ union LogicalType { - StringType:Empty, + StringType:Empty, MapType:Empty, ListType:Empty, EnumType:Empty, @@ -298,6 +297,8 @@ union LogicalType { table Statistics { null_count: long = null; + /** count of distinct values occurring */ + distinct_count: long = null; // Store min/max values as fixed-width entities depending on the physical type. // If min_len/max_len is present then the corresponding min/max value is present. // @@ -411,19 +412,9 @@ union ColumnOrder { * the nodes are listed in depth first traversal order. */ table SchemaElement { - /** Name of the field in the schema */ - name: string; - /** Data type for this field. Not set if the current element is a non-leaf node */ type: Type = null; - /** repetition of the field. The root of the schema does not have a repetition_type. - * All other nodes must have one */ - repetition_type: FieldRepetitionType = null; - - /** The logical type of this SchemaElement */ - logical_type: LogicalType; - /** If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the values. * Otherwise, if specified, this is the maximum bit length to store any of the values. * (e.g. a low cardinality INT col could have this set to 3). Note that this is @@ -431,6 +422,13 @@ table SchemaElement { */ type_length: int = null; + /** repetition of the field. The root of the schema does not have a repetition_type. + * All other nodes must have one */ + repetition_type: FieldRepetitionType = null; + + /** Name of the field in the schema */ + name: string; + /** Nested fields. Since thrift does not support nested fields, * the nesting is flattened to a single list by a depth-first traversal. * The children count is used to construct the nested relationship. @@ -442,7 +440,12 @@ table SchemaElement { * original field id in the parquet schema */ field_id: int = null; - column_order: ColumnOrder; // only present for leaf nodes + + /** The logical type of this SchemaElement */ + logical_type: LogicalType; + + /** Column ordering for leaf nodes, used to interpret min/max statistics */ + column_order: ColumnOrder; } enum PageType : byte { @@ -489,7 +492,9 @@ table ColumnMetadata { /** optional statistics for this column chunk */ statistics: Statistics; - /** Indicates whether the column chunk pages are fully dictionary encoded. */ + /** True if every data page in this column chunk is dictionary-encoded + * (no fallback). Replaces Thrift encoding_stats. + */ is_fully_dict_encoded: bool; /** Optional Bloom filter information for this column chunk */ @@ -517,7 +522,7 @@ table ColumnChunk { * Note: while marked as optional, this field is in fact required by most major * Parquet implementations. As such, writers MUST populate this field. **/ - meta_data: ColumnMetadata; + metadata: ColumnMetadata; /** Crypto metadata of encrypted columns **/ crypto_metadata: ColumnCryptoMetadata; @@ -547,7 +552,7 @@ table RowGroup { **/ columns: [ColumnChunk]; - /** Total byte size of all the uncompressed column data in this row group **/ + /** Sum of total_uncompressed_size across all columns (uncompressed, encoded) **/ total_byte_size: long; /** Number of rows in this row group **/ @@ -570,22 +575,6 @@ table RowGroup { ordinal: short = null; } -/** - * Crypto metadata for files with encrypted footer. - */ -table FileCryptoMetaData { - /** - * Encryption algorithm. This field is only used for files - * with encrypted footer. Files with plaintext footer store algorithm id - * inside footer (FileMetaData structure). - */ - encryption_algorithm: EncryptionAlgorithm; - - /** Retrieval metadata of key used for encryption of footer, - * and (possibly) columns **/ - key_metadata: [byte]; -} - /** * Description for file metadata */