Skip to content
Draft
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
54bc343
Add alp to the thrift definitions
sdf-jkl Feb 6, 2026
622841b
Add alp to the encoding enum
sdf-jkl Feb 6, 2026
bf754b2
Add alp header and page layout structs
sdf-jkl Feb 12, 2026
8337fcd
Add Interleaved Vectors​ support
sdf-jkl Feb 15, 2026
5349a9e
Merge branch 'main' into alp
sdf-jkl Feb 15, 2026
148b1d5
Add bit unpacking, reverse FOR and exceptions patching
sdf-jkl Feb 18, 2026
5f0222e
Decode page works
sdf-jkl Feb 18, 2026
027172f
Add more checks and tests
sdf-jkl Feb 19, 2026
8ee1b7e
Make decode page lazy + some tests
sdf-jkl Feb 20, 2026
18f885e
Find vectors using offsets at decode time, not parse time
sdf-jkl Feb 20, 2026
a9fa0c3
get decode fast path
sdf-jkl Feb 20, 2026
7754ebc
Simplify decode_page_values
sdf-jkl Feb 20, 2026
6b7e923
remove `version` from page header
sdf-jkl Mar 12, 2026
a13f244
change `log_vetctor_size` range consts
sdf-jkl Mar 12, 2026
6e86d6f
redo the integration test using a test file from the c++ pr
sdf-jkl Mar 12, 2026
ac56cc2
cache parsed vectors
sdf-jkl Mar 12, 2026
07ca6f0
use `Bytes` for `packed_values`
sdf-jkl Mar 12, 2026
def5c09
clippy too many args
sdf-jkl Mar 12, 2026
5e5c8d2
cargo fmt
sdf-jkl Mar 12, 2026
6060ca2
Merge branch 'main' of https://github.com/apache/arrow-rs into alp
sdf-jkl Mar 16, 2026
5ed70ae
fix decode to two-step multiplication
sdf-jkl Mar 16, 2026
e328d7a
make vector byte count more strict
sdf-jkl Mar 16, 2026
1807d32
add stricter offset validation
sdf-jkl Mar 16, 2026
11c7905
fmt
sdf-jkl Mar 16, 2026
e9827e5
Merge tag '58.2.0' into alp
alamb May 8, 2026
cacacf4
Fix up compilation
alamb May 8, 2026
578481c
Refactor: Pull common header structures into common module
alamb May 8, 2026
131851a
Merge pull request #3 from alamb/alamb/move_common_structures
sdf-jkl May 9, 2026
f10279c
fmt
sdf-jkl May 9, 2026
fbf4ad2
small fix
sdf-jkl May 9, 2026
f65ff9a
Align structure names with spec
alamb May 9, 2026
c912f4e
Merge pull request #5 from alamb/alamb/alp_names
sdf-jkl May 9, 2026
26c382c
fmt
sdf-jkl May 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion parquet-testing
10 changes: 10 additions & 0 deletions parquet/src/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,10 @@ enum Encoding {
/// afterwards. Note that the use of this encoding with FIXED_LEN_BYTE_ARRAY(N) data may
/// perform poorly for large values of N.
BYTE_STREAM_SPLIT = 9;
/// Adaptive Lossless floating-Point encoding (ALP).
///
/// Currently specified for FLOAT and DOUBLE.
ALP = 10;
}
);

Expand All @@ -654,6 +658,7 @@ impl FromStr for Encoding {
"DELTA_BYTE_ARRAY" | "delta_byte_array" => Ok(Encoding::DELTA_BYTE_ARRAY),
"RLE_DICTIONARY" | "rle_dictionary" => Ok(Encoding::RLE_DICTIONARY),
"BYTE_STREAM_SPLIT" | "byte_stream_split" => Ok(Encoding::BYTE_STREAM_SPLIT),
"ALP" | "alp" => Ok(Encoding::ALP),
_ => Err(general_err!("unknown encoding: {}", s)),
}
}
Expand Down Expand Up @@ -791,6 +796,7 @@ fn i32_to_encoding(val: i32) -> Encoding {
7 => Encoding::DELTA_BYTE_ARRAY,
8 => Encoding::RLE_DICTIONARY,
9 => Encoding::BYTE_STREAM_SPLIT,
10 => Encoding::ALP,
_ => panic!("Impossible encoding {val}"),
}
}
Expand Down Expand Up @@ -2137,6 +2143,7 @@ mod tests {
);
assert_eq!(Encoding::DELTA_BYTE_ARRAY.to_string(), "DELTA_BYTE_ARRAY");
assert_eq!(Encoding::RLE_DICTIONARY.to_string(), "RLE_DICTIONARY");
assert_eq!(Encoding::ALP.to_string(), "ALP");
}

#[test]
Expand Down Expand Up @@ -2438,6 +2445,8 @@ mod tests {
assert_eq!(encoding, Encoding::RLE_DICTIONARY);
encoding = "BYTE_STREAM_SPLIT".parse().unwrap();
assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
encoding = "alp".parse().unwrap();
assert_eq!(encoding, Encoding::ALP);

// test lowercase
encoding = "byte_stream_split".parse().unwrap();
Expand Down Expand Up @@ -2573,6 +2582,7 @@ mod tests {
Encoding::PLAIN_DICTIONARY,
Encoding::RLE_DICTIONARY,
Encoding::BYTE_STREAM_SPLIT,
Encoding::ALP,
];
encodings_roundtrip(encodings.into());
}
Expand Down
9 changes: 8 additions & 1 deletion parquet/src/encodings/decoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@ use crate::data_type::*;
use crate::encodings::decoding::byte_stream_split_decoder::{
ByteStreamSplitDecoder, VariableWidthByteStreamSplitDecoder,
};
use crate::encodings::decoding::alp::AlpDecoder;
use crate::errors::{ParquetError, Result};
use crate::schema::types::ColumnDescPtr;
use crate::util::bit_util::{self, BitReader};

mod alp;
mod byte_stream_split_decoder;

pub(crate) mod private {
Expand Down Expand Up @@ -63,7 +65,8 @@ pub(crate) mod private {
Encoding::RLE
| Encoding::DELTA_BINARY_PACKED
| Encoding::DELTA_BYTE_ARRAY
| Encoding::DELTA_LENGTH_BYTE_ARRAY => Err(general_err!(
| Encoding::DELTA_LENGTH_BYTE_ARRAY
| Encoding::ALP => Err(general_err!(
"Encoding {} is not supported for type",
encoding
)),
Expand Down Expand Up @@ -116,6 +119,7 @@ pub(crate) mod private {
) -> Result<Box<dyn Decoder<T>>> {
match encoding {
Encoding::BYTE_STREAM_SPLIT => Ok(Box::new(ByteStreamSplitDecoder::new())),
Encoding::ALP => Ok(Box::new(AlpDecoder::new())),
_ => get_decoder_default(descr, encoding),
}
}
Expand All @@ -127,6 +131,7 @@ pub(crate) mod private {
) -> Result<Box<dyn Decoder<T>>> {
match encoding {
Encoding::BYTE_STREAM_SPLIT => Ok(Box::new(ByteStreamSplitDecoder::new())),
Encoding::ALP => Ok(Box::new(AlpDecoder::new())),
_ => get_decoder_default(descr, encoding),
}
}
Expand Down Expand Up @@ -1135,6 +1140,8 @@ mod tests {
create_and_check_decoder::<ByteArrayType>(Encoding::DELTA_LENGTH_BYTE_ARRAY, None);
create_and_check_decoder::<ByteArrayType>(Encoding::DELTA_BYTE_ARRAY, None);
create_and_check_decoder::<BoolType>(Encoding::RLE, None);
create_and_check_decoder::<FloatType>(Encoding::ALP, None);
create_and_check_decoder::<DoubleType>(Encoding::ALP, None);

// error when initializing
create_and_check_decoder::<Int32Type>(
Expand Down
Loading
Loading