Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
577 changes: 281 additions & 296 deletions Cargo.lock

Large diffs are not rendered by default.

77 changes: 38 additions & 39 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -100,17 +100,16 @@ rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] }
# See for more detaiils: https://github.com/rust-lang/cargo/issues/11329
ahash = { version = "0.8", features = ["compile-time-rng"] }
aquamarine = "0.6"
arrow = { version = "57.3", features = ["prettyprint"] }
arrow-array = { version = "57.3", default-features = false, features = ["chrono-tz"] }
arrow-buffer = "57.3"
arrow-cast = "57.3"
arrow-flight = "57.3"
arrow-ipc = { version = "57.3", default-features = false, features = ["lz4", "zstd"] }
arrow-schema = { version = "57.3", features = ["serde"] }
arrow = { version = "58.3", features = ["prettyprint"] }
arrow-array = { version = "58.3", default-features = false, features = ["chrono-tz"] }
arrow-buffer = "58.3"
arrow-cast = "58.3"
arrow-flight = "58.3"
arrow-ipc = { version = "58.3", default-features = false, features = ["lz4", "zstd"] }
arrow-schema = { version = "58.3", features = ["serde"] }
async-stream = "0.3"
async-trait = "0.1"
# Remember to update axum-extra, axum-macros when updating axum
arrow_object_store = { package = "object_store", version = "0.13.2" }
axum = "0.8"
axum-extra = "0.10"
axum-macros = "0.5"
Expand All @@ -128,22 +127,22 @@ const_format = "0.2"
criterion = "0.7"
crossbeam-utils = "0.8"
dashmap = "6.1"
datafusion = "=52.1"
datafusion-common = "=52.1"
datafusion-datasource = "=52.1"
datafusion-expr = "=52.1"
datafusion-expr-common = "=52.1"
datafusion-functions = "=52.1"
datafusion-functions-aggregate-common = "=52.1"
datafusion-functions-window-common = "=52.1"
datafusion-optimizer = "=52.1"
datafusion-orc = "0.7"
datafusion-pg-catalog = "0.15.1"
datafusion-physical-expr = "=52.1"
datafusion-physical-plan = "=52.1"
datafusion-sql = "=52.1"
datafusion-substrait = "=52.1"
datafusion_object_store = { package = "object_store", version = "0.12.5" }
datafusion = "=53.1.0"
datafusion-common = "=53.1.0"
datafusion-datasource = "=53.1.0"
datafusion-expr = "=53.1.0"
datafusion-expr-common = "=53.1.0"
datafusion-functions = "=53.1.0"
datafusion-functions-aggregate-common = "=53.1.0"
datafusion-functions-window-common = "=53.1.0"
datafusion-optimizer = "=53.1.0"
datafusion-orc = { git = "https://github.com/datafusion-contrib/datafusion-orc.git", rev = "6c07fa282dc8d62db2aa4ded06ab55485efc811a" }
datafusion-pg-catalog = "0.16"
datafusion-physical-expr = "=53.1.0"
datafusion-physical-plan = "=53.1.0"
datafusion-sql = "=53.1.0"
datafusion-substrait = "=53.1.0"
datafusion_object_store = { package = "object_store", version = "0.13.2" }
deadpool = "0.12"
deadpool-postgres = "0.14"
derive_builder = "0.20"
Expand Down Expand Up @@ -192,7 +191,7 @@ otel-arrow-rust = { git = "https://github.com/GreptimeTeam/otel-arrow", rev = "5
"server",
] }
parking_lot = "0.12"
parquet = { version = "57.3", default-features = false, features = ["arrow", "async", "object_store"] }
parquet = { version = "58.3", default-features = false, features = ["arrow", "async", "object_store"] }
paste = "1.0"
pin-project = "1.0"
pretty_assertions = "1.4.0"
Expand Down Expand Up @@ -338,20 +337,20 @@ git = "https://github.com/GreptimeTeam/greptime-meter.git"
rev = "5618e779cf2bb4755b499c630fba4c35e91898cb"

[patch.crates-io]
datafusion = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "02b82535e0160c4545667f36a03e1ff9d1d2e51f" }
datafusion-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "02b82535e0160c4545667f36a03e1ff9d1d2e51f" }
datafusion-expr = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "02b82535e0160c4545667f36a03e1ff9d1d2e51f" }
datafusion-expr-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "02b82535e0160c4545667f36a03e1ff9d1d2e51f" }
datafusion-functions = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "02b82535e0160c4545667f36a03e1ff9d1d2e51f" }
datafusion-functions-aggregate-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "02b82535e0160c4545667f36a03e1ff9d1d2e51f" }
datafusion-functions-window-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "02b82535e0160c4545667f36a03e1ff9d1d2e51f" }
datafusion-optimizer = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "02b82535e0160c4545667f36a03e1ff9d1d2e51f" }
datafusion-physical-expr = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "02b82535e0160c4545667f36a03e1ff9d1d2e51f" }
datafusion-physical-expr-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "02b82535e0160c4545667f36a03e1ff9d1d2e51f" }
datafusion-physical-plan = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "02b82535e0160c4545667f36a03e1ff9d1d2e51f" }
datafusion-datasource = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "02b82535e0160c4545667f36a03e1ff9d1d2e51f" }
datafusion-sql = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "02b82535e0160c4545667f36a03e1ff9d1d2e51f" }
datafusion-substrait = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "02b82535e0160c4545667f36a03e1ff9d1d2e51f" }
datafusion = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "9c1ed8d9242408aad0a6d444c7c339bcb62f9be4" }
datafusion-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "9c1ed8d9242408aad0a6d444c7c339bcb62f9be4" }
datafusion-datasource = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "9c1ed8d9242408aad0a6d444c7c339bcb62f9be4" }
datafusion-expr = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "9c1ed8d9242408aad0a6d444c7c339bcb62f9be4" }
datafusion-expr-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "9c1ed8d9242408aad0a6d444c7c339bcb62f9be4" }
datafusion-functions = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "9c1ed8d9242408aad0a6d444c7c339bcb62f9be4" }
datafusion-functions-aggregate-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "9c1ed8d9242408aad0a6d444c7c339bcb62f9be4" }
datafusion-functions-window-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "9c1ed8d9242408aad0a6d444c7c339bcb62f9be4" }
datafusion-optimizer = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "9c1ed8d9242408aad0a6d444c7c339bcb62f9be4" }
datafusion-physical-expr = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "9c1ed8d9242408aad0a6d444c7c339bcb62f9be4" }
datafusion-physical-expr-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "9c1ed8d9242408aad0a6d444c7c339bcb62f9be4" }
datafusion-physical-plan = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "9c1ed8d9242408aad0a6d444c7c339bcb62f9be4" }
datafusion-sql = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "9c1ed8d9242408aad0a6d444c7c339bcb62f9be4" }
datafusion-substrait = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "9c1ed8d9242408aad0a6d444c7c339bcb62f9be4" }
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "2aefa08a8d69c96eec2d6d6703598a009bba6e4c" } # on branch v0.61.x

[profile.release]
Expand Down
4 changes: 2 additions & 2 deletions src/common/datasource/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,12 @@ common-runtime.workspace = true
common-telemetry.workspace = true
datafusion.workspace = true
datafusion-datasource.workspace = true
datafusion-orc.workspace = true
datatypes.workspace = true
futures.workspace = true
lazy_static.workspace = true
object-store.workspace = true
orc-rust = { version = "0.7", default-features = false, features = ["async"] }
orc-rust = { version = "0.8", default-features = false, features = ["async"] }
parquet.workspace = true
paste.workspace = true
regex.workspace = true
Expand All @@ -45,4 +46,3 @@ url.workspace = true

[dev-dependencies]
common-test-util.workspace = true
datafusion-orc.workspace = true
1 change: 1 addition & 0 deletions src/common/datasource/src/file_format/orc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
use arrow_schema::Schema;
use async_trait::async_trait;
use bytes::Bytes;
pub use datafusion_orc::OrcSource;
use futures::FutureExt;
use futures::future::BoxFuture;
use object_store::ObjectStore;
Expand Down
9 changes: 6 additions & 3 deletions src/common/datasource/src/file_format/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,11 +176,14 @@ impl AsyncFileReader for LazyParquetFileReader {
.map_err(|e| ParquetError::External(Box::new(e)))?;

let metadata_opts = options.map(|o| o.metadata_options().clone());
let column_index_policy =
options.map_or(PageIndexPolicy::Skip, |o| o.column_index_policy());
let offset_index_policy =
options.map_or(PageIndexPolicy::Skip, |o| o.offset_index_policy());
let metadata_reader = ParquetMetaDataReader::new()
.with_metadata_options(metadata_opts)
.with_page_index_policy(PageIndexPolicy::from(
options.is_some_and(|o| o.page_index()),
))
.with_column_index_policy(column_index_policy)
.with_offset_index_policy(offset_index_policy)
.with_prefetch_hint(self.metadata_size_hint);

let metadata = metadata_reader
Expand Down
2 changes: 1 addition & 1 deletion src/common/datasource/src/file_format/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ use datafusion::execution::context::TaskContext;
use datafusion::physical_plan::ExecutionPlan;
use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
use datafusion::prelude::SessionContext;
use datafusion_orc::OrcSource;
use futures::StreamExt;
use object_store::ObjectStore;

use super::FORMAT_TYPE;
use crate::file_format::orc::OrcSource;
use crate::file_format::parquet::DefaultParquetFileReaderFactory;
use crate::file_format::{FileFormat, Format, OrcFormat};
use crate::test_util::{basic_schema_with_time_format, scan_config, test_basic_schema, test_store};
Expand Down
1 change: 0 additions & 1 deletion src/file-engine/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ common-test-util = { workspace = true, optional = true }
common-time.workspace = true
datafusion.workspace = true
datafusion-expr.workspace = true
datafusion-orc.workspace = true
datatypes.workspace = true
futures.workspace = true
object-store.workspace = true
Expand Down
2 changes: 1 addition & 1 deletion src/file-engine/src/query/file_stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use std::sync::Arc;

use common_datasource::file_format::Format;
use common_datasource::file_format::csv::CsvFormat;
use common_datasource::file_format::orc::OrcSource;
use common_datasource::file_format::parquet::DefaultParquetFileReaderFactory;
use datafusion::common::ToDFSchema;
use datafusion::config::CsvOptions;
Expand All @@ -34,7 +35,6 @@ use datafusion::physical_plan::{
use datafusion::prelude::SessionContext;
use datafusion_expr::expr::Expr;
use datafusion_expr::utils::conjunction;
use datafusion_orc::OrcSource;
use datatypes::schema::SchemaRef;
use object_store::ObjectStore;
use snafu::ResultExt;
Expand Down
5 changes: 4 additions & 1 deletion src/mito2/src/cache/test_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,10 @@ pub(crate) fn assert_parquet_metadata_equal(x: Arc<ParquetMetaData>, y: Arc<Parq
.statistics()
.cloned()
.map(unset_min_max_backwards_compatible_flag);
let mut col_builder = col.into_builder().clear_statistics();
let mut col_builder = col
.into_builder()
.clear_statistics()
.clear_page_encoding_stats();
if let Some(stats) = stats {
col_builder = col_builder.set_statistics(stats);
}
Expand Down
2 changes: 1 addition & 1 deletion src/mito2/src/memtable/bulk/part.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1202,7 +1202,7 @@ impl BulkPartEncoder {
WriterProperties::builder()
.set_key_value_metadata(Some(vec![key_value_meta]))
.set_write_batch_size(row_group_size)
.set_max_row_group_size(row_group_size)
.set_max_row_group_row_count(Some(row_group_size))
.set_compression(Compression::ZSTD(ZstdLevel::default()))
.set_column_index_truncate_length(None)
.set_statistics_truncate_length(None)
Expand Down
2 changes: 1 addition & 1 deletion src/mito2/src/memtable/partition_tree/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -756,7 +756,7 @@ impl<'a> DataPartEncoder<'a> {
fn writer_props(self) -> WriterProperties {
let mut builder = WriterProperties::builder();
if let Some(row_group_size) = self.row_group_size {
builder = builder.set_max_row_group_size(row_group_size)
builder = builder.set_max_row_group_row_count(Some(row_group_size))
}

let ts_col = ColumnPath::new(vec![self.timestamp_column_name]);
Expand Down
2 changes: 1 addition & 1 deletion src/mito2/src/sst/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,7 @@ mod tests {
.set_key_value_metadata(Some(vec![key_value_meta]))
.set_compression(Compression::ZSTD(ZstdLevel::default()))
.set_encoding(Encoding::PLAIN)
.set_max_row_group_size(write_opts.row_group_size);
.set_max_row_group_row_count(Some(write_opts.row_group_size));

let writer_props = props_builder.build();

Expand Down
2 changes: 1 addition & 1 deletion src/mito2/src/sst/parquet/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ where
.set_key_value_metadata(Some(vec![key_value_meta]))
.set_compression(Compression::ZSTD(ZstdLevel::default()))
.set_encoding(Encoding::PLAIN)
.set_max_row_group_size(opts.row_group_size)
.set_max_row_group_row_count(Some(opts.row_group_size))
.set_column_index_truncate_length(None)
.set_statistics_truncate_length(None);

Expand Down
1 change: 0 additions & 1 deletion src/object-store/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ uuid.workspace = true

[dev-dependencies]
anyhow = "1.0"
arrow_object_store.workspace = true
common-telemetry.workspace = true
common-test-util.workspace = true
object_store_opendal.workspace = true
Expand Down
Loading
Loading