Skip to content

Commit e7da510

Browse files
test(dataframe): add mixed-schema test for describe with numeric + FixedSizeBinary
Add a test that combines numeric (Int32) and FixedSizeBinary columns to exercise the filter path in describe(), where min/max aggregations skip FixedSizeBinary but still compute results for numeric columns. This covers the filter rather than the empty-aggregate fallback. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent d3e4295 commit e7da510

File tree

1 file changed

+51
-1
lines changed

1 file changed

+51
-1
lines changed

datafusion/core/tests/dataframe/describe.rs

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use datafusion_common::{Result, test_util::parquet_test_data};
2121
use insta::assert_snapshot;
2222
use std::sync::Arc;
2323

24-
use arrow::array::{FixedSizeBinaryArray, RecordBatch};
24+
use arrow::array::{FixedSizeBinaryArray, Int32Array, RecordBatch};
2525
use arrow::datatypes::{DataType, Field, Schema};
2626

2727
#[tokio::test]
@@ -123,6 +123,56 @@ async fn describe_fixed_size_binary() -> Result<()> {
123123
Ok(())
124124
}
125125

126+
#[tokio::test]
127+
async fn describe_mixed_numeric_and_fixed_size_binary() -> Result<()> {
128+
let ctx = SessionContext::new();
129+
let batch = RecordBatch::try_new(
130+
Arc::new(Schema::new(vec![
131+
Field::new("num", DataType::Int32, true),
132+
Field::new("fsb", DataType::FixedSizeBinary(3), true),
133+
])),
134+
vec![
135+
Arc::new(Int32Array::from(vec![Some(10), Some(20), Some(30)])),
136+
Arc::new(FixedSizeBinaryArray::from(vec![
137+
Some(&[1_u8, 2, 3][..]),
138+
None,
139+
Some(&[4_u8, 5, 6][..]),
140+
])),
141+
],
142+
)?;
143+
ctx.register_batch("test_mixed", batch)?;
144+
145+
let result = ctx
146+
.table("test_mixed")
147+
.await?
148+
.describe()
149+
.await?
150+
.collect()
151+
.await?;
152+
153+
// num is numeric so min/max/mean/median/std are computed;
154+
// fsb is FixedSizeBinary so it is filtered out of min/max but still
155+
// appears in count/null_count. This exercises the filter path (partial
156+
// column list in the aggregate) rather than the empty-aggregate fallback.
157+
assert_snapshot!(
158+
batches_to_string(&result),
159+
@r"
160+
+------------+------+------+
161+
| describe | num | fsb |
162+
+------------+------+------+
163+
| count | 3.0 | 2 |
164+
| null_count | 0.0 | 1 |
165+
| mean | 20.0 | null |
166+
| std | 10.0 | null |
167+
| min | 10.0 | null |
168+
| max | 30.0 | null |
169+
| median | 20.0 | null |
170+
+------------+------+------+
171+
"
172+
);
173+
Ok(())
174+
}
175+
126176
#[tokio::test]
127177
async fn describe_null() -> Result<()> {
128178
let ctx = parquet_context().await;

0 commit comments

Comments
 (0)