test(dataframe): add mixed-schema test for describe with numeric + FixedSizeBinary

officialasishkumar · claude · officialasishkumar · commit e7da51019b0b · 2026-04-08T16:34:52.000Z
Add a test that combines numeric (Int32) and FixedSizeBinary columns to
exercise the filter path in describe(), where min/max aggregations skip
FixedSizeBinary but still compute results for numeric columns. This
covers the filter rather than the empty-aggregate fallback.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/datafusion/core/tests/dataframe/describe.rs b/datafusion/core/tests/dataframe/describe.rs
@@ -21,7 +21,7 @@ use datafusion_common::{Result, test_util::parquet_test_data};
 use insta::assert_snapshot;
 use std::sync::Arc;
 
-use arrow::array::{FixedSizeBinaryArray, RecordBatch};
+use arrow::array::{FixedSizeBinaryArray, Int32Array, RecordBatch};
 use arrow::datatypes::{DataType, Field, Schema};
 
 #[tokio::test]
@@ -123,6 +123,56 @@ async fn describe_fixed_size_binary() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn describe_mixed_numeric_and_fixed_size_binary() -> Result<()> {
+    let ctx = SessionContext::new();
+    let batch = RecordBatch::try_new(
+        Arc::new(Schema::new(vec![
+            Field::new("num", DataType::Int32, true),
+            Field::new("fsb", DataType::FixedSizeBinary(3), true),
+        ])),
+        vec![
+            Arc::new(Int32Array::from(vec![Some(10), Some(20), Some(30)])),
+            Arc::new(FixedSizeBinaryArray::from(vec![
+                Some(&[1_u8, 2, 3][..]),
+                None,
+                Some(&[4_u8, 5, 6][..]),
+            ])),
+        ],
+    )?;
+    ctx.register_batch("test_mixed", batch)?;
+
+    let result = ctx
+        .table("test_mixed")
+        .await?
+        .describe()
+        .await?
+        .collect()
+        .await?;
+
+    // num is numeric so min/max/mean/median/std are computed;
+    // fsb is FixedSizeBinary so it is filtered out of min/max but still
+    // appears in count/null_count. This exercises the filter path (partial
+    // column list in the aggregate) rather than the empty-aggregate fallback.
+    assert_snapshot!(
+        batches_to_string(&result),
+        @r"
+    +------------+------+------+
+    | describe   | num  | fsb  |
+    +------------+------+------+
+    | count      | 3.0  | 2    |
+    | null_count | 0.0  | 1    |
+    | mean       | 20.0 | null |
+    | std        | 10.0 | null |
+    | min        | 10.0 | null |
+    | max        | 30.0 | null |
+    | median     | 20.0 | null |
+    +------------+------+------+
+    "
+    );
+    Ok(())
+}
+
 #[tokio::test]
 async fn describe_null() -> Result<()> {
     let ctx = parquet_context().await;