From b68e39cac0baeddb3932347af9a52abf29f6c93a Mon Sep 17 00:00:00 2001 From: Selena Chen Date: Fri, 8 May 2026 15:49:45 -0700 Subject: [PATCH] Add tests for spark 3.1 to confirm sort order is respected --- build.gradle | 2 +- .../catalogtest/CatalogOperationTest.java | 57 +++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 263bbf04a..aee358d7c 100644 --- a/build.gradle +++ b/build.gradle @@ -30,7 +30,7 @@ ext { spark_version = "3.1.1" ok_http3_version = "4.11.0" junit_version = "5.11.0" - iceberg_1_2_version = "1.2.0.15" + iceberg_1_2_version = "1.2.0.16" iceberg_1_5_version = "1.5.2.11" otel_agent_version = "2.12.0" // Bundles OTel SDK 1.47.0 otel_annotations_version = "2.12.0" // Match agent version diff --git a/integrations/spark/spark-3.1/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/CatalogOperationTest.java b/integrations/spark/spark-3.1/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/CatalogOperationTest.java index c7d8e0d7b..e47cd855e 100644 --- a/integrations/spark/spark-3.1/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/CatalogOperationTest.java +++ b/integrations/spark/spark-3.1/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/CatalogOperationTest.java @@ -13,6 +13,7 @@ import java.util.stream.Collectors; import org.apache.iceberg.DataFile; import org.apache.iceberg.DataFiles; +import org.apache.iceberg.NullOrder; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; import org.apache.iceberg.SchemaParser; @@ -403,4 +404,60 @@ public void testAlterTableSortOrderCTAS() throws Exception { Assertions.assertEquals(SortOrder.unsorted(), newSqlTable.sortOrder()); } } + + @Test + public void testWriteOrderedByPersistsMultiColumnSortOrder() throws Exception { + try (SparkSession spark = getSparkSession()) { + Catalog catalog = getOpenHouseCatalog(spark); + spark.sql( + "CREATE TABLE openhouse.db.write_ordered_multi (id INT, category STRING, data STRING)"); + spark.sql("ALTER TABLE openhouse.db.write_ordered_multi WRITE ORDERED BY category, id"); + + Table table = catalog.loadTable(TableIdentifier.of("db", "write_ordered_multi")); + Assertions.assertEquals( + SortOrder.builderFor(table.schema()).asc("category").asc("id").build(), + table.sortOrder()); + } + } + + @Test + public void testWriteOrderedByRespectsDirectionAndNullOrder() throws Exception { + try (SparkSession spark = getSparkSession()) { + Catalog catalog = getOpenHouseCatalog(spark); + spark.sql("CREATE TABLE openhouse.db.write_ordered_desc (id INT, category STRING)"); + // DESC defaults to NULLS LAST in Iceberg; override to NULLS FIRST to verify both + // direction and null-order are propagated end-to-end. + spark.sql( + "ALTER TABLE openhouse.db.write_ordered_desc WRITE ORDERED BY category DESC NULLS FIRST"); + + Table table = catalog.loadTable(TableIdentifier.of("db", "write_ordered_desc")); + Assertions.assertEquals( + SortOrder.builderFor(table.schema()).desc("category", NullOrder.NULLS_FIRST).build(), + table.sortOrder()); + } + } + + @Test + public void testWriteOrderedByRoundTripsThroughInsert() throws Exception { + try (SparkSession spark = getSparkSession()) { + Catalog catalog = getOpenHouseCatalog(spark); + spark.sql("CREATE TABLE openhouse.db.write_ordered_insert (id INT, category STRING)"); + spark.sql("ALTER TABLE openhouse.db.write_ordered_insert WRITE ORDERED BY id"); + + spark.sql( + "INSERT INTO openhouse.db.write_ordered_insert VALUES (3, 'C'), (1, 'A'), (2, 'B')"); + + Table table = catalog.loadTable(TableIdentifier.of("db", "write_ordered_insert")); + // Sort order metadata is preserved across an INSERT (no implicit reset). + Assertions.assertEquals( + SortOrder.builderFor(table.schema()).asc("id").build(), table.sortOrder()); + + List rows = + spark.sql("SELECT id FROM openhouse.db.write_ordered_insert ORDER BY id").collectAsList(); + Assertions.assertEquals(3, rows.size()); + Assertions.assertEquals(1, rows.get(0).getInt(0)); + Assertions.assertEquals(2, rows.get(1).getInt(0)); + Assertions.assertEquals(3, rows.get(2).getInt(0)); + } + } }