From 19522e03be85cc231858f31b7f49cf4c2523a98a Mon Sep 17 00:00:00 2001 From: 1fanwang <1fannnw@gmail.com> Date: Tue, 12 May 2026 07:01:46 -0700 Subject: [PATCH] Reject varbinary partition columns in Delta Lake at schema validation Creating a Delta Lake table with a varbinary partition column previously succeeded, then any subsequent INSERT failed deep in the write path with 'Unsupported type for partition: varbinary' from DeltaLakeWriteUtils. The Delta protocol's binary partition-value encoding is also not implemented on the read side, so Spark-written varbinary-partitioned tables cannot be read either. Match the existing array/map/row handling and reject varbinary partition columns up front in checkPartitionColumns. CREATE TABLE, CTAS, and the table-layout path all go through validateTableColumns, so the new check fires before any row is written. Update TestDeltaLakeConnectorTest.testCreateTableWithUnsupportedPartitionType and testCreateTableAsSelectWithUnsupportedPartitionType to assert the new error, drop the obsolete testInsertIntoUnsupportedVarbinaryPartitionType that expected the deep-write failure, and add a fast in-process variant to TestDeltaLakeBasic. Fixes #24155 Signed-off-by: 1fanwang <1fannnw@gmail.com> --- .../plugin/deltalake/DeltaLakeMetadata.java | 7 +++++++ .../plugin/deltalake/TestDeltaLakeBasic.java | 14 +++++++++++++- .../deltalake/TestDeltaLakeConnectorTest.java | 18 ++++++------------ 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java index 23cca367ce24..ad93cadaa627 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java @@ -1748,6 +1748,13 @@ private static void checkPartitionColumns(List columns, List partitionColumnNames.contains(column.getName())) + .anyMatch(column -> column.getType().equals(VARBINARY))) { + throw new TrinoException(DELTA_LAKE_INVALID_SCHEMA, "Using varbinary type on partitioned columns is unsupported"); + } + if (!invalidPartitionNames.isEmpty()) { throw new TrinoException(DELTA_LAKE_INVALID_SCHEMA, "Table property 'partitioned_by' contained column names which do not exist: " + invalidPartitionNames); } diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java index 079d1ea8f1c4..6c0df9c87186 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java @@ -548,7 +548,7 @@ void testPartitionValuesParsedCheckpoint() ImmutableList.of("0.12", "3.45"), ImmutableList.of(decimal("0.12", createDecimalType(3, 2)), decimal("3.45", createDecimalType(3, 2)))); testPartitionValuesParsedCheckpoint(mode, "varchar", ImmutableList.of("'alice'", "'bob'"), ImmutableList.of("alice", "bob")); - // TODO https://github.com/trinodb/trino/issues/24155 Cannot insert varbinary values into partitioned columns + // varbinary partition columns are rejected at schema validation; see testVarbinaryPartitionColumnRejected. testPartitionValuesParsedCheckpoint( mode, "date", @@ -628,6 +628,18 @@ private void testPartitionValuesParsedCheckpoint(ColumnMappingMode columnMapping } } + @Test // https://github.com/trinodb/trino/issues/24155 + void testVarbinaryPartitionColumnRejected() + { + String tableName = "test_varbinary_partition_" + randomNameSuffix(); + assertQueryFails( + "CREATE TABLE " + tableName + "(x INT, part VARBINARY) WITH (partitioned_by = ARRAY['part'])", + "Using varbinary type on partitioned columns is unsupported"); + assertQueryFails( + "CREATE TABLE " + tableName + " WITH (partitioned_by = ARRAY['part']) AS SELECT 1 x, X'01' part", + "Using varbinary type on partitioned columns is unsupported"); + } + /** * @see deltalake.column_mapping_mode_id * @see deltalake.column_mapping_mode_name diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConnectorTest.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConnectorTest.java index 8a01eacc4544..963d14f1c854 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConnectorTest.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConnectorTest.java @@ -458,18 +458,9 @@ public void testCreateTableWithUnsupportedPartitionType() assertQueryFails( "CREATE TABLE " + tableName + "(a INT, part ROW(field INT)) WITH (partitioned_by = ARRAY['part'])", "Using array, map or row type on partitioned columns is unsupported"); - } - - @Test - public void testInsertIntoUnsupportedVarbinaryPartitionType() - { - // TODO https://github.com/trinodb/trino/issues/24155 Cannot insert varbinary values into partitioned columns - // Update TestDeltaLakeBasic.testPartitionValuesParsedCheckpoint() when fixing this issue - try (TestTable table = newTrinoTable( - "test_varbinary_partition", - "(x int, part varbinary) WITH (partitioned_by = ARRAY['part'])")) { - assertQueryFails("INSERT INTO " + table.getName() + " VALUES (1, X'01')", "Unsupported type for partition: varbinary"); - } + assertQueryFails( + "CREATE TABLE " + tableName + "(a INT, part VARBINARY) WITH (partitioned_by = ARRAY['part'])", + "Using varbinary type on partitioned columns is unsupported"); } @Test @@ -485,6 +476,9 @@ public void testCreateTableAsSelectWithUnsupportedPartitionType() assertQueryFails( "CREATE TABLE " + tableName + " WITH (partitioned_by = ARRAY['part']) AS SELECT 1 a, row(1) part", "Using array, map or row type on partitioned columns is unsupported"); + assertQueryFails( + "CREATE TABLE " + tableName + " WITH (partitioned_by = ARRAY['part']) AS SELECT 1 a, X'01' part", + "Using varbinary type on partitioned columns is unsupported"); } @Test