Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -540,9 +540,17 @@ public void testTrinoWriteStatsAsJsonEnabled(String type, String inputValue, Dou
testWriteStatsAsJsonEnabled(sql -> onTrino().executeQuery(sql), tableName, "delta.default." + tableName, type, inputValue, dataSize, distinctValues, nullsFraction, statsValue);
}

@Test(groups = {DELTA_LAKE_DATABRICKS, PROFILE_SPECIFIC_TESTS}, dataProvider = "testDeltaCheckpointWriteStatsAsJson")
@Test(groups = {DELTA_LAKE_DATABRICKS, PROFILE_SPECIFIC_TESTS}, dataProvider = "testDeltaCheckpointWriteStatsAsJsonNumericTypes")
@Flaky(issue = DATABRICKS_COMMUNICATION_FAILURE_ISSUE, match = DATABRICKS_COMMUNICATION_FAILURE_MATCH)
public void testDatabricksWriteStatsAsJsonEnabled(String type, String inputValue, Double nullsFraction, Object statsValue)
public void testDatabricksWriteStatsAsJsonEnabledNumericTypes(String type, String inputValue, Double nullsFraction, Object statsValue)
{
String tableName = "test_dl_checkpoints_write_stats_as_json_enabled_databricks_" + randomNameSuffix();
testWriteStatsAsJsonEnabled(sql -> onDelta().executeQuery(sql), tableName, "default." + tableName, type, inputValue, null, null, nullsFraction, statsValue);
}

@Test(groups = {DELTA_LAKE_DATABRICKS, PROFILE_SPECIFIC_TESTS}, dataProvider = "testDeltaCheckpointWriteStatsAsJsonNonNumericTypes")
@Flaky(issue = DATABRICKS_COMMUNICATION_FAILURE_ISSUE, match = DATABRICKS_COMMUNICATION_FAILURE_MATCH)
public void testDatabricksWriteStatsAsJsonEnabledNonNumericTypes(String type, String inputValue, Double nullsFraction, Object statsValue)
{
String tableName = "test_dl_checkpoints_write_stats_as_json_enabled_databricks_" + randomNameSuffix();
testWriteStatsAsJsonEnabled(sql -> onDelta().executeQuery(sql), tableName, "default." + tableName, type, inputValue, null, null, nullsFraction, statsValue);
Expand All @@ -557,8 +565,8 @@ private void testWriteStatsAsJsonEnabled(Consumer<String> sqlExecutor, String ta
"LOCATION 's3://%s/databricks-compatibility-test-%1$s' " +
"TBLPROPERTIES (" +
" delta.checkpointInterval = 2, " +
" delta.checkpoint.writeStatsAsJson = false, " +
" delta.checkpoint.writeStatsAsStruct = true)",
" delta.checkpoint.writeStatsAsJson = true, " +
" delta.checkpoint.writeStatsAsStruct = false)",
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reduce round trips for testDatabricksWriteStatsAsJsonEnabled
Create the table with writeStatsAsJson=true directly instead of
transitioning from struct format, dropping the ALTER TABLE and one
INSERT. Reduces onDelta() calls from 5 to 3 per variant.

If I remember correctly, we intentionally introduced two steps here. Let me look into the details.

tableName, type, bucketName);

onDelta().executeQuery(createTableSql);
Expand All @@ -567,18 +575,10 @@ private void testWriteStatsAsJsonEnabled(Consumer<String> sqlExecutor, String ta
sqlExecutor.accept("INSERT INTO " + qualifiedTableName + " SELECT " + inputValue);
sqlExecutor.accept("INSERT INTO " + qualifiedTableName + " SELECT " + inputValue);

// SET TBLPROPERTIES increments checkpoint
onDelta().executeQuery("" +
"ALTER TABLE default." + tableName + " SET TBLPROPERTIES (" +
"'delta.checkpoint.writeStatsAsJson' = true, " +
"'delta.checkpoint.writeStatsAsStruct' = false)");

sqlExecutor.accept("INSERT INTO " + qualifiedTableName + " SELECT " + inputValue);

assertThat(onTrino().executeQuery("SHOW STATS FOR delta.default." + tableName))
.containsOnly(ImmutableList.of(
row("col", dataSize, distinctValues, nullsFraction, null, statsValue, statsValue),
row(null, null, null, null, 3.0, null, null)));
row(null, null, null, null, 2.0, null, null)));
}
finally {
dropDeltaTableWithRetry("default." + tableName);
Expand Down Expand Up @@ -609,10 +609,9 @@ public Object[][] testTrinoCheckpointWriteStatsAsJson()
}

@DataProvider
public Object[][] testDeltaCheckpointWriteStatsAsJson()
public Object[][] testDeltaCheckpointWriteStatsAsJsonNumericTypes()
{
return new Object[][] {
{"boolean", "true", 0.0, null},
{"integer", "1", 0.0, "1"},
{"tinyint", "2", 0.0, "2"},
{"smallint", "3", 0.0, "3"},
Expand All @@ -621,6 +620,14 @@ public Object[][] testDeltaCheckpointWriteStatsAsJson()
{"double", "1.0", 0.0, "1.0"},
{"decimal(3,2)", "3.14", 0.0, "3.14"},
{"decimal(30,1)", "12345", 0.0, "12345.0"},
};
}

@DataProvider
public Object[][] testDeltaCheckpointWriteStatsAsJsonNonNumericTypes()
{
return new Object[][] {
{"boolean", "true", 0.0, null},
{"string", "'test'", 0.0, null},
{"binary", "X'65683F'", 0.0, null},
{"date", "date '2021-02-03'", 0.0, "2021-02-03"},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -253,10 +253,17 @@ public void testReadFromSchemaChangedShallowCloneTable()

@Test(groups = {DELTA_LAKE_DATABRICKS, PROFILE_SPECIFIC_TESTS})
@Flaky(issue = DATABRICKS_COMMUNICATION_FAILURE_ISSUE, match = DATABRICKS_COMMUNICATION_FAILURE_MATCH)
public void testReadFromSchemaChangedDeepCloneTable()
public void testReadFromSchemaChangedDeepCloneTablePartitioned()
{
// Deep Clone is not supported on Delta-Lake OSS
testReadSchemaChangedCloneTable("DEEP", true);
}

@Test(groups = {DELTA_LAKE_DATABRICKS, PROFILE_SPECIFIC_TESTS})
@Flaky(issue = DATABRICKS_COMMUNICATION_FAILURE_ISSUE, match = DATABRICKS_COMMUNICATION_FAILURE_MATCH)
public void testReadFromSchemaChangedDeepCloneTableNonPartitioned()
{
// Deep Clone is not supported on Delta-Lake OSS
testReadSchemaChangedCloneTable("DEEP", false);
}

Expand Down