diff --git a/docs/src/main/sphinx/connector/delta-lake.md b/docs/src/main/sphinx/connector/delta-lake.md index ea5aba99aa16..48192bfb1b37 100644 --- a/docs/src/main/sphinx/connector/delta-lake.md +++ b/docs/src/main/sphinx/connector/delta-lake.md @@ -201,6 +201,10 @@ values. Typical usage does not require you to configure them. - Number of threads used for retrieving checkpoint files of each table. Currently, only retrievals of V2 Checkpoint's sidecar files are parallelized. - `4` +* - `delta.enable-clustering-info` + - Controls whether clustered column information is retrieved. + The equivalent catalog session property is `enable_clustering_info` + - `false` ::: ### Catalog session properties diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeConfig.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeConfig.java index cb0a4f41388b..5d0f52d69838 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeConfig.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeConfig.java @@ -95,6 +95,7 @@ public class DeltaLakeConfig private boolean deltaLogFileSystemCacheDisabled; private int metadataParallelism = 8; private int checkpointProcessingParallelism = 4; + private boolean enableClusteringInfo; public Duration getMetadataCacheTtl() { @@ -364,6 +365,19 @@ public DeltaLakeConfig setCompressionCodec(HiveCompressionOption compressionCode return this; } + public boolean isEnableClusteringInfo() + { + return enableClusteringInfo; + } + + @Config("delta.enable-clustering-info") + @ConfigDescription("If show clustered columns in table metadata") + public DeltaLakeConfig setEnableClusteringInfo(boolean enableClusteringInfo) + { + this.enableClusteringInfo = enableClusteringInfo; + return this; + } + @Min(1) public long getPerTransactionMetastoreCacheMaximumSize() { diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java index 9e7b1ab30b66..9f71ebba034e 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java @@ -241,6 +241,7 @@ import static io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_TABLE; import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.getHiveCatalogName; import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isCollectExtendedStatisticsColumnStatisticsOnWrite; +import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isEnableClusteringInfo; import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isExtendedStatisticsEnabled; import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isProjectionPushdownEnabled; import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isQueryPartitionFilterRequired; @@ -294,6 +295,7 @@ import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.serializeStatsAsJson; import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.validateType; import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.verifySupportedColumnMapping; +import static io.trino.plugin.deltalake.transactionlog.DeltaLakeTableFeatures.CLUSTERED_TABLES_FEATURE_NAME; import static io.trino.plugin.deltalake.transactionlog.DeltaLakeTableFeatures.unsupportedReaderFeatures; import static io.trino.plugin.deltalake.transactionlog.DeltaLakeTableFeatures.unsupportedWriterFeatures; import static io.trino.plugin.deltalake.transactionlog.MetadataEntry.DELTA_CHANGE_DATA_FEED_ENABLED_PROPERTY; @@ -445,7 +447,7 @@ public class DeltaLakeMetadata private static final String CHECK_CONSTRAINT_CONVERT_FAIL_EXPRESSION = "CAST(fail('Failed to convert Delta check constraints to Trino expression') AS boolean)"; - private static final int TEMPORAL_TIME_TRAVEL_LINEAR_SEARCH_MAX_SIZE = 1000; + public static final int TEMPORAL_TIME_TRAVEL_LINEAR_SEARCH_MAX_SIZE = 1000; private final DeltaLakeMetastore metastore; private final TransactionLogAccess transactionLogAccess; @@ -744,6 +746,12 @@ public LocatedTableHandle getTableHandle( LOG.debug("Skip %s because the reader version is unsupported: %d", tableName, protocolEntry.minReaderVersion()); return null; } + + Optional> clusteredColumns = Optional.empty(); + if (isEnableClusteringInfo(session) && protocolEntry.writerFeaturesContains(CLUSTERED_TABLES_FEATURE_NAME)) { + clusteredColumns = transactionLogAccess.getClusteredColumns(fileSystem, tableSnapshot); + } + Set unsupportedReaderFeatures = unsupportedReaderFeatures(protocolEntry.readerFeatures().orElse(ImmutableSet.of())); if (!unsupportedReaderFeatures.isEmpty()) { LOG.debug("Skip %s because the table contains unsupported reader features: %s", tableName, unsupportedReaderFeatures); @@ -762,6 +770,7 @@ public LocatedTableHandle getTableHandle( tableLocation, metadataEntry, protocolEntry, + clusteredColumns, TupleDomain.all(), TupleDomain.all(), false, @@ -3571,6 +3580,7 @@ else if (!partitionColumns.contains(column)) { tableHandle.getLocation(), tableHandle.getMetadataEntry(), tableHandle.getProtocolEntry(), + tableHandle.getClusteredColumns(), // Do not simplify the enforced constraint, the connector is guaranteeing the constraint will be applied as is. // The unenforced constraint will still be checked by the engine. tableHandle.getEnforcedPartitionConstraint() @@ -3869,6 +3879,7 @@ public ConnectorAnalyzeMetadata getStatisticsCollectionMetadata(ConnectorSession handle.getLocation(), metadata, handle.getProtocolEntry(), + handle.getClusteredColumns(), TupleDomain.all(), TupleDomain.all(), false, diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSessionProperties.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSessionProperties.java index b13bb565593c..348dd7d2d57c 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSessionProperties.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSessionProperties.java @@ -65,6 +65,7 @@ public final class DeltaLakeSessionProperties private static final String TARGET_MAX_FILE_SIZE = "target_max_file_size"; private static final String IDLE_WRITER_MIN_FILE_SIZE = "idle_writer_min_file_size"; private static final String COMPRESSION_CODEC = "compression_codec"; + private static final String ENABLE_CLUSTERING_INFO = "enable_clustering_info"; // This property is not supported by Delta Lake and exists solely for technical reasons. @Deprecated private static final String TIMESTAMP_PRECISION = "timestamp_precision"; @@ -216,6 +217,11 @@ public DeltaLakeSessionProperties( } }, false), + booleanProperty( + ENABLE_CLUSTERING_INFO, + "If show clustered columns in table metadata", + deltaLakeConfig.isEnableClusteringInfo(), + false), booleanProperty( PROJECTION_PUSHDOWN_ENABLED, "Read only required fields from a row type", @@ -334,6 +340,11 @@ public static HiveCompressionOption getCompressionCodec(ConnectorSession session return session.getProperty(COMPRESSION_CODEC, HiveCompressionOption.class); } + public static boolean isEnableClusteringInfo(ConnectorSession session) + { + return session.getProperty(ENABLE_CLUSTERING_INFO, Boolean.class); + } + public static boolean isProjectionPushdownEnabled(ConnectorSession session) { return session.getProperty(PROJECTION_PUSHDOWN_ENABLED, Boolean.class); diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTableHandle.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTableHandle.java index 12af6a3dcf20..6691825eb830 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTableHandle.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTableHandle.java @@ -24,6 +24,7 @@ import io.trino.spi.connector.SchemaTableName; import io.trino.spi.predicate.TupleDomain; +import java.util.List; import java.util.Objects; import java.util.Optional; import java.util.Set; @@ -39,6 +40,7 @@ public class DeltaLakeTableHandle private final String location; private final MetadataEntry metadataEntry; private final ProtocolEntry protocolEntry; + private final Optional> clusteredColumns; private final TupleDomain enforcedPartitionConstraint; private final TupleDomain nonPartitionConstraint; private final boolean merge; @@ -65,6 +67,7 @@ public DeltaLakeTableHandle( @JsonProperty("location") String location, @JsonProperty("metadataEntry") MetadataEntry metadataEntry, @JsonProperty("protocolEntry") ProtocolEntry protocolEntry, + @JsonProperty("clusteredColumns") Optional> clusteredColumns, @JsonProperty("enforcedPartitionConstraint") TupleDomain enforcedPartitionConstraint, @JsonProperty("nonPartitionConstraint") TupleDomain nonPartitionConstraint, @JsonProperty("merge") boolean merge, @@ -80,6 +83,7 @@ public DeltaLakeTableHandle( location, metadataEntry, protocolEntry, + clusteredColumns, enforcedPartitionConstraint, nonPartitionConstraint, ImmutableSet.of(), @@ -100,6 +104,7 @@ public DeltaLakeTableHandle( String location, MetadataEntry metadataEntry, ProtocolEntry protocolEntry, + Optional> clusteredColumns, TupleDomain enforcedPartitionConstraint, TupleDomain nonPartitionConstraint, Set constraintColumns, @@ -118,6 +123,7 @@ public DeltaLakeTableHandle( this.location = requireNonNull(location, "location is null"); this.metadataEntry = requireNonNull(metadataEntry, "metadataEntry is null"); this.protocolEntry = requireNonNull(protocolEntry, "protocolEntry is null"); + this.clusteredColumns = requireNonNull(clusteredColumns, "clusteredColumns is null"); this.enforcedPartitionConstraint = requireNonNull(enforcedPartitionConstraint, "enforcedPartitionConstraint is null"); this.nonPartitionConstraint = requireNonNull(nonPartitionConstraint, "nonPartitionConstraint is null"); this.merge = merge; @@ -140,6 +146,7 @@ public DeltaLakeTableHandle withProjectedColumns(Set proj location, metadataEntry, protocolEntry, + clusteredColumns, enforcedPartitionConstraint, nonPartitionConstraint, constraintColumns, @@ -162,6 +169,7 @@ public DeltaLakeTableHandle forOptimize(boolean recordScannedFiles, DataSize max location, metadataEntry, protocolEntry, + clusteredColumns, enforcedPartitionConstraint, nonPartitionConstraint, constraintColumns, @@ -184,6 +192,7 @@ public DeltaLakeTableHandle forMerge() location, metadataEntry, protocolEntry, + clusteredColumns, enforcedPartitionConstraint, nonPartitionConstraint, constraintColumns, @@ -262,6 +271,12 @@ public ProtocolEntry getProtocolEntry() return protocolEntry; } + @JsonProperty + public Optional> getClusteredColumns() + { + return clusteredColumns; + } + @JsonProperty public TupleDomain getEnforcedPartitionConstraint() { @@ -353,6 +368,7 @@ public boolean equals(Object o) Objects.equals(location, that.location) && Objects.equals(metadataEntry, that.metadataEntry) && Objects.equals(protocolEntry, that.protocolEntry) && + Objects.equals(clusteredColumns, that.clusteredColumns) && Objects.equals(enforcedPartitionConstraint, that.enforcedPartitionConstraint) && Objects.equals(nonPartitionConstraint, that.nonPartitionConstraint) && merge == that.merge && @@ -374,6 +390,7 @@ public int hashCode() location, metadataEntry, protocolEntry, + clusteredColumns, enforcedPartitionConstraint, nonPartitionConstraint, merge, diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/clustering/ClusteringMetadataUtil.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/clustering/ClusteringMetadataUtil.java new file mode 100644 index 000000000000..2ce94783a731 --- /dev/null +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/clustering/ClusteringMetadataUtil.java @@ -0,0 +1,296 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake.clustering; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Strings; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import io.airlift.concurrent.BoundedExecutor; +import io.airlift.json.ObjectMapperProvider; +import io.airlift.log.Logger; +import io.airlift.units.DataSize; +import io.trino.filesystem.Location; +import io.trino.filesystem.TrinoFileSystem; +import io.trino.filesystem.TrinoInputFile; +import io.trino.plugin.deltalake.transactionlog.CommitInfoEntry; +import io.trino.plugin.deltalake.transactionlog.DeltaLakeTransactionLogEntry; +import io.trino.plugin.deltalake.transactionlog.TableSnapshot; +import io.trino.spi.TrinoException; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.Executor; +import java.util.stream.Stream; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static com.google.common.util.concurrent.MoreExecutors.newDirectExecutorService; +import static io.airlift.units.DataSize.Unit.MEGABYTE; +import static io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_FILESYSTEM_ERROR; +import static io.trino.plugin.deltalake.DeltaLakeMetadata.TEMPORAL_TIME_TRAVEL_LINEAR_SEARCH_MAX_SIZE; +import static io.trino.plugin.deltalake.clustering.Operation.CLUSTER_BY; +import static io.trino.plugin.deltalake.clustering.Operation.CREATE_TABLE_KEYWORD; +import static io.trino.plugin.deltalake.clustering.Operation.MERGE; +import static io.trino.plugin.deltalake.clustering.Operation.OPTIMIZE; +import static io.trino.plugin.deltalake.clustering.Operation.RENAME_COLUMN; +import static io.trino.plugin.deltalake.clustering.Operation.REPLACE_TABLE_KEYWORD; +import static io.trino.plugin.deltalake.clustering.Operation.RESTORE; +import static io.trino.plugin.deltalake.clustering.Operation.UNKNOW_OPERATION; +import static io.trino.plugin.deltalake.clustering.Operation.WRITE; +import static io.trino.plugin.deltalake.clustering.Operation.fromString; +import static io.trino.plugin.deltalake.transactionlog.TemporalTimeTravelUtil.findLatestVersionUsingTemporal; +import static io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.getTransactionLogDir; +import static io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.getTransactionLogJsonEntryPath; +import static io.trino.plugin.deltalake.transactionlog.checkpoint.TransactionLogTail.getEntriesFromJson; +import static java.lang.String.format; +import static java.time.ZoneOffset.UTC; + +public final class ClusteringMetadataUtil +{ + private static final Logger LOG = Logger.get(ClusteringMetadataUtil.class); + private static final DataSize TRANSACTION_LOG_MAX_CACHED_SIZE = DataSize.of(16, MEGABYTE); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapperProvider().get(); + private static final String CLUSTERING_PARAMETER_KEY = "clusterBy"; + private static final String NEW_CLUSTERING_PARAMETER_KEY = "newClusteringColumns"; + private static final String RENAMED_OLD_COLUMN_KEY = "oldColumnPath"; + private static final String RENAMED_NEW_COLUMN_KEY = "newColumnPath"; + + private static final String RESTORE_VERSION_KEY = "version"; + private static final String RESTORE_TEMPORAL_KEY = "timestamp"; + private static final DateTimeFormatter TIME_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.S").withZone(UTC); + private static final Executor restoredInfoExecutor = new BoundedExecutor(newDirectExecutorService(), 4); + + private static final Map OPERATION_CLUSTEINFOKEY_MAP = ImmutableMap.of( + WRITE, CLUSTERING_PARAMETER_KEY, + MERGE, CLUSTERING_PARAMETER_KEY, + OPTIMIZE, CLUSTERING_PARAMETER_KEY, + CREATE_TABLE_KEYWORD, CLUSTERING_PARAMETER_KEY, + REPLACE_TABLE_KEYWORD, CLUSTERING_PARAMETER_KEY, + CLUSTER_BY, NEW_CLUSTERING_PARAMETER_KEY); + + private static final ThreadLocal> OLD_TO_NEW_RENAMED_COLUMNS = ThreadLocal.withInitial(HashMap::new); + + private ClusteringMetadataUtil() + { + } + + public static Optional> getLatestClusteredColumns(TrinoFileSystem fileSystem, TableSnapshot tableSnapshot) + { + long currentVersion = getCurrentVersion(fileSystem, tableSnapshot, tableSnapshot.getVersion()); + Optional commitInfoEntry; + List clusteredColumns = ImmutableList.of(); + while (currentVersion >= 0) { + commitInfoEntry = extractCommitInfo(currentVersion, fileSystem, tableSnapshot); + if (commitInfoEntry.isEmpty()) { + break; + } + clusteredColumns = extractClusteredColumns(commitInfoEntry); + Operation operation = getOperation(commitInfoEntry.get().operation()); + if (shouldStopLookup(operation, clusteredColumns)) { + break; + } + if (operation == RESTORE) { + currentVersion = getCurrentVersion(fileSystem, tableSnapshot, currentVersion); + } + else { + currentVersion--; + } + } + if (!clusteredColumns.isEmpty()) { + clusteredColumns = clusteredColumns.stream() + .map(c -> OLD_TO_NEW_RENAMED_COLUMNS.get().getOrDefault(c, c)) + .collect(toImmutableList()); + } + OLD_TO_NEW_RENAMED_COLUMNS.remove(); + return Optional.of(clusteredColumns); + } + + @VisibleForTesting + static long getCurrentVersion(TrinoFileSystem fileSystem, TableSnapshot tableSnapshot, long currentVersion) + { + long version = currentVersion; + CommitInfoEntry commitInfoEntry = extractCommitInfo(currentVersion, fileSystem, tableSnapshot) + .orElseThrow(() -> new IllegalStateException("No commit info found for table at version " + tableSnapshot.getVersion())); + if (getOperation(commitInfoEntry.operation()) == RESTORE) { + version = getRestoreVersion(commitInfoEntry, fileSystem, tableSnapshot); + } + return version; + } + + @VisibleForTesting + static long getRestoreVersion(CommitInfoEntry commitInfoEntry, TrinoFileSystem fileSystem, TableSnapshot tableSnapshot) + { + if (getOperation(commitInfoEntry.operation()) != RESTORE) { + throw new IllegalArgumentException("The provided commitInfoEntry is not of RESTORE operation"); + } + long version; + String restoredVersion = commitInfoEntry.operationParameters().get(RESTORE_VERSION_KEY); + String restoredTimestamp = commitInfoEntry.operationParameters().get(RESTORE_TEMPORAL_KEY); + if (!Strings.isNullOrEmpty(restoredVersion)) { + version = Long.parseLong(restoredVersion); + } + else if (!Strings.isNullOrEmpty(restoredTimestamp)) { + String tableLocation = tableSnapshot.getTableLocation(); + LocalDateTime localDateTime = LocalDateTime.parse(restoredTimestamp, TIME_FORMATTER); + long epochMillis = localDateTime.toInstant(UTC).toEpochMilli(); // all timestamp recorded in commitInfoEntry are in UTC + try { + version = findLatestVersionUsingTemporal(fileSystem, tableLocation, epochMillis, restoredInfoExecutor, TEMPORAL_TIME_TRAVEL_LINEAR_SEARCH_MAX_SIZE); + } + catch (IOException e) { + throw new TrinoException(DELTA_LAKE_FILESYSTEM_ERROR, + format("Unexpected IO exception occurred while reading the entries under the location %s for finding latest snapshot id before or at %s", + tableLocation, Instant.ofEpochMilli(epochMillis)), e); + } + } + else { + throw new IllegalArgumentException("Both restored version and timestamp are null or empty, should never happen"); + } + return version; + } + + @VisibleForTesting + static Optional extractCommitInfo(Long version, TrinoFileSystem fileSystem, TableSnapshot tableSnapshot) + { + Location transactionLogPath = getTransactionLogJsonEntryPath(getTransactionLogDir(tableSnapshot.getTableLocation()), version); + TrinoInputFile inputFile = fileSystem.newInputFile(transactionLogPath); + + Stream transactionLogEntries; + try { + transactionLogEntries = getEntriesFromJson(version, inputFile, TRANSACTION_LOG_MAX_CACHED_SIZE).map(entries -> entries.getEntries(fileSystem)) + // transaction log does not exist. Might have been expired. + .orElseGet(Stream::of); + } + catch (IOException e) { + throw new UncheckedIOException(e); + } + + Optional commitInfoEntry = transactionLogEntries + .map(DeltaLakeTransactionLogEntry::getCommitInfo) + .filter(Objects::nonNull) + .findFirst(); + if (commitInfoEntry.isEmpty()) { + LOG.error(String.format("No commit info found for table at version %d", version)); + throw new IllegalStateException(format("No commit info found for table at version %d", version)); + } + return commitInfoEntry; + } + + @VisibleForTesting + static List extractClusteredColumns(Optional commitInfoEntry) + { + if (commitInfoEntry.isEmpty()) { + return ImmutableList.of(); + } + Operation operation = fromString(commitInfoEntry.get().operation()); + if (operation == RENAME_COLUMN) { + recordRenamedColumns(commitInfoEntry.get()); + } + if (operation == UNKNOW_OPERATION) { + LOG.warn(String.format("Unknown operation: %s", commitInfoEntry.get().operation())); + } + String clusteredKey = OPERATION_CLUSTEINFOKEY_MAP.get(operation); + String clusteredValue = commitInfoEntry.get().operationParameters().get(clusteredKey); + if (Strings.isNullOrEmpty(clusteredValue)) { + return ImmutableList.of(); + } + return getClusteredColumnList(clusteredKey, clusteredValue); + } + + @VisibleForTesting + static boolean shouldStopLookup(Operation operation, List clusteredColumns) + { + if (!clusteredColumns.isEmpty()) { + return true; + } + if (operation == OPTIMIZE) { + // this means this optimize could be triggered by Delete, Update, Merge, or Manually triggered Optimize + // the action is just merging small files, + // no clustering-info or partition-info is recorded + // also, no clustered columns changed + // so we should continue to look for the latest clustering info + return false; + } + return OPERATION_CLUSTEINFOKEY_MAP.containsKey(operation); + } + + @VisibleForTesting + static Operation getOperation(String operationStr) + { + if (Strings.isNullOrEmpty(operationStr)) { + throw new IllegalArgumentException("Operation parameter is empty"); + } + return fromString(operationStr); + } + + @VisibleForTesting + static void recordRenamedColumns(CommitInfoEntry commitInfoEntry) + { + String oldName = commitInfoEntry.operationParameters().get(RENAMED_OLD_COLUMN_KEY); + String newName = commitInfoEntry.operationParameters().get(RENAMED_NEW_COLUMN_KEY); + if (Strings.isNullOrEmpty(oldName) || Strings.isNullOrEmpty(newName)) { + throw new IllegalArgumentException("old or renamed columns are null or empty, should never happen"); + } + if (OLD_TO_NEW_RENAMED_COLUMNS.get().containsKey(newName)) { + String oldValue = OLD_TO_NEW_RENAMED_COLUMNS.get().get(newName); + OLD_TO_NEW_RENAMED_COLUMNS.get().remove(newName); + OLD_TO_NEW_RENAMED_COLUMNS.get().put(oldName, oldValue); + } + else { + OLD_TO_NEW_RENAMED_COLUMNS.get().put(oldName, newName); + } + } + + @VisibleForTesting + static List getClusteredColumnList(String clusteredKey, String clusteredValue) + { + List clusteredColumns; + if (clusteredKey.equals(CLUSTERING_PARAMETER_KEY)) { + try { + clusteredColumns = ImmutableList.copyOf(OBJECT_MAPPER.readValue(clusteredValue, new TypeReference>() {})); + } + catch (JsonProcessingException e) { + LOG.error("Failed to extract clustering columns from commitInfoEntry: %s", e); + return ImmutableList.of(); + } + } + else if (clusteredKey.equals(NEW_CLUSTERING_PARAMETER_KEY)) { + clusteredColumns = Arrays.stream(clusteredValue.split(",")) + .map(String::trim) + .collect(toImmutableList()); + } + else { + LOG.error("Unknown clustering key: %s", clusteredKey); + return ImmutableList.of(); + } + return clusteredColumns; + } + + @VisibleForTesting + static ThreadLocal> getOldToNewRenamedColumns() + { + return OLD_TO_NEW_RENAMED_COLUMNS; + } +} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/clustering/Operation.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/clustering/Operation.java new file mode 100644 index 000000000000..570c5c9d0064 --- /dev/null +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/clustering/Operation.java @@ -0,0 +1,110 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake.clustering; + +import com.google.common.collect.ImmutableMap; + +import java.util.Map; +import java.util.stream.Stream; + +import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static java.util.Locale.ENGLISH; + +public enum Operation +{ + // refer to: https://github.com/delta-io/delta/blob/master/spark/src/main/scala/org/apache/spark/sql/delta/DeltaOperations.scala + ADD_COLUMNS("ADD COLUMNS"), + ADD_CONSTRAINT("ADD CONSTRAINT"), + ADD_DELETION_VECTOR_TOMBSTONES("Deletion Vector Tombstones"), + CHANGE_COLUMN("CHANGE COLUMN"), + CHANGE_COLUMNS("CHANGE COLUMNS"), + CLONE("CLONE"), + CLUSTER_BY("CLUSTER BY"), + COMPUTE_STATS("COMPUTE STATS"), + CONVERT("CONVERT"), + DELETE("DELETE"), + DOMAIN_METADATA_CLEANUP("DOMAIN METADATA CLEANUP"), + DROP_COLUMNS("DROP COLUMNS"), + DROP_CONSTRAINT("DROP CONSTRAINT"), + DROP_TABLE_FEATURE("DROP FEATURE"), + EMPTY_COMMIT("Empty Commit"), + MANUAL_UPDATE("Manual Update"), + MERGE("MERGE"), + OPTIMIZE("OPTIMIZE"), + RENAME_COLUMN("RENAME COLUMN"), + REORG("REORG"), + REORG_TABLE_UPGRADE_UNIFORM("REORG TABLE UPGRADE UNIFORM"), + REMOVE_COLUMN_MAPPING("REMOVE COLUMN MAPPING"), + REPLACE_COLUMNS("REPLACE COLUMNS"), + RESTORE("RESTORE"), + ROW_TRACKING_BACKFILL("ROW TRACKING BACKFILL"), + ROW_TRACKING_UNBACKFILL("ROW TRACKING UNBACKFILL"), + SET_TABLE_PROPERTIES("SET TBLPROPERTIES"), + STREAMING_UPDATE("STREAMING UPDATE"), + TRUNCATE("TRUNCATE"), + UNSET_TABLE_PROPERTIES("UNSET TBLPROPERTIES"), + UPDATE("UPDATE"), + UPDATE_COLUMN_METADATA("UPDATE COLUMN METADATA"), + UPDATE_SCHEMA("UPDATE SCHEMA"), + UPGRADE_PROTOCOL("UPGRADE PROTOCOL"), + VACUUM_END("VACUUM END"), + VACUUM_START("VACUUM START"), + WRITE("WRITE"), + + TEST_OPERATION("TEST"), + + CREATE_TABLE_KEYWORD("CREATE TABLE"), + REPLACE_TABLE_KEYWORD("REPLACE TABLE"), + + UNKNOW_OPERATION("UNKNOWN OPERATION"); + + private final String operationName; + + Operation(String operationName) + { + this.operationName = operationName; + } + + public String getOperationName() + { + return operationName; + } + + @Override + public String toString() + { + return operationName; + } + + private static final Map LOWERCASE_NAME_TO_OPERATION = Stream.of(values()) + .collect(toImmutableMap(op -> op.operationName.toLowerCase(ENGLISH), op -> op)); + + private static final Map LOWERCASE_KEYWORD_TO_OPERATION = ImmutableMap.of( + CREATE_TABLE_KEYWORD.getOperationName().toLowerCase(ENGLISH), CREATE_TABLE_KEYWORD, + REPLACE_TABLE_KEYWORD.getOperationName().toLowerCase(ENGLISH), REPLACE_TABLE_KEYWORD); + + public static Operation fromString(String operationName) + { + Operation operation = LOWERCASE_NAME_TO_OPERATION.get(operationName.toLowerCase(ENGLISH)); + if (operation == null) { + for (String keyword : LOWERCASE_KEYWORD_TO_OPERATION.keySet()) { + if (operationName.toLowerCase(ENGLISH).contains(keyword)) { + operation = LOWERCASE_NAME_TO_OPERATION.get(keyword); + break; + } + } + } + return operation == null ? UNKNOW_OPERATION : operation; + } +} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeTableFeatures.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeTableFeatures.java index 04fc6cb87223..26b6b8d556cd 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeTableFeatures.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeTableFeatures.java @@ -27,6 +27,7 @@ public final class DeltaLakeTableFeatures public static final String APPEND_ONLY_FEATURE_NAME = "appendOnly"; public static final String CHANGE_DATA_FEED_FEATURE_NAME = "changeDataFeed"; public static final String CHECK_CONSTRAINTS_FEATURE_NAME = "checkConstraints"; + public static final String CLUSTERED_TABLES_FEATURE_NAME = "clustering"; public static final String COLUMN_MAPPING_FEATURE_NAME = "columnMapping"; public static final String DELETION_VECTORS_FEATURE_NAME = "deletionVectors"; public static final String ICEBERG_COMPATIBILITY_V1_FEATURE_NAME = "icebergCompatV1"; diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TableSnapshot.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TableSnapshot.java index b26e0e263486..ef730c472573 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TableSnapshot.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TableSnapshot.java @@ -83,6 +83,7 @@ public class TableSnapshot private Optional cachedMetadata = Optional.empty(); private Optional cachedProtocol = Optional.empty(); + private Optional> cachedClusteredColumns = Optional.empty(); private TableSnapshot( SchemaTableName table, @@ -189,6 +190,11 @@ public Optional getCachedProtocol() return cachedProtocol; } + public Optional> getCachedClusteredColumns() + { + return cachedClusteredColumns; + } + public String getTableLocation() { return tableLocation; @@ -204,6 +210,11 @@ public void setCachedProtocol(Optional cachedProtocol) this.cachedProtocol = cachedProtocol; } + public void setCachedClusteredColumns(Optional> cachedClusteredColumns) + { + this.cachedClusteredColumns = cachedClusteredColumns; + } + public List getJsonTransactionLogEntries(TrinoFileSystem fileSystem) { return logTail.getFileEntries(fileSystem); diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TransactionLogAccess.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TransactionLogAccess.java index f9f40dab4cc7..6791188d80bc 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TransactionLogAccess.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TransactionLogAccess.java @@ -91,6 +91,7 @@ import static io.airlift.slice.SizeOf.instanceSize; import static io.trino.cache.CacheUtils.invalidateAllIf; import static io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_SCHEMA; +import static io.trino.plugin.deltalake.clustering.ClusteringMetadataUtil.getLatestClusteredColumns; import static io.trino.plugin.deltalake.transactionlog.TransactionLogParser.readLastCheckpoint; import static io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.getTransactionLogDir; import static io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.getTransactionLogJsonEntryPath; @@ -579,6 +580,15 @@ public ProtocolEntry getProtocolEntry(ConnectorSession session, TrinoFileSystem .orElseThrow(() -> new TrinoException(DELTA_LAKE_INVALID_SCHEMA, "Protocol entry not found in transaction log for table " + tableSnapshot.getTable())); } + public Optional> getClusteredColumns(TrinoFileSystem fileSystem, TableSnapshot tableSnapshot) + { + if (tableSnapshot.getCachedClusteredColumns().isEmpty()) { + Optional> clusteredColumns = getLatestClusteredColumns(fileSystem, tableSnapshot); + tableSnapshot.setCachedClusteredColumns(clusteredColumns); + } + return tableSnapshot.getCachedClusteredColumns(); + } + private Optional getCheckpointEntry( ConnectorSession session, TableSnapshot tableSnapshot, diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java index 47eb42d76fb0..17d38e200e21 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java @@ -156,6 +156,9 @@ public class TestDeltaLakeBasic new ResourceTable("type_widening_nested", "databricks153/type_widening_nested"), new ResourceTable("in_commit_timestamp_history_read", "deltalake/in_commit_timestamp_history_read")); + private static final List CLUSTERED_TABLES = ImmutableList.of( + new ResourceTable("clustered_table_1", "deltalake/liquid_clustering")); + // The col-{uuid} pattern for delta.columnMapping.physicalName private static final Pattern PHYSICAL_COLUMN_NAME_PATTERN = Pattern.compile("^col-[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"); @@ -185,7 +188,7 @@ protected QueryRunner createQueryRunner() @BeforeAll public void registerTables() { - for (ResourceTable table : Iterables.concat(PERSON_TABLES, OTHER_TABLES)) { + for (ResourceTable table : Iterables.concat(PERSON_TABLES, OTHER_TABLES, CLUSTERED_TABLES)) { String dataPath = getResourceLocation(table.resourcePath()).toExternalForm(); getQueryRunner().execute( format("CALL system.register_table(CURRENT_SCHEMA, '%s', '%s')", table.tableName(), dataPath)); diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConfig.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConfig.java index 4a5c55667164..5879eca91e7c 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConfig.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConfig.java @@ -75,7 +75,8 @@ public void testDefaults() .setDeletionVectorsEnabled(false) .setDeltaLogFileSystemCacheDisabled(false) .setMetadataParallelism(8) - .setCheckpointProcessingParallelism(4)); + .setCheckpointProcessingParallelism(4) + .setEnableClusteringInfo(false)); } @Test @@ -118,6 +119,7 @@ public void testExplicitPropertyMappings() .put("delta.fs.cache.disable-transaction-log-caching", "true") .put("delta.metadata.parallelism", "10") .put("delta.checkpoint-processing.parallelism", "8") + .put("delta.enable-clustering-info", "true") .buildOrThrow(); DeltaLakeConfig expected = new DeltaLakeConfig() @@ -156,7 +158,8 @@ public void testExplicitPropertyMappings() .setDeletionVectorsEnabled(true) .setDeltaLogFileSystemCacheDisabled(true) .setMetadataParallelism(10) - .setCheckpointProcessingParallelism(8); + .setCheckpointProcessingParallelism(8) + .setEnableClusteringInfo(true); assertFullMapping(properties, expected); } diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeMetadata.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeMetadata.java index e96e7afc6087..b2a8051d6f23 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeMetadata.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeMetadata.java @@ -493,6 +493,7 @@ private static DeltaLakeTableHandle createDeltaLakeTableHandle(Set operationParameters; + + public CommitInfoEntryForTestBuilder withOperation(String operation) + { + this.operation = operation; + return this; + } + + public CommitInfoEntryForTestBuilder withOperationParameters(Map operationParameters) + { + this.operationParameters = operationParameters; + return this; + } + + public CommitInfoEntry build() + { + requireNonNull(operation, "operation is null"); + requireNonNull(operationParameters, "operationParameters is null"); + long timestamp = Instant.now().toEpochMilli(); + return new CommitInfoEntry( + 0, + OptionalLong.of(timestamp), + timestamp, + "user1", + "user1", + operation, + ImmutableMap.copyOf(operationParameters), + null, + null, + "cluster_1", + 0, + "SnapshotIsolation", + Optional.of(true), + ImmutableMap.of()); + } + } + + @Test + void testGetLatestClusteredColumnsWithAutoOptimize() + throws IOException + { + String tableLocation = "databricks172/liquid_clustering_with_operations/liquid_clustering_with_auto_optimize"; + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation(tableLocation)) + .build(); + + Optional> result = ClusteringMetadataUtil.getLatestClusteredColumns(FILE_SYSTEM, tableSnapshot); + + assertThat(result).isPresent(); + assertThat(result.get()).isEmpty(); + } + + @Test + void testGetLatestClusteredColumnsWithCloneTable() + throws IOException + { + String tableLocation = "databricks172/liquid_clustering_with_operations/liquid_clustering_with_clone_table"; + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation(tableLocation)) + .build(); + + Optional> result = ClusteringMetadataUtil.getLatestClusteredColumns(FILE_SYSTEM, tableSnapshot); + + assertThat(result).isPresent(); + assertThat(result.get()).isEmpty(); + } + + @Test + void testGetLatestClusteredColumnsWithCreateOrReplaceTable() + throws IOException + { + String tableLocation = "databricks172/liquid_clustering_with_operations/liquid_clustering_with_create_or_replace_table"; + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation(tableLocation)) + .build(); + + Optional> result = ClusteringMetadataUtil.getLatestClusteredColumns(FILE_SYSTEM, tableSnapshot); + + assertThat(result).isPresent(); + assertThat(result.get()).containsExactly("col_1", "col_2"); + } + + @Test + void testGetLatestClusteredColumnsWithCreateTable() + throws IOException + { + String tableLocation = "databricks172/liquid_clustering_with_operations/liquid_clustering_with_create_table"; + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation(tableLocation)) + .build(); + + Optional> result = ClusteringMetadataUtil.getLatestClusteredColumns(FILE_SYSTEM, tableSnapshot); + + assertThat(result).isPresent(); + assertThat(result.get()).containsExactly("col_1", "col_2"); + } + + @Test + void testGetLatestClusteredColumnsWithCreateTableAsSelect() + throws IOException + { + String tableLocation = "databricks172/liquid_clustering_with_operations/liquid_clustering_with_create_table_as_select"; + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation(tableLocation)) + .build(); + + Optional> result = ClusteringMetadataUtil.getLatestClusteredColumns(FILE_SYSTEM, tableSnapshot); + + assertThat(result).isPresent(); + assertThat(result.get()).containsExactly("col_1", "col_2"); + } + + @Test + void testGetLatestClusteredColumnsWithManuallyOptimize() + throws IOException + { + String tableLocation = "databricks172/liquid_clustering_with_operations/liquid_clustering_with_manually_optimize"; + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation(tableLocation)) + .build(); + + Optional> result = ClusteringMetadataUtil.getLatestClusteredColumns(FILE_SYSTEM, tableSnapshot); + + assertThat(result).isPresent(); + assertThat(result.get()).containsExactly("manually_optimize_col_1", "manually_optimize_col_2"); + } + + @Test + void testGetLatestClusteredColumnsWithMerge() + throws IOException + { + String tableLocation = "databricks172/liquid_clustering_with_operations/liquid_clustering_with_merge"; + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation(tableLocation)) + .build(); + + Optional> result = ClusteringMetadataUtil.getLatestClusteredColumns(FILE_SYSTEM, tableSnapshot); + + assertThat(result).isPresent(); + assertThat(result.get()).containsExactly("merge_col_1", "merge_col_2"); + } + + @Test + void testGetLatestClusteredColumnsWithClusteredOperation() + throws IOException + { + String tableLocation = "databricks172/liquid_clustering_with_operations/liquid_clustering_with_cluster_by"; + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation(tableLocation)) + .build(); + + Optional> result = ClusteringMetadataUtil.getLatestClusteredColumns(FILE_SYSTEM, tableSnapshot); + + assertThat(result).isPresent(); + assertThat(result.get()).containsExactly("new_col_1", "new_col_2"); + } + + @Test + void testGetLatestClusteredColumnsWithRenameColumn() + throws IOException + { + String tableLocation = "databricks172/liquid_clustering_with_operations/liquid_clustering_with_renamed_clustered_column"; + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation(tableLocation)) + .build(); + + Optional> result = ClusteringMetadataUtil.getLatestClusteredColumns(FILE_SYSTEM, tableSnapshot); + + assertThat(result).isPresent(); + assertThat(result.get()).containsExactly("col_rename_latest_time_1", "col_before_rename_2"); + } + + @Test + void testGetLatestClusteredColumnsWithRestoreToTimestamp() + throws IOException + { + String tableLocation = "databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_timestamp"; + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation(tableLocation)) + .build(); + + Optional> result = ClusteringMetadataUtil.getLatestClusteredColumns(FILE_SYSTEM, tableSnapshot); + + assertThat(result).isPresent(); + assertThat(result.get()).containsExactly("commit_1_col_1", "commit_1_col_2"); + } + + @Test + void testGetLatestClusteredColumnsWithRestoreToVersion() + throws IOException + { + String tableLocation = "databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_version"; + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation(tableLocation)) + .build(); + + Optional> result = ClusteringMetadataUtil.getLatestClusteredColumns(FILE_SYSTEM, tableSnapshot); + + assertThat(result).isPresent(); + assertThat(result.get()).containsExactly("commit_2_col_1", "commit_2_col_2"); + } + + @Test + void testGetLatestClusteredColumnsWithRestoreToVersionTimestampRecursive() + throws IOException + { + String tableLocation = "databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive"; + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation(tableLocation)) + .build(); + + Optional> result = ClusteringMetadataUtil.getLatestClusteredColumns(FILE_SYSTEM, tableSnapshot); + + assertThat(result).isPresent(); + assertThat(result.get()).containsExactly("commit_0_col_1", "commit_0_col_2"); + } + + @Test + void testGetLatestClusteredColumnsWithShallowCloneTable() + throws IOException + { + String tableLocation = "databricks172/liquid_clustering_with_operations/liquid_clustering_with_shallow_clone_table"; + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation(tableLocation)) + .build(); + + Optional> result = ClusteringMetadataUtil.getLatestClusteredColumns(FILE_SYSTEM, tableSnapshot); + + assertThat(result).isPresent(); + assertThat(result.get()).isEmpty(); + } + + @Test + void testGetLatestClusteredColumnsWithWrite() + throws IOException + { + String tableLocation = "databricks172/liquid_clustering_with_operations/liquid_clustering_with_write"; + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation(tableLocation)) + .build(); + + Optional> result = ClusteringMetadataUtil.getLatestClusteredColumns(FILE_SYSTEM, tableSnapshot); + + assertThat(result).isPresent(); + assertThat(result.get()).containsExactly("commit_0_col_1", "commit_0_col_2"); + } + + @Test + void testGetLatestClusteredColumnsOtherNoClusterOperations() + throws IOException + { + String tableLocation = "databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations"; + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation(tableLocation)) + .build(); + + Optional> result = ClusteringMetadataUtil.getLatestClusteredColumns(FILE_SYSTEM, tableSnapshot); + + assertThat(result).isPresent(); + assertThat(result.get()).isEmpty(); + } + + @Test + void testGetCurrentVersionWhenNotRestore() + throws IOException + { + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation("databricks172/liquid_clustering_with_operations/liquid_clustering_with_write")) + .build(); + long version = getCurrentVersion(FILE_SYSTEM, tableSnapshot, 0); + + assertThat(version).isEqualTo(0); + assertThat(tableSnapshot.getVersion()).isEqualTo(0); + } + + @Test + void testGetCurrentVersionWhenRestoreToVersion() + throws IOException + { + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation("databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_version")) + .build(); + long version = getCurrentVersion(FILE_SYSTEM, tableSnapshot, 4); + + assertThat(version).isEqualTo(2); + assertThat(tableSnapshot.getVersion()).isNotEqualTo(2); + } + + @Test + void testGetCurrentVersionWhenRestoreToTimestamp() + throws IOException + { + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation("databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_timestamp")) + .build(); + long version = getCurrentVersion(FILE_SYSTEM, tableSnapshot, 4); + + assertThat(version).isEqualTo(1); + assertThat(tableSnapshot.getVersion()).isNotEqualTo(1); + } + + @Test + void testGetCurrentVersionException() + throws IOException + { + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation("databricks172/liquid_clustering_with_operations/liquid_clustering_with_no_commit")) + .build(); + + assertThatThrownBy(() -> getCurrentVersion(FILE_SYSTEM, tableSnapshot, 0)) + .isInstanceOf(IllegalStateException.class) + .hasMessageContaining("No commit info found for table at version 0"); + } + + @Test + void testGetRestoreVersionWithVersion() + throws IOException + { + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation("databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_version")) + .build(); + CommitInfoEntry commitInfoEntry = new CommitInfoEntryForTestBuilder() + .withOperation("RESTORE") + .withOperationParameters(ImmutableMap.of("version", "0")) + .build(); + + long version = getRestoreVersion(commitInfoEntry, FILE_SYSTEM, tableSnapshot); + + assertThat(version).isEqualTo(0); + assertThat(tableSnapshot.getVersion()).isNotEqualTo(0); + } + + @Test + void testGetRestoreVersionWithTimestamp() + throws IOException + { + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation("databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_timestamp")) + .build(); + CommitInfoEntry commitInfoEntry = new CommitInfoEntryForTestBuilder() + .withOperation("RESTORE") + .withOperationParameters(ImmutableMap.of("timestamp", "2025-10-17 16:45:00.0")) + .build(); + + long version = getRestoreVersion(commitInfoEntry, FILE_SYSTEM, tableSnapshot); + + assertThat(version).isEqualTo(1); + assertThat(tableSnapshot.getVersion()).isNotEqualTo(1); + } + + @Test + void testGetRestoreVersionNoRestoreOperation() + throws IOException + { + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation("databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_timestamp")) + .build(); + CommitInfoEntry commitInfoEntry = new CommitInfoEntryForTestBuilder() + .withOperation("WRITE") + .withOperationParameters(ImmutableMap.of()) + .build(); + + assertThatThrownBy(() -> getRestoreVersion(commitInfoEntry, FILE_SYSTEM, tableSnapshot)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("is not of RESTORE operation"); + } + + @Test + void testGetRestoreVersionNoVersionAndTimestamp() + throws IOException + { + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation("databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_timestamp")) + .build(); + CommitInfoEntry commitInfoEntry = new CommitInfoEntryForTestBuilder() + .withOperation("RESTORE") + .withOperationParameters(ImmutableMap.of()) + .build(); + + assertThatThrownBy(() -> getRestoreVersion(commitInfoEntry, FILE_SYSTEM, tableSnapshot)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Both restored version and timestamp are null or empty, should never happen"); + } + + @Test + void testExtractCommitInfoWithCommitInfo() + throws IOException + { + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation("databricks172/liquid_clustering_with_operations/liquid_clustering_with_write")) + .build(); + + Optional result = extractCommitInfo(0L, FILE_SYSTEM, tableSnapshot); + + assertThat(result).isPresent(); + assertThat(result.get().operation()).isEqualTo("WRITE"); + } + + @Test + void testExtractCommitInfoWithoutCommitInfo() + throws IOException + { + TableSnapshot tableSnapshot = new TableSnapShotBuilderForTestBuilder() + .withTableLocation(getResourceLocation("databricks172/liquid_clustering_with_operations/liquid_clustering_with_no_commit")) + .build(); + + assertThatThrownBy(() -> extractCommitInfo(0L, FILE_SYSTEM, tableSnapshot)) + .isInstanceOf(IllegalStateException.class) + .hasMessageContaining("No commit info found for table at version"); + } + + @Test + void testExtractClusteredColumnsValidClusteredValue() + { + CommitInfoEntry commitInfoEntry = new CommitInfoEntryForTestBuilder() + .withOperation("WRITE") + .withOperationParameters(ImmutableMap.of("clusterBy", "[\"col1\",\"col2\"]")) + .build(); + + Optional commitInfo = Optional.of(commitInfoEntry); + List result = ClusteringMetadataUtil.extractClusteredColumns(commitInfo); + + assertThat(result).containsExactly("col1", "col2"); + } + + @Test + void testExtractClusteredColumnsEmptyClusteredValue() + { + CommitInfoEntry commitInfoEntry = new CommitInfoEntryForTestBuilder() + .withOperation("WRITE") + .withOperationParameters(ImmutableMap.of("clusterBy", "[]")) + .build(); + + Optional commitInfo = Optional.of(commitInfoEntry); + List result = ClusteringMetadataUtil.extractClusteredColumns(commitInfo); + + assertThat(result).isEmpty(); + } + + @Test + void testExtractClusteredColumnsInValidClusteredValue() + { + CommitInfoEntry commitInfoEntry = new CommitInfoEntryForTestBuilder() + .withOperation("WRITE") + .withOperationParameters(ImmutableMap.of("clusterBy", "[col1,col2]")) + .build(); + + Optional commitInfo = Optional.of(commitInfoEntry); + List result = ClusteringMetadataUtil.extractClusteredColumns(commitInfo); + + assertThat(result).isEmpty(); + } + + @Test + void testExtractClusteredColumnsWithNoClusterInfo() + { + CommitInfoEntry commitInfoEntry = new CommitInfoEntryForTestBuilder() + .withOperation("TRUNCATE") + .withOperationParameters(ImmutableMap.of()) + .build(); + + Optional commitInfo = Optional.of(commitInfoEntry); + List result = ClusteringMetadataUtil.extractClusteredColumns(commitInfo); + + assertThat(result).isEmpty(); + } + + @Test + void testExtractClusteredColumnsWithUnknowOperation() + { + CommitInfoEntry commitInfoEntry = new CommitInfoEntryForTestBuilder() + .withOperation("WHATEVER") + .withOperationParameters(ImmutableMap.of()) + .build(); + + Optional commitInfo = Optional.of(commitInfoEntry); + List result = ClusteringMetadataUtil.extractClusteredColumns(commitInfo); + + assertThat(result).isEmpty(); + } + + @Test + void testExtractClusteredColumnsWithRenameOperation() + { + CommitInfoEntry commitInfoEntry = new CommitInfoEntryForTestBuilder() + .withOperation("RENAME COLUMN") + .withOperationParameters(ImmutableMap.of("oldColumnPath", "old_col", "newColumnPath", "new_col")) + .build(); + + Optional commitInfo = Optional.of(commitInfoEntry); + List result = ClusteringMetadataUtil.extractClusteredColumns(commitInfo); + + assertThat(getOldToNewRenamedColumns().get()).isNotEmpty(); + assertThat(getOldToNewRenamedColumns().get().get("old_col")).isEqualTo("new_col"); + + getOldToNewRenamedColumns().remove(); + assertThat(result).isEmpty(); + } + + @Test + void testShouldStopLookup() + { + assertThat(shouldStopLookup(OPTIMIZE, ImmutableList.of("col1"))).isTrue(); + assertThat(shouldStopLookup(OPTIMIZE, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(WRITE, ImmutableList.of("col1"))).isTrue(); + assertThat(shouldStopLookup(WRITE, ImmutableList.of())).isTrue(); + assertThat(shouldStopLookup(MERGE, ImmutableList.of("col1"))).isTrue(); + assertThat(shouldStopLookup(MERGE, ImmutableList.of())).isTrue(); + assertThat(shouldStopLookup(CREATE_TABLE_KEYWORD, ImmutableList.of("col1"))).isTrue(); + assertThat(shouldStopLookup(CREATE_TABLE_KEYWORD, ImmutableList.of())).isTrue(); + assertThat(shouldStopLookup(REPLACE_TABLE_KEYWORD, ImmutableList.of("col1"))).isTrue(); + assertThat(shouldStopLookup(REPLACE_TABLE_KEYWORD, ImmutableList.of())).isTrue(); + assertThat(shouldStopLookup(CLUSTER_BY, ImmutableList.of("col1"))).isTrue(); + assertThat(shouldStopLookup(CLUSTER_BY, ImmutableList.of())).isTrue(); + assertThat(shouldStopLookup(RENAME_COLUMN, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(RENAME_COLUMN, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(ADD_COLUMNS, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(ADD_CONSTRAINT, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(ADD_DELETION_VECTOR_TOMBSTONES, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(CHANGE_COLUMN, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(CHANGE_COLUMNS, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(CLONE, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(COMPUTE_STATS, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(CONVERT, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(DELETE, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(DOMAIN_METADATA_CLEANUP, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(DROP_COLUMNS, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(DROP_CONSTRAINT, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(DROP_TABLE_FEATURE, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(EMPTY_COMMIT, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(MANUAL_UPDATE, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(REORG, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(REORG_TABLE_UPGRADE_UNIFORM, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(REMOVE_COLUMN_MAPPING, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(REPLACE_COLUMNS, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(RESTORE, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(ROW_TRACKING_BACKFILL, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(ROW_TRACKING_UNBACKFILL, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(SET_TABLE_PROPERTIES, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(STREAMING_UPDATE, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(TRUNCATE, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(UNSET_TABLE_PROPERTIES, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(UPDATE, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(UPDATE_COLUMN_METADATA, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(UPDATE_SCHEMA, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(UPGRADE_PROTOCOL, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(VACUUM_END, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(VACUUM_START, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(TEST_OPERATION, ImmutableList.of())).isFalse(); + assertThat(shouldStopLookup(UNKNOW_OPERATION, ImmutableList.of())).isFalse(); + } + + @Test + void testGetOperation() + { + for (Operation op : Operation.values()) { + Operation resolved = getOperation(op.getOperationName()); + assertThat(op).isEqualTo(resolved); + } + assertThat(UNKNOW_OPERATION).isEqualTo(getOperation("NON_EXISTENT_OPERATION")); + assertThatThrownBy(() -> getOperation("")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Operation parameter is empty"); + } + + @Test + void testRecordRenamedColumnsNormalPut() + { + CommitInfoEntry commitInfoEntry = new CommitInfoEntryForTestBuilder() + .withOperation("RENAME COLUMN") + .withOperationParameters(ImmutableMap.of("oldColumnPath", "old_col", "newColumnPath", "new_col")) + .build(); + + recordRenamedColumns(commitInfoEntry); + + assertThat(getOldToNewRenamedColumns().get()).isNotEmpty(); + assertThat(getOldToNewRenamedColumns().get().size()).isEqualTo(1); + assertThat(getOldToNewRenamedColumns().get().get("old_col")).isEqualTo("new_col"); + + getOldToNewRenamedColumns().remove(); + } + + @Test + void testRecordRenamedColumnsPutExisted() + { + getOldToNewRenamedColumns().get().put("new_col", "new_col_2"); + CommitInfoEntry commitInfoEntry = new CommitInfoEntryForTestBuilder() + .withOperation("RENAME COLUMN") + .withOperationParameters(ImmutableMap.of("oldColumnPath", "old_col", "newColumnPath", "new_col")) + .build(); + + recordRenamedColumns(commitInfoEntry); + + assertThat(getOldToNewRenamedColumns().get()).isNotEmpty(); + assertThat(getOldToNewRenamedColumns().get().size()).isEqualTo(1); + assertThat(getOldToNewRenamedColumns().get().get("old_col")).isEqualTo("new_col_2"); + + getOldToNewRenamedColumns().remove(); + } + + @Test + void testRecordRenamedColumnsMissingOldColumn() + { + CommitInfoEntry commitInfoEntry = new CommitInfoEntryForTestBuilder() + .withOperation("RENAME COLUMN") + .withOperationParameters(ImmutableMap.of("oldColumnPath", "", "newColumnPath", "new_col")) + .build(); + + assertThatThrownBy(() -> recordRenamedColumns(commitInfoEntry)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("old or renamed columns are null or empty, should never happen"); + + getOldToNewRenamedColumns().remove(); + } + + @Test + void testRecordRenamedColumnsMissingNewColumn() + { + CommitInfoEntry commitInfoEntry = new CommitInfoEntryForTestBuilder() + .withOperation("RENAME COLUMN") + .withOperationParameters(ImmutableMap.of("oldColumnPath", "old_col", "newColumnPath", "")) + .build(); + + assertThatThrownBy(() -> recordRenamedColumns(commitInfoEntry)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("old or renamed columns are null or empty, should never happen"); + + getOldToNewRenamedColumns().remove(); + } + + @Test + void testGetClusteredColumnListWithClusterBy() + { + String clusteredKey = "clusterBy"; + String clusteredValue = "[\"col1\", \"col2\"]"; + + List result = getClusteredColumnList(clusteredKey, clusteredValue); + + assertThat(result).containsExactly("col1", "col2"); + } + + @Test + void testGetClusteredColumnListWithNewClusteringColumns() + { + String clusteredKey = "newClusteringColumns"; + String clusteredValue = "col1,col2"; + + List result = getClusteredColumnList(clusteredKey, clusteredValue); + + assertThat(result).isNotEmpty(); + assertThat(result).containsExactly("col1", "col2"); + } + + @Test + void testGetClusteredColumnListWithInvalidJson() + { + String clusteredKey = "clusterBy"; + String clusteredValue = "col1,col2"; + + List result = getClusteredColumnList(clusteredKey, clusteredValue); + + assertThat(result).isEmpty(); + } + + @Test + void testGetClusteredColumnListWithUnknown() + { + String clusteredKey = "unknown"; + String clusteredValue = "col1,col2"; + + List result = getClusteredColumnList(clusteredKey, clusteredValue); + + assertThat(result).isEmpty(); + } + + private String getResourceLocation(String resourcePath) + { + return requireNonNull(getClass().getClassLoader().getResource(resourcePath)).getPath(); + } +} diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/clustering/TestOperations.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/clustering/TestOperations.java new file mode 100644 index 000000000000..088539974e3b --- /dev/null +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/clustering/TestOperations.java @@ -0,0 +1,69 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake.clustering; + +import org.junit.jupiter.api.Test; + +import static io.trino.plugin.deltalake.clustering.Operation.CREATE_TABLE_KEYWORD; +import static io.trino.plugin.deltalake.clustering.Operation.DELETE; +import static io.trino.plugin.deltalake.clustering.Operation.REPLACE_TABLE_KEYWORD; +import static io.trino.plugin.deltalake.clustering.Operation.UNKNOW_OPERATION; +import static io.trino.plugin.deltalake.clustering.Operation.UPDATE_SCHEMA; +import static io.trino.plugin.deltalake.clustering.Operation.fromString; +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; + +final class TestOperations +{ + @Test + void testGetOperationNameAndToStringConsistency() + { + for (Operation op : Operation.values()) { + assertThat(op.getOperationName()).isEqualTo(op.toString()); + } + } + + @Test + void testFromStringExactMatch() + { + assertThat(fromString("DELETE")).isEqualTo(DELETE); + assertThat(fromString("delete")).isEqualTo(DELETE); + assertThat(fromString("UPDATE SCHEMA")).isEqualTo(UPDATE_SCHEMA); + } + + @Test + void testFromStringKeywordMatch() + { + assertThat(fromString("CREATE TABLE")).isEqualTo(CREATE_TABLE_KEYWORD); + assertThat(fromString("CREATE table AS SELECT")).isEqualTo(CREATE_TABLE_KEYWORD); + assertThat(fromString("CREATE OR REPLACE TABLE AS SELECT")).isEqualTo(REPLACE_TABLE_KEYWORD); + assertThat(fromString("CREATE OR replace TABLE")).isEqualTo(REPLACE_TABLE_KEYWORD); + } + + @Test + void testFromStringUnknownOperation() + { + assertThat(fromString("NON_EXISTENT_OPERATION")).isEqualTo(UNKNOW_OPERATION); + assertThat(fromString("")).isEqualTo(UNKNOW_OPERATION); + assertThat(fromString("SOMETHING RANDOM")).isEqualTo(UNKNOW_OPERATION); + } + + @Test + void testAllEnumValuesResolvableByFromString() + { + for (Operation op : Operation.values()) { + Operation resolved = Operation.fromString(op.getOperationName()); + assertThat(resolved).isNotNull(); + } + } +} diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/statistics/TestDeltaLakeFileBasedTableStatisticsProvider.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/statistics/TestDeltaLakeFileBasedTableStatisticsProvider.java index 5727ce4a4724..4a74e2cac7da 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/statistics/TestDeltaLakeFileBasedTableStatisticsProvider.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/statistics/TestDeltaLakeFileBasedTableStatisticsProvider.java @@ -132,6 +132,7 @@ private DeltaLakeTableHandle registerTable(String tableName, String directoryNam tableLocation, metadataEntry, new ProtocolEntry(1, 2, Optional.empty(), Optional.empty()), + Optional.empty(), TupleDomain.all(), TupleDomain.all(), false, @@ -262,6 +263,7 @@ public void testStatisticsMultipleFiles() tableHandle.getLocation(), tableHandle.getMetadataEntry(), tableHandle.getProtocolEntry(), + tableHandle.getClusteredColumns(), TupleDomain.all(), TupleDomain.withColumnDomains(ImmutableMap.of((DeltaLakeColumnHandle) COLUMN_HANDLE, Domain.singleValue(DOUBLE, 42.0))), tableHandle.isMerge(), @@ -286,6 +288,7 @@ public void testStatisticsNoRecords() tableHandle.getLocation(), tableHandle.getMetadataEntry(), tableHandle.getProtocolEntry(), + tableHandle.getClusteredColumns(), TupleDomain.none(), TupleDomain.all(), tableHandle.isMerge(), @@ -300,6 +303,7 @@ public void testStatisticsNoRecords() tableHandle.getLocation(), tableHandle.getMetadataEntry(), tableHandle.getProtocolEntry(), + tableHandle.getClusteredColumns(), TupleDomain.all(), TupleDomain.none(), tableHandle.isMerge(), diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/README.md b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/README.md new file mode 100644 index 000000000000..116d4b22af24 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/README.md @@ -0,0 +1,10 @@ +Data generated using Databricks 17.2 +Only the transaction log is included here, to test retrieving clustered information across all operation types. + +```sql +CREATE TABLE test_retreve_clustered_fields +(col_1 int, col_2 int) +USING delta +CLUSTER BY (col_1,col_2) +LOCATION ? +``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_auto_optimize/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_auto_optimize/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..6344b4c62db9 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_auto_optimize/_delta_log/00000000000000000000.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760712838006,"userId":"user1","userName":"user1","operation":"OPTIMIZE","operationParameters":{"clusterBy":"[]","zOrderBy":"[]","batchId":"0","predicate":"[]","auto":true},"notebook":{"notebookId":"xxxxxx"},"clusterId":"cluster","readVersion":9,"isolationLevel":"SnapshotIsolation","isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"2","numRemovedBytes":"5398","p25FileSize":"3647","numDeletionVectorsRemoved":"1","minFileSize":"3647","p75FileSize":"3647","p50FileSize":"3647","numAddedBytes":"3647","numAddedFiles":"1","maxFileSize":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_auto_optimize/_delta_log/README.md b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_auto_optimize/_delta_log/README.md new file mode 100644 index 000000000000..5f2e8efab5f1 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_auto_optimize/_delta_log/README.md @@ -0,0 +1,29 @@ +Data generated using Databricks 17.2 +Only the transaction log is included here, to test retrieving clustered information across all operation types. +Keep only the commit info for the operations to be tested, and set the version starting from 0. + +"MERGE" will trigger AUTO OPTIMIZE + +```sql +CREATE TABLE test_retreve_clustered_fields +(col_1 int, col_2 int) +USING delta +CLUSTER BY (col_1,col_2) +LOCATION ? + +CREATE OR REPLACE TEMP VIEW source_table AS +SELECT + 1 AS col_1, + 2 AS col_2; + +MERGE INTO test_retreve_clustered_fields AS target +USING source_table AS source +ON target.col_1 = source.col_1 +WHEN MATCHED THEN +UPDATE SET + target.col_1 = source.col_1, + target.col_2 = source.col_2 +WHEN NOT MATCHED THEN +INSERT (col_1, col_2) +VALUES (source.col_1, source.col_2); +``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_clone_table/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_clone_table/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..88375e761d8e --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_clone_table/_delta_log/00000000000000000000.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760730565541,"userId":"user1","userName":"user1","operation":"CLONE","operationParameters":{"source":"source_table","sourceVersion":16,"isShallow":false},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":-1,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"removedFilesSize":"0","numRemovedFiles":"0","sourceTableSize":"3776","numCopiedFiles":"1","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","copiedFilesSize":"3776","sourceNumOfFiles":"1"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_clone_table/_delta_log/README.md b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_clone_table/_delta_log/README.md new file mode 100644 index 000000000000..4648c3e11c7d --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_clone_table/_delta_log/README.md @@ -0,0 +1,14 @@ +Data generated using Databricks 17.2 +Only the transaction log is included here, to test retrieving clustered information across all operation types. +Keep only the commit info for the operations to be tested, and set the version starting from 0. + +```sql +CREATE TABLE test_retreve_clustered_fields +(col_1 int, col_2 int) +USING delta +CLUSTER BY (col_1,col_2) +LOCATION ? + +CREATE TABLE test_retreve_clustered_fields_cloned +DEEP CLONE test_retreve_clustered_fields; +``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_cluster_by/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_cluster_by/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..be39e1d56065 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_cluster_by/_delta_log/00000000000000000000.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760729899778,"userId":"user1","userName":"user1","operation":"CLUSTER BY","operationParameters":{"oldClusteringColumns":"old_col_1,old_col_2","newClusteringColumns":"new_col_1,new_col_2"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":0,"isolationLevel":"SnapshotIsolation","isBlindAppend":true,"operationMetrics":{},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_cluster_by/_delta_log/README.md b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_cluster_by/_delta_log/README.md new file mode 100644 index 000000000000..4db48ea08b28 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_cluster_by/_delta_log/README.md @@ -0,0 +1,14 @@ +Data generated using Databricks 17.2 +Only the transaction log is included here, to test retrieving clustered information across all operation types. +Keep only the commit info for the operations to be tested, and set the version starting from 0. + +```sql +CREATE TABLE test_retreve_clustered_fields +(col_1 int, col_2 int) +USING delta +LOCATION ? + +%sql +ALTER TABLE test_retreve_clustered_fields +CLUSTER BY (col_1, col_2); +``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_create_or_replace_table/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_create_or_replace_table/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..f938617f96f1 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_create_or_replace_table/_delta_log/00000000000000000000.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760728115712,"userId":"user1","userName":"user1","operation":"CREATE OR REPLACE TABLE AS SELECT","operationParameters":{"partitionBy":"[]","clusterBy":"[\"col_1\",\"col_2\"]","description":null,"isManaged":"true","properties":"{\"delta.enableDeletionVectors\":\"true\",\"delta.enableRowTracking\":\"true\",\"delta.checkpointPolicy\":\"v2\",\"delta.rowTracking.materializedRowCommitVersionColumnName\":\"_row-commit-version-col-2da8df4d-28b2-4900-8e78-fb03cd535c38\",\"delta.rowTracking.materializedRowIdColumnName\":\"_row-id-col-b542c3ab-cea8-49c2-90f5-2d4f680be2a3\"}","statsOnLoad":false,"clusteringOnWriteStatus":"Reason for skipping: Estimated ingestion size is not within the expected range"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","isolationLevel":"WriteSerializable","isBlindAppend":false,"operationMetrics":{"numFiles":"1","numRemovedFiles":"0","numRemovedBytes":"0","numDeletionVectorsRemoved":"0","numOutputRows":"12","numOutputBytes":"1813"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_create_or_replace_table/_delta_log/README.md b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_create_or_replace_table/_delta_log/README.md new file mode 100644 index 000000000000..27001a02b7d5 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_create_or_replace_table/_delta_log/README.md @@ -0,0 +1,17 @@ +Data generated using Databricks 17.2 +Only the transaction log is included here, to test retrieving clustered information across all operation types. +Keep only the commit info for the operations to be tested, and set the version starting from 0. + +```sql +CREATE TABLE test_retreve_clustered_fields +(col_1 int, col_2 int) +USING delta +CLUSTER BY (col_1,col_2) +LOCATION ? + +CREATE OR REPLACE TABLE test_retreve_clustered_fields_copy +USING delta +CLUSTER BY (col_1, col_2) +AS +select * from test_retreve_clustered_fields +``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_create_table/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_create_table/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..41baeb67db30 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_create_table/_delta_log/00000000000000000000.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760728115712,"userId":"user1","userName":"user1","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[\"col_1\",\"col_2\"]","description":null,"isManaged":"true","properties":"{\"delta.enableDeletionVectors\":\"true\",\"delta.enableRowTracking\":\"true\",\"delta.checkpointPolicy\":\"v2\",\"delta.rowTracking.materializedRowCommitVersionColumnName\":\"_row-commit-version-col-2da8df4d-28b2-4900-8e78-fb03cd535c38\",\"delta.rowTracking.materializedRowIdColumnName\":\"_row-id-col-b542c3ab-cea8-49c2-90f5-2d4f680be2a3\"}","statsOnLoad":false,"clusteringOnWriteStatus":"Reason for skipping: Estimated ingestion size is not within the expected range"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","isolationLevel":"WriteSerializable","isBlindAppend":false,"operationMetrics":{"numFiles":"1","numRemovedFiles":"0","numRemovedBytes":"0","numDeletionVectorsRemoved":"0","numOutputRows":"12","numOutputBytes":"1813"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_create_table/_delta_log/README.md b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_create_table/_delta_log/README.md new file mode 100644 index 000000000000..632c522925e3 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_create_table/_delta_log/README.md @@ -0,0 +1,11 @@ +Data generated using Databricks 17.2 +Only the transaction log is included here, to test retrieving clustered information across all operation types. +Keep only the commit info for the operations to be tested, and set the version starting from 0. + +```sql +CREATE TABLE test_retreve_clustered_fields +(col_1 int, col_2 int) +USING delta +CLUSTER BY (col_1,col_2) +LOCATION ? +``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_create_table_as_select/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_create_table_as_select/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..04ac1c1c2e7f --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_create_table_as_select/_delta_log/00000000000000000000.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760728115712,"userId":"user1","userName":"user1","operation":"CREATE TABLE AS SELECT","operationParameters":{"partitionBy":"[]","clusterBy":"[\"col_1\",\"col_2\"]","description":null,"isManaged":"true","properties":"{\"delta.enableDeletionVectors\":\"true\",\"delta.enableRowTracking\":\"true\",\"delta.checkpointPolicy\":\"v2\",\"delta.rowTracking.materializedRowCommitVersionColumnName\":\"_row-commit-version-col-2da8df4d-28b2-4900-8e78-fb03cd535c38\",\"delta.rowTracking.materializedRowIdColumnName\":\"_row-id-col-b542c3ab-cea8-49c2-90f5-2d4f680be2a3\"}","statsOnLoad":false,"clusteringOnWriteStatus":"Reason for skipping: Estimated ingestion size is not within the expected range"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","isolationLevel":"WriteSerializable","isBlindAppend":false,"operationMetrics":{"numFiles":"1","numRemovedFiles":"0","numRemovedBytes":"0","numDeletionVectorsRemoved":"0","numOutputRows":"12","numOutputBytes":"1813"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_create_table_as_select/_delta_log/README.md b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_create_table_as_select/_delta_log/README.md new file mode 100644 index 000000000000..77ce3f4757fe --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_create_table_as_select/_delta_log/README.md @@ -0,0 +1,17 @@ +Data generated using Databricks 17.2 +Only the transaction log is included here, to test retrieving clustered information across all operation types. +Keep only the commit info for the operations to be tested, and set the version starting from 0. + +```sql +CREATE TABLE test_retreve_clustered_fields +(col_1 int, col_2 int) +USING delta +CLUSTER BY (col_1,col_2) +LOCATION ? + +CREATE TABLE test_retreve_clustered_fields_copy +USING delta +CLUSTER BY (col_1, col_2) +AS +select * from test_retreve_clustered_fields +``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_manually_optimize/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_manually_optimize/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..514efe0974b0 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_manually_optimize/_delta_log/00000000000000000000.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760727577629,"userId":"user1","userName":"user1","operation":"OPTIMIZE","operationParameters":{"clusterBy":"[\"manually_optimize_col_1\",\"manually_optimize_col_2\"]","batchId":"-1","predicate":"[]","auto":false,"isFull":false,"zOrderBy":"[]"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":0,"isolationLevel":"SnapshotIsolation","isBlindAppend":true,"operationMetrics":{},"tags":{"optimizeCommandId":"xxxxxx","delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_manually_optimize/_delta_log/README.md b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_manually_optimize/_delta_log/README.md new file mode 100644 index 000000000000..b4eaa390b962 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_manually_optimize/_delta_log/README.md @@ -0,0 +1,13 @@ +Data generated using Databricks 17.2 +Only the transaction log is included here, to test retrieving clustered information across all operation types. +Keep only the commit info for the operations to be tested, and set the version starting from 0. + +```sql +CREATE TABLE test_retreve_clustered_fields +(col_1 int, col_2 int) +USING delta +CLUSTER BY (col_1,col_2) +LOCATION ? + +OPTIMIZE test_retreve_clustered_fields +``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_merge/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_merge/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..ecd8293405ed --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_merge/_delta_log/00000000000000000000.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760727407410,"userId":"user1","userName":"user1","operation":"MERGE","operationParameters":{"predicate":"[\"(new_id3#12160L = cast(new_id3#12150 as bigint))\"]","clusterBy":"[\"merge_col_1\",\"merge_col_2\"]","matchedPredicates":"[{\"actionType\":\"update\"}]","statsOnLoad":false,"notMatchedBySourcePredicates":"[]","notMatchedPredicates":"[{\"actionType\":\"insert\"}]","clusteringOnWriteStatus":null},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":12,"isolationLevel":"WriteSerializable","isBlindAppend":false,"operationMetrics":{"numTargetRowsCopied":"0","numTargetRowsDeleted":"0","numTargetBytesRemoved":"0","numTargetDeletionVectorsAdded":"0","numTargetRowsMatchedUpdated":"0","numTargetRowsMatchedDeleted":"0","numTargetRowsUpdated":"0","numTargetChangeFilesAdded":"0","numTargetRowsNotMatchedBySourceDeleted":"0","rewriteTimeMs":"772","numTargetFilesAdded":"3","numTargetBytesAdded":"7584","executionTimeMs":"1775","materializeSourceTimeMs":"213","numTargetRowsInserted":"3","numTargetDeletionVectorsUpdated":"0","scanTimeMs":"683","numOutputRows":"3","numTargetDeletionVectorsRemoved":"0","numTargetRowsNotMatchedBySourceUpdated":"0","numSourceRows":"3","numTargetFilesRemoved":"0"},"tags":{"noRowsCopied":"true","delta.rowTracking.preserved":"true","restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_merge/_delta_log/README.md b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_merge/_delta_log/README.md new file mode 100644 index 000000000000..8b87d21f2434 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_merge/_delta_log/README.md @@ -0,0 +1,27 @@ +Data generated using Databricks 17.2 +Only the transaction log is included here, to test retrieving clustered information across all operation types. +Keep only the commit info for the operations to be tested, and set the version starting from 0. + +```sql +CREATE TABLE test_retreve_clustered_fields +(col_1 int, col_2 int) +USING delta +CLUSTER BY (col_1,col_2) +LOCATION ? + +CREATE OR REPLACE TEMP VIEW source_table AS +SELECT + 1 AS col_1, + 2 AS col_2; + +MERGE INTO test_retreve_clustered_fields AS target +USING source_table AS source +ON target.col_1 = source.col_1 +WHEN MATCHED THEN +UPDATE SET + target.col_1 = source.col_1, + target.col_2 = source.col_2 +WHEN NOT MATCHED THEN +INSERT (col_1, col_2) +VALUES (source.col_1, source.col_2); +``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_no_commit/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_no_commit/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..9633ceefa5e6 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_no_commit/_delta_log/00000000000000000000.json @@ -0,0 +1 @@ +{"domainMetadata":{"domain":"delta.rowTracking","configuration":"{\"rowIdHighWaterMark\":44,\"domainName\":\"delta.rowTracking\"}","removed":false}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_no_commit/_delta_log/README.md b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_no_commit/_delta_log/README.md new file mode 100644 index 000000000000..47208c6d4ce5 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_no_commit/_delta_log/README.md @@ -0,0 +1,10 @@ +Data generated using Databricks 17.2 +Only the transaction log is included here, to test retrieving clustered information across all operation types. +Keep only the commit info for the operations to be tested, and set the version starting from 0. + +"MERGE" will trigger AUTO OPTIMIZE + +```sql +-- test no commit info + +``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..00b1edb3c6c3 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000000.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760563216742,"userId":"user1","userName":"user1","operation":"ADD COLUMNS","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"2702"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false","delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000001.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000001.json new file mode 100644 index 000000000000..44ac5a1c25ce --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000001.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760648312029,"userId":"user1","userName":"user1","operation":"ADD CONSTRAINT","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":1,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"2696"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false","delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000002.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000002.json new file mode 100644 index 000000000000..87a43cbdb85a --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000002.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760663272286,"userId":"user1","userName":"user1","operation":"Deletion Vector Tombstones","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":2,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"0","removedFilesSize":"2708","numRemovedFiles":"1","restoredFilesSize":"0","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"2702"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000003.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000003.json new file mode 100644 index 000000000000..bf72715277a7 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000003.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760673149750,"userId":"user1","userName":"user1","operation":"CHANGE COLUMN","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":3,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"2708"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false","delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000004.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000004.json new file mode 100644 index 000000000000..d72a92c1a365 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000004.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760683272286,"userId":"user1","userName":"user1","operation":"CHANGE COLUMNS","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":4,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"0","removedFilesSize":"2708","numRemovedFiles":"1","restoredFilesSize":"0","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"2702"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000005.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000005.json new file mode 100644 index 000000000000..350248a5854e --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000005.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760694272286,"userId":"user1","userName":"user1","operation":"WRITE","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":5,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"2708"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false","delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000006.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000006.json new file mode 100644 index 000000000000..2ebff17864af --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000006.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"COMPUTE STATS","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000007.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000007.json new file mode 100644 index 000000000000..10c78f13beab --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000007.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"CONVERT","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000008.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000008.json new file mode 100644 index 000000000000..77fca6735ec3 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000008.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"DELETE","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000009.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000009.json new file mode 100644 index 000000000000..625af63905b8 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000009.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"DOMAIN METADATA CLEANUP","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000010.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000010.json new file mode 100644 index 000000000000..625af63905b8 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000010.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"DOMAIN METADATA CLEANUP","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000011.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000011.json new file mode 100644 index 000000000000..f9a8c1e183b2 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000011.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"DROP COLUMNS","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000012.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000012.json new file mode 100644 index 000000000000..f81282228681 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000012.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"DROP CONSTRAINT","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000013.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000013.json new file mode 100644 index 000000000000..0b0f9734f3ea --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000013.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"DROP FEATURE","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000014.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000014.json new file mode 100644 index 000000000000..d2ab4cb532ce --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000014.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"Empty Commit","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000015.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000015.json new file mode 100644 index 000000000000..b42122d0bd9c --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000015.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"Manual Update","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000016.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000016.json new file mode 100644 index 000000000000..2e239e4444c8 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000016.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"RENAME COLUMN","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000017.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000017.json new file mode 100644 index 000000000000..e0459094be16 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000017.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"REORG","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000018.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000018.json new file mode 100644 index 000000000000..50a2b213b4c5 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000018.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"REORG TABLE UPGRADE UNIFORM","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000019.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000019.json new file mode 100644 index 000000000000..8daec2ed6b5f --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000019.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"REMOVE COLUMN MAPPING","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000020.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000020.json new file mode 100644 index 000000000000..1cb091e3080b --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000020.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"REPLACE COLUMNS","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000021.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000021.json new file mode 100644 index 000000000000..549638d6dc17 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000021.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"RESTORE","operationParameters":{"version":2,"timestamp":null},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000022.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000022.json new file mode 100644 index 000000000000..ee6a4d544040 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000022.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"ROW TRACKING BACKFILL","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000023.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000023.json new file mode 100644 index 000000000000..e10b6af71ff2 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000023.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"ROW TRACKING UNBACKFILL","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000024.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000024.json new file mode 100644 index 000000000000..73b6ad48197f --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000024.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"SET TBLPROPERTIES","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000025.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000025.json new file mode 100644 index 000000000000..a7fa30efce0e --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000025.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"STREAMING UPDATE","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000026.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000026.json new file mode 100644 index 000000000000..9ff821b69cd6 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000026.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"TRUNCATE","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000027.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000027.json new file mode 100644 index 000000000000..3fe5be1dec4c --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000027.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"UNSET TBLPROPERTIES","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000028.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000028.json new file mode 100644 index 000000000000..7f43a8e74799 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000028.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"UPDATE","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000029.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000029.json new file mode 100644 index 000000000000..5fd49d6a010e --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000029.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"UPDATE COLUMN METADATA","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000030.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000030.json new file mode 100644 index 000000000000..2bb95dc001e6 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000030.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"UPDATE SCHEMA","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000031.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000031.json new file mode 100644 index 000000000000..67f96c399ffb --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000031.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"UPGRADE PROTOCOL","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000032.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000032.json new file mode 100644 index 000000000000..2503f1839766 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000032.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"VACUUM END","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000033.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000033.json new file mode 100644 index 000000000000..e8ec37f3cfd5 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000033.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"VACUUM START","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000034.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000034.json new file mode 100644 index 000000000000..b9b7e3f41556 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000034.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"TEST","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000035.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000035.json new file mode 100644 index 000000000000..e040d2c0f4f0 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/00000000000000000035.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"WHAT EVER","operationParameters":{},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/README.md b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/README.md new file mode 100644 index 000000000000..632c522925e3 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_other_no_cluster_operations/_delta_log/README.md @@ -0,0 +1,11 @@ +Data generated using Databricks 17.2 +Only the transaction log is included here, to test retrieving clustered information across all operation types. +Keep only the commit info for the operations to be tested, and set the version starting from 0. + +```sql +CREATE TABLE test_retreve_clustered_fields +(col_1 int, col_2 int) +USING delta +CLUSTER BY (col_1,col_2) +LOCATION ? +``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_renamed_clustered_column/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_renamed_clustered_column/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..96b12c2c37b8 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_renamed_clustered_column/_delta_log/00000000000000000000.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760561058135,"userId":"user1","userName":"user1","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[\"col_before_rename_1\",\"col_before_rename_2\"]","description":null,"isManaged":"true","properties":"{\"delta.enableDeletionVectors\":\"true\",\"delta.enableRowTracking\":\"true\",\"delta.checkpointPolicy\":\"v2\",\"delta.columnMapping.mode\":\"id\",\"delta.rowTracking.materializedRowIdColumnName\":\"_row-id-col-de4b670f-d201-43c6-8504-6e718d77e162\",\"delta.checkpointInterval\":\"2\",\"delta.columnMapping.maxColumnId\":\"5\",\"delta.rowTracking.materializedRowCommitVersionColumnName\":\"_row-commit-version-col-bba66f41-818a-43a5-b47c-67c8000b5907\"}","statsOnLoad":false},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_renamed_clustered_column/_delta_log/00000000000000000001.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_renamed_clustered_column/_delta_log/00000000000000000001.json new file mode 100644 index 000000000000..069a707f1418 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_renamed_clustered_column/_delta_log/00000000000000000001.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760641580664,"userId":"user1","userName":"user1","operation":"RENAME COLUMN","operationParameters":{"oldColumnPath":"col_before_rename_1","newColumnPath":"col_rename_first_time_1"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":1,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_renamed_clustered_column/_delta_log/00000000000000000002.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_renamed_clustered_column/_delta_log/00000000000000000002.json new file mode 100644 index 000000000000..121f15e06339 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_renamed_clustered_column/_delta_log/00000000000000000002.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760641580664,"userId":"user1","userName":"user1","operation":"RENAME COLUMN","operationParameters":{"oldColumnPath":"col_rename_first_time_1","newColumnPath":"col_rename_second_time_1"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":1,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_renamed_clustered_column/_delta_log/00000000000000000003.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_renamed_clustered_column/_delta_log/00000000000000000003.json new file mode 100644 index 000000000000..1e25f2ff9bb0 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_renamed_clustered_column/_delta_log/00000000000000000003.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760641580664,"userId":"user1","userName":"user1","operation":"RENAME COLUMN","operationParameters":{"oldColumnPath":"col_rename_second_time_1","newColumnPath":"col_rename_latest_time_1"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":1,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_renamed_clustered_column/_delta_log/README.md b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_renamed_clustered_column/_delta_log/README.md new file mode 100644 index 000000000000..65a5776dda0c --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_renamed_clustered_column/_delta_log/README.md @@ -0,0 +1,17 @@ +Data generated using Databricks 17.2 +Only the transaction log is included here, to test retrieving clustered information across all operation types. +Keep only the commit info for the operations to be tested, and set the version starting from 0. + +```sql +CREATE TABLE test_retreve_clustered_fields +(col_before_rename_1 int, col_before_rename_2 int) +USING delta +CLUSTER BY (col_before_rename_1,col_before_rename_2) +LOCATION ? + +ALTER TABLE test_retreve_clustered_fields RENAME COLUMN col_before_rename_1 TO col_rename_first_time_1; + +ALTER TABLE test_retreve_clustered_fields RENAME COLUMN col_rename_first_time_1 TO col_rename_second_time_1; + +ALTER TABLE test_retreve_clustered_fields RENAME COLUMN col_rename_second_time_1 TO col_rename_latest_time_1; +``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_timestamp/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_timestamp/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..25b3c506c3e1 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_timestamp/_delta_log/00000000000000000000.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760563216742,"userId":"user1","userName":"user1","operation":"WRITE","operationParameters":{"partitionBy":"[]","clusterBy":"[\"commit_0_col_1\",\"commit_0_col_2\"]","statsOnLoad":false,"mode":"Append","clusteringOnWriteStatus":"late-stage clustering triggered"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"2702"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false","delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_timestamp/_delta_log/00000000000000000001.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_timestamp/_delta_log/00000000000000000001.json new file mode 100644 index 000000000000..dc8958b9d820 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_timestamp/_delta_log/00000000000000000001.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760648312029,"userId":"user1","userName":"user1","operation":"WRITE","operationParameters":{"partitionBy":"[]","clusterBy":"[\"commit_1_col_1\",\"commit_1_col_2\"]","statsOnLoad":false,"mode":"Append","clusteringOnWriteStatus":"late-stage clustering triggered"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"2696"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false","delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_timestamp/_delta_log/00000000000000000002.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_timestamp/_delta_log/00000000000000000002.json new file mode 100644 index 000000000000..3271a080970f --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_timestamp/_delta_log/00000000000000000002.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760733149743,"userId":"user1","userName":"user1","operation":"WRITE","operationParameters":{"partitionBy":"[]","clusterBy":"[\"commit_2_col_1\",\"commit_2_col_2\"]","statsOnLoad":false,"mode":"Append","clusteringOnWriteStatus":"late-stage clustering triggered"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":17,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"2708"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false","delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_timestamp/_delta_log/00000000000000000003.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_timestamp/_delta_log/00000000000000000003.json new file mode 100644 index 000000000000..678881d8dba2 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_timestamp/_delta_log/00000000000000000003.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760733149750,"userId":"user1","userName":"user1","operation":"WRITE","operationParameters":{"partitionBy":"[]","clusterBy":"[\"commit_3_col_1\",\"commit_3_col_2\"]","statsOnLoad":false,"mode":"Append","clusteringOnWriteStatus":"late-stage clustering triggered"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":17,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"2708"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false","delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_timestamp/_delta_log/00000000000000000004.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_timestamp/_delta_log/00000000000000000004.json new file mode 100644 index 000000000000..581991804590 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_timestamp/_delta_log/00000000000000000004.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"RESTORE","operationParameters":{"version":null,"timestamp":"2025-10-17 16:45:00.0"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":22,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_timestamp/_delta_log/README.md b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_timestamp/_delta_log/README.md new file mode 100644 index 000000000000..d805f9cec613 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_timestamp/_delta_log/README.md @@ -0,0 +1,17 @@ +Data generated using Databricks 17.2 +Only the transaction log is included here, to test retrieving clustered information across all operation types. +Keep only the commit info for the operations to be tested, and set the version starting from 0. + +```sql +CREATE TABLE test_retreve_clustered_fields +(commit_0_col_1 int, commit_0_col_2 int) +USING delta +CLUSTER BY (commit_0_col_1,commit_0_col_2) +LOCATION ?; + +INSERT INTO test_retreve_clustered_fields VALUES (1,2); +INSERT INTO test_retreve_clustered_fields VALUES (3,4); +INSERT INTO test_retreve_clustered_fields VALUES (5,6); + +RESTORE TABLE test_retreve_clustered_fields TO TIMESTAMP AS OF '2025-10-17 16:45:00'; +``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_version/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_version/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..25b3c506c3e1 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_version/_delta_log/00000000000000000000.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760563216742,"userId":"user1","userName":"user1","operation":"WRITE","operationParameters":{"partitionBy":"[]","clusterBy":"[\"commit_0_col_1\",\"commit_0_col_2\"]","statsOnLoad":false,"mode":"Append","clusteringOnWriteStatus":"late-stage clustering triggered"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"2702"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false","delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_version/_delta_log/00000000000000000001.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_version/_delta_log/00000000000000000001.json new file mode 100644 index 000000000000..dc8958b9d820 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_version/_delta_log/00000000000000000001.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760648312029,"userId":"user1","userName":"user1","operation":"WRITE","operationParameters":{"partitionBy":"[]","clusterBy":"[\"commit_1_col_1\",\"commit_1_col_2\"]","statsOnLoad":false,"mode":"Append","clusteringOnWriteStatus":"late-stage clustering triggered"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"2696"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false","delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_version/_delta_log/00000000000000000002.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_version/_delta_log/00000000000000000002.json new file mode 100644 index 000000000000..3271a080970f --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_version/_delta_log/00000000000000000002.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760733149743,"userId":"user1","userName":"user1","operation":"WRITE","operationParameters":{"partitionBy":"[]","clusterBy":"[\"commit_2_col_1\",\"commit_2_col_2\"]","statsOnLoad":false,"mode":"Append","clusteringOnWriteStatus":"late-stage clustering triggered"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":17,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"2708"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false","delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_version/_delta_log/00000000000000000003.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_version/_delta_log/00000000000000000003.json new file mode 100644 index 000000000000..678881d8dba2 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_version/_delta_log/00000000000000000003.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760733149750,"userId":"user1","userName":"user1","operation":"WRITE","operationParameters":{"partitionBy":"[]","clusterBy":"[\"commit_3_col_1\",\"commit_3_col_2\"]","statsOnLoad":false,"mode":"Append","clusteringOnWriteStatus":"late-stage clustering triggered"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":17,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"2708"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false","delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_version/_delta_log/00000000000000000004.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_version/_delta_log/00000000000000000004.json new file mode 100644 index 000000000000..f6ff9052f18e --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_version/_delta_log/00000000000000000004.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760733272286,"userId":"user1","userName":"user1","operation":"RESTORE","operationParameters":{"version":2,"timestamp":null},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":18,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"0","removedFilesSize":"2708","numRemovedFiles":"1","restoredFilesSize":"0","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"2702"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_version/_delta_log/README.md b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_version/_delta_log/README.md new file mode 100644 index 000000000000..a30aaf74e6e5 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_to_version/_delta_log/README.md @@ -0,0 +1,18 @@ +Data generated using Databricks 17.2 +Only the transaction log is included here, to test retrieving clustered information across all operation types. +Keep only the commit info for the operations to be tested, and set the version starting from 0. + +```sql +CREATE TABLE test_retreve_clustered_fields +(commit_0_col_1 int, commit_0_col_2 int) +USING delta +CLUSTER BY (commit_0_col_1,commit_0_col_2) +LOCATION ?; + +INSERT INTO test_retreve_clustered_fields VALUES (1,2); +INSERT INTO test_retreve_clustered_fields VALUES (3,4); +INSERT INTO test_retreve_clustered_fields VALUES (5,6); +INSERT INTO test_retreve_clustered_fields VALUES (7,8); + +RESTORE TABLE test_retreve_clustered_fields TO VERSION AS OF 2; +``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..25b3c506c3e1 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive/_delta_log/00000000000000000000.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760563216742,"userId":"user1","userName":"user1","operation":"WRITE","operationParameters":{"partitionBy":"[]","clusterBy":"[\"commit_0_col_1\",\"commit_0_col_2\"]","statsOnLoad":false,"mode":"Append","clusteringOnWriteStatus":"late-stage clustering triggered"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"2702"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false","delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive/_delta_log/00000000000000000001.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive/_delta_log/00000000000000000001.json new file mode 100644 index 000000000000..13ae66a6ec9a --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive/_delta_log/00000000000000000001.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760648312029,"userId":"user1","userName":"user1","operation":"WRITE","operationParameters":{"partitionBy":"[]","clusterBy":"[\"commit_1_col_1\",\"commit_1_col_2\"]","statsOnLoad":false,"mode":"Append","clusteringOnWriteStatus":"late-stage clustering triggered"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":1,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"2696"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false","delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive/_delta_log/00000000000000000002.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive/_delta_log/00000000000000000002.json new file mode 100644 index 000000000000..8dae6bbab66a --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive/_delta_log/00000000000000000002.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760663272286,"userId":"user1","userName":"user1","operation":"RESTORE","operationParameters":{"version":0,"timestamp":null},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":2,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"0","removedFilesSize":"2708","numRemovedFiles":"1","restoredFilesSize":"0","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"2702"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive/_delta_log/00000000000000000003.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive/_delta_log/00000000000000000003.json new file mode 100644 index 000000000000..aefa60d67bbf --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive/_delta_log/00000000000000000003.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760673149750,"userId":"user1","userName":"user1","operation":"WRITE","operationParameters":{"partitionBy":"[]","clusterBy":"[\"commit_3_col_1\",\"commit_3_col_2\"]","statsOnLoad":false,"mode":"Append","clusteringOnWriteStatus":"late-stage clustering triggered"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":3,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"2708"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false","delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive/_delta_log/00000000000000000004.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive/_delta_log/00000000000000000004.json new file mode 100644 index 000000000000..c573508137da --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive/_delta_log/00000000000000000004.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760683272286,"userId":"user1","userName":"user1","operation":"RESTORE","operationParameters":{"version":2,"timestamp":null},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":4,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"0","removedFilesSize":"2708","numRemovedFiles":"1","restoredFilesSize":"0","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"2702"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive/_delta_log/00000000000000000005.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive/_delta_log/00000000000000000005.json new file mode 100644 index 000000000000..d5e218fd444f --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive/_delta_log/00000000000000000005.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760694272286,"userId":"user1","userName":"user1","operation":"WRITE","operationParameters":{"partitionBy":"[]","clusterBy":"[\"commit_5_col_1\",\"commit_6_col_2\"]","statsOnLoad":false,"mode":"Append","clusteringOnWriteStatus":"late-stage clustering triggered"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":5,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"2708"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false","delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive/_delta_log/00000000000000000006.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive/_delta_log/00000000000000000006.json new file mode 100644 index 000000000000..5c1cf7ee61ac --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive/_delta_log/00000000000000000006.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760746694603,"userId":"user1","userName":"user1","operation":"RESTORE","operationParameters":{"version":null,"timestamp":"2025-10-17 09:30:32.0"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":6,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRestoredFiles":"1","removedFilesSize":"5399","numRemovedFiles":"2","restoredFilesSize":"3647","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numOfFilesAfterRestore":"1","tableSizeAfterRestore":"3647"},"tags":{"delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive/_delta_log/README.md b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive/_delta_log/README.md new file mode 100644 index 000000000000..d132792da024 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_restore_version_timestamp_recursive/_delta_log/README.md @@ -0,0 +1,19 @@ +Data generated using Databricks 17.2 +Only the transaction log is included here, to test retrieving clustered information across all operation types. +Keep only the commit info for the operations to be tested, and set the version starting from 0. + +```sql +CREATE TABLE test_retreve_clustered_fields +(commit_0_col_1 int, commit_0_col_2 int) +USING delta +CLUSTER BY (commit_0_col_1,commit_0_col_2) +LOCATION ?; + +INSERT INTO test_retreve_clustered_fields VALUES (1,2); +INSERT INTO test_retreve_clustered_fields VALUES (3,4); +RESTORE TABLE test_retreve_clustered_fields TO VERSION AS OF 0; +INSERT INTO test_retreve_clustered_fields VALUES (5,6); +RESTORE TABLE test_retreve_clustered_fields TO VERSION AS OF 2; +INSERT INTO test_retreve_clustered_fields VALUES (7,8); +RESTORE TABLE test_retreve_clustered_fields TO TIMESTAMP AS OF '2025-10-17 16:45:00'; +``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_shallow_clone_table/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_shallow_clone_table/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..1c2937ccee7d --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_shallow_clone_table/_delta_log/00000000000000000000.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760730902334,"userId":"user1","userName":"user1","operation":"CLONE","operationParameters":{"source":"source_table","sourceVersion":16,"isShallow":true},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":-1,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"removedFilesSize":"0","numRemovedFiles":"0","sourceTableSize":"3776","numCopiedFiles":"0","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","copiedFilesSize":"0","sourceNumOfFiles":"1"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_shallow_clone_table/_delta_log/README.md b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_shallow_clone_table/_delta_log/README.md new file mode 100644 index 000000000000..a71d79245c25 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_shallow_clone_table/_delta_log/README.md @@ -0,0 +1,14 @@ +Data generated using Databricks 17.2 +Only the transaction log is included here, to test retrieving clustered information across all operation types. +Keep only the commit info for the operations to be tested, and set the version starting from 0. + +```sql +CREATE TABLE source_table +(commit_0_col_1 int, commit_0_col_2 int) +USING delta +CLUSTER BY (commit_0_col_1,commit_0_col_2) +LOCATION ?; + +%sql +CREATE TABLE cloned_table DEEP CLONE source_table; +``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_write/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_write/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..25b3c506c3e1 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_write/_delta_log/00000000000000000000.json @@ -0,0 +1 @@ +{"commitInfo":{"timestamp":1760563216742,"userId":"user1","userName":"user1","operation":"WRITE","operationParameters":{"partitionBy":"[]","clusterBy":"[\"commit_0_col_1\",\"commit_0_col_2\"]","statsOnLoad":false,"mode":"Append","clusteringOnWriteStatus":"late-stage clustering triggered"},"notebook":{"notebookId":"xxxxxx"},"clusterId":"xxxxxx","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"2702"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false","delta.rowTracking.preserved":"true"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"xxxxxx"}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_write/_delta_log/README.md b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_write/_delta_log/README.md new file mode 100644 index 000000000000..9e8045eedc78 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks172/liquid_clustering_with_operations/liquid_clustering_with_write/_delta_log/README.md @@ -0,0 +1,13 @@ +Data generated using Databricks 17.2 +Only the transaction log is included here, to test retrieving clustered information across all operation types. +Keep only the commit info for the operations to be tested, and set the version starting from 0. + +```sql +CREATE TABLE test_retreve_clustered_fields +(commit_0_col_1 int, commit_0_col_2 int) +USING delta +CLUSTER BY (commit_0_col_1,commit_0_col_2) +LOCATION ?; + +INSERT INTO test_retreve_clustered_fields VALUES (1,2); +```