Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/src/main/sphinx/connector/delta-lake.md
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,10 @@ values. Typical usage does not require you to configure them.
- Number of threads used for retrieving checkpoint files of each table. Currently, only
retrievals of V2 Checkpoint's sidecar files are parallelized.
- `4`
* - `delta.enable-clustering-info`
- Controls whether clustered column information is retrieved.
The equivalent catalog session property is `enable_clustering_info`
- `false`
:::

### Catalog session properties
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ public class DeltaLakeConfig
private boolean deltaLogFileSystemCacheDisabled;
private int metadataParallelism = 8;
private int checkpointProcessingParallelism = 4;
private boolean enableClusteringInfo;

public Duration getMetadataCacheTtl()
{
Expand Down Expand Up @@ -364,6 +365,19 @@ public DeltaLakeConfig setCompressionCodec(HiveCompressionOption compressionCode
return this;
}

public boolean isEnableClusteringInfo()
{
return enableClusteringInfo;
}

@Config("delta.enable-clustering-info")
@ConfigDescription("If show clustered columns in table metadata")
public DeltaLakeConfig setEnableClusteringInfo(boolean enableClusteringInfo)
{
this.enableClusteringInfo = enableClusteringInfo;
return this;
}

@Min(1)
public long getPerTransactionMetastoreCacheMaximumSize()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@
import static io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_TABLE;
import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.getHiveCatalogName;
import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isCollectExtendedStatisticsColumnStatisticsOnWrite;
import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isEnableClusteringInfo;
import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isExtendedStatisticsEnabled;
import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isProjectionPushdownEnabled;
import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isQueryPartitionFilterRequired;
Expand Down Expand Up @@ -294,6 +295,7 @@
import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.serializeStatsAsJson;
import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.validateType;
import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.verifySupportedColumnMapping;
import static io.trino.plugin.deltalake.transactionlog.DeltaLakeTableFeatures.CLUSTERED_TABLES_FEATURE_NAME;
import static io.trino.plugin.deltalake.transactionlog.DeltaLakeTableFeatures.unsupportedReaderFeatures;
import static io.trino.plugin.deltalake.transactionlog.DeltaLakeTableFeatures.unsupportedWriterFeatures;
import static io.trino.plugin.deltalake.transactionlog.MetadataEntry.DELTA_CHANGE_DATA_FEED_ENABLED_PROPERTY;
Expand Down Expand Up @@ -445,7 +447,7 @@ public class DeltaLakeMetadata

private static final String CHECK_CONSTRAINT_CONVERT_FAIL_EXPRESSION = "CAST(fail('Failed to convert Delta check constraints to Trino expression') AS boolean)";

private static final int TEMPORAL_TIME_TRAVEL_LINEAR_SEARCH_MAX_SIZE = 1000;
public static final int TEMPORAL_TIME_TRAVEL_LINEAR_SEARCH_MAX_SIZE = 1000;

private final DeltaLakeMetastore metastore;
private final TransactionLogAccess transactionLogAccess;
Expand Down Expand Up @@ -744,6 +746,12 @@ public LocatedTableHandle getTableHandle(
LOG.debug("Skip %s because the reader version is unsupported: %d", tableName, protocolEntry.minReaderVersion());
return null;
}

Optional<List<String>> clusteredColumns = Optional.empty();
if (isEnableClusteringInfo(session) && protocolEntry.writerFeaturesContains(CLUSTERED_TABLES_FEATURE_NAME)) {
clusteredColumns = transactionLogAccess.getClusteredColumns(fileSystem, tableSnapshot);
}

Set<String> unsupportedReaderFeatures = unsupportedReaderFeatures(protocolEntry.readerFeatures().orElse(ImmutableSet.of()));
if (!unsupportedReaderFeatures.isEmpty()) {
LOG.debug("Skip %s because the table contains unsupported reader features: %s", tableName, unsupportedReaderFeatures);
Expand All @@ -762,6 +770,7 @@ public LocatedTableHandle getTableHandle(
tableLocation,
metadataEntry,
protocolEntry,
clusteredColumns,
TupleDomain.all(),
TupleDomain.all(),
false,
Expand Down Expand Up @@ -3571,6 +3580,7 @@ else if (!partitionColumns.contains(column)) {
tableHandle.getLocation(),
tableHandle.getMetadataEntry(),
tableHandle.getProtocolEntry(),
tableHandle.getClusteredColumns(),
// Do not simplify the enforced constraint, the connector is guaranteeing the constraint will be applied as is.
// The unenforced constraint will still be checked by the engine.
tableHandle.getEnforcedPartitionConstraint()
Expand Down Expand Up @@ -3869,6 +3879,7 @@ public ConnectorAnalyzeMetadata getStatisticsCollectionMetadata(ConnectorSession
handle.getLocation(),
metadata,
handle.getProtocolEntry(),
handle.getClusteredColumns(),
TupleDomain.all(),
TupleDomain.all(),
false,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ public final class DeltaLakeSessionProperties
private static final String TARGET_MAX_FILE_SIZE = "target_max_file_size";
private static final String IDLE_WRITER_MIN_FILE_SIZE = "idle_writer_min_file_size";
private static final String COMPRESSION_CODEC = "compression_codec";
private static final String ENABLE_CLUSTERING_INFO = "enable_clustering_info";
// This property is not supported by Delta Lake and exists solely for technical reasons.
@Deprecated
private static final String TIMESTAMP_PRECISION = "timestamp_precision";
Expand Down Expand Up @@ -216,6 +217,11 @@ public DeltaLakeSessionProperties(
}
},
false),
booleanProperty(
ENABLE_CLUSTERING_INFO,
"If show clustered columns in table metadata",
deltaLakeConfig.isEnableClusteringInfo(),
false),
booleanProperty(
PROJECTION_PUSHDOWN_ENABLED,
"Read only required fields from a row type",
Expand Down Expand Up @@ -334,6 +340,11 @@ public static HiveCompressionOption getCompressionCodec(ConnectorSession session
return session.getProperty(COMPRESSION_CODEC, HiveCompressionOption.class);
}

public static boolean isEnableClusteringInfo(ConnectorSession session)
{
return session.getProperty(ENABLE_CLUSTERING_INFO, Boolean.class);
}

public static boolean isProjectionPushdownEnabled(ConnectorSession session)
{
return session.getProperty(PROJECTION_PUSHDOWN_ENABLED, Boolean.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import io.trino.spi.connector.SchemaTableName;
import io.trino.spi.predicate.TupleDomain;

import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
Expand All @@ -39,6 +40,7 @@ public class DeltaLakeTableHandle
private final String location;
private final MetadataEntry metadataEntry;
private final ProtocolEntry protocolEntry;
private final Optional<List<String>> clusteredColumns;
private final TupleDomain<DeltaLakeColumnHandle> enforcedPartitionConstraint;
private final TupleDomain<DeltaLakeColumnHandle> nonPartitionConstraint;
private final boolean merge;
Expand All @@ -65,6 +67,7 @@ public DeltaLakeTableHandle(
@JsonProperty("location") String location,
@JsonProperty("metadataEntry") MetadataEntry metadataEntry,
@JsonProperty("protocolEntry") ProtocolEntry protocolEntry,
@JsonProperty("clusteredColumns") Optional<List<String>> clusteredColumns,
@JsonProperty("enforcedPartitionConstraint") TupleDomain<DeltaLakeColumnHandle> enforcedPartitionConstraint,
@JsonProperty("nonPartitionConstraint") TupleDomain<DeltaLakeColumnHandle> nonPartitionConstraint,
@JsonProperty("merge") boolean merge,
Expand All @@ -80,6 +83,7 @@ public DeltaLakeTableHandle(
location,
metadataEntry,
protocolEntry,
clusteredColumns,
enforcedPartitionConstraint,
nonPartitionConstraint,
ImmutableSet.of(),
Expand All @@ -100,6 +104,7 @@ public DeltaLakeTableHandle(
String location,
MetadataEntry metadataEntry,
ProtocolEntry protocolEntry,
Optional<List<String>> clusteredColumns,
TupleDomain<DeltaLakeColumnHandle> enforcedPartitionConstraint,
TupleDomain<DeltaLakeColumnHandle> nonPartitionConstraint,
Set<DeltaLakeColumnHandle> constraintColumns,
Expand All @@ -118,6 +123,7 @@ public DeltaLakeTableHandle(
this.location = requireNonNull(location, "location is null");
this.metadataEntry = requireNonNull(metadataEntry, "metadataEntry is null");
this.protocolEntry = requireNonNull(protocolEntry, "protocolEntry is null");
this.clusteredColumns = requireNonNull(clusteredColumns, "clusteredColumns is null");
this.enforcedPartitionConstraint = requireNonNull(enforcedPartitionConstraint, "enforcedPartitionConstraint is null");
this.nonPartitionConstraint = requireNonNull(nonPartitionConstraint, "nonPartitionConstraint is null");
this.merge = merge;
Expand All @@ -140,6 +146,7 @@ public DeltaLakeTableHandle withProjectedColumns(Set<DeltaLakeColumnHandle> proj
location,
metadataEntry,
protocolEntry,
clusteredColumns,
enforcedPartitionConstraint,
nonPartitionConstraint,
constraintColumns,
Expand All @@ -162,6 +169,7 @@ public DeltaLakeTableHandle forOptimize(boolean recordScannedFiles, DataSize max
location,
metadataEntry,
protocolEntry,
clusteredColumns,
enforcedPartitionConstraint,
nonPartitionConstraint,
constraintColumns,
Expand All @@ -184,6 +192,7 @@ public DeltaLakeTableHandle forMerge()
location,
metadataEntry,
protocolEntry,
clusteredColumns,
enforcedPartitionConstraint,
nonPartitionConstraint,
constraintColumns,
Expand Down Expand Up @@ -262,6 +271,12 @@ public ProtocolEntry getProtocolEntry()
return protocolEntry;
}

@JsonProperty
public Optional<List<String>> getClusteredColumns()
{
return clusteredColumns;
}

@JsonProperty
public TupleDomain<DeltaLakeColumnHandle> getEnforcedPartitionConstraint()
{
Expand Down Expand Up @@ -353,6 +368,7 @@ public boolean equals(Object o)
Objects.equals(location, that.location) &&
Objects.equals(metadataEntry, that.metadataEntry) &&
Objects.equals(protocolEntry, that.protocolEntry) &&
Objects.equals(clusteredColumns, that.clusteredColumns) &&
Objects.equals(enforcedPartitionConstraint, that.enforcedPartitionConstraint) &&
Objects.equals(nonPartitionConstraint, that.nonPartitionConstraint) &&
merge == that.merge &&
Expand All @@ -374,6 +390,7 @@ public int hashCode()
location,
metadataEntry,
protocolEntry,
clusteredColumns,
enforcedPartitionConstraint,
nonPartitionConstraint,
merge,
Expand Down
Loading