Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions extensions/iceberg/s3/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,8 @@ dependencies {
// but we need to be able to compile against it to implement AwsClientFactory
compileOnly libs.awssdk.dynamodb

// We don't want to explicitly pull in dependencies for KMS (there doesn't seem to be anything in Iceberg that
// actually calls it?), but we need to be able to compile against it to implement AwsClientFactory
compileOnly libs.awssdk.kms
// Needed for AwsClientFactory
implementation libs.awssdk.kms
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Needed for new KMS related properties added in apache/iceberg#13136


implementation libs.guava

Expand All @@ -64,9 +63,6 @@ dependencies {
testImplementation TestTools.projectDependency(project, 'extensions-s3')
testImplementation TestTools.projectDependency(project, 'extensions-iceberg')

// TODO (DH-19508) : Remove this dependency when https://github.com/apache/iceberg/issues/13133 fix is released
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

apache/iceberg#13134 has been released.

runtimeOnly libs.analyticsaccelerator.s3

testRuntimeOnly project(':test-configs')
testRuntimeOnly project(':log-to-slf4j')
testRuntimeOnly libs.slf4j.simple
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ public static Table partitionStats(org.apache.iceberg.Table table, Snapshot snap
.add("PositionDeleteFileCount", int.class, PartitionStats::positionDeleteFileCount)
.add("EqualityDeleteRecordCount", long.class, PartitionStats::equalityDeleteRecordCount)
.add("EqualityDeleteFileCount", int.class, PartitionStats::equalityDeleteFileCount)
.add("TotalRecordCount", long.class, PartitionStats::totalRecordCount)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.add("TotalRecordCount", long.class, PartitionStats::totalRecords)
.add("LastUpdatedAt", Instant.class, Explore::lastUpdatedAt)
.add("LastUpdatedSnapshotId", long.class, PartitionStats::lastUpdatedSnapshotId)
.view(partitionStats);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -263,26 +263,15 @@ private void validate(
// https://iceberg.apache.org/spec/#partitioning
// The source columns, selected by ids, must be a primitive type and cannot be contained in a map or list,
// but may be nested in a struct.
for (NestedField nestedField : fieldPath) {
// https://github.com/apache/iceberg/issues/12870
if (nestedField.type().isListType()) {
throw new MappingException("Partition fields may not be contained in a list");
}
if (nestedField.type().isMapType()) {
throw new MappingException("Partition fields may not be contained in a map");
}
}
{
// org.apache.iceberg.PartitionSpec.checkCompatibility should typically catch this case, but in certain
// cases (for example, a Catalog / metadata error), we can check for this ourselves.
final NestedField field = fieldPath.get(fieldPath.size() - 1);
if (!field.type().isPrimitiveType()) {
throw new MappingException(
String.format("Cannot partition by non-primitive source field: %s", field.type()));
}
final org.apache.iceberg.types.Type.PrimitiveType inputType = field.type().asPrimitiveType();
IcebergPartitionedLayout.validateSupported(partitionField.transform(), inputType, type);
// org.apache.iceberg.PartitionSpec.checkCompatibility should typically catch this case, but in certain
// cases (for example, a Catalog / metadata error), we can check for this ourselves.
final NestedField field = fieldPath.get(fieldPath.size() - 1);
if (!field.type().isPrimitiveType()) {
throw new MappingException(
String.format("Cannot partition by non-primitive source field: %s", field.type()));
}
final org.apache.iceberg.types.Type.PrimitiveType inputType = field.type().asPrimitiveType();
IcebergPartitionedLayout.validateSupported(partitionField.transform(), inputType, type);
}
checkCompatible(fieldPath, type);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -533,53 +533,6 @@ void partitionFieldAgainstNonPrimitiveType() {
}
}

@Test
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

apache/iceberg#12887 has been released.

void partitionFieldContainedWithinList() {
final Schema schema = new Schema(NestedField.optional(1, "S1", Types.ListType.ofOptional(2, IT)));
// https://iceberg.apache.org/spec/#partitioning
// > The source columns ... cannot be contained in a list
// Iceberg does *not* currently guard against this (may open up a PR later for this)
final PartitionSpec spec = PartitionSpec.builderFor(schema).identity("S1.element").build();
try {
Resolver.builder()
.schema(schema)
.spec(spec)
.definition(TableDefinition.of(
ColumnDefinition.ofInt("S1").withPartitioning()))
.putColumnInstructions("S1", partitionField(spec.fields().get(0).fieldId()))
.build();
failBecauseExceptionWasNotThrown(Resolver.MappingException.class);
} catch (Resolver.MappingException e) {
assertThat(e).hasMessageContaining("Unable to map Deephaven column S1");
assertThat(e).cause().hasMessageContaining("Partition fields may not be contained in a list");
}
}

@Test
void partitionFieldContainedWithinMap() {
final Schema schema = new Schema(NestedField.optional(1, "S1", Types.MapType.ofOptional(2, 3, IT, IT)));
// https://iceberg.apache.org/spec/#partitioning
// > The source columns ... cannot be contained in a map
// Iceberg does *not* currently guard against this (may open up a PR later for this)
for (final PartitionSpec spec : List.of(
PartitionSpec.builderFor(schema).identity("S1.key").build(),
PartitionSpec.builderFor(schema).identity("S1.value").build())) {
try {
Resolver.builder()
.schema(schema)
.spec(spec)
.definition(TableDefinition.of(
ColumnDefinition.ofInt("S1").withPartitioning()))
.putColumnInstructions("S1", partitionField(spec.fields().get(0).fieldId()))
.build();
failBecauseExceptionWasNotThrown(Resolver.MappingException.class);
} catch (Resolver.MappingException e) {
assertThat(e).hasMessageContaining("Unable to map Deephaven column S1");
assertThat(e).cause().hasMessageContaining("Partition fields may not be contained in a map");
}
}
}

@Test
void listType() {
final Schema schema = new Schema(NestedField.optional(1, "MyListField", Types.ListType.ofOptional(2, IT)));
Expand Down
6 changes: 1 addition & 5 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ autoservice = "1.1.1"
avro = "1.12.0"
awssdk = "2.29.52"
aws-s3-tables-catalog-for-iceberg = "0.1.6"
analyticsaccelerator = "1.1.0"
# See dependency matrix for particular gRPC versions at https://github.com/grpc/grpc-java/blob/master/SECURITY.md#netty
boringssl = "2.0.61.Final"
calcite = "1.39.0"
Expand Down Expand Up @@ -42,7 +41,7 @@ gwt = "2.12.2"
gwtJetty = "9.4.44.v20210927"
hadoop = "3.4.1"
hdrhistogram = "2.2.2"
iceberg = "1.9.2"
iceberg = "1.10.0"
immutables = "2.10.1"
jackson = "2.19.2"
jakarta-servlet = "6.0.0"
Expand Down Expand Up @@ -126,9 +125,6 @@ awssdk-sso = { module = "software.amazon.awssdk:sso" }
awssdk-ssooidc = { module = "software.amazon.awssdk:ssooidc" }
awssdk-netty-nio = { module = "software.amazon.awssdk:netty-nio-client" }

# TODO (DH-19508) : Remove this dependency when https://github.com/apache/iceberg/issues/13133 fix is released
analyticsaccelerator-s3 = { module = "software.amazon.s3.analyticsaccelerator:analyticsaccelerator-s3", version.ref = "analyticsaccelerator" }
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

apache/iceberg#13134 has been released.


s3-tables-catalog-for-iceberg = { module = "software.amazon.s3tables:s3-tables-catalog-for-iceberg", version.ref = "aws-s3-tables-catalog-for-iceberg" }

boringssl = { module = "io.netty:netty-tcnative-boringssl-static", version.ref = "boringssl" }
Expand Down