diff --git a/core/src/main/java/org/apache/iceberg/PartitionsTable.java b/core/src/main/java/org/apache/iceberg/PartitionsTable.java index 09c6e7893b7e..ce92ca90b90e 100644 --- a/core/src/main/java/org/apache/iceberg/PartitionsTable.java +++ b/core/src/main/java/org/apache/iceberg/PartitionsTable.java @@ -87,10 +87,15 @@ public class PartitionsTable extends BaseMetadataTable { Types.TimestampType.withZone(), "Commit time of snapshot that last updated this partition"), Types.NestedField.optional( - 10, - "last_updated_snapshot_id", - Types.LongType.get(), - "Id of snapshot that last updated this partition")); + 10, + "last_updated_snapshot_id", + Types.LongType.get(), + "Id of snapshot that last updated this partition"), + Types.NestedField.required( + 12, + "total_delete_file_size_in_bytes", + Types.LongType.get(), + "Total size in bytes of delete files")); this.unpartitionedTable = Partitioning.partitionType(table).fields().isEmpty(); } @@ -111,7 +116,8 @@ public Schema schema() { "equality_delete_record_count", "equality_delete_file_count", "last_updated_at", - "last_updated_snapshot_id"); + "last_updated_snapshot_id", + "total_delete_file_size_in_bytes"); } return schema; } @@ -163,7 +169,9 @@ private static StaticDataTask.Row convertPartition(Partition partition) { partition.eqDeleteRecordCount, partition.eqDeleteFileCount, partition.lastUpdatedAt, - partition.lastUpdatedSnapshotId); + partition.lastUpdatedSnapshotId, + partition.deleteFileSizeInBytes); + } private static Iterable partitions(Table table, StaticTableScan scan) { @@ -281,6 +289,7 @@ static class Partition { private int eqDeleteFileCount; private Long lastUpdatedAt; private Long lastUpdatedSnapshotId; + private long deleteFileSizeInBytes; Partition(StructLike key, Types.StructType keyType) { this.partitionData = toPartitionData(key, keyType); @@ -314,10 +323,12 @@ void update(ContentFile file, Snapshot snapshot) { case POSITION_DELETES: this.posDeleteRecordCount += file.recordCount(); this.posDeleteFileCount += 1; + this.deleteFileSizeInBytes += file.fileSizeInBytes(); break; case EQUALITY_DELETES: this.eqDeleteRecordCount += file.recordCount(); this.eqDeleteFileCount += 1; + this.deleteFileSizeInBytes += file.fileSizeInBytes(); break; default: throw new UnsupportedOperationException( diff --git a/core/src/test/java/org/apache/iceberg/TestMetadataTableScans.java b/core/src/test/java/org/apache/iceberg/TestMetadataTableScans.java index 4d2971c7f12a..f0aafd3d1221 100644 --- a/core/src/test/java/org/apache/iceberg/TestMetadataTableScans.java +++ b/core/src/test/java/org/apache/iceberg/TestMetadataTableScans.java @@ -1195,6 +1195,52 @@ public void testPartitionsTableScanWithPlanExecutor() { .isGreaterThan(0); } + @TestTemplate + public void testPartitionsTableScanWithDeleteFileSize() throws java.io.IOException { + assumeThat(formatVersion).as("Position deletes are not supported by V1 Tables").isNotEqualTo(1); + preparePartitionedTable(); + + Table partitionsTable = new PartitionsTable(table); + TableScan scan = partitionsTable.newScan().select("partition.data_bucket", "total_delete_file_size_in_bytes"); + + List records = Lists.newArrayList(); + try (CloseableIterable tasks = scan.planFiles()) { + for (FileScanTask task : tasks) { + // PartitionsTable is a StaticTable, so the task is a StaticDataTask + StaticDataTask dataTask = (StaticDataTask) task.asDataTask(); + try (CloseableIterable rows = dataTask.rows()) { + rows.forEach(records::add); + } + } + } + + // The returned records are StructLike, and we need to extract fields by position + // based on the projection. + // Projection is: "partition.data_bucket", "total_delete_file_size_in_bytes" + // Resulting schema has two top-level fields. + // Field 0: partition (struct with one field, data_bucket) + // Field 1: total_delete_file_size_in_bytes (long) + + records.sort(Comparator.comparing(r -> r.get(0, StructLike.class).get(0, Integer.class))); + + assertThat(records).hasSize(4); + + // The preparePartitionedTable() method adds one delete file to each of the 4 partitions. + // Each of those delete files (fileADeletes, fileBDeletes, FILE_C2_DELETES, FILE_D2_DELETES) + // is created with a size of 10 bytes in the test base class. + long expectedDeleteFileSize = 10L; + + for (int i = 0; i < 4; i++) { + StructLike record = records.get(i); + StructLike partition = record.get(0, StructLike.class); + Integer partitionValue = partition.get(0, Integer.class); + Long deleteFileSize = record.get(1, Long.class); + + assertThat(partitionValue).isEqualTo(i); + assertThat(deleteFileSize).isEqualTo(expectedDeleteFileSize); + } + } + @TestTemplate public void testAllManifestsTableSnapshotGt() { // Snapshots 1,2,3,4