From 35b94c99f4c0157a0e5b22a27990bd3fdfaf73fd Mon Sep 17 00:00:00 2001 From: Manish Malhotra Date: Sun, 14 Dec 2025 11:30:49 -0800 Subject: [PATCH 1/2] add total files size to partition --- .../iceberg/TestMetadataTableScans.java | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/core/src/test/java/org/apache/iceberg/TestMetadataTableScans.java b/core/src/test/java/org/apache/iceberg/TestMetadataTableScans.java index 4d2971c7f12a..92db82d44f49 100644 --- a/core/src/test/java/org/apache/iceberg/TestMetadataTableScans.java +++ b/core/src/test/java/org/apache/iceberg/TestMetadataTableScans.java @@ -1195,6 +1195,52 @@ public void testPartitionsTableScanWithPlanExecutor() { .isGreaterThan(0); } + @TestTemplate + public void testPartitionsTableScanWithDeleteFileSize() throws java.io.IOException { + assumeThat(formatVersion).as("Position deletes are not supported by V1 Tables").isNotEqualTo(1); + preparePartitionedTable(); + + Table partitionsTable = new PartitionsTable(table); + TableScan scan = partitionsTable.newScan().select("partition.data_bucket", "total_delete_file_size_in_bytes"); + + List records = Lists.newArrayList(); + try (CloseableIterable tasks = scan.planFiles()) { + for (FileScanTask task : tasks) { + // PartitionsTable is a StaticTable, so the task is a StaticDataTask + StaticDataTask dataTask = (StaticDataTask) task.asDataTask(); + try (CloseableIterable rows = dataTask.rows()) { + rows.forEach(records::add); + } + } + } + + // The returned records are StructLike, and we need to extract fields by position + // based on the projection. + // Projection is: "partition.data_bucket", "total_delete_file_size_in_bytes" + // Resulting schema has two top-level fields. + // Field 0: partition (struct with one field, data_bucket) + // Field 1: total_delete_file_size_in_bytes (long) + + records.sort(Comparator.comparing(r -> r.get(0, StructLike.class).get(0, Integer.class))); + + assertThat(records).hasSize(4); + + // The preparePartitionedTable() method adds one delete file to each of the 4 partitions. + // Each of those delete files (fileADeletes, fileBDeletes, FILE_C2_DELETES, FILE_D2_DELETES) + // is created with a size of 10 bytes in the test base class. + long expectedDeleteFileSize = 10L; + + for (int i = 0; i < 4; i++) { + StructLike record = records.get(i); + StructLike partition = record.get(0, StructLike.class); + Integer partitionValue = partition.get(0, Integer.class); + Long deleteFileSize = record.get(1, Long.class); + + assertThat(partitionValue).isEqualTo(i); + assertThat(deleteFileSize).isEqualTo(expectedDeleteFileSize); + } + } + @TestTemplate public void testAllManifestsTableSnapshotGt() { // Snapshots 1,2,3,4 From ef8dede57e11c47513cba859a9ee1eaf2882569d Mon Sep 17 00:00:00 2001 From: Manish Malhotra Date: Sun, 14 Dec 2025 11:49:19 -0800 Subject: [PATCH 2/2] add total files size to partition --- .../org/apache/iceberg/PartitionsTable.java | 23 ++++++++++++++----- .../iceberg/TestMetadataTableScans.java | 16 ++++++------- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/PartitionsTable.java b/core/src/main/java/org/apache/iceberg/PartitionsTable.java index 09c6e7893b7e..ce92ca90b90e 100644 --- a/core/src/main/java/org/apache/iceberg/PartitionsTable.java +++ b/core/src/main/java/org/apache/iceberg/PartitionsTable.java @@ -87,10 +87,15 @@ public class PartitionsTable extends BaseMetadataTable { Types.TimestampType.withZone(), "Commit time of snapshot that last updated this partition"), Types.NestedField.optional( - 10, - "last_updated_snapshot_id", - Types.LongType.get(), - "Id of snapshot that last updated this partition")); + 10, + "last_updated_snapshot_id", + Types.LongType.get(), + "Id of snapshot that last updated this partition"), + Types.NestedField.required( + 12, + "total_delete_file_size_in_bytes", + Types.LongType.get(), + "Total size in bytes of delete files")); this.unpartitionedTable = Partitioning.partitionType(table).fields().isEmpty(); } @@ -111,7 +116,8 @@ public Schema schema() { "equality_delete_record_count", "equality_delete_file_count", "last_updated_at", - "last_updated_snapshot_id"); + "last_updated_snapshot_id", + "total_delete_file_size_in_bytes"); } return schema; } @@ -163,7 +169,9 @@ private static StaticDataTask.Row convertPartition(Partition partition) { partition.eqDeleteRecordCount, partition.eqDeleteFileCount, partition.lastUpdatedAt, - partition.lastUpdatedSnapshotId); + partition.lastUpdatedSnapshotId, + partition.deleteFileSizeInBytes); + } private static Iterable partitions(Table table, StaticTableScan scan) { @@ -281,6 +289,7 @@ static class Partition { private int eqDeleteFileCount; private Long lastUpdatedAt; private Long lastUpdatedSnapshotId; + private long deleteFileSizeInBytes; Partition(StructLike key, Types.StructType keyType) { this.partitionData = toPartitionData(key, keyType); @@ -314,10 +323,12 @@ void update(ContentFile file, Snapshot snapshot) { case POSITION_DELETES: this.posDeleteRecordCount += file.recordCount(); this.posDeleteFileCount += 1; + this.deleteFileSizeInBytes += file.fileSizeInBytes(); break; case EQUALITY_DELETES: this.eqDeleteRecordCount += file.recordCount(); this.eqDeleteFileCount += 1; + this.deleteFileSizeInBytes += file.fileSizeInBytes(); break; default: throw new UnsupportedOperationException( diff --git a/core/src/test/java/org/apache/iceberg/TestMetadataTableScans.java b/core/src/test/java/org/apache/iceberg/TestMetadataTableScans.java index 92db82d44f49..f0aafd3d1221 100644 --- a/core/src/test/java/org/apache/iceberg/TestMetadataTableScans.java +++ b/core/src/test/java/org/apache/iceberg/TestMetadataTableScans.java @@ -1205,13 +1205,13 @@ public void testPartitionsTableScanWithDeleteFileSize() throws java.io.IOExcepti List records = Lists.newArrayList(); try (CloseableIterable tasks = scan.planFiles()) { - for (FileScanTask task : tasks) { - // PartitionsTable is a StaticTable, so the task is a StaticDataTask - StaticDataTask dataTask = (StaticDataTask) task.asDataTask(); - try (CloseableIterable rows = dataTask.rows()) { - rows.forEach(records::add); - } + for (FileScanTask task : tasks) { + // PartitionsTable is a StaticTable, so the task is a StaticDataTask + StaticDataTask dataTask = (StaticDataTask) task.asDataTask(); + try (CloseableIterable rows = dataTask.rows()) { + rows.forEach(records::add); } + } } // The returned records are StructLike, and we need to extract fields by position @@ -1220,9 +1220,9 @@ public void testPartitionsTableScanWithDeleteFileSize() throws java.io.IOExcepti // Resulting schema has two top-level fields. // Field 0: partition (struct with one field, data_bucket) // Field 1: total_delete_file_size_in_bytes (long) - + records.sort(Comparator.comparing(r -> r.get(0, StructLike.class).get(0, Integer.class))); - + assertThat(records).hasSize(4); // The preparePartitionedTable() method adds one delete file to each of the 4 partitions.