apache · gaborkaszab · May 11, 2026 · anuragmantri · May 11, 2026 · gaborkaszab
diff --git a/core/src/main/java/org/apache/iceberg/ColumnFileInfo.java b/core/src/main/java/org/apache/iceberg/ColumnFileInfo.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg;
+
+import java.util.List;
+import org.apache.iceberg.types.Types;
+
+/** Information about a column file. */
+interface ColumnFileInfo {
+  Types.NestedField FIELD_IDS =
+      Types.NestedField.required(
+          159,
+          "field_ids",
+          Types.ListType.ofRequired(160, Types.IntegerType.get()),
+          "Field IDs this column file contains");
+  Types.NestedField LOCATION =
+      Types.NestedField.required(
+          161, "location", Types.StringType.get(), "Location of the column file");
+  Types.NestedField FILE_SIZE_IN_BYTES =
+      Types.NestedField.required(
+          162, "file_size_in_bytes", Types.LongType.get(), "Total column file size in bytes");
+  Types.NestedField SEQUENCE_NUMBER =
+      Types.NestedField.optional(
+          163, "sequence_number", Types.LongType.get(), "Sequence number of the column file");
+
+  static Types.StructType schema() {
+    return Types.StructType.of(FIELD_IDS, LOCATION, FILE_SIZE_IN_BYTES, SEQUENCE_NUMBER);
+  }
+
+  /** Returns the field IDs contained in this column file. */
+  List<Integer> fieldIds();
+
+  /** Returns the location of the column file. */
+  String location();
+
+  /** Returns the total size of the column file in bytes. */
+  long fileSizeInBytes();
+
+  /** Returns the sequence number of the column file, or null if not set. */
+  Long sequenceNumber();
+
+  /** Copies this column file info. */
+  ColumnFileInfo copy();
+}
diff --git a/core/src/main/java/org/apache/iceberg/TrackedFile.java b/core/src/main/java/org/apache/iceberg/TrackedFile.java
@@ -91,6 +91,12 @@ interface TrackedFile {
           "equality_ids",
           Types.ListType.ofRequired(136, Types.IntegerType.get()),
           "Field ids used to determine row equality in equality delete files");
+  Types.NestedField COLUMN_FILES =
+      Types.NestedField.optional(
+          157,
+          "column_files",
+          Types.ListType.ofRequired(158, ColumnFileInfo.schema()),
+          "Column update files");
 
   static Types.StructType schemaWithContentStats(
       Types.StructType partitionType, Types.StructType contentStatsType) {
@@ -110,7 +116,8 @@ static Types.StructType schemaWithContentStats(
         MANIFEST_INFO,
         KEY_METADATA,
         SPLIT_OFFSETS,
-        EQUALITY_IDS);
+        EQUALITY_IDS,
+        COLUMN_FILES);
   }
 
   /** Returns the tracking information for this entry. */
@@ -158,6 +165,9 @@ static Types.StructType schemaWithContentStats(
   /** Returns the set of field IDs used for equality comparison in equality delete files. */
   List<Integer> equalityIds();
 
+  /** Returns the column files for this file. */
+  List<ColumnFileInfo> columnFiles();
+
   /** Copies this tracked file. */
   TrackedFile copy();
 

diff --git a/core/src/main/java/org/apache/iceberg/TrackedFileStruct.java b/core/src/main/java/org/apache/iceberg/TrackedFileStruct.java
@@ -21,8 +21,10 @@
 import java.io.Serializable;
 import java.nio.ByteBuffer;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 import java.util.Set;
+import java.util.stream.Collectors;
 import org.apache.iceberg.avro.SupportsIndexProjection;
 import org.apache.iceberg.relocated.com.google.common.base.MoreObjects;
 import org.apache.iceberg.types.Type;
@@ -65,7 +67,8 @@ public PartitionData copy() {
           TrackedFile.MANIFEST_INFO,
           TrackedFile.KEY_METADATA,
           TrackedFile.SPLIT_OFFSETS,
-          TrackedFile.EQUALITY_IDS);
+          TrackedFile.EQUALITY_IDS,
+          TrackedFile.COLUMN_FILES);
 
   private FileContent contentType = null;
   private String location = null;
@@ -81,6 +84,7 @@ public PartitionData copy() {
   private Integer sortOrderId = null;
   private DeletionVector deletionVector = null;
   private ManifestInfo manifestInfo = null;
+  private List<ColumnFileInfo> columnFiles = null;
   private byte[] keyMetadata = null;
   private long[] splitOffsets = null;
   private int[] equalityIds = null;
@@ -155,6 +159,10 @@ private TrackedFileStruct(TrackedFileStruct toCopy, boolean withStats, Set<Integ
         toCopy.equalityIds != null
             ? Arrays.copyOf(toCopy.equalityIds, toCopy.equalityIds.length)
             : null;
+    this.columnFiles =
+        toCopy.columnFiles != null
+            ? toCopy.columnFiles.stream().map(ColumnFileInfo::copy).collect(Collectors.toList())
+            : null;
   }
 
   @Override
@@ -232,6 +240,11 @@ public List<Integer> equalityIds() {
     return equalityIds != null ? ArrayUtil.toUnmodifiableIntList(equalityIds) : null;
   }
 
+  @Override
+  public List<ColumnFileInfo> columnFiles() {
+    return columnFiles != null ? Collections.unmodifiableList(columnFiles) : null;
+  }
+
   @Override
   public TrackedFile copy() {
     return new TrackedFileStruct(this, true, null);
@@ -279,6 +292,8 @@ private Object getByPos(int pos) {
         return splitOffsets();
       case 14:
         return equalityIds();
+      case 15:
+        return columnFiles;
       default:
         throw new UnsupportedOperationException("Unknown field ordinal: " + pos);
     }
@@ -333,6 +348,11 @@ protected <T> void internalSet(int pos, T value) {
       case 14:
         this.equalityIds = ArrayUtil.toIntArray((List<Integer>) value);
         break;
+      case 15:
+        this.columnFiles =
+            ((List<ColumnFileInfo>) value)
+                .stream().map(ColumnFileInfo::copy).collect(Collectors.toList());
+        break;
       default:
         // ignore the object, it must be from a newer version of the format
     }
@@ -356,6 +376,7 @@ public String toString() {
         .add("key_metadata", keyMetadata == null ? "null" : "(redacted)")
         .add("split_offsets", splitOffsets == null ? "null" : splitOffsets())
         .add("equality_ids", equalityIds == null ? "null" : equalityIds())
+        .add("column_files", columnFiles == null ? "null" : columnFiles)
         .toString();
   }
 }
diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFile.java b/core/src/test/java/org/apache/iceberg/TestTrackedFile.java
@@ -59,7 +59,8 @@ public void schemaWithContentStatsFieldOrder() {
             "manifest_info",
             "key_metadata",
             "split_offsets",
-            "equality_ids");
+            "equality_ids",
+            "column_files");
   }
 
   @Test
@@ -69,7 +70,8 @@ public void schemaWithContentStatsFieldIds() {
 
     assertThat(fields)
         .extracting(Types.NestedField::fieldId)
-        .containsExactly(147, 134, 100, 101, 103, 104, 141, 102, 146, 140, 148, 150, 131, 132, 135);
+        .containsExactly(
+            147, 134, 100, 101, 103, 104, 141, 102, 146, 140, 148, 150, 131, 132, 135, 157);
   }
 
   @Test

diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java
@@ -215,7 +215,7 @@ void testCopyIsDeep() {
   @Test
   void testStructLikeSize() {
     TrackedFileStruct file = new TrackedFileStruct();
-    assertThat(file.size()).isEqualTo(15);
+    assertThat(file.size()).isEqualTo(16);
   }
 
   @Test