3333import io .trino .spi .type .TypeManager ;
3434import org .apache .iceberg .ContentFile ;
3535import org .apache .iceberg .DeleteFile ;
36+ import org .apache .iceberg .IcebergManifestUtils .FileEntryWithMetadata ;
3637import org .apache .iceberg .ManifestReader ;
3738import org .apache .iceberg .MetricsUtil ;
3839import org .apache .iceberg .PartitionField ;
6667import static io .trino .plugin .iceberg .IcebergUtil .primitiveFieldTypes ;
6768import static io .trino .plugin .iceberg .IcebergUtil .readerForManifest ;
6869import static io .trino .plugin .iceberg .StructLikeWrapperWithFieldIdToIndex .createStructLikeWrapper ;
70+ import static io .trino .plugin .iceberg .system .FilesTable .ADDED_SNAPSHOT_ID_COLUMN_NAME ;
6971import static io .trino .plugin .iceberg .system .FilesTable .COLUMN_SIZES_COLUMN_NAME ;
7072import static io .trino .plugin .iceberg .system .FilesTable .CONTENT_COLUMN_NAME ;
73+ import static io .trino .plugin .iceberg .system .FilesTable .CONTENT_OFFSET_COLUMN_NAME ;
74+ import static io .trino .plugin .iceberg .system .FilesTable .CONTENT_SIZE_IN_BYTES_COLUMN_NAME ;
75+ import static io .trino .plugin .iceberg .system .FilesTable .DATA_SEQUENCE_NUMBER_COLUMN_NAME ;
7176import static io .trino .plugin .iceberg .system .FilesTable .EQUALITY_IDS_COLUMN_NAME ;
7277import static io .trino .plugin .iceberg .system .FilesTable .FILE_FORMAT_COLUMN_NAME ;
7378import static io .trino .plugin .iceberg .system .FilesTable .FILE_PATH_COLUMN_NAME ;
79+ import static io .trino .plugin .iceberg .system .FilesTable .FILE_SEQUENCE_NUMBER_COLUMN_NAME ;
7480import static io .trino .plugin .iceberg .system .FilesTable .FILE_SIZE_IN_BYTES_COLUMN_NAME ;
81+ import static io .trino .plugin .iceberg .system .FilesTable .FIRST_ROW_ID_COLUMN_NAME ;
7582import static io .trino .plugin .iceberg .system .FilesTable .KEY_METADATA_COLUMN_NAME ;
7683import static io .trino .plugin .iceberg .system .FilesTable .LOWER_BOUNDS_COLUMN_NAME ;
84+ import static io .trino .plugin .iceberg .system .FilesTable .MANIFEST_LOCATION_COLUMN_NAME ;
7785import static io .trino .plugin .iceberg .system .FilesTable .NAN_VALUE_COUNTS_COLUMN_NAME ;
7886import static io .trino .plugin .iceberg .system .FilesTable .NULL_VALUE_COUNTS_COLUMN_NAME ;
7987import static io .trino .plugin .iceberg .system .FilesTable .PARTITION_COLUMN_NAME ;
88+ import static io .trino .plugin .iceberg .system .FilesTable .POS_COLUMN_NAME ;
8089import static io .trino .plugin .iceberg .system .FilesTable .READABLE_METRICS_COLUMN_NAME ;
8190import static io .trino .plugin .iceberg .system .FilesTable .RECORD_COUNT_COLUMN_NAME ;
91+ import static io .trino .plugin .iceberg .system .FilesTable .REFERENCED_DATA_FILE_COLUMN_NAME ;
8292import static io .trino .plugin .iceberg .system .FilesTable .SORT_ORDER_ID_COLUMN_NAME ;
8393import static io .trino .plugin .iceberg .system .FilesTable .SPEC_ID_COLUMN_NAME ;
8494import static io .trino .plugin .iceberg .system .FilesTable .SPLIT_OFFSETS_COLUMN_NAME ;
95105import static io .trino .spi .type .VarbinaryType .VARBINARY ;
96106import static io .trino .spi .type .VarcharType .VARCHAR ;
97107import static java .util .Objects .requireNonNull ;
108+ import static org .apache .iceberg .IcebergManifestUtils .liveEntriesWithMetadata ;
98109import static org .apache .iceberg .MetricsUtil .readableMetricsStruct ;
99110
100111public final class FilesTablePageSource
@@ -108,7 +119,7 @@ public final class FilesTablePageSource
108119 private final List <PartitionField > partitionFields ;
109120 private final Optional <IcebergPartitionColumn > partitionColumnType ;
110121 private final List <Types .NestedField > primitiveFields ;
111- private final Iterator <? extends ContentFile <?>> contentIterator ;
122+ private final Iterator <FileEntryWithMetadata > entryIterator ;
112123 private final Map <String , Integer > columnNameToIndex ;
113124 private final PageBuilder pageBuilder ;
114125 private final long completedBytes ;
@@ -137,7 +148,7 @@ public FilesTablePageSource(
137148 .collect (toImmutableList ());
138149 ManifestReader <? extends ContentFile <?>> manifestReader = closer .register (readerForManifest (split .manifestFile (), fileIoFactory .create (trinoFileSystem ), idToPartitionSpecMapping ));
139150 // TODO figure out why selecting the specific column causes null to be returned for offset_splits
140- this .contentIterator = closer .register (requireNonNull (manifestReader , "manifestReader is null" ).iterator ());
151+ this .entryIterator = closer .register (liveEntriesWithMetadata ( requireNonNull (manifestReader , "manifestReader is null" ) ).iterator ());
141152 this .pageBuilder = new PageBuilder (requiredColumns .stream ().map (column -> {
142153 if (column .equals (PARTITION_COLUMN_NAME )) {
143154 return split .partitionColumnType ().orElseThrow ();
@@ -184,10 +195,11 @@ public SourcePage getNextSourcePage()
184195 return null ;
185196 }
186197
187- while (contentIterator .hasNext () && !pageBuilder .isFull ()) {
198+ while (entryIterator .hasNext () && !pageBuilder .isFull ()) {
188199 pageBuilder .declarePosition ();
189200 long start = System .nanoTime ();
190- ContentFile <?> contentFile = contentIterator .next ();
201+ FileEntryWithMetadata entry = entryIterator .next ();
202+ ContentFile <?> contentFile = entry .file ();
191203
192204 writeValueOrNull (pageBuilder , CONTENT_COLUMN_NAME , () -> contentFile .content ().id (), INTEGER ::writeInt );
193205 writeValueOrNull (pageBuilder , FILE_PATH_COLUMN_NAME , contentFile ::location , VARCHAR ::writeString );
@@ -218,22 +230,23 @@ public SourcePage getNextSourcePage()
218230 (blkBldr , value ) -> INTEGER .writeLong (blkBldr , value ));
219231 writeValueOrNull (pageBuilder , READABLE_METRICS_COLUMN_NAME , () -> metadataSchema .findField (MetricsUtil .READABLE_METRICS ),
220232 (blkBldr , value ) -> VARCHAR .writeString (blkBldr , readableMetricsToJson (readableMetricsStruct (schema , contentFile , value .type ().asStructType ()), primitiveFields )));
221- writeValueOrNull (pageBuilder , FilesTable .FILE_SEQUENCE_NUMBER_COLUMN_NAME , contentFile ::fileSequenceNumber , BIGINT ::writeLong );
222- writeValueOrNull (pageBuilder , FilesTable .DATA_SEQUENCE_NUMBER_COLUMN_NAME , contentFile ::dataSequenceNumber , BIGINT ::writeLong );
233+ writeValueOrNull (pageBuilder , ADDED_SNAPSHOT_ID_COLUMN_NAME , entry ::snapshotId , BIGINT ::writeLong );
234+ writeValueOrNull (pageBuilder , FILE_SEQUENCE_NUMBER_COLUMN_NAME , contentFile ::fileSequenceNumber , BIGINT ::writeLong );
235+ writeValueOrNull (pageBuilder , DATA_SEQUENCE_NUMBER_COLUMN_NAME , contentFile ::dataSequenceNumber , BIGINT ::writeLong );
223236 if (contentFile instanceof DeleteFile deleteFile ) {
224- writeValueOrNull (pageBuilder , FilesTable . REFERENCED_DATA_FILE_COLUMN_NAME , deleteFile ::referencedDataFile , VARCHAR ::writeString );
225- writeValueOrNull (pageBuilder , FilesTable . CONTENT_OFFSET_COLUMN_NAME , deleteFile ::contentOffset , BIGINT ::writeLong );
226- writeValueOrNull (pageBuilder , FilesTable . CONTENT_SIZE_IN_BYTES_COLUMN_NAME , deleteFile ::contentSizeInBytes , BIGINT ::writeLong );
237+ writeValueOrNull (pageBuilder , REFERENCED_DATA_FILE_COLUMN_NAME , deleteFile ::referencedDataFile , VARCHAR ::writeString );
238+ writeValueOrNull (pageBuilder , CONTENT_OFFSET_COLUMN_NAME , deleteFile ::contentOffset , BIGINT ::writeLong );
239+ writeValueOrNull (pageBuilder , CONTENT_SIZE_IN_BYTES_COLUMN_NAME , deleteFile ::contentSizeInBytes , BIGINT ::writeLong );
227240 }
228241 else {
229242 // For non-delete files, these columns should be null
230- writeNull (pageBuilder , FilesTable . REFERENCED_DATA_FILE_COLUMN_NAME );
231- writeNull (pageBuilder , FilesTable . CONTENT_OFFSET_COLUMN_NAME );
232- writeNull (pageBuilder , FilesTable . CONTENT_SIZE_IN_BYTES_COLUMN_NAME );
243+ writeNull (pageBuilder , REFERENCED_DATA_FILE_COLUMN_NAME );
244+ writeNull (pageBuilder , CONTENT_OFFSET_COLUMN_NAME );
245+ writeNull (pageBuilder , CONTENT_SIZE_IN_BYTES_COLUMN_NAME );
233246 }
234- writeValueOrNull (pageBuilder , FilesTable . POS_COLUMN_NAME , contentFile ::pos , BIGINT ::writeLong );
235- writeValueOrNull (pageBuilder , FilesTable . MANIFEST_LOCATION_COLUMN_NAME , contentFile ::manifestLocation , VARCHAR ::writeString );
236- writeValueOrNull (pageBuilder , FilesTable . FIRST_ROW_ID_COLUMN_NAME , contentFile ::firstRowId , BIGINT ::writeLong );
247+ writeValueOrNull (pageBuilder , POS_COLUMN_NAME , contentFile ::pos , BIGINT ::writeLong );
248+ writeValueOrNull (pageBuilder , MANIFEST_LOCATION_COLUMN_NAME , contentFile ::manifestLocation , VARCHAR ::writeString );
249+ writeValueOrNull (pageBuilder , FIRST_ROW_ID_COLUMN_NAME , contentFile ::firstRowId , BIGINT ::writeLong );
237250 readTimeNanos += System .nanoTime () - start ;
238251 }
239252
0 commit comments