Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,39 @@ public static PositionDeleteIndex deserialize(byte[] bytes, DeleteFile deleteFil
return new BitmapPositionDeleteIndex(bitmap, deleteFile);
}

/**
* Extracts the raw Roaring bitmap bytes from the envelope, validating magic and CRC. Returns a
* zero-copy {@link ByteSlice} pointing into the original byte array.
*
* <p>Envelope format: [4B length BE][4B magic LE][roaring LE][4B CRC BE]
*
* <p>Returns a slice over just the "roaring" portion.
*/
static ByteSlice extractRoaringBitmap(byte[] bytes, DeleteFile deleteFile) {
ByteBuffer buffer = ByteBuffer.wrap(bytes);
int bitmapDataLength = readBitmapDataLength(buffer, deleteFile);

// validate CRC
int crc = computeChecksum(bytes, bitmapDataLength);
int crcOffset = LENGTH_SIZE_BYTES + bitmapDataLength;
int expectedCrc = buffer.getInt(crcOffset);
Preconditions.checkArgument(crc == expectedCrc, "Invalid CRC");

// validate magic
ByteBuffer bitmapData = pointToBitmapData(bytes, bitmapDataLength);
int magicNumber = bitmapData.getInt();
Preconditions.checkArgument(
magicNumber == MAGIC_NUMBER,
"Invalid magic number: %s, expected %s",
magicNumber,
MAGIC_NUMBER);

// the Roaring bytes start right after the magic, and run to end of bitmap data
int roaringOffset = LENGTH_SIZE_BYTES + MAGIC_NUMBER_SIZE_BYTES;
int roaringLength = bitmapDataLength - MAGIC_NUMBER_SIZE_BYTES;
return new ByteSlice(bytes, roaringOffset, roaringLength);
}

// computes and validates the length of the bitmap data (magic bytes + bitmap)
private static int computeBitmapDataLength(RoaringPositionBitmap bitmap) {
long length = MAGIC_NUMBER_SIZE_BYTES + bitmap.serializedSizeInBytes();
Expand Down
51 changes: 51 additions & 0 deletions core/src/main/java/org/apache/iceberg/deletes/ByteSlice.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.deletes;

/** A zero-copy view over a region of a byte array. */
public class ByteSlice {
private final byte[] data;
private final int offset;
private final int length;

public ByteSlice(byte[] data, int offset, int length) {
this.data = data;
this.offset = offset;
this.length = length;
}

public byte[] data() {
return data;
}

public int offset() {
return offset;
}

public int length() {
return length;
}

/** Returns a copy of just the slice. */
public byte[] toByteArray() {
byte[] copy = new byte[length];
System.arraycopy(data, offset, copy, 0, length);
return copy;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,19 @@ static PositionDeleteIndex deserialize(byte[] bytes, DeleteFile deleteFile) {
return BitmapPositionDeleteIndex.deserialize(bytes, deleteFile);
}

/**
* Extracts the raw portable Roaring bitmap bytes from an Iceberg deletion vector envelope,
* validating the magic number and CRC. Returns a zero-copy {@link ByteSlice} pointing into the
* original byte array.
*
* @param bytes the full DV blob (length + magic + bitmap + CRC)
* @param deleteFile the DV file for validation
* @return a slice over the raw Roaring bitmap bytes (little-endian, portable format)
*/
static ByteSlice extractRoaringBitmap(byte[] bytes, DeleteFile deleteFile) {
return BitmapPositionDeleteIndex.extractRoaringBitmap(bytes, deleteFile);
}

/** Returns an empty immutable position delete index. */
static PositionDeleteIndex empty() {
return EmptyPositionDeleteIndex.get();
Expand Down
14 changes: 14 additions & 0 deletions core/src/main/java/org/apache/iceberg/formats/ReadBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import java.util.Map;
import org.apache.iceberg.Schema;
import org.apache.iceberg.deletes.ByteSlice;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.mapping.NameMapping;
Expand Down Expand Up @@ -119,6 +120,19 @@ default ReadBuilder<D, S> setAll(Map<String, String> properties) {
/** Sets a mapping from external schema names to Iceberg type IDs. */
ReadBuilder<D, S> withNameMapping(NameMapping nameMapping);

/**
* Pushes position deletes into the reader so that deleted rows are excluded during scanning. The
* bitmap is a portable Roaring bitmap (little-endian) where each set bit represents a deleted row
* position. Formats that support this can skip deleted rows at the scan level rather than
* filtering them after the fact. Formats that do not support this can safely ignore the bitmap.
*
* @param bitmap a slice over portable Roaring bitmap bytes representing deleted row positions
* @return this for method chaining
*/
default ReadBuilder<D, S> positionDeleteBitmap(ByteSlice bitmap) {
return this;
}

/** Builds the reader. */
CloseableIterable<D> build();
}
52 changes: 52 additions & 0 deletions data/src/main/java/org/apache/iceberg/data/BaseDeleteLoader.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Queue;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ExecutorService;
Expand All @@ -30,6 +32,7 @@
import org.apache.iceberg.MetadataColumns;
import org.apache.iceberg.Schema;
import org.apache.iceberg.StructLike;
import org.apache.iceberg.deletes.ByteSlice;
import org.apache.iceberg.deletes.Deletes;
import org.apache.iceberg.deletes.PositionDeleteIndex;
import org.apache.iceberg.deletes.PositionDeleteIndexUtil;
Expand All @@ -50,6 +53,7 @@
import org.apache.iceberg.util.StructLikeSet;
import org.apache.iceberg.util.Tasks;
import org.apache.iceberg.util.ThreadPools;
import org.roaringbitmap.RoaringBitmap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -168,6 +172,54 @@ public PositionDeleteIndex loadPositionDeletes(
}
}

/**
* Loads position deletes and returns the deleted positions as a portable Roaring bitmap
* (little-endian, per the <a href="https://github.com/RoaringBitmap/RoaringFormatSpec">Roaring
* format spec</a>). For deletion vectors, the raw bitmap bytes are extracted directly from the
* file without deserializing, returned as a zero-copy {@link ByteSlice}. For position delete
* files, positions are loaded and a new bitmap is built.
*
* @param deleteFiles position delete files or a deletion vector
* @param filePath the data file path for which to load deletes
* @return a slice over portable Roaring bitmap bytes, or null if there are no deletes
*/
public ByteSlice loadPositionDeleteBitmap(
Iterable<DeleteFile> deleteFiles, CharSequence filePath) {
if (ContentFileUtil.containsSingleDV(deleteFiles)) {
DeleteFile dv = Iterables.getOnlyElement(deleteFiles);
validateDV(dv, filePath);
return readDVBitmap(dv);
}

PositionDeleteIndex index = getOrReadPosDeletes(deleteFiles, filePath);
if (index == null || index.isEmpty()) {
return null;
}

RoaringBitmap bitmap = new RoaringBitmap();
index.forEach(pos -> bitmap.add((int) pos));
bitmap.runOptimize();
ByteBuffer buf = ByteBuffer.allocate(bitmap.serializedSizeInBytes());
buf.order(ByteOrder.LITTLE_ENDIAN);
bitmap.serialize(buf);
byte[] bytes = buf.array();
return new ByteSlice(bytes, 0, bytes.length);
}

private ByteSlice readDVBitmap(DeleteFile dv) {
LOG.trace("Reading DV bitmap bytes without deserializing {}", dv.location());
InputFile inputFile = loadInputFile.apply(dv);
long offset = dv.contentOffset();
int length = dv.contentSizeInBytes().intValue();
byte[] bytes = new byte[length];
try {
IOUtil.readFully(inputFile, offset, bytes, 0, length);
return PositionDeleteIndex.extractRoaringBitmap(bytes, dv);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}

private PositionDeleteIndex readDV(DeleteFile dv) {
LOG.trace("Opening DV file {}", dv.location());
InputFile inputFile = loadInputFile.apply(dv);
Expand Down
28 changes: 27 additions & 1 deletion data/src/main/java/org/apache/iceberg/data/DeleteFilter.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@
import java.util.Set;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.Optional;
import org.apache.iceberg.Accessor;
import org.apache.iceberg.DeleteFile;
import org.apache.iceberg.MetadataColumns;
import org.apache.iceberg.Schema;
import org.apache.iceberg.StructLike;
import org.apache.iceberg.deletes.ByteSlice;
import org.apache.iceberg.deletes.DeleteCounter;
import org.apache.iceberg.deletes.Deletes;
import org.apache.iceberg.deletes.PositionDeleteIndex;
Expand Down Expand Up @@ -65,6 +67,7 @@ public abstract class DeleteFilter<T> {
private PositionDeleteIndex deleteRowPositions = null;
private List<Predicate<T>> isInDeleteSets = null;
private Predicate<T> eqDeleteRows = null;
private boolean posDeletesPushedDown = false;

protected DeleteFilter(
String filePath,
Expand Down Expand Up @@ -258,8 +261,31 @@ public PositionDeleteIndex deletedRowPositions() {
return deleteRowPositions;
}

private CloseableIterable<T> applyPosDeletes(CloseableIterable<T> records) {
/**
* Returns the position deletes as a portable Roaring bitmap {@link ByteSlice} for pushdown into a
* format-native scanner. For deletion vectors this is zero-copy from the on-disk bytes. When this
* returns a non-empty value, position deletes are marked as handled and {@link
* #filter(CloseableIterable)} will not re-apply them.
*/
public Optional<ByteSlice> bitmapBytes() {
if (posDeletes.isEmpty()) {
return Optional.empty();
}

DeleteLoader loader = deleteLoader();
if (loader instanceof BaseDeleteLoader) {
ByteSlice slice = ((BaseDeleteLoader) loader).loadPositionDeleteBitmap(posDeletes, filePath);
if (slice != null) {
this.posDeletesPushedDown = true;
return Optional.of(slice);
}
}

return Optional.empty();
}

private CloseableIterable<T> applyPosDeletes(CloseableIterable<T> records) {
if (posDeletes.isEmpty() || posDeletesPushedDown) {
return records;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ public static void register() {
(icebergSchema, fileSchema, engineSchema) ->
GenericVortexWriter.buildWriter(icebergSchema),
(VortexFormatModel.ReaderFunction<Record>) GenericVortexReader::buildReader));

FormatModelRegistry.register(VortexFormatModel.forPositionDeletes());
}

private GenericFormatModels() {}
Expand Down
16 changes: 14 additions & 2 deletions data/src/main/java/org/apache/iceberg/data/GenericReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@

import java.io.Serializable;
import java.util.Map;
import java.util.Optional;
import org.apache.iceberg.CombinedScanTask;
import org.apache.iceberg.FileScanTask;
import org.apache.iceberg.Schema;
import org.apache.iceberg.TableScan;
import org.apache.iceberg.deletes.ByteSlice;
import org.apache.iceberg.expressions.Evaluator;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.expressions.Expressions;
Expand Down Expand Up @@ -66,7 +68,12 @@ public CloseableIterable<Record> open(FileScanTask task) {
DeleteFilter<Record> deletes = new GenericDeleteFilter(io, task, tableSchema, projection);
Schema readSchema = deletes.requiredSchema();

CloseableIterable<Record> records = openFile(task, readSchema);
// Try to extract position deletes as bitmap bytes for pushdown into the format reader.
// If the format supports it, position deletes are handled at scan level and the filter
// will skip re-applying them.
Optional<ByteSlice> bitmapBytes = deletes.bitmapBytes();

CloseableIterable<Record> records = openFile(task, readSchema, bitmapBytes.orElse(null));
records = deletes.filter(records);
records = applyResidual(records, readSchema, task.residual());

Expand All @@ -84,7 +91,8 @@ private CloseableIterable<Record> applyResidual(
return records;
}

private CloseableIterable<Record> openFile(FileScanTask task, Schema fileProjection) {
private CloseableIterable<Record> openFile(
FileScanTask task, Schema fileProjection, ByteSlice posDeleteBitmap) {
InputFile input = io.newInputFile(task.file());
Map<Integer, ?> partition =
PartitionUtil.constantsMap(task, IdentityPartitionConverters::convertConstant);
Expand All @@ -95,6 +103,10 @@ private CloseableIterable<Record> openFile(FileScanTask task, Schema fileProject
builder = builder.reuseContainers();
}

if (posDeleteBitmap != null) {
builder = builder.positionDeleteBitmap(posDeleteBitmap);
}

return builder
.project(fileProjection)
.idToConstant(partition)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.data.vortex;

import java.nio.charset.StandardCharsets;
import java.util.stream.Stream;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.iceberg.FieldMetrics;
import org.apache.iceberg.deletes.PositionDelete;
import org.apache.iceberg.vortex.VortexValueWriter;

/**
* Writes {@link PositionDelete} objects to Arrow vectors for Vortex position delete file output.
*
* <p>The output schema is [file_path: string, pos: long].
*/
public class PositionDeleteVortexWriter<D> implements VortexValueWriter<PositionDelete<D>> {
@Override
public void write(PositionDelete<D> datum, VectorSchemaRoot root, int rowIndex) {
VarCharVector pathVector = (VarCharVector) root.getVector(0);
byte[] pathBytes = datum.path().toString().getBytes(StandardCharsets.UTF_8);
pathVector.setSafe(rowIndex, pathBytes);

BigIntVector posVector = (BigIntVector) root.getVector(1);
posVector.setSafe(rowIndex, datum.pos());
}

@Override
public Stream<FieldMetrics<?>> metrics() {
return Stream.empty();
}
}
Loading
Loading