Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ public VortexValueReader<?> list(

@Override
public VortexValueReader<?> primitive(Type.PrimitiveType iPrimitive, Field primField) {
if (VortexSchemas.isVariantField(primField)) {
return GenericVortexReaders.variants();
}

if ((iPrimitive != null && iPrimitive.typeId() == Type.TypeID.UUID)
|| VortexSchemas.isUuidField(primField)) {
return GenericVortexReaders.uuids();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.charset.StandardCharsets;
import java.time.Instant;
import java.time.LocalDate;
Expand Down Expand Up @@ -50,6 +51,9 @@
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.DateTimeUtil;
import org.apache.iceberg.util.UUIDUtil;
import org.apache.iceberg.variants.Variant;
import org.apache.iceberg.variants.VariantMetadata;
import org.apache.iceberg.variants.VariantValue;
import org.apache.iceberg.vortex.VortexValueReader;

public class GenericVortexReaders {
Expand Down Expand Up @@ -91,6 +95,10 @@ public static VortexValueReader<UUID> uuids() {
return UuidReader.INSTANCE;
}

public static VortexValueReader<Variant> variants() {
return VariantReader.INSTANCE;
}

public static VortexValueReader<LocalDate> date(boolean isMillis) {
return new DateReader(isMillis);
}
Expand Down Expand Up @@ -265,6 +273,77 @@ static FixedSizeBinaryVector uuidStorage(FieldVector vector) {
return (FixedSizeBinaryVector) vector;
}

private static class VariantReader implements VortexValueReader<Variant> {
static final VariantReader INSTANCE = new VariantReader();

private VariantReader() {}

@Override
public Variant read(FieldVector vector, int row) {
StructVector storage = variantStorage(vector);
VarBinaryVector valueVector = storage.getChild("value", VarBinaryVector.class);
if (vector.isNull(row) || isMissingBinary(valueVector, row)) {
FieldVector typedValueVector = (FieldVector) storage.getChild("typed_value");
if (typedValueVector != null && !typedValueVector.isNull(row)) {
throw new UnsupportedOperationException(
"Reading shredded Variant values from Vortex is not supported yet");
}

return null;
}

return readVariant(storage, valueVector, row);
}

@Override
public Variant readNonNull(FieldVector vector, int row) {
StructVector storage = variantStorage(vector);
VarBinaryVector valueVector = storage.getChild("value", VarBinaryVector.class);
if (isMissingBinary(valueVector, row)) {
throw new UnsupportedOperationException(
"Reading shredded Variant values from Vortex is not supported yet");
}

return readVariant(storage, valueVector, row);
}

private Variant readVariant(StructVector storage, VarBinaryVector valueVector, int row) {
VarBinaryVector metadataVector = storage.getChild("metadata", VarBinaryVector.class);

if (metadataVector == null || metadataVector.isNull(row)) {
throw new IllegalStateException("Invalid Vortex variant: metadata is null");
}

byte[] metadataBytes = metadataVector.get(row);
byte[] valueBytes = valueVector.get(row);
if (metadataBytes.length == 0 || valueBytes.length == 0) {
throw new IllegalStateException(
"Invalid Vortex variant: serialized value is empty (metadata="
+ metadataBytes.length
+ ", value="
+ valueBytes.length
+ ")");
}

VariantMetadata metadata =
VariantMetadata.from(ByteBuffer.wrap(metadataBytes).order(ByteOrder.LITTLE_ENDIAN));
VariantValue value =
VariantValue.from(metadata, ByteBuffer.wrap(valueBytes).order(ByteOrder.LITTLE_ENDIAN));
return Variant.of(metadata, value);
}
}

private static boolean isMissingBinary(VarBinaryVector vector, int row) {
return vector == null || vector.isNull(row) || vector.get(row).length == 0;
}

private static StructVector variantStorage(FieldVector vector) {
if (vector instanceof ExtensionTypeVector<?> ext) {
return (StructVector) ext.getUnderlyingVector();
}
return (StructVector) vector;
}

private static class DateReader implements VortexValueReader<LocalDate> {
private final boolean isMillis;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.charset.StandardCharsets;
import java.time.LocalDate;
import java.time.LocalDateTime;
Expand Down Expand Up @@ -49,12 +50,18 @@
import org.apache.arrow.vector.VarBinaryVector;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.complex.StructVector;
import org.apache.iceberg.FieldMetrics;
import org.apache.iceberg.Schema;
import org.apache.iceberg.data.Record;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.ByteBuffers;
import org.apache.iceberg.util.UUIDUtil;
import org.apache.iceberg.variants.Serialized;
import org.apache.iceberg.variants.Variant;
import org.apache.iceberg.variants.VariantMetadata;
import org.apache.iceberg.variants.VariantValue;
import org.apache.iceberg.vortex.VortexValueWriter;

/** Writes Iceberg generic {@link Record} objects to Arrow vectors for Vortex file output. */
Expand Down Expand Up @@ -88,7 +95,12 @@ public void write(Record datum, VectorSchemaRoot root, int rowIndex) {

ColumnMetricsTracker<Object> tracker = (ColumnMetricsTracker<Object>) trackers[fieldIndex];
if (value == null) {
vector.setNull(rowIndex);
if (field.isRequired()) {
throw new IllegalArgumentException(
"Cannot write null value for required field: " + field);
}

writeNull(vector, field.type(), rowIndex);
tracker.addNull();
continue;
}
Expand Down Expand Up @@ -180,13 +192,71 @@ private static void writeValue(
((TimeStampNanoVector) vector).setSafe(rowIndex, localEpochNanos);
}

break;
case VARIANT:
writeVariant((StructVector) vector, (Variant) value, rowIndex);

break;
default:
throw new UnsupportedOperationException(
"Unsupported Iceberg type for Vortex write: " + type);
}
}

private static void writeNull(FieldVector vector, Type type, int rowIndex) {
if (type.isVariantType()) {
writeNullVariant((StructVector) vector, rowIndex);
} else {
vector.setNull(rowIndex);
}
}

private static void writeNullVariant(StructVector vector, int rowIndex) {
vector.setNull(rowIndex);
writeVariantMetadata(
vector.getChild("metadata", VarBinaryVector.class), VariantMetadata.empty(), rowIndex);

VarBinaryVector valueVector = vector.getChild("value", VarBinaryVector.class);
if (valueVector != null) {
valueVector.setNull(rowIndex);
}
}

private static void writeVariant(StructVector vector, Variant variant, int rowIndex) {
vector.setIndexDefined(rowIndex);

writeVariantMetadata(
vector.getChild("metadata", VarBinaryVector.class), variant.metadata(), rowIndex);
writeVariantValue(vector.getChild("value", VarBinaryVector.class), variant.value(), rowIndex);
}

private static void writeVariantMetadata(
VarBinaryVector vector, VariantMetadata metadata, int rowIndex) {
if (metadata instanceof Serialized serialized) {
writeSerialized(vector, serialized, rowIndex);
return;
}

ByteBuffer buffer = ByteBuffer.allocate(metadata.sizeInBytes()).order(ByteOrder.LITTLE_ENDIAN);
int length = metadata.writeTo(buffer, 0);
vector.setSafe(rowIndex, buffer, 0, length);
}

private static void writeVariantValue(VarBinaryVector vector, VariantValue value, int rowIndex) {
if (value instanceof Serialized serialized) {
writeSerialized(vector, serialized, rowIndex);
return;
}

ByteBuffer buffer = ByteBuffer.allocate(value.sizeInBytes()).order(ByteOrder.LITTLE_ENDIAN);
int length = value.writeTo(buffer, 0);
vector.setSafe(rowIndex, buffer, 0, length);
}

private static void writeSerialized(VarBinaryVector vector, Serialized serialized, int rowIndex) {
vector.setSafe(rowIndex, ByteBuffers.toByteArray(serialized.buffer()));
}

@SuppressWarnings({"unchecked", "rawtypes"})
private static ColumnMetricsTracker<?> newTracker(Types.NestedField field) {
switch (field.type().typeId()) {
Expand Down Expand Up @@ -228,6 +298,8 @@ private static ColumnMetricsTracker<?> newTracker(Types.NestedField field) {
Comparator.naturalOrder(),
v -> ChronoUnit.NANOS.between(LOCAL_EPOCH, (LocalDateTime) v));
}
case VARIANT:
return new ColumnMetricsTracker<Object>(field.fieldId());
default:
return new ColumnMetricsTracker<>(field.fieldId(), (Comparator) Comparator.naturalOrder());
}
Expand All @@ -247,11 +319,14 @@ static class ColumnMetricsTracker<T> {
private T min;
private T max;

ColumnMetricsTracker(int fieldId) {
this(fieldId, null, null);
}

ColumnMetricsTracker(int fieldId, Comparator<T> comparator) {
this(fieldId, comparator, null);
}

@SuppressWarnings("unchecked")
ColumnMetricsTracker(
int fieldId, Comparator<T> comparator, java.util.function.Function<Object, T> converter) {
this.fieldId = fieldId;
Expand All @@ -271,6 +346,11 @@ void incrementValueCount() {
@SuppressWarnings("unchecked")
void addValue(Object value) {
valueCount++;

if (comparator == null) {
return;
}

T typedValue = converter != null ? converter.apply(value) : (T) value;
if (min == null || comparator.compare(typedValue, min) < 0) {
min = typedValue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ public static <T> T visit(
}

public static <T> T visit(Type iType, Field field, VortexSchemaWithTypeVisitor<T> visitor) {
if ((iType != null && iType.isVariantType()) || VortexSchemas.isVariantField(field)) {
return visitor.primitive(null, field);
}

ArrowType arrowType = field.getType();
if (arrowType instanceof ArrowType.Struct) {
return visitStruct(iType != null ? iType.asStructType() : null, field.getChildren(), visitor);
Expand Down
Loading
Loading