From 61a3f93b98af2345c232b212e88fa9dc3f62557c Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Wed, 27 May 2026 12:04:15 +0100 Subject: [PATCH 1/2] Variant support for Iceberg Signed-off-by: Adam Gutglick --- .../data/vortex/GenericVortexWriter.java | 59 ++++++++++++++++++- .../apache/iceberg/vortex/VortexSchemas.java | 38 ++++++++++++ .../iceberg/vortex/TestGenericVortex.java | 7 ++- 3 files changed, 102 insertions(+), 2 deletions(-) diff --git a/vortex/src/main/java/org/apache/iceberg/data/vortex/GenericVortexWriter.java b/vortex/src/main/java/org/apache/iceberg/data/vortex/GenericVortexWriter.java index a73ce14cf3c5..871d583d35bf 100644 --- a/vortex/src/main/java/org/apache/iceberg/data/vortex/GenericVortexWriter.java +++ b/vortex/src/main/java/org/apache/iceberg/data/vortex/GenericVortexWriter.java @@ -20,6 +20,7 @@ import java.math.BigDecimal; import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; import java.time.LocalDate; import java.time.LocalDateTime; @@ -49,12 +50,17 @@ import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.complex.StructVector; import org.apache.iceberg.FieldMetrics; import org.apache.iceberg.Schema; import org.apache.iceberg.data.Record; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.ByteBuffers; import org.apache.iceberg.util.UUIDUtil; +import org.apache.iceberg.variants.Serialized; +import org.apache.iceberg.variants.Variant; +import org.apache.iceberg.variants.VariantMetadata; +import org.apache.iceberg.variants.VariantValue; import org.apache.iceberg.vortex.VortexValueWriter; /** Writes Iceberg generic {@link Record} objects to Arrow vectors for Vortex file output. */ @@ -181,12 +187,53 @@ private static void writeValue( } break; + case VARIANT: + writeVariant((StructVector) vector, (Variant) value, rowIndex); + + break; + default: throw new UnsupportedOperationException( "Unsupported Iceberg type for Vortex write: " + type); } } + private static void writeVariant(StructVector vector, Variant variant, int rowIndex) { + vector.setIndexDefined(rowIndex); + + writeVariantMetadata( + vector.getChild("metadata", VarBinaryVector.class), variant.metadata(), rowIndex); + writeVariantValue(vector.getChild("value", VarBinaryVector.class), variant.value(), rowIndex); + } + + private static void writeVariantMetadata( + VarBinaryVector vector, VariantMetadata metadata, int rowIndex) { + if (metadata instanceof Serialized serialized) { + writeSerialized(vector, serialized, rowIndex); + return; + } + + ByteBuffer buffer = ByteBuffer.allocate(metadata.sizeInBytes()).order(ByteOrder.LITTLE_ENDIAN); + int length = metadata.writeTo(buffer, 0); + vector.setSafe(rowIndex, buffer, 0, length); + } + + private static void writeVariantValue(VarBinaryVector vector, VariantValue value, int rowIndex) { + if (value instanceof Serialized serialized) { + writeSerialized(vector, serialized, rowIndex); + return; + } + + ByteBuffer buffer = ByteBuffer.allocate(value.sizeInBytes()).order(ByteOrder.LITTLE_ENDIAN); + int length = value.writeTo(buffer, 0); + vector.setSafe(rowIndex, buffer, 0, length); + } + + private static void writeSerialized(VarBinaryVector vector, Serialized serialized, int rowIndex) { + ByteBuffer buffer = serialized.buffer(); + vector.setSafe(rowIndex, buffer, buffer.position(), buffer.remaining()); + } + @SuppressWarnings({"unchecked", "rawtypes"}) private static ColumnMetricsTracker newTracker(Types.NestedField field) { switch (field.type().typeId()) { @@ -228,6 +275,8 @@ private static ColumnMetricsTracker newTracker(Types.NestedField field) { Comparator.naturalOrder(), v -> ChronoUnit.NANOS.between(LOCAL_EPOCH, (LocalDateTime) v)); } + case VARIANT: + return new ColumnMetricsTracker(field.fieldId()); default: return new ColumnMetricsTracker<>(field.fieldId(), (Comparator) Comparator.naturalOrder()); } @@ -247,11 +296,14 @@ static class ColumnMetricsTracker { private T min; private T max; + ColumnMetricsTracker(int fieldId) { + this(fieldId, null, null); + } + ColumnMetricsTracker(int fieldId, Comparator comparator) { this(fieldId, comparator, null); } - @SuppressWarnings("unchecked") ColumnMetricsTracker( int fieldId, Comparator comparator, java.util.function.Function converter) { this.fieldId = fieldId; @@ -271,6 +323,11 @@ void incrementValueCount() { @SuppressWarnings("unchecked") void addValue(Object value) { valueCount++; + + if (comparator == null) { + return; + } + T typedValue = converter != null ? converter.apply(value) : (T) value; if (min == null || comparator.compare(typedValue, min) < 0) { min = typedValue; diff --git a/vortex/src/main/java/org/apache/iceberg/vortex/VortexSchemas.java b/vortex/src/main/java/org/apache/iceberg/vortex/VortexSchemas.java index 386230479903..e61e8cd46a30 100644 --- a/vortex/src/main/java/org/apache/iceberg/vortex/VortexSchemas.java +++ b/vortex/src/main/java/org/apache/iceberg/vortex/VortexSchemas.java @@ -40,6 +40,12 @@ public final class VortexSchemas { /** Canonical Arrow extension name for UUIDs (matches {@code arrow.vector.extension.UuidType}). */ static final String UUID_EXTENSION_NAME = "arrow.uuid"; + /** + * Canonical Arrow extension name for Parquet variant (matches {@code + * arrow.vector.extension.ParquetVariant}). + */ + static final String VARIANT_EXTENSION_NAME = "arrow.parquet.variant"; + private VortexSchemas() {} /** Convert a Vortex file's Arrow {@link org.apache.arrow.vector.types.pojo.Schema} to Iceberg. */ @@ -169,6 +175,25 @@ yield new Field( yield new Field( name, new FieldType(nullable, ArrowType.Struct.INSTANCE, null), children.build()); } + case VARIANT -> { + Map extMetadata = + ImmutableMap.of( + ArrowType.ExtensionType.EXTENSION_METADATA_KEY_NAME, + VARIANT_EXTENSION_NAME, + ArrowType.ExtensionType.EXTENSION_METADATA_KEY_METADATA, + ""); + + ImmutableList.Builder children = ImmutableList.builder(); + children.add( + new Field("metadata", new FieldType(false, ArrowType.Binary.INSTANCE, null), null)); + children.add( + new Field("value", new FieldType(true, ArrowType.Binary.INSTANCE, null), null)); + + yield new Field( + name, + new FieldType(nullable, ArrowType.Struct.INSTANCE, null, extMetadata), + children.build()); + } default -> throw new UnsupportedOperationException( "Unsupported Iceberg type for Arrow conversion: " + type); @@ -186,6 +211,11 @@ private static Type toIcebergType(Field field) { if (isUuidField(field)) { return Types.UUIDType.get(); } + + if (isVariantField(field)) { + return Types.VariantType.get(); + } + ArrowType arrowType = field.getType(); if (arrowType instanceof ArrowType.Int intType) { return intType.getBitWidth() <= Integer.SIZE ? Types.IntegerType.get() : Types.LongType.get(); @@ -257,4 +287,12 @@ public static boolean isUuidField(Field field) { return UUID_EXTENSION_NAME.equals( field.getMetadata().get(ArrowType.ExtensionType.EXTENSION_METADATA_KEY_NAME)); } + + public static boolean isVariantField(Field field) { + if (field.getType() instanceof ArrowType.ExtensionType ext) { + return VARIANT_EXTENSION_NAME.equals(ext.extensionName()); + } + return VARIANT_EXTENSION_NAME.equals( + field.getMetadata().get(ArrowType.ExtensionType.EXTENSION_METADATA_KEY_NAME)); + } } diff --git a/vortex/src/test/java/org/apache/iceberg/vortex/TestGenericVortex.java b/vortex/src/test/java/org/apache/iceberg/vortex/TestGenericVortex.java index fc7f2ebaa866..6d136436217a 100644 --- a/vortex/src/test/java/org/apache/iceberg/vortex/TestGenericVortex.java +++ b/vortex/src/test/java/org/apache/iceberg/vortex/TestGenericVortex.java @@ -48,7 +48,12 @@ protected boolean supportsUnknown() { @Override protected boolean supportsVariant() { - return false; + return true; + } + + @Override + protected boolean supportsTimestampNanos() { + return true; } @Override From bee87b4b280d33ea31c72b64043720e00d8a30bf Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Wed, 27 May 2026 15:13:53 +0100 Subject: [PATCH 2/2] Initial support Signed-off-by: Adam Gutglick --- .../data/vortex/GenericVortexReader.java | 4 + .../data/vortex/GenericVortexReaders.java | 79 +++++++++ .../data/vortex/GenericVortexWriter.java | 31 +++- .../vortex/VortexSchemaWithTypeVisitor.java | 4 + .../apache/iceberg/vortex/VortexSchemas.java | 65 +++++++ .../iceberg/vortex/TestVortexSchemas.java | 158 ++++++++++++++++++ 6 files changed, 337 insertions(+), 4 deletions(-) create mode 100644 vortex/src/test/java/org/apache/iceberg/vortex/TestVortexSchemas.java diff --git a/vortex/src/main/java/org/apache/iceberg/data/vortex/GenericVortexReader.java b/vortex/src/main/java/org/apache/iceberg/data/vortex/GenericVortexReader.java index 73c2d929ee91..c552c6455cf4 100644 --- a/vortex/src/main/java/org/apache/iceberg/data/vortex/GenericVortexReader.java +++ b/vortex/src/main/java/org/apache/iceberg/data/vortex/GenericVortexReader.java @@ -104,6 +104,10 @@ public VortexValueReader list( @Override public VortexValueReader primitive(Type.PrimitiveType iPrimitive, Field primField) { + if (VortexSchemas.isVariantField(primField)) { + return GenericVortexReaders.variants(); + } + if ((iPrimitive != null && iPrimitive.typeId() == Type.TypeID.UUID) || VortexSchemas.isUuidField(primField)) { return GenericVortexReaders.uuids(); diff --git a/vortex/src/main/java/org/apache/iceberg/data/vortex/GenericVortexReaders.java b/vortex/src/main/java/org/apache/iceberg/data/vortex/GenericVortexReaders.java index e10374314763..cf866138b593 100644 --- a/vortex/src/main/java/org/apache/iceberg/data/vortex/GenericVortexReaders.java +++ b/vortex/src/main/java/org/apache/iceberg/data/vortex/GenericVortexReaders.java @@ -20,6 +20,7 @@ import java.math.BigDecimal; import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; import java.time.Instant; import java.time.LocalDate; @@ -50,6 +51,9 @@ import org.apache.iceberg.types.Types; import org.apache.iceberg.util.DateTimeUtil; import org.apache.iceberg.util.UUIDUtil; +import org.apache.iceberg.variants.Variant; +import org.apache.iceberg.variants.VariantMetadata; +import org.apache.iceberg.variants.VariantValue; import org.apache.iceberg.vortex.VortexValueReader; public class GenericVortexReaders { @@ -91,6 +95,10 @@ public static VortexValueReader uuids() { return UuidReader.INSTANCE; } + public static VortexValueReader variants() { + return VariantReader.INSTANCE; + } + public static VortexValueReader date(boolean isMillis) { return new DateReader(isMillis); } @@ -265,6 +273,77 @@ static FixedSizeBinaryVector uuidStorage(FieldVector vector) { return (FixedSizeBinaryVector) vector; } + private static class VariantReader implements VortexValueReader { + static final VariantReader INSTANCE = new VariantReader(); + + private VariantReader() {} + + @Override + public Variant read(FieldVector vector, int row) { + StructVector storage = variantStorage(vector); + VarBinaryVector valueVector = storage.getChild("value", VarBinaryVector.class); + if (vector.isNull(row) || isMissingBinary(valueVector, row)) { + FieldVector typedValueVector = (FieldVector) storage.getChild("typed_value"); + if (typedValueVector != null && !typedValueVector.isNull(row)) { + throw new UnsupportedOperationException( + "Reading shredded Variant values from Vortex is not supported yet"); + } + + return null; + } + + return readVariant(storage, valueVector, row); + } + + @Override + public Variant readNonNull(FieldVector vector, int row) { + StructVector storage = variantStorage(vector); + VarBinaryVector valueVector = storage.getChild("value", VarBinaryVector.class); + if (isMissingBinary(valueVector, row)) { + throw new UnsupportedOperationException( + "Reading shredded Variant values from Vortex is not supported yet"); + } + + return readVariant(storage, valueVector, row); + } + + private Variant readVariant(StructVector storage, VarBinaryVector valueVector, int row) { + VarBinaryVector metadataVector = storage.getChild("metadata", VarBinaryVector.class); + + if (metadataVector == null || metadataVector.isNull(row)) { + throw new IllegalStateException("Invalid Vortex variant: metadata is null"); + } + + byte[] metadataBytes = metadataVector.get(row); + byte[] valueBytes = valueVector.get(row); + if (metadataBytes.length == 0 || valueBytes.length == 0) { + throw new IllegalStateException( + "Invalid Vortex variant: serialized value is empty (metadata=" + + metadataBytes.length + + ", value=" + + valueBytes.length + + ")"); + } + + VariantMetadata metadata = + VariantMetadata.from(ByteBuffer.wrap(metadataBytes).order(ByteOrder.LITTLE_ENDIAN)); + VariantValue value = + VariantValue.from(metadata, ByteBuffer.wrap(valueBytes).order(ByteOrder.LITTLE_ENDIAN)); + return Variant.of(metadata, value); + } + } + + private static boolean isMissingBinary(VarBinaryVector vector, int row) { + return vector == null || vector.isNull(row) || vector.get(row).length == 0; + } + + private static StructVector variantStorage(FieldVector vector) { + if (vector instanceof ExtensionTypeVector ext) { + return (StructVector) ext.getUnderlyingVector(); + } + return (StructVector) vector; + } + private static class DateReader implements VortexValueReader { private final boolean isMillis; diff --git a/vortex/src/main/java/org/apache/iceberg/data/vortex/GenericVortexWriter.java b/vortex/src/main/java/org/apache/iceberg/data/vortex/GenericVortexWriter.java index 871d583d35bf..e585cbdef15c 100644 --- a/vortex/src/main/java/org/apache/iceberg/data/vortex/GenericVortexWriter.java +++ b/vortex/src/main/java/org/apache/iceberg/data/vortex/GenericVortexWriter.java @@ -54,6 +54,7 @@ import org.apache.iceberg.FieldMetrics; import org.apache.iceberg.Schema; import org.apache.iceberg.data.Record; +import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.ByteBuffers; import org.apache.iceberg.util.UUIDUtil; @@ -94,7 +95,12 @@ public void write(Record datum, VectorSchemaRoot root, int rowIndex) { ColumnMetricsTracker tracker = (ColumnMetricsTracker) trackers[fieldIndex]; if (value == null) { - vector.setNull(rowIndex); + if (field.isRequired()) { + throw new IllegalArgumentException( + "Cannot write null value for required field: " + field); + } + + writeNull(vector, field.type(), rowIndex); tracker.addNull(); continue; } @@ -191,13 +197,31 @@ private static void writeValue( writeVariant((StructVector) vector, (Variant) value, rowIndex); break; - default: throw new UnsupportedOperationException( "Unsupported Iceberg type for Vortex write: " + type); } } + private static void writeNull(FieldVector vector, Type type, int rowIndex) { + if (type.isVariantType()) { + writeNullVariant((StructVector) vector, rowIndex); + } else { + vector.setNull(rowIndex); + } + } + + private static void writeNullVariant(StructVector vector, int rowIndex) { + vector.setNull(rowIndex); + writeVariantMetadata( + vector.getChild("metadata", VarBinaryVector.class), VariantMetadata.empty(), rowIndex); + + VarBinaryVector valueVector = vector.getChild("value", VarBinaryVector.class); + if (valueVector != null) { + valueVector.setNull(rowIndex); + } + } + private static void writeVariant(StructVector vector, Variant variant, int rowIndex) { vector.setIndexDefined(rowIndex); @@ -230,8 +254,7 @@ private static void writeVariantValue(VarBinaryVector vector, VariantValue value } private static void writeSerialized(VarBinaryVector vector, Serialized serialized, int rowIndex) { - ByteBuffer buffer = serialized.buffer(); - vector.setSafe(rowIndex, buffer, buffer.position(), buffer.remaining()); + vector.setSafe(rowIndex, ByteBuffers.toByteArray(serialized.buffer())); } @SuppressWarnings({"unchecked", "rawtypes"}) diff --git a/vortex/src/main/java/org/apache/iceberg/vortex/VortexSchemaWithTypeVisitor.java b/vortex/src/main/java/org/apache/iceberg/vortex/VortexSchemaWithTypeVisitor.java index 8bd72d55896e..fd790a799bac 100644 --- a/vortex/src/main/java/org/apache/iceberg/vortex/VortexSchemaWithTypeVisitor.java +++ b/vortex/src/main/java/org/apache/iceberg/vortex/VortexSchemaWithTypeVisitor.java @@ -45,6 +45,10 @@ public static T visit( } public static T visit(Type iType, Field field, VortexSchemaWithTypeVisitor visitor) { + if ((iType != null && iType.isVariantType()) || VortexSchemas.isVariantField(field)) { + return visitor.primitive(null, field); + } + ArrowType arrowType = field.getType(); if (arrowType instanceof ArrowType.Struct) { return visitStruct(iType != null ? iType.asStructType() : null, field.getChildren(), visitor); diff --git a/vortex/src/main/java/org/apache/iceberg/vortex/VortexSchemas.java b/vortex/src/main/java/org/apache/iceberg/vortex/VortexSchemas.java index e61e8cd46a30..9ea824f734f0 100644 --- a/vortex/src/main/java/org/apache/iceberg/vortex/VortexSchemas.java +++ b/vortex/src/main/java/org/apache/iceberg/vortex/VortexSchemas.java @@ -30,6 +30,7 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.iceberg.Schema; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Lists; @@ -213,6 +214,7 @@ private static Type toIcebergType(Field field) { } if (isVariantField(field)) { + validateVariantField(field); return Types.VariantType.get(); } @@ -260,6 +262,69 @@ private static Type toIcebergFloatingPoint(ArrowType.FloatingPoint fpType) { }; } + private static void validateVariantField(Field field) { + Preconditions.checkArgument( + field.getType() instanceof ArrowType.Struct, + "Invalid Arrow variant field %s: expected struct storage type, found %s", + field.getName(), + field.getType()); + + Field metadata = findChild(field, "metadata"); + Preconditions.checkArgument( + metadata != null, + "Invalid Arrow variant field %s: missing metadata child", + field.getName()); + Preconditions.checkArgument( + !metadata.isNullable(), + "Invalid Arrow variant field %s: metadata child must be non-nullable", + field.getName()); + Preconditions.checkArgument( + isBinaryLike(metadata.getType()), + "Invalid Arrow variant field %s: metadata child must be binary, found %s", + field.getName(), + metadata.getType()); + + Field value = findChild(field, "value"); + if (value != null) { + Preconditions.checkArgument( + value.isNullable(), + "Invalid Arrow variant field %s: value child must be nullable", + field.getName()); + Preconditions.checkArgument( + isBinaryLike(value.getType()), + "Invalid Arrow variant field %s: value child must be binary, found %s", + field.getName(), + value.getType()); + } + + Field typedValue = findChild(field, "typed_value"); + if (typedValue != null) { + Preconditions.checkArgument( + typedValue.isNullable(), + "Invalid Arrow variant field %s: typed_value child must be nullable", + field.getName()); + } + + Preconditions.checkArgument( + value != null || typedValue != null, + "Invalid Arrow variant field %s: expected value or typed_value child", + field.getName()); + } + + private static Field findChild(Field field, String name) { + for (Field child : field.getChildren()) { + if (name.equals(child.getName())) { + return child; + } + } + + return null; + } + + private static boolean isBinaryLike(ArrowType arrowType) { + return arrowType instanceof ArrowType.Binary || arrowType instanceof ArrowType.LargeBinary; + } + private static Type toIcebergTimestamp(ArrowType.Timestamp tsType) { boolean isNano = tsType.getUnit() == TimeUnit.NANOSECOND; if (tsType.getTimezone() == null) { diff --git a/vortex/src/test/java/org/apache/iceberg/vortex/TestVortexSchemas.java b/vortex/src/test/java/org/apache/iceberg/vortex/TestVortexSchemas.java new file mode 100644 index 000000000000..35eb20e2e556 --- /dev/null +++ b/vortex/src/test/java/org/apache/iceberg/vortex/TestVortexSchemas.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.vortex; + +import static org.apache.iceberg.types.Types.NestedField.optional; +import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import java.util.List; +import java.util.Map; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.iceberg.Schema; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; + +class TestVortexSchemas { + private static final Map VARIANT_METADATA = + ImmutableMap.of( + ArrowType.ExtensionType.EXTENSION_METADATA_KEY_NAME, + VortexSchemas.VARIANT_EXTENSION_NAME, + ArrowType.ExtensionType.EXTENSION_METADATA_KEY_METADATA, + ""); + + @Test + void variantToArrowUsesCanonicalUnshreddedStorage() { + Schema icebergSchema = + new Schema( + required(1, "id", Types.LongType.get()), optional(2, "v", Types.VariantType.get())); + + Field variant = VortexSchemas.toArrowSchema(icebergSchema).findField("v"); + + assertThat(VortexSchemas.isVariantField(variant)).isTrue(); + assertThat(variant.isNullable()).isTrue(); + assertThat(variant.getType()).isEqualTo(ArrowType.Struct.INSTANCE); + assertThat(variant.getChildren()).hasSize(2); + + Field metadata = variant.getChildren().get(0); + assertThat(metadata.getName()).isEqualTo("metadata"); + assertThat(metadata.isNullable()).isFalse(); + assertThat(metadata.getType()).isEqualTo(ArrowType.Binary.INSTANCE); + + Field value = variant.getChildren().get(1); + assertThat(value.getName()).isEqualTo("value"); + assertThat(value.isNullable()).isTrue(); + assertThat(value.getType()).isEqualTo(ArrowType.Binary.INSTANCE); + } + + @Test + void variantFromArrowAcceptsTypedValueOnlyStorage() { + Field variant = + variantField( + "v", + true, + ImmutableList.of( + binaryField("metadata", false), + new Field( + "typed_value", + new FieldType(true, new ArrowType.Int(Integer.SIZE, true), null), + null))); + + Schema converted = + VortexSchemas.convert( + new org.apache.arrow.vector.types.pojo.Schema(ImmutableList.of(variant))); + + assertThat(converted.columns()).containsExactly(optional(0, "v", Types.VariantType.get())); + } + + @Test + void variantFromArrowRequiresMetadataChild() { + Field variant = variantField("v", true, ImmutableList.of(binaryField("value", true))); + + assertThatThrownBy( + () -> + VortexSchemas.convert( + new org.apache.arrow.vector.types.pojo.Schema(ImmutableList.of(variant)))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("metadata"); + } + + @Test + void variantFromArrowRequiresValueOrTypedValueChild() { + Field variant = variantField("v", true, ImmutableList.of(binaryField("metadata", false))); + + assertThatThrownBy( + () -> + VortexSchemas.convert( + new org.apache.arrow.vector.types.pojo.Schema(ImmutableList.of(variant)))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("value or typed_value"); + } + + @Test + void variantFromArrowRequiresNullableValueChild() { + Field variant = + variantField( + "v", + true, + ImmutableList.of(binaryField("metadata", false), binaryField("value", false))); + + assertThatThrownBy( + () -> + VortexSchemas.convert( + new org.apache.arrow.vector.types.pojo.Schema(ImmutableList.of(variant)))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("value child must be nullable"); + } + + @Test + void variantFromArrowRequiresNullableTypedValueChild() { + Field variant = + variantField( + "v", + true, + ImmutableList.of( + binaryField("metadata", false), + new Field( + "typed_value", + new FieldType(false, new ArrowType.Int(Integer.SIZE, true), null), + null))); + + assertThatThrownBy( + () -> + VortexSchemas.convert( + new org.apache.arrow.vector.types.pojo.Schema(ImmutableList.of(variant)))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("typed_value child must be nullable"); + } + + private static Field variantField(String name, boolean nullable, List children) { + return new Field( + name, new FieldType(nullable, ArrowType.Struct.INSTANCE, null, VARIANT_METADATA), children); + } + + private static Field binaryField(String name, boolean nullable) { + return new Field(name, new FieldType(nullable, ArrowType.Binary.INSTANCE, null), null); + } +}