diff --git a/jdbc-core/build.gradle.kts b/jdbc-core/build.gradle.kts index 42fad85b..7bdbaf5a 100644 --- a/jdbc-core/build.gradle.kts +++ b/jdbc-core/build.gradle.kts @@ -44,3 +44,11 @@ dependencies { tasks.named("compileJava") { dependsOn(":jdbc-grpc:compileJava") } + +// Iceberg sets -Darrow.enable_null_check_for_get=false on the JVM. With that flag off, Arrow's +// VarCharVector/VarBinaryVector/FixedSizeBinaryVector .get(int) skip the validity check and return +// stale buffer bytes for null rows instead of null. Run the test suite under that condition so +// the existing null-handling assertions cover the Iceberg scenario. +tasks.test { + systemProperty("arrow.enable_null_check_for_get", "false") +} diff --git a/jdbc-core/src/main/java/com/salesforce/datacloud/jdbc/core/accessor/impl/BaseListVectorAccessor.java b/jdbc-core/src/main/java/com/salesforce/datacloud/jdbc/core/accessor/impl/BaseListVectorAccessor.java index e3624ae2..8a5ea47b 100644 --- a/jdbc-core/src/main/java/com/salesforce/datacloud/jdbc/core/accessor/impl/BaseListVectorAccessor.java +++ b/jdbc-core/src/main/java/com/salesforce/datacloud/jdbc/core/accessor/impl/BaseListVectorAccessor.java @@ -32,6 +32,8 @@ public Class getObjectClass() { } protected List getListObject(VectorProvider vectorProvider) throws SQLException { + // ListVector/LargeListVector.getObject is validity-correct (not gated by + // arrow.enable_null_check_for_get), so a null return reliably indicates a null row. List object = vectorProvider.getObject(getCurrentRow()); this.wasNull = object == null; return object; diff --git a/jdbc-core/src/main/java/com/salesforce/datacloud/jdbc/core/accessor/impl/BinaryVectorAccessor.java b/jdbc-core/src/main/java/com/salesforce/datacloud/jdbc/core/accessor/impl/BinaryVectorAccessor.java index 9d85d510..374ff44e 100644 --- a/jdbc-core/src/main/java/com/salesforce/datacloud/jdbc/core/accessor/impl/BinaryVectorAccessor.java +++ b/jdbc-core/src/main/java/com/salesforce/datacloud/jdbc/core/accessor/impl/BinaryVectorAccessor.java @@ -6,6 +6,7 @@ import com.salesforce.datacloud.jdbc.core.accessor.QueryJDBCAccessor; import java.nio.charset.StandardCharsets; +import java.util.function.IntPredicate; import java.util.function.IntSupplier; import org.apache.arrow.vector.FixedSizeBinaryVector; import org.apache.arrow.vector.LargeVarBinaryVector; @@ -18,30 +19,37 @@ private interface ByteArrayGetter { } private final ByteArrayGetter getter; + private final IntPredicate nullChecker; public BinaryVectorAccessor(FixedSizeBinaryVector vector, IntSupplier currentRowSupplier) { - this(vector::get, currentRowSupplier); + this(vector::get, vector::isNull, currentRowSupplier); } public BinaryVectorAccessor(VarBinaryVector vector, IntSupplier currentRowSupplier) { - this(vector::get, currentRowSupplier); + this(vector::get, vector::isNull, currentRowSupplier); } public BinaryVectorAccessor(LargeVarBinaryVector vector, IntSupplier currentRowSupplier) { - this(vector::get, currentRowSupplier); + this(vector::get, vector::isNull, currentRowSupplier); } - private BinaryVectorAccessor(ByteArrayGetter getter, IntSupplier currentRowSupplier) { + private BinaryVectorAccessor(ByteArrayGetter getter, IntPredicate nullChecker, IntSupplier currentRowSupplier) { super(currentRowSupplier); this.getter = getter; + this.nullChecker = nullChecker; } @Override public byte[] getBytes() { - byte[] bytes = getter.get(getCurrentRow()); - this.wasNull = bytes == null; - - return bytes; + // Arrow's vector.get(int) skips the isSet check when arrow.enable_null_check_for_get=false + // (e.g. set by Iceberg on the JVM), so a null entry returns garbage rather than null. + // Check the validity buffer explicitly via isNull(int). + final int row = getCurrentRow(); + this.wasNull = nullChecker.test(row); + if (this.wasNull) { + return null; + } + return getter.get(row); } @Override diff --git a/jdbc-core/src/main/java/com/salesforce/datacloud/jdbc/core/accessor/impl/DecimalVectorAccessor.java b/jdbc-core/src/main/java/com/salesforce/datacloud/jdbc/core/accessor/impl/DecimalVectorAccessor.java index b00fb8f9..4fe9c706 100644 --- a/jdbc-core/src/main/java/com/salesforce/datacloud/jdbc/core/accessor/impl/DecimalVectorAccessor.java +++ b/jdbc-core/src/main/java/com/salesforce/datacloud/jdbc/core/accessor/impl/DecimalVectorAccessor.java @@ -25,6 +25,8 @@ public Class getObjectClass() { @Override public BigDecimal getBigDecimal() { + // DecimalVector.getObject is validity-correct (not gated by arrow.enable_null_check_for_get), + // so a null return reliably indicates a null row even when Iceberg disables that flag. final BigDecimal value = vector.getObject(getCurrentRow()); this.wasNull = value == null; return value; diff --git a/jdbc-core/src/main/java/com/salesforce/datacloud/jdbc/core/accessor/impl/VarCharVectorAccessor.java b/jdbc-core/src/main/java/com/salesforce/datacloud/jdbc/core/accessor/impl/VarCharVectorAccessor.java index 0eacb249..f6175f56 100644 --- a/jdbc-core/src/main/java/com/salesforce/datacloud/jdbc/core/accessor/impl/VarCharVectorAccessor.java +++ b/jdbc-core/src/main/java/com/salesforce/datacloud/jdbc/core/accessor/impl/VarCharVectorAccessor.java @@ -6,6 +6,7 @@ import com.salesforce.datacloud.jdbc.core.accessor.QueryJDBCAccessor; import java.nio.charset.StandardCharsets; +import java.util.function.IntPredicate; import java.util.function.IntSupplier; import org.apache.arrow.vector.LargeVarCharVector; import org.apache.arrow.vector.VarCharVector; @@ -18,18 +19,20 @@ interface Getter { } private final Getter getter; + private final IntPredicate nullChecker; public VarCharVectorAccessor(VarCharVector vector, IntSupplier currenRowSupplier) { - this(vector::get, currenRowSupplier); + this(vector::get, vector::isNull, currenRowSupplier); } public VarCharVectorAccessor(LargeVarCharVector vector, IntSupplier currenRowSupplier) { - this(vector::get, currenRowSupplier); + this(vector::get, vector::isNull, currenRowSupplier); } - VarCharVectorAccessor(Getter getter, IntSupplier currentRowSupplier) { + VarCharVectorAccessor(Getter getter, IntPredicate nullChecker, IntSupplier currentRowSupplier) { super(currentRowSupplier); this.getter = getter; + this.nullChecker = nullChecker; } @Override @@ -39,9 +42,15 @@ public Class getObjectClass() { @Override public byte[] getBytes() { - final byte[] bytes = this.getter.get(getCurrentRow()); - this.wasNull = bytes == null; - return this.getter.get(getCurrentRow()); + // Arrow's vector.get(int) skips the isSet check when arrow.enable_null_check_for_get=false + // (e.g. set by Iceberg on the JVM), so a null entry returns an empty byte[] rather than null. + // Check the validity buffer explicitly via isNull(int). + final int row = getCurrentRow(); + this.wasNull = nullChecker.test(row); + if (this.wasNull) { + return null; + } + return this.getter.get(row); } @Override