Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions jdbc-core/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,11 @@ dependencies {
tasks.named("compileJava") {
dependsOn(":jdbc-grpc:compileJava")
}

// Iceberg sets -Darrow.enable_null_check_for_get=false on the JVM. With that flag off, Arrow's
// VarCharVector/VarBinaryVector/FixedSizeBinaryVector .get(int) skip the validity check and return
// stale buffer bytes for null rows instead of null. Run the test suite under that condition so
// the existing null-handling assertions cover the Iceberg scenario.
tasks.test {
systemProperty("arrow.enable_null_check_for_get", "false")
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ public Class<?> getObjectClass() {
}

protected List<?> getListObject(VectorProvider vectorProvider) throws SQLException {
// ListVector/LargeListVector.getObject is validity-correct (not gated by
// arrow.enable_null_check_for_get), so a null return reliably indicates a null row.
List<?> object = vectorProvider.getObject(getCurrentRow());
this.wasNull = object == null;
return object;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import com.salesforce.datacloud.jdbc.core.accessor.QueryJDBCAccessor;
import java.nio.charset.StandardCharsets;
import java.util.function.IntPredicate;
import java.util.function.IntSupplier;
import org.apache.arrow.vector.FixedSizeBinaryVector;
import org.apache.arrow.vector.LargeVarBinaryVector;
Expand All @@ -18,30 +19,37 @@ private interface ByteArrayGetter {
}

private final ByteArrayGetter getter;
private final IntPredicate nullChecker;

public BinaryVectorAccessor(FixedSizeBinaryVector vector, IntSupplier currentRowSupplier) {
this(vector::get, currentRowSupplier);
this(vector::get, vector::isNull, currentRowSupplier);
}

public BinaryVectorAccessor(VarBinaryVector vector, IntSupplier currentRowSupplier) {
this(vector::get, currentRowSupplier);
this(vector::get, vector::isNull, currentRowSupplier);
}

public BinaryVectorAccessor(LargeVarBinaryVector vector, IntSupplier currentRowSupplier) {
this(vector::get, currentRowSupplier);
this(vector::get, vector::isNull, currentRowSupplier);
}

private BinaryVectorAccessor(ByteArrayGetter getter, IntSupplier currentRowSupplier) {
private BinaryVectorAccessor(ByteArrayGetter getter, IntPredicate nullChecker, IntSupplier currentRowSupplier) {
super(currentRowSupplier);
this.getter = getter;
this.nullChecker = nullChecker;
}

@Override
public byte[] getBytes() {
byte[] bytes = getter.get(getCurrentRow());
this.wasNull = bytes == null;

return bytes;
// Arrow's vector.get(int) skips the isSet check when arrow.enable_null_check_for_get=false
// (e.g. set by Iceberg on the JVM), so a null entry returns garbage rather than null.
// Check the validity buffer explicitly via isNull(int).
final int row = getCurrentRow();
this.wasNull = nullChecker.test(row);
if (this.wasNull) {
return null;
}
return getter.get(row);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ public Class<?> getObjectClass() {

@Override
public BigDecimal getBigDecimal() {
// DecimalVector.getObject is validity-correct (not gated by arrow.enable_null_check_for_get),
// so a null return reliably indicates a null row even when Iceberg disables that flag.
final BigDecimal value = vector.getObject(getCurrentRow());
this.wasNull = value == null;
return value;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import com.salesforce.datacloud.jdbc.core.accessor.QueryJDBCAccessor;
import java.nio.charset.StandardCharsets;
import java.util.function.IntPredicate;
import java.util.function.IntSupplier;
import org.apache.arrow.vector.LargeVarCharVector;
import org.apache.arrow.vector.VarCharVector;
Expand All @@ -18,18 +19,20 @@ interface Getter {
}

private final Getter getter;
private final IntPredicate nullChecker;

public VarCharVectorAccessor(VarCharVector vector, IntSupplier currenRowSupplier) {
this(vector::get, currenRowSupplier);
this(vector::get, vector::isNull, currenRowSupplier);
}

public VarCharVectorAccessor(LargeVarCharVector vector, IntSupplier currenRowSupplier) {
this(vector::get, currenRowSupplier);
this(vector::get, vector::isNull, currenRowSupplier);
}

VarCharVectorAccessor(Getter getter, IntSupplier currentRowSupplier) {
VarCharVectorAccessor(Getter getter, IntPredicate nullChecker, IntSupplier currentRowSupplier) {
super(currentRowSupplier);
this.getter = getter;
this.nullChecker = nullChecker;
}

@Override
Expand All @@ -39,9 +42,15 @@ public Class<?> getObjectClass() {

@Override
public byte[] getBytes() {
final byte[] bytes = this.getter.get(getCurrentRow());
this.wasNull = bytes == null;
return this.getter.get(getCurrentRow());
// Arrow's vector.get(int) skips the isSet check when arrow.enable_null_check_for_get=false
// (e.g. set by Iceberg on the JVM), so a null entry returns an empty byte[] rather than null.
// Check the validity buffer explicitly via isNull(int).
final int row = getCurrentRow();
this.wasNull = nullChecker.test(row);
if (this.wasNull) {
return null;
}
return this.getter.get(row);
}

@Override
Expand Down
Loading