Skip to content

Commit 0187d5b

Browse files
pramodsatyameta-codesync[bot]
authored andcommitted
fix(cudf): Guard hash join build debug logging against empty inputs (#17221)
Summary: When the build side of a hash join has no data (e.g. anti-join with empty result), inputs_ is empty. With a debug build, the logging at `noMoreInput()` accesses `inputs_[0]` without bound checking, which could result in a `SIGSEGV`. This fixes Q16 (TPC-H) which uses NOT IN (anti-join) where the build side can be empty for some partitions. Pull Request resolved: #17221 Reviewed By: kagamiori Differential Revision: D101671299 Pulled By: peterenescu fbshipit-source-id: 0971d85671e67810789d8148ce3891e757faf4f7
1 parent dc9e9f3 commit 0187d5b

2 files changed

Lines changed: 24 additions & 4 deletions

File tree

velox/experimental/cudf/exec/CudfHashJoin.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -204,9 +204,11 @@ void CudfHashJoinBuild::doNoMoreInput() {
204204
};
205205

206206
if (CudfConfig::getInstance().debugEnabled) {
207-
VLOG(1) << "CudfHashJoinBuild: build batches";
208-
VLOG(1) << "Build batches number of columns: "
209-
<< inputs_[0]->getTableView().num_columns();
207+
VLOG(1) << "CudfHashJoinBuild: build batches count: " << inputs_.size();
208+
if (!inputs_.empty()) {
209+
VLOG(1) << "Build batches number of columns: "
210+
<< inputs_[0]->getTableView().num_columns();
211+
}
210212
for (auto i = 0; i < inputs_.size(); i++) {
211213
VLOG(1) << "Build batch " << i
212214
<< ": number of rows: " << inputs_[i]->getTableView().num_rows();
@@ -224,7 +226,7 @@ void CudfHashJoinBuild::doNoMoreInput() {
224226
for (auto const& tbl : tbls) {
225227
VELOX_CHECK_NOT_NULL(tbl);
226228
}
227-
if (CudfConfig::getInstance().debugEnabled) {
229+
if (CudfConfig::getInstance().debugEnabled && !tbls.empty()) {
228230
VLOG(1) << "Build table number of columns: " << tbls[0]->num_columns();
229231
for (auto i = 0; i < tbls.size(); i++) {
230232
VLOG(1) << "Build table " << i

velox/experimental/cudf/tests/HashJoinTest.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8493,4 +8493,22 @@ DEBUG_ONLY_TEST_F(HashJoinTest, hashTableCleanupAfterProbeFinish) {
84938493
ASSERT_TRUE(tableEmpty);
84948494
}
84958495

8496+
TEST_F(HashJoinTest, emptyBuildWithDebugEnabled) {
8497+
cudf_velox::CudfConfig::getInstance().debugEnabled = true;
8498+
SCOPE_EXIT {
8499+
cudf_velox::CudfConfig::getInstance().debugEnabled = false;
8500+
};
8501+
8502+
HashJoinBuilder(*pool_, duckDbQueryRunner_, driverExecutor_.get())
8503+
.injectSpill(false)
8504+
.numDrivers(1)
8505+
.keyTypes({BIGINT()})
8506+
.probeVectors(100, 5)
8507+
.buildVectors(0, 5)
8508+
.referenceQuery(
8509+
"SELECT t_k0, t_data, u_k0, u_data FROM t, u WHERE t_k0 = u_k0")
8510+
.checkSpillStats(false)
8511+
.run();
8512+
}
8513+
84968514
} // namespace

0 commit comments

Comments
 (0)