Skip to content

Commit a0f963c

Browse files
committed
test: cover null-eq join key paths
1 parent 3683cb8 commit a0f963c

File tree

2 files changed

+224
-22
lines changed

2 files changed

+224
-22
lines changed

contrib/tipb

Submodule tipb updated from 3f0090f to fe7badb

dbms/src/Interpreters/tests/gtest_join_null_eq.cpp

Lines changed: 223 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,12 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
#include <Columns/ColumnFixedString.h>
16+
#include <Columns/ColumnString.h>
1517
#include <Columns/ColumnsNumber.h>
18+
#include <DataTypes/DataTypeFixedString.h>
1619
#include <DataTypes/DataTypeNullable.h>
20+
#include <DataTypes/DataTypeString.h>
1721
#include <DataTypes/DataTypesNumber.h>
1822
#include <Functions/FunctionFactory.h>
1923
#include <Functions/registerFunctions.h>
@@ -47,6 +51,10 @@ constexpr auto * mixed_build_value_name = "build_multi_v";
4751
constexpr auto * full_other_cond_name = "full_other_cond";
4852
constexpr auto * full_flag_helper_name = "__full_flag_helper";
4953

54+
Block makeOuterProbeSampleBlock(const DataTypePtr & key_type, bool include_filter);
55+
Block makeOuterBuildSampleBlock(const DataTypePtr & key_type, bool include_filter);
56+
void prepareAndFinalizeMixedJoin(const JoinPtr & join, const DataTypePtr & key_type);
57+
5058
void ensureFunctionsRegistered()
5159
{
5260
static std::once_flag once;
@@ -130,10 +138,10 @@ JoinNonEqualConditions makeOuterJoinSideConditions(
130138

131139
JoinPtr makeOuterJoinTestJoin(
132140
ASTTableJoin::Kind kind,
141+
const DataTypePtr & key_type,
133142
const JoinNonEqualConditions & non_equal_conditions = JoinNonEqualConditions{},
134143
const String & flag_helper_name = "")
135144
{
136-
auto nullable_int_type = makeNullable(std::make_shared<DataTypeInt32>());
137145
auto nullable_value_type = makeNullable(std::make_shared<DataTypeInt32>());
138146
SpillConfig build_spill_config("/tmp", "join_null_eq_build", 0, 0, 0, nullptr);
139147
SpillConfig probe_spill_config("/tmp", "join_null_eq_probe", 0, 0, 0, nullptr);
@@ -149,9 +157,9 @@ JoinPtr makeOuterJoinTestJoin(
149157
probe_spill_config,
150158
RestoreConfig{1, 0, 0},
151159
NamesAndTypes{
152-
{outer_probe_key_name, nullable_int_type},
160+
{outer_probe_key_name, key_type},
153161
{outer_probe_value_name, nullable_value_type},
154-
{outer_build_key_name, nullable_int_type},
162+
{outer_build_key_name, key_type},
155163
{outer_build_value_name, nullable_value_type},
156164
},
157165
RegisterOperatorSpillContext{},
@@ -166,6 +174,18 @@ JoinPtr makeOuterJoinTestJoin(
166174
true);
167175
}
168176

177+
JoinPtr makeOuterJoinTestJoin(
178+
ASTTableJoin::Kind kind,
179+
const JoinNonEqualConditions & non_equal_conditions = JoinNonEqualConditions{},
180+
const String & flag_helper_name = "")
181+
{
182+
return makeOuterJoinTestJoin(
183+
kind,
184+
makeNullable(std::make_shared<DataTypeInt32>()),
185+
non_equal_conditions,
186+
flag_helper_name);
187+
}
188+
169189
JoinPtr makeSemiJoinTestJoin(ASTTableJoin::Kind kind)
170190
{
171191
auto nullable_int_type = makeNullable(std::make_shared<DataTypeInt32>());
@@ -242,10 +262,14 @@ JoinPtr makeMixedKeyJoin(const std::vector<UInt8> & is_null_eq)
242262

243263
Block makeOuterProbeSampleBlock(bool include_filter = false)
244264
{
245-
auto nullable_int_type = makeNullable(std::make_shared<DataTypeInt32>());
265+
return makeOuterProbeSampleBlock(makeNullable(std::make_shared<DataTypeInt32>()), include_filter);
266+
}
267+
268+
Block makeOuterProbeSampleBlock(const DataTypePtr & key_type, bool include_filter = false)
269+
{
246270
auto int_type = std::make_shared<DataTypeInt32>();
247271
auto block = Block{
248-
{nullable_int_type->createColumn(), nullable_int_type, outer_probe_key_name},
272+
{key_type->createColumn(), key_type, outer_probe_key_name},
249273
{int_type->createColumn(), int_type, outer_probe_value_name},
250274
};
251275
if (include_filter)
@@ -258,10 +282,14 @@ Block makeOuterProbeSampleBlock(bool include_filter = false)
258282

259283
Block makeOuterBuildSampleBlock(bool include_filter = false)
260284
{
261-
auto nullable_int_type = makeNullable(std::make_shared<DataTypeInt32>());
285+
return makeOuterBuildSampleBlock(makeNullable(std::make_shared<DataTypeInt32>()), include_filter);
286+
}
287+
288+
Block makeOuterBuildSampleBlock(const DataTypePtr & key_type, bool include_filter = false)
289+
{
262290
auto int_type = std::make_shared<DataTypeInt32>();
263291
auto block = Block{
264-
{nullable_int_type->createColumn(), nullable_int_type, outer_build_key_name},
292+
{key_type->createColumn(), key_type, outer_build_key_name},
265293
{int_type->createColumn(), int_type, outer_build_value_name},
266294
};
267295
if (include_filter)
@@ -282,11 +310,6 @@ Block makeMixedProbeSampleBlock(const DataTypePtr & key_type)
282310
};
283311
}
284312

285-
Block makeMixedProbeSampleBlock()
286-
{
287-
return makeMixedProbeSampleBlock(makeNullable(std::make_shared<DataTypeInt32>()));
288-
}
289-
290313
Block makeMixedBuildSampleBlock(const DataTypePtr & key_type)
291314
{
292315
auto int_type = std::make_shared<DataTypeInt32>();
@@ -297,14 +320,10 @@ Block makeMixedBuildSampleBlock(const DataTypePtr & key_type)
297320
};
298321
}
299322

300-
Block makeMixedBuildSampleBlock()
301-
{
302-
return makeMixedBuildSampleBlock(makeNullable(std::make_shared<DataTypeInt32>()));
303-
}
304-
305-
ColumnPtr makeNullableInt32Column(std::initializer_list<std::optional<Int32>> values)
323+
template <typename T, typename ColumnType>
324+
ColumnPtr makeNullableNumberColumn(std::initializer_list<std::optional<T>> values)
306325
{
307-
auto nested = ColumnInt32::create();
326+
auto nested = ColumnType::create();
308327
auto null_map = ColumnUInt8::create();
309328
nested->reserve(values.size());
310329
null_map->reserve(values.size());
@@ -326,6 +345,60 @@ ColumnPtr makeNullableInt32Column(std::initializer_list<std::optional<Int32>> va
326345
return ColumnNullable::create(std::move(nested), std::move(null_map));
327346
}
328347

348+
ColumnPtr makeNullableInt32Column(std::initializer_list<std::optional<Int32>> values)
349+
{
350+
return makeNullableNumberColumn<Int32, ColumnInt32>(values);
351+
}
352+
353+
ColumnPtr makeNullableInt64Column(std::initializer_list<std::optional<Int64>> values)
354+
{
355+
return makeNullableNumberColumn<Int64, ColumnInt64>(values);
356+
}
357+
358+
ColumnPtr makeNullableStringColumn(std::initializer_list<std::optional<String>> values)
359+
{
360+
auto nested = ColumnString::create();
361+
auto null_map = ColumnUInt8::create();
362+
null_map->reserve(values.size());
363+
auto & null_map_data = null_map->getData();
364+
for (const auto & value : values)
365+
{
366+
if (value.has_value())
367+
{
368+
nested->insertData(value->data(), value->size());
369+
null_map_data.push_back(0);
370+
}
371+
else
372+
{
373+
nested->insertData("", 0);
374+
null_map_data.push_back(1);
375+
}
376+
}
377+
return ColumnNullable::create(std::move(nested), std::move(null_map));
378+
}
379+
380+
ColumnPtr makeNullableFixedStringColumn(size_t string_size, std::initializer_list<std::optional<String>> values)
381+
{
382+
auto nested = ColumnFixedString::create(string_size);
383+
auto null_map = ColumnUInt8::create();
384+
null_map->reserve(values.size());
385+
auto & null_map_data = null_map->getData();
386+
for (const auto & value : values)
387+
{
388+
if (value.has_value())
389+
{
390+
nested->insertData(value->data(), value->size());
391+
null_map_data.push_back(0);
392+
}
393+
else
394+
{
395+
nested->insertData("", 0);
396+
null_map_data.push_back(1);
397+
}
398+
}
399+
return ColumnNullable::create(std::move(nested), std::move(null_map));
400+
}
401+
329402
ColumnPtr makeUInt8Column(std::initializer_list<UInt8> values)
330403
{
331404
auto column = ColumnUInt8::create();
@@ -390,6 +463,22 @@ void prepareAndFinalizeOuterJoin(
390463
});
391464
}
392465

466+
void prepareAndFinalizeOuterJoin(
467+
const JoinPtr & join,
468+
const DataTypePtr & key_type,
469+
bool include_probe_filter = false,
470+
bool include_build_filter = false)
471+
{
472+
join->initBuild(makeOuterBuildSampleBlock(key_type, include_build_filter), 1);
473+
join->initProbe(makeOuterProbeSampleBlock(key_type, include_probe_filter), 1);
474+
join->finalize(Names{
475+
outer_probe_key_name,
476+
outer_probe_value_name,
477+
outer_build_key_name,
478+
outer_build_value_name,
479+
});
480+
}
481+
393482
void prepareAndFinalizeSemiJoin(const JoinPtr & join)
394483
{
395484
join->initBuild(makeOuterBuildSampleBlock(), 1);
@@ -402,8 +491,13 @@ void prepareAndFinalizeSemiJoin(const JoinPtr & join)
402491

403492
void prepareAndFinalizeMixedJoin(const JoinPtr & join)
404493
{
405-
join->initBuild(makeMixedBuildSampleBlock(), 1);
406-
join->initProbe(makeMixedProbeSampleBlock(), 1);
494+
prepareAndFinalizeMixedJoin(join, makeNullable(std::make_shared<DataTypeInt32>()));
495+
}
496+
497+
void prepareAndFinalizeMixedJoin(const JoinPtr & join, const DataTypePtr & key_type)
498+
{
499+
join->initBuild(makeMixedBuildSampleBlock(key_type), 1);
500+
join->initProbe(makeMixedProbeSampleBlock(key_type), 1);
407501
join->finalize(Names{
408502
mixed_probe_key1_name,
409503
mixed_probe_key2_name,
@@ -433,6 +527,114 @@ TEST(JoinNullEqTest, NullableMixedNullEqKeysCanUseNullableKeys256JoinMapMethod)
433527
ASSERT_EQ(join->getJoinMapMethod(), JoinMapMethod::nullable_keys256);
434528
}
435529

530+
TEST(JoinNullEqTest, NullableMixedNullEqKeys256JoinProducesJoinedRow)
531+
{
532+
auto nullable_int64_type = makeNullable(std::make_shared<DataTypeInt64>());
533+
auto int_type = std::make_shared<DataTypeInt32>();
534+
auto join = makeMixedKeyJoin({1, 1}, nullable_int64_type);
535+
prepareAndFinalizeMixedJoin(join, nullable_int64_type);
536+
537+
ASSERT_EQ(join->getJoinMapMethod(), JoinMapMethod::nullable_keys256);
538+
539+
join->setInitActiveBuildThreads();
540+
join->insertFromBlock(
541+
Block{
542+
{makeNullableInt64Column({std::nullopt}), nullable_int64_type, mixed_build_key1_name},
543+
{makeNullableInt64Column({11}), nullable_int64_type, mixed_build_key2_name},
544+
{makeInt32Column({100}), int_type, mixed_build_value_name},
545+
},
546+
0);
547+
ASSERT_TRUE(join->finishOneBuild(0));
548+
join->finalizeBuild();
549+
550+
ProbeProcessInfo probe_process_info(1024, 0);
551+
probe_process_info.resetBlock(Block{
552+
{makeNullableInt64Column({std::nullopt}), nullable_int64_type, mixed_probe_key1_name},
553+
{makeNullableInt64Column({11}), nullable_int64_type, mixed_probe_key2_name},
554+
{makeInt32Column({10}), int_type, mixed_probe_value_name},
555+
});
556+
Block probe_result = join->joinBlock(probe_process_info);
557+
558+
ASSERT_EQ(probe_result.rows(), 1);
559+
EXPECT_EQ(getInt32Value(probe_result, mixed_probe_value_name, 0), 10);
560+
EXPECT_EQ(getInt32Value(probe_result, mixed_build_value_name, 0), 100);
561+
}
562+
563+
TEST(JoinNullEqTest, NullableStringNullEqFallsBackToSerializedJoinMapMethod)
564+
{
565+
auto nullable_string_type = makeNullable(std::make_shared<DataTypeString>());
566+
auto int_type = std::make_shared<DataTypeInt32>();
567+
auto join = makeOuterJoinTestJoin(ASTTableJoin::Kind::Inner, nullable_string_type);
568+
prepareAndFinalizeOuterJoin(join, nullable_string_type);
569+
570+
ASSERT_EQ(join->getJoinMapMethod(), JoinMapMethod::serialized);
571+
572+
join->setInitActiveBuildThreads();
573+
join->insertFromBlock(
574+
Block{
575+
{makeNullableStringColumn({std::nullopt, "alpha"}), nullable_string_type, outer_build_key_name},
576+
{makeInt32Column({100, 200}), int_type, outer_build_value_name},
577+
},
578+
0);
579+
ASSERT_TRUE(join->finishOneBuild(0));
580+
join->finalizeBuild();
581+
582+
ProbeProcessInfo probe_process_info(1024, 0);
583+
probe_process_info.resetBlock(Block{
584+
{makeNullableStringColumn({std::nullopt, "alpha", "beta"}), nullable_string_type, outer_probe_key_name},
585+
{makeInt32Column({10, 20, 30}), int_type, outer_probe_value_name},
586+
});
587+
Block probe_result = join->joinBlock(probe_process_info);
588+
589+
ASSERT_EQ(probe_result.rows(), 2);
590+
EXPECT_EQ(getInt32Value(probe_result, outer_probe_value_name, 0), 10);
591+
EXPECT_EQ(getInt32Value(probe_result, outer_build_value_name, 0), 100);
592+
EXPECT_EQ(getInt32Value(probe_result, outer_probe_value_name, 1), 20);
593+
EXPECT_EQ(getInt32Value(probe_result, outer_build_value_name, 1), 200);
594+
}
595+
596+
TEST(JoinNullEqTest, OversizedNullableFixedKeysFallBackToSerializedJoinMapMethod)
597+
{
598+
constexpr size_t fixed_string_size = 16;
599+
auto nullable_fixed_string_type = makeNullable(std::make_shared<DataTypeFixedString>(fixed_string_size));
600+
auto int_type = std::make_shared<DataTypeInt32>();
601+
auto join = makeMixedKeyJoin({1, 1}, nullable_fixed_string_type);
602+
prepareAndFinalizeMixedJoin(join, nullable_fixed_string_type);
603+
604+
ASSERT_EQ(join->getJoinMapMethod(), JoinMapMethod::serialized);
605+
606+
join->setInitActiveBuildThreads();
607+
join->insertFromBlock(
608+
Block{
609+
{makeNullableFixedStringColumn(fixed_string_size, {std::nullopt}),
610+
nullable_fixed_string_type,
611+
mixed_build_key1_name},
612+
{makeNullableFixedStringColumn(fixed_string_size, {"abcdefghijklmnop"}),
613+
nullable_fixed_string_type,
614+
mixed_build_key2_name},
615+
{makeInt32Column({100}), int_type, mixed_build_value_name},
616+
},
617+
0);
618+
ASSERT_TRUE(join->finishOneBuild(0));
619+
join->finalizeBuild();
620+
621+
ProbeProcessInfo probe_process_info(1024, 0);
622+
probe_process_info.resetBlock(Block{
623+
{makeNullableFixedStringColumn(fixed_string_size, {std::nullopt, std::nullopt}),
624+
nullable_fixed_string_type,
625+
mixed_probe_key1_name},
626+
{makeNullableFixedStringColumn(fixed_string_size, {"abcdefghijklmnop", "qrstuvwxyzabcdef"}),
627+
nullable_fixed_string_type,
628+
mixed_probe_key2_name},
629+
{makeInt32Column({10, 20}), int_type, mixed_probe_value_name},
630+
});
631+
Block probe_result = join->joinBlock(probe_process_info);
632+
633+
ASSERT_EQ(probe_result.rows(), 1);
634+
EXPECT_EQ(getInt32Value(probe_result, mixed_probe_value_name, 0), 10);
635+
EXPECT_EQ(getInt32Value(probe_result, mixed_build_value_name, 0), 100);
636+
}
637+
436638
TEST(JoinNullEqTest, DefaultMethodSelectionRemainsForOtherCases)
437639
{
438640
auto nullable_int_type = makeNullable(std::make_shared<DataTypeInt32>());

0 commit comments

Comments
 (0)