1212// See the License for the specific language governing permissions and
1313// limitations under the License.
1414
15+ #include < Columns/ColumnFixedString.h>
16+ #include < Columns/ColumnString.h>
1517#include < Columns/ColumnsNumber.h>
18+ #include < DataTypes/DataTypeFixedString.h>
1619#include < DataTypes/DataTypeNullable.h>
20+ #include < DataTypes/DataTypeString.h>
1721#include < DataTypes/DataTypesNumber.h>
1822#include < Functions/FunctionFactory.h>
1923#include < Functions/registerFunctions.h>
@@ -47,6 +51,10 @@ constexpr auto * mixed_build_value_name = "build_multi_v";
4751constexpr auto * full_other_cond_name = " full_other_cond" ;
4852constexpr auto * full_flag_helper_name = " __full_flag_helper" ;
4953
54+ Block makeOuterProbeSampleBlock (const DataTypePtr & key_type, bool include_filter);
55+ Block makeOuterBuildSampleBlock (const DataTypePtr & key_type, bool include_filter);
56+ void prepareAndFinalizeMixedJoin (const JoinPtr & join, const DataTypePtr & key_type);
57+
5058void ensureFunctionsRegistered ()
5159{
5260 static std::once_flag once;
@@ -130,10 +138,10 @@ JoinNonEqualConditions makeOuterJoinSideConditions(
130138
131139JoinPtr makeOuterJoinTestJoin (
132140 ASTTableJoin::Kind kind,
141+ const DataTypePtr & key_type,
133142 const JoinNonEqualConditions & non_equal_conditions = JoinNonEqualConditions{},
134143 const String & flag_helper_name = " " )
135144{
136- auto nullable_int_type = makeNullable (std::make_shared<DataTypeInt32>());
137145 auto nullable_value_type = makeNullable (std::make_shared<DataTypeInt32>());
138146 SpillConfig build_spill_config (" /tmp" , " join_null_eq_build" , 0 , 0 , 0 , nullptr );
139147 SpillConfig probe_spill_config (" /tmp" , " join_null_eq_probe" , 0 , 0 , 0 , nullptr );
@@ -149,9 +157,9 @@ JoinPtr makeOuterJoinTestJoin(
149157 probe_spill_config,
150158 RestoreConfig{1 , 0 , 0 },
151159 NamesAndTypes{
152- {outer_probe_key_name, nullable_int_type },
160+ {outer_probe_key_name, key_type },
153161 {outer_probe_value_name, nullable_value_type},
154- {outer_build_key_name, nullable_int_type },
162+ {outer_build_key_name, key_type },
155163 {outer_build_value_name, nullable_value_type},
156164 },
157165 RegisterOperatorSpillContext{},
@@ -166,6 +174,18 @@ JoinPtr makeOuterJoinTestJoin(
166174 true );
167175}
168176
177+ JoinPtr makeOuterJoinTestJoin (
178+ ASTTableJoin::Kind kind,
179+ const JoinNonEqualConditions & non_equal_conditions = JoinNonEqualConditions{},
180+ const String & flag_helper_name = " " )
181+ {
182+ return makeOuterJoinTestJoin (
183+ kind,
184+ makeNullable (std::make_shared<DataTypeInt32>()),
185+ non_equal_conditions,
186+ flag_helper_name);
187+ }
188+
169189JoinPtr makeSemiJoinTestJoin (ASTTableJoin::Kind kind)
170190{
171191 auto nullable_int_type = makeNullable (std::make_shared<DataTypeInt32>());
@@ -242,10 +262,14 @@ JoinPtr makeMixedKeyJoin(const std::vector<UInt8> & is_null_eq)
242262
243263Block makeOuterProbeSampleBlock (bool include_filter = false )
244264{
245- auto nullable_int_type = makeNullable (std::make_shared<DataTypeInt32>());
265+ return makeOuterProbeSampleBlock (makeNullable (std::make_shared<DataTypeInt32>()), include_filter);
266+ }
267+
268+ Block makeOuterProbeSampleBlock (const DataTypePtr & key_type, bool include_filter = false )
269+ {
246270 auto int_type = std::make_shared<DataTypeInt32>();
247271 auto block = Block{
248- {nullable_int_type ->createColumn (), nullable_int_type , outer_probe_key_name},
272+ {key_type ->createColumn (), key_type , outer_probe_key_name},
249273 {int_type->createColumn (), int_type, outer_probe_value_name},
250274 };
251275 if (include_filter)
@@ -258,10 +282,14 @@ Block makeOuterProbeSampleBlock(bool include_filter = false)
258282
259283Block makeOuterBuildSampleBlock (bool include_filter = false )
260284{
261- auto nullable_int_type = makeNullable (std::make_shared<DataTypeInt32>());
285+ return makeOuterBuildSampleBlock (makeNullable (std::make_shared<DataTypeInt32>()), include_filter);
286+ }
287+
288+ Block makeOuterBuildSampleBlock (const DataTypePtr & key_type, bool include_filter = false )
289+ {
262290 auto int_type = std::make_shared<DataTypeInt32>();
263291 auto block = Block{
264- {nullable_int_type ->createColumn (), nullable_int_type , outer_build_key_name},
292+ {key_type ->createColumn (), key_type , outer_build_key_name},
265293 {int_type->createColumn (), int_type, outer_build_value_name},
266294 };
267295 if (include_filter)
@@ -282,11 +310,6 @@ Block makeMixedProbeSampleBlock(const DataTypePtr & key_type)
282310 };
283311}
284312
285- Block makeMixedProbeSampleBlock ()
286- {
287- return makeMixedProbeSampleBlock (makeNullable (std::make_shared<DataTypeInt32>()));
288- }
289-
290313Block makeMixedBuildSampleBlock (const DataTypePtr & key_type)
291314{
292315 auto int_type = std::make_shared<DataTypeInt32>();
@@ -297,14 +320,10 @@ Block makeMixedBuildSampleBlock(const DataTypePtr & key_type)
297320 };
298321}
299322
300- Block makeMixedBuildSampleBlock ()
301- {
302- return makeMixedBuildSampleBlock (makeNullable (std::make_shared<DataTypeInt32>()));
303- }
304-
305- ColumnPtr makeNullableInt32Column (std::initializer_list<std::optional<Int32>> values)
323+ template <typename T, typename ColumnType>
324+ ColumnPtr makeNullableNumberColumn (std::initializer_list<std::optional<T>> values)
306325{
307- auto nested = ColumnInt32 ::create ();
326+ auto nested = ColumnType ::create ();
308327 auto null_map = ColumnUInt8::create ();
309328 nested->reserve (values.size ());
310329 null_map->reserve (values.size ());
@@ -326,6 +345,60 @@ ColumnPtr makeNullableInt32Column(std::initializer_list<std::optional<Int32>> va
326345 return ColumnNullable::create (std::move (nested), std::move (null_map));
327346}
328347
348+ ColumnPtr makeNullableInt32Column (std::initializer_list<std::optional<Int32>> values)
349+ {
350+ return makeNullableNumberColumn<Int32, ColumnInt32>(values);
351+ }
352+
353+ ColumnPtr makeNullableInt64Column (std::initializer_list<std::optional<Int64>> values)
354+ {
355+ return makeNullableNumberColumn<Int64, ColumnInt64>(values);
356+ }
357+
358+ ColumnPtr makeNullableStringColumn (std::initializer_list<std::optional<String>> values)
359+ {
360+ auto nested = ColumnString::create ();
361+ auto null_map = ColumnUInt8::create ();
362+ null_map->reserve (values.size ());
363+ auto & null_map_data = null_map->getData ();
364+ for (const auto & value : values)
365+ {
366+ if (value.has_value ())
367+ {
368+ nested->insertData (value->data (), value->size ());
369+ null_map_data.push_back (0 );
370+ }
371+ else
372+ {
373+ nested->insertData (" " , 0 );
374+ null_map_data.push_back (1 );
375+ }
376+ }
377+ return ColumnNullable::create (std::move (nested), std::move (null_map));
378+ }
379+
380+ ColumnPtr makeNullableFixedStringColumn (size_t string_size, std::initializer_list<std::optional<String>> values)
381+ {
382+ auto nested = ColumnFixedString::create (string_size);
383+ auto null_map = ColumnUInt8::create ();
384+ null_map->reserve (values.size ());
385+ auto & null_map_data = null_map->getData ();
386+ for (const auto & value : values)
387+ {
388+ if (value.has_value ())
389+ {
390+ nested->insertData (value->data (), value->size ());
391+ null_map_data.push_back (0 );
392+ }
393+ else
394+ {
395+ nested->insertData (" " , 0 );
396+ null_map_data.push_back (1 );
397+ }
398+ }
399+ return ColumnNullable::create (std::move (nested), std::move (null_map));
400+ }
401+
329402ColumnPtr makeUInt8Column (std::initializer_list<UInt8> values)
330403{
331404 auto column = ColumnUInt8::create ();
@@ -390,6 +463,22 @@ void prepareAndFinalizeOuterJoin(
390463 });
391464}
392465
466+ void prepareAndFinalizeOuterJoin (
467+ const JoinPtr & join,
468+ const DataTypePtr & key_type,
469+ bool include_probe_filter = false ,
470+ bool include_build_filter = false )
471+ {
472+ join->initBuild (makeOuterBuildSampleBlock (key_type, include_build_filter), 1 );
473+ join->initProbe (makeOuterProbeSampleBlock (key_type, include_probe_filter), 1 );
474+ join->finalize (Names{
475+ outer_probe_key_name,
476+ outer_probe_value_name,
477+ outer_build_key_name,
478+ outer_build_value_name,
479+ });
480+ }
481+
393482void prepareAndFinalizeSemiJoin (const JoinPtr & join)
394483{
395484 join->initBuild (makeOuterBuildSampleBlock (), 1 );
@@ -402,8 +491,13 @@ void prepareAndFinalizeSemiJoin(const JoinPtr & join)
402491
403492void prepareAndFinalizeMixedJoin (const JoinPtr & join)
404493{
405- join->initBuild (makeMixedBuildSampleBlock (), 1 );
406- join->initProbe (makeMixedProbeSampleBlock (), 1 );
494+ prepareAndFinalizeMixedJoin (join, makeNullable (std::make_shared<DataTypeInt32>()));
495+ }
496+
497+ void prepareAndFinalizeMixedJoin (const JoinPtr & join, const DataTypePtr & key_type)
498+ {
499+ join->initBuild (makeMixedBuildSampleBlock (key_type), 1 );
500+ join->initProbe (makeMixedProbeSampleBlock (key_type), 1 );
407501 join->finalize (Names{
408502 mixed_probe_key1_name,
409503 mixed_probe_key2_name,
@@ -433,6 +527,114 @@ TEST(JoinNullEqTest, NullableMixedNullEqKeysCanUseNullableKeys256JoinMapMethod)
433527 ASSERT_EQ (join->getJoinMapMethod (), JoinMapMethod::nullable_keys256);
434528}
435529
530+ TEST (JoinNullEqTest, NullableMixedNullEqKeys256JoinProducesJoinedRow)
531+ {
532+ auto nullable_int64_type = makeNullable (std::make_shared<DataTypeInt64>());
533+ auto int_type = std::make_shared<DataTypeInt32>();
534+ auto join = makeMixedKeyJoin ({1 , 1 }, nullable_int64_type);
535+ prepareAndFinalizeMixedJoin (join, nullable_int64_type);
536+
537+ ASSERT_EQ (join->getJoinMapMethod (), JoinMapMethod::nullable_keys256);
538+
539+ join->setInitActiveBuildThreads ();
540+ join->insertFromBlock (
541+ Block{
542+ {makeNullableInt64Column ({std::nullopt }), nullable_int64_type, mixed_build_key1_name},
543+ {makeNullableInt64Column ({11 }), nullable_int64_type, mixed_build_key2_name},
544+ {makeInt32Column ({100 }), int_type, mixed_build_value_name},
545+ },
546+ 0 );
547+ ASSERT_TRUE (join->finishOneBuild (0 ));
548+ join->finalizeBuild ();
549+
550+ ProbeProcessInfo probe_process_info (1024 , 0 );
551+ probe_process_info.resetBlock (Block{
552+ {makeNullableInt64Column ({std::nullopt }), nullable_int64_type, mixed_probe_key1_name},
553+ {makeNullableInt64Column ({11 }), nullable_int64_type, mixed_probe_key2_name},
554+ {makeInt32Column ({10 }), int_type, mixed_probe_value_name},
555+ });
556+ Block probe_result = join->joinBlock (probe_process_info);
557+
558+ ASSERT_EQ (probe_result.rows (), 1 );
559+ EXPECT_EQ (getInt32Value (probe_result, mixed_probe_value_name, 0 ), 10 );
560+ EXPECT_EQ (getInt32Value (probe_result, mixed_build_value_name, 0 ), 100 );
561+ }
562+
563+ TEST (JoinNullEqTest, NullableStringNullEqFallsBackToSerializedJoinMapMethod)
564+ {
565+ auto nullable_string_type = makeNullable (std::make_shared<DataTypeString>());
566+ auto int_type = std::make_shared<DataTypeInt32>();
567+ auto join = makeOuterJoinTestJoin (ASTTableJoin::Kind::Inner, nullable_string_type);
568+ prepareAndFinalizeOuterJoin (join, nullable_string_type);
569+
570+ ASSERT_EQ (join->getJoinMapMethod (), JoinMapMethod::serialized);
571+
572+ join->setInitActiveBuildThreads ();
573+ join->insertFromBlock (
574+ Block{
575+ {makeNullableStringColumn ({std::nullopt , " alpha" }), nullable_string_type, outer_build_key_name},
576+ {makeInt32Column ({100 , 200 }), int_type, outer_build_value_name},
577+ },
578+ 0 );
579+ ASSERT_TRUE (join->finishOneBuild (0 ));
580+ join->finalizeBuild ();
581+
582+ ProbeProcessInfo probe_process_info (1024 , 0 );
583+ probe_process_info.resetBlock (Block{
584+ {makeNullableStringColumn ({std::nullopt , " alpha" , " beta" }), nullable_string_type, outer_probe_key_name},
585+ {makeInt32Column ({10 , 20 , 30 }), int_type, outer_probe_value_name},
586+ });
587+ Block probe_result = join->joinBlock (probe_process_info);
588+
589+ ASSERT_EQ (probe_result.rows (), 2 );
590+ EXPECT_EQ (getInt32Value (probe_result, outer_probe_value_name, 0 ), 10 );
591+ EXPECT_EQ (getInt32Value (probe_result, outer_build_value_name, 0 ), 100 );
592+ EXPECT_EQ (getInt32Value (probe_result, outer_probe_value_name, 1 ), 20 );
593+ EXPECT_EQ (getInt32Value (probe_result, outer_build_value_name, 1 ), 200 );
594+ }
595+
596+ TEST (JoinNullEqTest, OversizedNullableFixedKeysFallBackToSerializedJoinMapMethod)
597+ {
598+ constexpr size_t fixed_string_size = 16 ;
599+ auto nullable_fixed_string_type = makeNullable (std::make_shared<DataTypeFixedString>(fixed_string_size));
600+ auto int_type = std::make_shared<DataTypeInt32>();
601+ auto join = makeMixedKeyJoin ({1 , 1 }, nullable_fixed_string_type);
602+ prepareAndFinalizeMixedJoin (join, nullable_fixed_string_type);
603+
604+ ASSERT_EQ (join->getJoinMapMethod (), JoinMapMethod::serialized);
605+
606+ join->setInitActiveBuildThreads ();
607+ join->insertFromBlock (
608+ Block{
609+ {makeNullableFixedStringColumn (fixed_string_size, {std::nullopt }),
610+ nullable_fixed_string_type,
611+ mixed_build_key1_name},
612+ {makeNullableFixedStringColumn (fixed_string_size, {" abcdefghijklmnop" }),
613+ nullable_fixed_string_type,
614+ mixed_build_key2_name},
615+ {makeInt32Column ({100 }), int_type, mixed_build_value_name},
616+ },
617+ 0 );
618+ ASSERT_TRUE (join->finishOneBuild (0 ));
619+ join->finalizeBuild ();
620+
621+ ProbeProcessInfo probe_process_info (1024 , 0 );
622+ probe_process_info.resetBlock (Block{
623+ {makeNullableFixedStringColumn (fixed_string_size, {std::nullopt , std::nullopt }),
624+ nullable_fixed_string_type,
625+ mixed_probe_key1_name},
626+ {makeNullableFixedStringColumn (fixed_string_size, {" abcdefghijklmnop" , " qrstuvwxyzabcdef" }),
627+ nullable_fixed_string_type,
628+ mixed_probe_key2_name},
629+ {makeInt32Column ({10 , 20 }), int_type, mixed_probe_value_name},
630+ });
631+ Block probe_result = join->joinBlock (probe_process_info);
632+
633+ ASSERT_EQ (probe_result.rows (), 1 );
634+ EXPECT_EQ (getInt32Value (probe_result, mixed_probe_value_name, 0 ), 10 );
635+ EXPECT_EQ (getInt32Value (probe_result, mixed_build_value_name, 0 ), 100 );
636+ }
637+
436638TEST (JoinNullEqTest, DefaultMethodSelectionRemainsForOtherCases)
437639{
438640 auto nullable_int_type = makeNullable (std::make_shared<DataTypeInt32>());
0 commit comments