Skip to content

Commit 53815c1

Browse files
ddl: Fix default value filling with finer granularity (#10682) (#10686)
close #10680 ddl: Fix default value filling with finer granularity - Tightens addDefaultValueToColumnIfPossible - For NOT NULL missing columns, force_decode=false now returns false unless there is an origin default. - This forces a schema sync instead of silently filling 0. - force_decode=true still fills a default value (best-effort). Signed-off-by: ti-chi-bot <ti-community-prow-bot@tidb.io> Co-authored-by: JaySon <tshent@qq.com>
1 parent e07cede commit 53815c1

File tree

8 files changed

+407
-18
lines changed

8 files changed

+407
-18
lines changed

dbms/src/Storages/KVStore/Decode/RegionBlockReader.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ bool RegionBlockReader::readImpl(Block & block, const ReadList & data_list, bool
179179
{
180180
VersionColResolver<ReadList> version_col_resolver;
181181
version_col_resolver.check(block, schema_snapshot->column_defines->size());
182+
// The column_ids to read according to schema_snapshot, each elem is (column_id, block_pos)
182183
const auto & read_column_ids = schema_snapshot->getColId2BlockPosMap();
183184
const auto & pk_column_ids = schema_snapshot->pk_column_ids;
184185
const auto & pk_pos_map = schema_snapshot->pk_pos_map;
@@ -269,6 +270,8 @@ bool RegionBlockReader::readImpl(Block & block, const ReadList & data_list, bool
269270
else
270271
{
271272
// Parse column value from encoded value
273+
// Decode the column_ids from `column_ids_iter` to `read_column_ids.end()`
274+
// and insert into `block` at position starting from `next_column_pos`
272275
if (!appendRowToBlock(
273276
*value_ptr,
274277
column_ids_iter,

dbms/src/Storages/KVStore/tests/gtest_region_block_reader.cpp

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -692,5 +692,178 @@ try
692692
}
693693
CATCH
694694

695+
TEST_F(RegionBlockReaderTest, ReadFromRegionDefaultValue)
696+
try
697+
{
698+
// With this table_info, column "c1" is "NOT NULL" and has no origin default
699+
TableInfo table_info_c1_not_null_no_origin_default(
700+
R"({"cols":[{"id":1,"name":{"L":"c0","O":"c0"},"offset":0,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":4,"Tp":1}},{"id":2,"name":{"L":"handle","O":"handle"},"offset":1,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":515,"Flen":11,"Tp":3}},{"default":"-56083770","id":7,"name":{"L":"c1","O":"c1"},"offset":2,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":1,"Flen":20,"Tp":8}},{"id":4,"name":{"L":"c2","O":"c2"},"offset":3,"origin_default":"0.07954397","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":-1,"Flag":4097,"Flen":12,"Tp":4}},{"id":5,"name":{"L":"c5","O":"c5"},"offset":4,"origin_default":"0","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}},{"default":"247262911","id":6,"name":{"L":"c4","O":"c4"},"offset":5,"origin_default":"247262911","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}}],"id":636,"index_info":[],"is_common_handle":false,"keyspace_id":4294967295,"name":{"L":"t0","O":"t0"},"pk_is_handle":true,"state":5,"tiflash_replica":{"Available":true,"Count":1},"update_timestamp":463845180343844895})",
701+
NullspaceID);
702+
703+
// With this table_info, column "c1" is "NOT NULL" and has no default value
704+
TableInfo table_info_c1_not_null_no_default_value(
705+
R"({"cols":[{"id":1,"name":{"L":"c0","O":"c0"},"offset":0,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":4,"Tp":1}},{"id":2,"name":{"L":"handle","O":"handle"},"offset":1,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":515,"Flen":11,"Tp":3}},{"id":7,"name":{"L":"c1","O":"c1"},"offset":2,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":4097,"Flen":20,"Tp":8}},{"id":4,"name":{"L":"c2","O":"c2"},"offset":3,"origin_default":"0.07954397","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":-1,"Flag":4097,"Flen":12,"Tp":4}},{"id":5,"name":{"L":"c5","O":"c5"},"offset":4,"origin_default":"0","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}},{"default":"247262911","id":6,"name":{"L":"c4","O":"c4"},"offset":5,"origin_default":"247262911","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}}],"id":636,"index_info":[],"is_common_handle":false,"keyspace_id":4294967295,"name":{"L":"t0","O":"t0"},"pk_is_handle":true,"state":5,"tiflash_replica":{"Available":true,"Count":1},"update_timestamp":463845180343844895})",
706+
NullspaceID);
707+
708+
// With this table_info, column "c1" has the "NOT NULL" flag and has origin default "-56083770"
709+
TableInfo table_info_c1_not_null_with_origin_default(
710+
R"({"cols":[{"id":1,"name":{"L":"c0","O":"c0"},"offset":0,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":4,"Tp":1}},{"id":2,"name":{"L":"handle","O":"handle"},"offset":1,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":515,"Flen":11,"Tp":3}},{"origin_default":"-56083770","id":7,"name":{"L":"c1","O":"c1"},"offset":2,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":1,"Flen":20,"Tp":8}},{"id":4,"name":{"L":"c2","O":"c2"},"offset":3,"origin_default":"0.07954397","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":-1,"Flag":4097,"Flen":12,"Tp":4}},{"id":5,"name":{"L":"c5","O":"c5"},"offset":4,"origin_default":"0","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}},{"default":"247262911","id":6,"name":{"L":"c4","O":"c4"},"offset":5,"origin_default":"247262911","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}}],"id":636,"index_info":[],"is_common_handle":false,"keyspace_id":4294967295,"name":{"L":"t0","O":"t0"},"pk_is_handle":true,"state":5,"tiflash_replica":{"Available":true,"Count":1},"update_timestamp":463845180343844895})",
711+
NullspaceID);
712+
713+
// With this table_info, column "c1" has the "NOT NULL" flag and has origin default "-56083770", but the state is not public
714+
TableInfo table_info_c1_not_null_with_origin_default_non_public(
715+
R"({"cols":[{"id":1,"name":{"L":"c0","O":"c0"},"offset":0,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":4,"Tp":1}},{"id":2,"name":{"L":"handle","O":"handle"},"offset":1,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":515,"Flen":11,"Tp":3}},{"origin_default":"-56083770","id":7,"name":{"L":"c1","O":"c1"},"offset":2,"state":3,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":1,"Flen":20,"Tp":8}},{"id":4,"name":{"L":"c2","O":"c2"},"offset":3,"origin_default":"0.07954397","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":-1,"Flag":4097,"Flen":12,"Tp":4}},{"id":5,"name":{"L":"c5","O":"c5"},"offset":4,"origin_default":"0","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}},{"default":"247262911","id":6,"name":{"L":"c4","O":"c4"},"offset":5,"origin_default":"247262911","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}}],"id":636,"index_info":[],"is_common_handle":false,"keyspace_id":4294967295,"name":{"L":"t0","O":"t0"},"pk_is_handle":true,"state":5,"tiflash_replica":{"Available":true,"Count":1},"update_timestamp":463845180343844895})",
716+
NullspaceID);
717+
718+
// With this table_info, column "c1" does not have the "NOT NULL" flag
719+
TableInfo table_info_c1_nullable(
720+
R"({"cols":[{"id":1,"name":{"L":"c0","O":"c0"},"offset":0,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":4,"Tp":1}},{"id":2,"name":{"L":"handle","O":"handle"},"offset":1,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":515,"Flen":11,"Tp":3}},{"id":7,"name":{"L":"c1","O":"c1"},"offset":2,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":20,"Tp":8}},{"id":4,"name":{"L":"c2","O":"c2"},"offset":3,"origin_default":"0.07954397","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":-1,"Flag":4097,"Flen":12,"Tp":4}},{"id":5,"name":{"L":"c5","O":"c5"},"offset":4,"origin_default":"0","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}},{"default":"247262911","id":6,"name":{"L":"c4","O":"c4"},"offset":5,"origin_default":"247262911","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}}],"id":636,"index_info":[],"is_common_handle":false,"keyspace_id":4294967295,"name":{"L":"t0","O":"t0"},"pk_is_handle":true,"state":5,"tiflash_replica":{"Available":true,"Count":1},"update_timestamp":463844343842340870})",
721+
NullspaceID);
722+
723+
RegionID region_id = 4;
724+
// the start_key and end_key for table_id = 68
725+
String region_start_key(bytesFromHexString("7480000000000002FF7C5F720000000000FA"));
726+
String region_end_key(bytesFromHexString("7480000000000002FF7D00000000000000F8"));
727+
auto region = RegionBench::makeRegionForRange(region_id, region_start_key, region_end_key);
728+
// the hex kv dump from RaftLog
729+
std::vector<std::tuple<std::string_view, std::string_view>> kvs = {
730+
{
731+
"7480000000000002FFA95F728000000000FF0000010000000000FAF9901806DEF7FFDA",
732+
"50A380A08892FFF9B706762C8000040000000405060708000F0016001A00BFDC4011A00000000A0080000000000A008000003CA339"
733+
"1ABC85",
734+
},
735+
{
736+
"7480000000000002FFA95F728000000000FF0000010000000000FAF9901806DEF7FFD8",
737+
"50A680A08892FFF9B706762C8000040000000405060708000F0016001A00BFDC4011A00000000A008033E04D600A008000003CA339"
738+
"1ABC85",
739+
},
740+
{
741+
"7480000000000002FFA95F728000000000FF0000020000000000FAF9901806DE33FFE8",
742+
"509680B08E92FFF9B706762580000300000004050608000F001600BF720CDD400000000A0080000000010A00800000393C",
743+
},
744+
};
745+
for (const auto & [k, v] : kvs)
746+
{
747+
region->insertDebug("write", TiKVKey(bytesFromHexString(k)), TiKVValue(bytesFromHexString(v)));
748+
}
749+
750+
auto data_list_read = ReadRegionCommitCache(region, true);
751+
ASSERT_TRUE(data_list_read.has_value());
752+
753+
// Test with `table_info_c1_not_null_no_origin_default`
754+
auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info_c1_not_null_no_origin_default);
755+
{
756+
// force_decode=false can not decode because there are
757+
// missing value for column with not null flag.
758+
auto reader = RegionBlockReader(decoding_schema);
759+
Block res_block = createBlockSortByColumnID(decoding_schema);
760+
ASSERT_FALSE(reader.read(res_block, *data_list_read, false));
761+
}
762+
{
763+
// force_decode=true can decode the block, and filling the default value for c1
764+
auto reader = RegionBlockReader(decoding_schema);
765+
Block res_block = createBlockSortByColumnID(decoding_schema);
766+
ASSERT_TRUE(reader.read(res_block, *data_list_read, true));
767+
LOG_INFO(
768+
Logger::get(),
769+
"Decoded block:\n{}",
770+
DB::tests::getColumnsContent(res_block.getColumnsWithTypeAndName()));
771+
ASSERT_EQ(res_block.getByName("c1").type->getName(), "Int64");
772+
// verify the default value is filled correctly
773+
ASSERT_COLUMN_EQ( //
774+
res_block.getByName("c1"),
775+
createColumn<Int64>({-2051270087, -2051270087, 0}));
776+
}
777+
778+
// Test with `table_info_c1_not_null_no_default_value`
779+
decoding_schema = getDecodingStorageSchemaSnapshot(table_info_c1_not_null_no_default_value);
780+
{
781+
// force_decode=false can not decode because there are
782+
// missing value for column with not null flag.
783+
auto reader = RegionBlockReader(decoding_schema);
784+
Block res_block = createBlockSortByColumnID(decoding_schema);
785+
ASSERT_FALSE(reader.read(res_block, *data_list_read, false));
786+
}
787+
{
788+
// force_decode=true can decode the block, and filling the default value for c1
789+
auto reader = RegionBlockReader(decoding_schema);
790+
Block res_block = createBlockSortByColumnID(decoding_schema);
791+
ASSERT_TRUE(reader.read(res_block, *data_list_read, true));
792+
LOG_INFO(
793+
Logger::get(),
794+
"Decoded block:\n{}",
795+
DB::tests::getColumnsContent(res_block.getColumnsWithTypeAndName()));
796+
ASSERT_EQ(res_block.getByName("c1").type->getName(), "Int64");
797+
// verify the default value is filled correctly
798+
ASSERT_COLUMN_EQ( //
799+
res_block.getByName("c1"),
800+
createColumn<Int64>({-2051270087, -2051270087, 0}));
801+
}
802+
803+
// Test with `table_info_c1_not_null_with_origin_default`
804+
decoding_schema = getDecodingStorageSchemaSnapshot(table_info_c1_not_null_with_origin_default);
805+
{
806+
// force_decode=false can decode because origin_default exists and NoDefaultValue flag is not set
807+
// so RegionBlockReader can use origin_default to fill the missing value`
808+
auto reader = RegionBlockReader(decoding_schema);
809+
Block res_block = createBlockSortByColumnID(decoding_schema);
810+
ASSERT_TRUE(reader.read(res_block, *data_list_read, false));
811+
LOG_INFO(
812+
Logger::get(),
813+
"Decoded block:\n{}",
814+
DB::tests::getColumnsContent(res_block.getColumnsWithTypeAndName()));
815+
ASSERT_EQ(res_block.getByName("c1").type->getName(), "Int64");
816+
// verify the default value is filled correctly
817+
ASSERT_COLUMN_EQ( //
818+
res_block.getByName("c1"),
819+
// the thrid elem is filled wih origin_default
820+
createColumn<Int64>({-2051270087, -2051270087, -56083770}));
821+
}
822+
823+
// Test with `table_info_c1_not_null_with_origin_default_non_public`
824+
decoding_schema = getDecodingStorageSchemaSnapshot(table_info_c1_not_null_with_origin_default_non_public);
825+
{
826+
// force_decode=false can not decode because there are
827+
// missing value for column with not null flag and the state is not public
828+
auto reader = RegionBlockReader(decoding_schema);
829+
Block res_block = createBlockSortByColumnID(decoding_schema);
830+
ASSERT_FALSE(reader.read(res_block, *data_list_read, false));
831+
}
832+
{
833+
// force_decode=true can decode the block, and filling the default value for c1
834+
auto reader = RegionBlockReader(decoding_schema);
835+
Block res_block = createBlockSortByColumnID(decoding_schema);
836+
ASSERT_TRUE(reader.read(res_block, *data_list_read, true));
837+
LOG_INFO(
838+
Logger::get(),
839+
"Decoded block:\n{}",
840+
DB::tests::getColumnsContent(res_block.getColumnsWithTypeAndName()));
841+
ASSERT_EQ(res_block.getByName("c1").type->getName(), "Int64");
842+
// verify the default value is filled correctly
843+
ASSERT_COLUMN_EQ( //
844+
res_block.getByName("c1"),
845+
// the thrid elem is filled wih origin_default
846+
createColumn<Int64>({-2051270087, -2051270087, -56083770}));
847+
}
848+
849+
// Test with `table_info_c1_nullable`
850+
decoding_schema = getDecodingStorageSchemaSnapshot(table_info_c1_nullable);
851+
{
852+
// force_decode=false should be able to decode because c1 is nullable
853+
auto reader = RegionBlockReader(decoding_schema);
854+
Block res_block = createBlockSortByColumnID(decoding_schema);
855+
ASSERT_TRUE(reader.read(res_block, *data_list_read, false));
856+
LOG_INFO(
857+
Logger::get(),
858+
"Decoded block:\n{}",
859+
DB::tests::getColumnsContent(res_block.getColumnsWithTypeAndName()));
860+
ASSERT_EQ(res_block.getByName("c1").type->getName(), "Nullable(Int64)");
861+
// verify the default value is filled with NULL correctly at the last row
862+
ASSERT_COLUMN_EQ( //
863+
res_block.getByName("c1"),
864+
createNullableColumn<Int64>({-2051270087, -2051270087, 0}, {0, 0, 1}));
865+
}
866+
}
867+
CATCH
695868

696869
} // namespace DB::tests

0 commit comments

Comments
 (0)