Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions dbms/src/Storages/KVStore/Decode/RegionBlockReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ bool RegionBlockReader::readImpl(Block & block, const ReadList & data_list, bool
{
VersionColResolver<ReadList> version_col_resolver;
version_col_resolver.check(block, schema_snapshot->column_defines->size());
// The column_ids to read according to schema_snapshot, each elem is (column_id, block_pos)
const auto & read_column_ids = schema_snapshot->getColId2BlockPosMap();
const auto & pk_column_ids = schema_snapshot->pk_column_ids;
const auto & pk_pos_map = schema_snapshot->pk_pos_map;
Expand Down Expand Up @@ -269,6 +270,8 @@ bool RegionBlockReader::readImpl(Block & block, const ReadList & data_list, bool
else
{
// Parse column value from encoded value
// Decode the column_ids from `column_ids_iter` to `read_column_ids.end()`
// and insert into `block` at position starting from `next_column_pos`
if (!appendRowToBlock(
*value_ptr,
column_ids_iter,
Expand Down
77 changes: 77 additions & 0 deletions dbms/src/Storages/KVStore/tests/gtest_region_block_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -692,5 +692,82 @@ try
}
CATCH

TEST_F(RegionBlockReaderTest, ReadFromRegionDefaultValue)
try
{
// With this table_info, c1 is filled with "0" according to ori_default
TableInfo table_info(
R"({"cols":[{"id":1,"name":{"L":"c0","O":"c0"},"offset":0,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":4,"Tp":1}},{"id":2,"name":{"L":"handle","O":"handle"},"offset":1,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":515,"Flen":11,"Tp":3}},{"default":"-56083770","id":7,"name":{"L":"c1","O":"c1"},"offset":2,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":1,"Flen":20,"Tp":8}},{"id":4,"name":{"L":"c2","O":"c2"},"offset":3,"origin_default":"0.07954397","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":-1,"Flag":4097,"Flen":12,"Tp":4}},{"id":5,"name":{"L":"c5","O":"c5"},"offset":4,"origin_default":"0","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}},{"default":"247262911","id":6,"name":{"L":"c4","O":"c4"},"offset":5,"origin_default":"247262911","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}}],"id":711,"index_info":[],"is_common_handle":false,"keyspace_id":4294967295,"name":{"L":"t0","O":"t0"},"pk_is_handle":true,"state":5,"tiflash_replica":{"Available":true,"Count":1},"update_timestamp":463845180343844895})",
NullspaceID);

RegionID region_id = 4;
// the start_key and end_key for table_id = 68
String region_start_key(bytesFromHexString("7480000000000002FF7C5F720000000000FA"));
String region_end_key(bytesFromHexString("7480000000000002FF7D00000000000000F8"));
auto region = RegionBench::makeRegionForRange(region_id, region_start_key, region_end_key);
// the hex kv dump from SSTFile
std::vector<std::tuple<std::string_view, std::string_view>> kvs = {
// {
// "7480000000000002FFA95F728000000000FF0000010000000000FAF9901806DEF7FFDA",
// "50A380A08892FFF9B706762C8000040000000405060708000F0016001A00BFDC4011A00000000A0080000000000A008000003CA339"
// "1ABC85",
// },
// {
// "7480000000000002FFA95F728000000000FF0000010000000000FAF9901806DEF7FFD8",
// "50A680A08892FFF9B706762C8000040000000405060708000F0016001A00BFDC4011A00000000A008033E04D600A008000003CA339"
// "1ABC85",
// },
{
"7480000000000002FFA95F728000000000FF0000020000000000FAF9901806DE33FFE8",
"509680B08E92FFF9B706762580000300000004050608000F001600BF720CDD400000000A0080000000010A00800000393C",
},
};
for (const auto & [k, v] : kvs)
{
region->insertDebug("write", TiKVKey(bytesFromHexString(k)), TiKVValue(bytesFromHexString(v)));
}

auto data_list_read = ReadRegionCommitCache(region, true);
ASSERT_TRUE(data_list_read.has_value());

auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info);
{
// force_decode=false can not decode because there are
// missing value for column with not null flag.
auto reader = RegionBlockReader(decoding_schema);
Block res_block = createBlockSortByColumnID(decoding_schema);
ASSERT_FALSE(reader.read(res_block, *data_list_read, false));
}
{
// force_decode=true can decode the block, and filling the default value for c1
auto reader = RegionBlockReader(decoding_schema);
Block res_block = createBlockSortByColumnID(decoding_schema);
ASSERT_TRUE(reader.read(res_block, *data_list_read, true));
// TODO: verify the default value is filled correctly
LOG_INFO(
Logger::get(),
"Decoded block:\n{}",
DB::tests::getColumnsContent(res_block.getColumnsWithTypeAndName()));
}

// With this table_info, c1 does not have the "not null" flag
TableInfo table_info_c1_nullable(
R"({"cols":[{"id":1,"name":{"L":"c0","O":"c0"},"offset":0,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":4,"Tp":1}},{"id":2,"name":{"L":"handle","O":"handle"},"offset":1,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":515,"Flen":11,"Tp":3}},{"id":7,"name":{"L":"c1","O":"c1"},"offset":2,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":20,"Tp":8}},{"id":4,"name":{"L":"c2","O":"c2"},"offset":3,"origin_default":"0.07954397","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":-1,"Flag":4097,"Flen":12,"Tp":4}},{"id":5,"name":{"L":"c5","O":"c5"},"offset":4,"origin_default":"0","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}},{"default":"247262911","id":6,"name":{"L":"c4","O":"c4"},"offset":5,"origin_default":"247262911","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}}],"id":681,"index_info":[],"is_common_handle":false,"keyspace_id":4294967295,"name":{"L":"t0","O":"t0"},"pk_is_handle":true,"state":5,"tiflash_replica":{"Available":true,"Count":1},"update_timestamp":463844343842340870})",
NullspaceID);

decoding_schema = getDecodingStorageSchemaSnapshot(table_info_c1_nullable);
{
// force_decode=false should be able to decode because c1 is nullable
auto reader = RegionBlockReader(decoding_schema);
Block res_block = createBlockSortByColumnID(decoding_schema);
ASSERT_TRUE(reader.read(res_block, *data_list_read, false));
// TODO: verify the default value is filled correctly
LOG_INFO(
Logger::get(),
"Decoded block:\n{}",
DB::tests::getColumnsContent(res_block.getColumnsWithTypeAndName()));
}
}
CATCH

} // namespace DB::tests
65 changes: 51 additions & 14 deletions dbms/src/TiDB/Decode/RowCodec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -388,19 +388,19 @@ bool appendRowToBlock(
}
}

// When the `column_info` is missing in the encoded row, we try to add default value to the `block` at `block_column_pos`.
// Return true if we could add default value to the column. Otherwise false and the caller should trigger schema sync.
inline bool addDefaultValueToColumnIfPossible(
Copy link
Copy Markdown
Contributor Author

@JaySon-Huang JaySon-Huang Jan 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Summary the logical change under different aspect:

Aspect Before PR (old logic) After PR (latest logic)
PK handle column (pk_is_handle/common handle) If ignore_pk_if_absent → return true; else force_decode=false → false, force_decode=true → fill Same behavior
NOT NULL + NoDefaultValueFlag force_decode=false → false; force_decode=true → fill Same behavior
NOT NULL + no origin default (but has default) Filled with defaultValueToField() even when force_decode=false (often zero) force_decode=false → return false unless hasOriginDefaultValue(); force_decode=true still fills
NOT NULL + origin default present Filled with defaultValueToField() Still filled, and now explicitly allowed without schema sync
NOT NULL + non‑public state No special handling (would try to fill) force_decode=false → return false if state not public
Nullable column missing Filled by defaultValueToField() (NULL if no origin default) Same behavior
Schema sync triggered for missing NOT NULL column Only when NoDefaultValueFlag Also when no origin default or non‑public state
Safety vs NOT NULL → NULLABLE mismatch Could silently insert zero Forces schema sync to avoid wrong zero fill

const ColumnInfo & column_info,
Block & block,
size_t block_column_pos,
bool ignore_pk_if_absent,
bool force_decode)
{
// We consider a missing column could be safely filled with NULL, unless it has not default value and is NOT NULL.
// This could saves lots of unnecessary schema syncs for old data with a newer schema that has newly added columns.

if (column_info.hasPriKeyFlag())
{
// For clustered index or pk_is_handle, if the pk column does not exists, it can still be decoded from the key
// For clustered index or pk_is_handle, if the pk column does not exists, it can still be decoded from the key.
// just skip this column.
if (ignore_pk_if_absent)
return true;

Expand All @@ -409,18 +409,44 @@ inline bool addDefaultValueToColumnIfPossible(
return false;
// Else non-clustered index, and not pk_is_handle, it could be a row encoded by older schema,
// we need to fill the column which has primary key flag with default value.
// fallthrough to fill default value when force_decode
// fallthrough to fill default value when `force_decode==true`
}

if (column_info.hasNoDefaultValueFlag() && column_info.hasNotNullFlag())
if (column_info.hasNotNullFlag())
{
if (!force_decode)
return false;
// Else the row does not contain this "not null" / "no default value" column,
// it could be a row encoded by older schema.
// fallthrough to fill default value when force_decode
{
if (column_info.hasNoDefaultValueFlag())
{
// This is a Column that defined as NOT NULL but no default value. In this case, user
// should fill the column value when inserting data. But in the encoded value, the
// datum of this Column is missing.
// It could be a row encoded by newer schema after turning `NOT NULL` to `NULLABLE`.
// Return false to trigger schema sync when `force_decode==false`.
return false;
}

assert(!column_info.hasNoDefaultValueFlag());
if (!column_info.hasOriDefaultValue())
{
// This is a Column that defined as NOT NULL with default value. In this case, tidb-server
// should fill the column value when inserting data unless the Column's default value is null,
// and the value equals to that but has no origin default.
// Reference: https://github.com/pingcap/tidb/blob/v8.5.5/pkg/table/tables/tables.go#L1463-L1489
// Now in the encoded value, the datum of this Column is missing. It could be a row encoded by
// older schema after turning `NOT NULL` to `NULLABLE`. If the column_info has no origin default value,
// Return false to trigger schema sync when `force_decode==false`.
return false;
}
// Else the Column has a not null origin default value, the key-value should be encoded in a old schema that
// this Column is not yet added. Fallthrough to fill the column with original default value.
}
// Else force_decode == true, the row does not contain this "not null" / "no default value" column.
// It could be a row encoded by older schema, fallthrough to fill the column with original default value.
}
// not null or has no default value, tidb will fill with specific value.

// We consider a missing column could be safely filled with NULL or original default value.
// This could saves lots of unnecessary schema syncs for old data with a newer schema that has newly added columns.
auto * raw_column = const_cast<IColumn *>((block.getByPosition(block_column_pos)).column.get());
raw_column->insert(column_info.defaultValueToField());
return true;
Expand Down Expand Up @@ -460,7 +486,9 @@ bool appendRowV2ToBlockImpl(
num_not_null_columns,
value_offsets);
size_t values_start_pos = cursor;
// how many not null columns have been processed
size_t idx_not_null = 0;
// how many null columns have been processed
size_t idx_null = 0;
// Merge ordered not null/null columns to keep order.
while (idx_not_null < not_null_column_ids.size() || idx_null < null_column_ids.size())
Expand All @@ -481,20 +509,21 @@ bool appendRowV2ToBlockImpl(
const auto next_column_id = column_ids_iter->first;
if (next_column_id > next_datum_column_id)
{
// The next column id to read is bigger than the column id of next datum in encoded row.
// The next_column_id to read is bigger than the next_datum_column_id in encoded row.
// It means this is the datum of extra column. May happen when reading after dropping
// a column.
// For `force_decode == false`, we should return false to let upper layer trigger schema sync.
if (!force_decode)
return false;
// Ignore the extra column and continue to parse other datum
// For `force_decode == true`, we just skip this extra column and continue to parse other datum.
if (is_null)
idx_null++;
else
idx_not_null++;
}
else if (next_column_id < next_datum_column_id)
{
// The next column id to read is less than the column id of next datum in encoded row.
// The next_column_id to read is less than the next_datum_column_id in encoded row.
// It means this is the datum of missing column. May happen when reading after adding
// a column.
// Fill with default value and continue to read data for next column id.
Expand All @@ -505,7 +534,9 @@ bool appendRowV2ToBlockImpl(
block_column_pos,
ignore_pk_if_absent,
force_decode))
{
return false;
}
column_ids_iter++;
block_column_pos++;
}
Expand Down Expand Up @@ -570,8 +601,12 @@ bool appendRowV2ToBlockImpl(
block_column_pos++;
}
}

// There are more columns to read other than the datum encoded in the row.
while (column_ids_iter != column_ids_iter_end)
{
// Skip if the column_id is the same as `pk_handle_id`. The value of column
// `pk_handle_id` will be filled in upper layer but not in this function.
if (column_ids_iter->first != pk_handle_id)
{
const auto & column_info = column_infos[column_ids_iter->second];
Expand All @@ -581,7 +616,9 @@ bool appendRowV2ToBlockImpl(
block_column_pos,
ignore_pk_if_absent,
force_decode))
{
return false;
}
}
column_ids_iter++;
block_column_pos++;
Expand Down
5 changes: 5 additions & 0 deletions dbms/src/TiDB/Schema/TiDB.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,11 @@ ColumnInfo::ColumnInfo(Poco::JSON::Object::Ptr json)
}


bool ColumnInfo::hasOriDefaultValue() const
{
return !origin_default_value.isEmpty() || !origin_default_bit_value.isEmpty();
}

Field ColumnInfo::defaultValueToField() const
{
const auto & value = origin_default_value;
Expand Down
1 change: 1 addition & 0 deletions dbms/src/TiDB/Schema/TiDB.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ struct ColumnInfo
COLUMN_FLAGS(M)
#undef M

bool hasOriDefaultValue() const;
DB::Field defaultValueToField() const;
CodecFlag getCodecFlag() const;
DB::Field getDecimalValue(const String &) const;
Expand Down