|
12 | 12 | // See the License for the specific language governing permissions and |
13 | 13 | // limitations under the License. |
14 | 14 |
|
15 | | -#pragma GCC diagnostic push |
16 | | -#pragma GCC diagnostic ignored "-Wsign-compare" |
17 | | -#include <gtest/gtest.h> |
18 | | -#pragma GCC diagnostic pop |
| 15 | +#include <Common/Logger.h> |
19 | 16 | #include <IO/BaseFile/PosixRandomAccessFile.h> |
20 | 17 | #include <IO/BaseFile/PosixWritableFile.h> |
21 | 18 | #include <IO/BaseFile/RateLimiter.h> |
|
28 | 25 | #include <Poco/File.h> |
29 | 26 | #include <Storages/DeltaMerge/DMChecksumConfig.h> |
30 | 27 | #include <Storages/Page/PageUtil.h> |
| 28 | +#include <TestUtils/TiFlashTestBasic.h> |
31 | 29 | #include <fmt/format.h> |
32 | 30 |
|
| 31 | +#include <ext/scope_guard.h> |
33 | 32 | #include <random> |
34 | 33 |
|
35 | | -namespace DB |
36 | | -{ |
37 | | -namespace tests |
| 34 | +namespace DB::tests |
38 | 35 | { |
| 36 | + |
39 | 37 | namespace |
40 | 38 | { |
41 | 39 | std::random_device dev; // NOLINT(cert-err58-cpp) |
@@ -372,5 +370,129 @@ TEST_STACKED_SEEKING(CRC32) |
372 | 370 | TEST_STACKED_SEEKING(CRC64) |
373 | 371 | TEST_STACKED_SEEKING(City128) |
374 | 372 | TEST_STACKED_SEEKING(XXH3) |
375 | | -} // namespace tests |
376 | | -} // namespace DB |
| 373 | + |
| 374 | +template <ChecksumAlgo D> |
| 375 | +void runCompressedSeekableReaderBufferTest() |
| 376 | +try |
| 377 | +{ |
| 378 | + auto log = Logger::get(); |
| 379 | + // Create a temporary file for testing |
| 380 | + const std::string temp_file_path = "/tmp/tiflash_compressed_seek_test.dat"; |
| 381 | + SCOPE_EXIT({ |
| 382 | + Poco::File file(temp_file_path); |
| 383 | + if (file.exists()) |
| 384 | + file.remove(); |
| 385 | + }); |
| 386 | + // Test data - create multiple blocks with different patterns |
| 387 | + std::vector<std::string> test_blocks; |
| 388 | + |
| 389 | + test_blocks = { |
| 390 | + std::string(1500, 'A') + "BLOCK0_END", |
| 391 | + std::string(800, 'B') + "BLOCK1_END", |
| 392 | + "", // Block 2 is empty |
| 393 | + "", // Block 3 is empty |
| 394 | + }; |
| 395 | + |
| 396 | + std::vector<size_t> block_compressed_offsets; |
| 397 | + std::vector<size_t> block_decompressed_sizes; |
| 398 | + |
| 399 | + auto [limiter, provider] = prepareIO(); |
| 400 | + auto config = DM::DMChecksumConfig{{}, TIFLASH_DEFAULT_CHECKSUM_FRAME_SIZE, D}; |
| 401 | + |
| 402 | + // Write compressed data to file |
| 403 | + { |
| 404 | + auto plain_file = ChecksumWriteBufferBuilder::build( |
| 405 | + true, |
| 406 | + provider, |
| 407 | + temp_file_path, |
| 408 | + EncryptionPath(temp_file_path, temp_file_path), |
| 409 | + false, |
| 410 | + limiter->getWriteLimiter(), |
| 411 | + config.getChecksumAlgorithm(), |
| 412 | + config.getChecksumFrameLength(), |
| 413 | + /*flags*/ |
| 414 | + -1, |
| 415 | + /*mode*/ 0666, |
| 416 | + 1048576); |
| 417 | + auto compressed_buf |
| 418 | + = CompressedWriteBuffer<>::build(*plain_file, CompressionSettings(CompressionMethod::LZ4), false); |
| 419 | + |
| 420 | + for (const auto & block_data : test_blocks) |
| 421 | + { |
| 422 | + // Record the compressed file offset before writing this block |
| 423 | + block_compressed_offsets.push_back(plain_file->count()); |
| 424 | + block_decompressed_sizes.push_back(block_data.size()); |
| 425 | + |
| 426 | + // Write the block data |
| 427 | + compressed_buf->write(block_data.data(), block_data.size()); |
| 428 | + compressed_buf->next(); // Force compression of this block |
| 429 | + } |
| 430 | + } |
| 431 | + |
| 432 | + LOG_INFO(log, "Created compressed file with {} blocks", test_blocks.size()); |
| 433 | + for (size_t i = 0; i < block_compressed_offsets.size(); ++i) |
| 434 | + { |
| 435 | + LOG_INFO( |
| 436 | + log, |
| 437 | + "Block {}: compressed_offset={}, decompressed_size={}", |
| 438 | + i, |
| 439 | + block_compressed_offsets[i], |
| 440 | + block_decompressed_sizes[i]); |
| 441 | + } |
| 442 | + |
| 443 | + |
| 444 | + auto compressed_in = CompressedReadBufferFromFileBuilder::build( |
| 445 | + provider, |
| 446 | + temp_file_path, |
| 447 | + EncryptionPath(temp_file_path, temp_file_path), |
| 448 | + config.getChecksumFrameLength(), |
| 449 | + limiter->getReadLimiter(), |
| 450 | + config.getChecksumAlgorithm(), |
| 451 | + config.getChecksumFrameLength()); |
| 452 | + |
| 453 | + // 1. Check seek + read |
| 454 | + for (size_t i = 0; i < test_blocks.size(); ++i) |
| 455 | + { |
| 456 | + // Seek to the start of each block |
| 457 | + LOG_INFO(log, "Seeking to block {} at offset {}", i, block_compressed_offsets[i]); |
| 458 | + compressed_in->seek(block_compressed_offsets[i], 0); |
| 459 | + |
| 460 | + // Read the data |
| 461 | + std::string read_data; |
| 462 | + read_data.resize(block_decompressed_sizes[i]); |
| 463 | + compressed_in->readBig(read_data.data(), block_decompressed_sizes[i]); |
| 464 | + |
| 465 | + // Verify the data matches |
| 466 | + ASSERT_EQ(read_data, test_blocks[i]) << "Block " << i << " data mismatch"; |
| 467 | + } |
| 468 | + |
| 469 | + // Seek in inverse order to test seek again |
| 470 | + for (size_t i = 0; i < test_blocks.size(); ++i) |
| 471 | + { |
| 472 | + assert(i + 1 <= test_blocks.size()); |
| 473 | + const size_t target_block = test_blocks.size() - i - 1; |
| 474 | + compressed_in->seek(block_compressed_offsets[target_block], 0); |
| 475 | + std::string read_data; |
| 476 | + read_data.resize(block_decompressed_sizes[target_block]); |
| 477 | + size_t num_read = compressed_in->readBig(read_data.data(), test_blocks[target_block].size()); |
| 478 | + ASSERT_EQ(num_read, test_blocks[target_block].size()); |
| 479 | + read_data.resize(num_read); |
| 480 | + ASSERT_EQ(read_data, test_blocks[target_block]) |
| 481 | + << "Block " << target_block << " data mismatch after seek again"; |
| 482 | + } |
| 483 | +} |
| 484 | +CATCH |
| 485 | + |
| 486 | +#define TEST_COMPRESSEDSEEKABLE(ALGO) \ |
| 487 | + TEST(DMChecksumBuffer##ALGO, CompressedSeekable) \ |
| 488 | + { \ |
| 489 | + runCompressedSeekableReaderBufferTest<ChecksumAlgo::ALGO>(); \ |
| 490 | + } // NOLINT(cert-err58-cpp) |
| 491 | + |
| 492 | +TEST_COMPRESSEDSEEKABLE(None) |
| 493 | +TEST_COMPRESSEDSEEKABLE(CRC32) |
| 494 | +TEST_COMPRESSEDSEEKABLE(CRC64) |
| 495 | +TEST_COMPRESSEDSEEKABLE(City128) |
| 496 | +TEST_COMPRESSEDSEEKABLE(XXH3) |
| 497 | + |
| 498 | +} // namespace DB::tests |
0 commit comments