man-group · IvoDD · Jun 4, 2026
diff --git a/Makefile b/Makefile
@@ -32,9 +32,9 @@ _VENV_PIP := $(VENV_DIR)/$(VENV_NAME)/bin/pip
 # ── Phony targets ────────────────────────────────────────────────────────────
 .PHONY: help setup protoc venv activate lint lint-check \
         build build-debug configure configure-debug \
-        test-cpp test-cpp-debug symlink symlink-debug \
+        test-cpp test-cpp-debug test-cpp-rapidcheck test-cpp-rapidcheck-debug symlink symlink-debug \
         test-py build-and-test-py build-and-test-py-debug \
-        wheel bench-cpp bench-py install-editable
+        wheel bench-cpp bench-cpp-build bench-py install-editable
 
 # ── help ─────────────────────────────────────────────────────────────────────
 help: ## Show this help
@@ -136,6 +136,15 @@ test-cpp-debug: $(_DEBUG_BUILD_DIR)/.configure-stamp ## Build and run C++ unit t
 	cmake --build $(_DEBUG_BUILD_DIR) -j $(CMAKE_JOBS) --target test_unit_arcticdb
 	$(_DEBUG_BUILD_DIR)/arcticdb/test_unit_arcticdb $(if $(FILTER),--gtest_filter=$(FILTER))
 
+# ── test-cpp-rapidcheck ──────────────────────────────────────────────────────
+test-cpp-rapidcheck: $(_RELEASE_BUILD_DIR)/.configure-stamp ## Build and run C++ rapidcheck tests (release, FILTER= for gtest_filter)
+	cmake --build $(_RELEASE_BUILD_DIR) -j $(CMAKE_JOBS) --target arcticdb_rapidcheck_tests
+	$(_RELEASE_BUILD_DIR)/arcticdb/arcticdb_rapidcheck_tests $(if $(FILTER),--gtest_filter=$(FILTER))
+
+test-cpp-rapidcheck-debug: $(_DEBUG_BUILD_DIR)/.configure-stamp ## Build and run C++ rapidcheck tests (debug, FILTER= for gtest_filter)
+	cmake --build $(_DEBUG_BUILD_DIR) -j $(CMAKE_JOBS) --target arcticdb_rapidcheck_tests
+	$(_DEBUG_BUILD_DIR)/arcticdb/arcticdb_rapidcheck_tests $(if $(FILTER),--gtest_filter=$(FILTER))
+
 # ── symlink ──────────────────────────────────────────────────────────────────
 _EXT_SUFFIX := $(shell python3 -c "import sysconfig; print(sysconfig.get_config_var('EXT_SUFFIX'))")
 
@@ -169,8 +178,10 @@ wheel: ## Build a pip wheel
 		$(PROXY_CMD) $(_VENV_PIP) wheel . --no-deps -w dist/
 
 # ── bench-cpp ────────────────────────────────────────────────────────────────
-bench-cpp: $(_RELEASE_BUILD_DIR)/.configure-stamp ## Build and run C++ benchmarks (release, FILTER= for benchmark_filter)
+bench-cpp-build: $(_RELEASE_BUILD_DIR)/.configure-stamp ## Build C++ benchmarks without running (release)
 	cmake --build $(_RELEASE_BUILD_DIR) -j $(CMAKE_JOBS) --target benchmarks
+
+bench-cpp: bench-cpp-build ## Build and run C++ benchmarks (release, FILTER= for benchmark_filter)
 	$(_RELEASE_BUILD_DIR)/arcticdb/benchmarks $(if $(FILTER),--benchmark_filter=$(FILTER))
 
 # ── install-editable ─────────────────────────────────────────────────────────

diff --git a/cpp/arcticdb/column_store/column_algorithms.hpp b/cpp/arcticdb/column_store/column_algorithms.hpp
@@ -355,14 +355,17 @@ typename TDT::DataTypeTag::raw_type value_at(const ColumnData::ColumnDataIterato
 // For lower_bound that is `probe < value`; for upper_bound it is `probe <= value`.
 // `within_block_bisect` is std::lower_bound or std::upper_bound run on the contiguous block memory.
 template<typename TDT, IteratorType IT, IteratorDensity ID, typename IsBeforeAnswer, typename WithinBlockBisect>
+requires(ID == IteratorDensity::DENSE) && (TDT::dimension() == Dimension::Dim0) &&
+        std::predicate<IsBeforeAnswer, typename TDT::DataTypeTag::raw_type, typename TDT::DataTypeTag::raw_type> &&
+        std::invocable<
+                WithinBlockBisect, const typename TDT::DataTypeTag::raw_type*,
+                const typename TDT::DataTypeTag::raw_type*, typename TDT::DataTypeTag::raw_type>
 ColumnData::ColumnDataIterator<TDT, IT, ID, true> bound_search(
         const ColumnData::ColumnDataIterator<TDT, IT, ID, true>& begin,
         const ColumnData::ColumnDataIterator<TDT, IT, ID, true>& end, typename TDT::DataTypeTag::raw_type value,
-        IsBeforeAnswer is_before, WithinBlockBisect bisect
+        IsBeforeAnswer&& is_before, WithinBlockBisect&& bisect
 ) {
     using RawType = typename TDT::DataTypeTag::raw_type;
-    static_assert(ID == IteratorDensity::DENSE, "Sorted search currently supports DENSE only");
-    static_assert(TDT::dimension() == Dimension::Dim0, "Sorted search supports Dim0 only");
     util::check(begin.parent() == end.parent(), "bound_search: begin and end have different parents");
 
     if (begin == end) {
@@ -414,11 +417,12 @@ ColumnData::ColumnDataIterator<TDT, IT, ID, true> bound_search(
 // Gallop forward from `begin` in steps of 2**n until an element after value is reached.
 // Returns the exponential range known to contain the first element for which `!is_before`.
 template<typename TDT, IteratorType IT, IteratorDensity ID, typename IsBeforeAnswer>
-std::pair<ColumnData::ColumnDataIterator<TDT, IT, ID, true>, ColumnData::ColumnDataIterator<TDT, IT, ID, true>>
-gallop_bracket(
+requires(ID == IteratorDensity::DENSE) && (TDT::dimension() == Dimension::Dim0) &&
+        std::predicate<IsBeforeAnswer, typename TDT::DataTypeTag::raw_type, typename TDT::DataTypeTag::raw_type>
+std::pair<ColumnData::ColumnDataIterator<TDT, IT, ID, true>, ColumnData::ColumnDataIterator<TDT, IT, ID, true>> gallop_bracket(
         const ColumnData::ColumnDataIterator<TDT, IT, ID, true>& begin,
         const ColumnData::ColumnDataIterator<TDT, IT, ID, true>& end, typename TDT::DataTypeTag::raw_type value,
-        IsBeforeAnswer is_before
+        IsBeforeAnswer&& is_before
 ) {
     using RawType = typename TDT::DataTypeTag::raw_type;
     if (begin == end) {
@@ -481,8 +485,7 @@ gallop_bracket(
     // We iterate until `first_offset+step < up_to - 1` because we'll later explicitly probe at
     // the last element of the first block
     const size_t up_to = end_block_idx > first_block_idx ? first_block_row_count : end_in_block_offset;
-    size_t step = 1;
-    for (; first_offset + step + 1 < up_to; step *= 2) {
+    for (size_t step = 1; first_offset + step + 1 < up_to; step *= 2) {
         const size_t probe_offset = first_offset + step;
         if (!record_probe_in_first_block(probe_offset + 1, first_block_data[probe_offset])) {
             return {make_iter_in_first_block(prev_offset), make_iter_in_first_block(cur_offset)};
@@ -500,8 +503,7 @@ gallop_bracket(
     }
 
     // Answer is after the first block — probe the last elements of blocks at first_idx + 2**n
-    step = 1;
-    for (; first_block_idx + step < end_block_idx; step *= 2) {
+    for (size_t step = 1; first_block_idx + step < end_block_idx; step *= 2) {
         const size_t block_idx = first_block_idx + step;
         const RawType last_in_block = block_data_at(block_idx)[block_row_count_at(block_idx) - 1];
         if (!record_probe(block_idx + 1, 0, last_in_block)) {

diff --git a/cpp/arcticdb/column_store/test/benchmark_column.cpp b/cpp/arcticdb/column_store/test/benchmark_column.cpp
@@ -13,6 +13,7 @@
 #include <benchmark/benchmark.h>
 #include <arcticdb/column_store/column.hpp>
 #include <arcticdb/column_store/column_algorithms.hpp>
+#include <arcticdb/util/test/test_utils.hpp>
 
 using namespace arcticdb;
 
@@ -25,7 +26,7 @@ static std::mt19937 gen(rd());
 
 // ─── Sorted-search benchmarks across block layouts ────────────────────────────────────────────────
 //
-// Four column shapes — single-block (PRESIZED memcpy), regular blocks (presized_in_blocks),
+// Four column shapes — single-block (PRESIZED), regular blocks (presized_in_blocks),
 // irregular blocks of size 1000 (DETACHABLE), irregular blocks of size 1 (DETACHABLE).
 
 namespace {
@@ -41,51 +42,21 @@ std::vector<timestamp> make_sorted_data(size_t num_rows, std::mt19937& rng) {
     return data;
 }
 
-void populate(Column& col, const std::vector<timestamp>& data) {
-    for (size_t i = 0; i < data.size(); ++i) {
-        col.reference_at<timestamp>(i) = data[i];
-    }
-}
-
-Column make_single_block(const std::vector<timestamp>& data) {
-    Column col(
-            make_scalar_type(DataType::NANOSECONDS_UTC64),
-            data.size(),
-            AllocationType::PRESIZED,
-            Sparsity::NOT_PERMITTED
-    );
-    memcpy(col.ptr(), data.data(), data.size() * sizeof(timestamp));
-    col.set_row_data(data.size() - 1);
-    return col;
-}
-
-Column make_regular_blocks(const std::vector<timestamp>& data) {
-    Column col(
-            make_scalar_type(DataType::NANOSECONDS_UTC64),
-            Sparsity::NOT_PERMITTED,
-            ChunkedBuffer::presized_in_blocks(data.size() * sizeof(timestamp))
-    );
-    populate(col, data);
-    return col;
-}
-
-// DETACHABLE allocation routes lookups through ChunkedBuffer::block_offsets_ even with uniform
-// block sizes, so these stress the irregular path while keeping block sizes consistent.
-Column make_irregular_blocks(const std::vector<timestamp>& data, size_t block_size) {
-    Column col(make_scalar_type(DataType::NANOSECONDS_UTC64), 0, AllocationType::DETACHABLE, Sparsity::NOT_PERMITTED);
-    size_t remaining = data.size();
-    while (remaining > 0) {
-        const size_t alloc = std::min(remaining, block_size);
-        col.allocate_data(alloc * sizeof(timestamp));
-        col.advance_data(alloc * sizeof(timestamp));
-        remaining -= alloc;
-    }
-    populate(col, data);
-    return col;
-}
-
-auto make_irregular_blocks_1000 = [](const std::vector<timestamp>& data) { return make_irregular_blocks(data, 1000); };
-auto make_irregular_blocks_1 = [](const std::vector<timestamp>& data) { return make_irregular_blocks(data, 1); };
+// NANOSECONDS_UTC64 keeps the column type consistent with BenchTDT.
+constexpr DataType index_data_type = DataType::NANOSECONDS_UTC64;
+
+auto make_single_block = [](const std::vector<timestamp>& data) {
+    return make_single_block_column<timestamp>(data, index_data_type);
+};
+auto make_regular_blocks = [](const std::vector<timestamp>& data) {
+    return make_regular_blocks_column<timestamp>(data, index_data_type);
+};
+auto make_irregular_blocks_1000 = [](const std::vector<timestamp>& data) {
+    return make_irregular_blocks_column<timestamp>(data, uniform_block_sizes(data.size(), 1000), index_data_type);
+};
+auto make_irregular_blocks_1 = [](const std::vector<timestamp>& data) {
+    return make_irregular_blocks_column<timestamp>(data, uniform_block_sizes(data.size(), 1), index_data_type);
+};
 
 } // namespace
 

diff --git a/cpp/arcticdb/column_store/test/rapidcheck_column.cpp b/cpp/arcticdb/column_store/test/rapidcheck_column.cpp
@@ -151,30 +151,35 @@ RC_GTEST_PROP(Column, SearchSorted, (const std::vector<int64_t>& input, int64_t
     auto n = sorted_input.size();
     auto smallest_value = sorted_input[0];
     auto largest_value = sorted_input[n - 1];
-    using TDT = TypeDescriptorTag<DataTypeTag<DataType::INT64>, DimensionTag<Dimension::Dim0>>;
-    Column column(static_cast<TypeDescriptor>(TDT{}), 0, AllocationType::DYNAMIC, Sparsity::NOT_PERMITTED);
-    for (size_t idx = 0; idx < n; ++idx) {
-        column.set_scalar<int64_t>(idx, sorted_input[idx]);
-    }
-    auto left_idx = lower_bound_idx<int64_t>(column, value_to_find);
-    auto right_idx = upper_bound_idx<int64_t>(column, value_to_find);
-    RC_ASSERT(left_idx <= n);
-    RC_ASSERT(right_idx <= n);
-    if (left_idx == 0) {
-        RC_ASSERT(value_to_find <= smallest_value);
-    } else if (left_idx == n) {
-        RC_ASSERT(value_to_find > largest_value);
-    } else {
-        RC_ASSERT(value_to_find > sorted_input[left_idx - 1]);
-        RC_ASSERT(value_to_find <= sorted_input[left_idx]);
-    }
-    if (right_idx == 0) {
-        RC_ASSERT(value_to_find <= smallest_value);
-    } else if (right_idx == n) {
-        RC_ASSERT(value_to_find >= largest_value);
-    } else {
-        RC_ASSERT(value_to_find >= sorted_input[right_idx - 1]);
-        RC_ASSERT(value_to_find < sorted_input[right_idx]);
+
+    // Run against single / regular / irregular block layouts so block-jumping is exercised, not just
+    // the contiguous case.
+    std::vector<Column> columns;
+    columns.push_back(make_single_block_column<int64_t>(sorted_input, DataType::INT64));
+    columns.push_back(make_regular_blocks_column<int64_t>(sorted_input, DataType::INT64));
+    columns.push_back(make_irregular_blocks_column<int64_t>(sorted_input, DataType::INT64));
+
+    for (const auto& column : columns) {
+        auto left_idx = lower_bound_idx<int64_t>(column, value_to_find);
+        auto right_idx = upper_bound_idx<int64_t>(column, value_to_find);
+        RC_ASSERT(left_idx <= n);
+        RC_ASSERT(right_idx <= n);
+        if (left_idx == 0) {
+            RC_ASSERT(value_to_find <= smallest_value);
+        } else if (left_idx == n) {
+            RC_ASSERT(value_to_find > largest_value);
+        } else {
+            RC_ASSERT(value_to_find > sorted_input[left_idx - 1]);
+            RC_ASSERT(value_to_find <= sorted_input[left_idx]);
+        }
+        if (right_idx == 0) {
+            RC_ASSERT(value_to_find <= smallest_value);
+        } else if (right_idx == n) {
+            RC_ASSERT(value_to_find >= largest_value);
+        } else {
+            RC_ASSERT(value_to_find >= sorted_input[right_idx - 1]);
+            RC_ASSERT(value_to_find < sorted_input[right_idx]);
+        }
     }
 }
 

diff --git a/cpp/arcticdb/column_store/test/test_column.cpp b/cpp/arcticdb/column_store/test/test_column.cpp
@@ -279,12 +279,11 @@ TEST(ColumnData, Iterator) {
     }
 }
 
-TEST(ColumnData, IteratorSkipsEmptyBlocks) {
+TEST(ColumnData, IteratorSkipsTrailingEmptyBlock) {
     using namespace arcticdb;
 
     using TDT = TypeDescriptorTag<DataTypeTag<DataType::INT64>, DimensionTag<Dimension::Dim0>>;
 
-    // Trailing empty block
     Column col(static_cast<TypeDescriptor>(TDT{}), 0, AllocationType::DYNAMIC, Sparsity::PERMITTED);
     std::array<int64_t, 3> data{10, 20, 30};
     col.set_external_block(0, data.data(), data.size());
@@ -302,8 +301,14 @@ TEST(ColumnData, IteratorSkipsEmptyBlocks) {
         visited.push_back(*it);
     }
     EXPECT_EQ(visited, (std::vector<int64_t>{10, 20, 30}));
+}
+
+TEST(ColumnData, IteratorOnAllEmptyColumn) {
+    using namespace arcticdb;
 
-    // All-empty column: a single zero-size external block. begin must compare equal to end.
+    using TDT = TypeDescriptorTag<DataTypeTag<DataType::INT64>, DimensionTag<Dimension::Dim0>>;
+
+    // A single zero-size external block. begin must compare equal to end.
     Column empty_col(static_cast<TypeDescriptor>(TDT{}), 0, AllocationType::DYNAMIC, Sparsity::PERMITTED);
     empty_col.set_external_block(0, static_cast<int64_t*>(nullptr), 0);
     ASSERT_EQ(empty_col.buffer().num_blocks(), 1u);
@@ -477,56 +482,20 @@ namespace {
 using namespace arcticdb;
 using SearchTDT = TypeDescriptorTag<DataTypeTag<DataType::INT64>, DimensionTag<Dimension::Dim0>>;
 
-void populate(Column& col, const std::vector<int64_t>& values) {
-    for (size_t i = 0; i < values.size(); ++i) {
-        col.reference_at<int64_t>(i) = values[i];
-    }
-}
-
-// Three column shapes exercise the three random_accessor paths: SINGLE / REGULAR / IRREGULAR.
 Column make_single_block(const std::vector<int64_t>& values) {
-    Column col(
-            static_cast<TypeDescriptor>(SearchTDT{}), values.size(), AllocationType::PRESIZED, Sparsity::NOT_PERMITTED
-    );
-    populate(col, values);
-    return col;
+    return make_single_block_column<int64_t>(values, DataType::INT64);
 }
 
 Column make_regular_blocks(const std::vector<int64_t>& values) {
-    Column col(
-            static_cast<TypeDescriptor>(SearchTDT{}),
-            Sparsity::NOT_PERMITTED,
-            ChunkedBuffer::presized_in_blocks(values.size() * sizeof(int64_t))
-    );
-    populate(col, values);
-    return col;
+    return make_regular_blocks_column<int64_t>(values, DataType::INT64);
 }
 
 Column make_irregular_blocks(const std::vector<int64_t>& values, const std::vector<size_t>& block_sizes) {
-    Column col(static_cast<TypeDescriptor>(SearchTDT{}), 0, AllocationType::DETACHABLE, Sparsity::NOT_PERMITTED);
-    for (size_t block_size : block_sizes) {
-        col.allocate_data(block_size * sizeof(int64_t));
-        col.advance_data(block_size * sizeof(int64_t));
-    }
-    populate(col, values);
-    return col;
-}
-
-// Default irregular pattern: [1, 1, 1, 3, 1, 5, 1, 7, ...] — alternates 1-element and i-element blocks.
-std::vector<size_t> default_irregular_sizes(size_t total) {
-    std::vector<size_t> sizes;
-    size_t remaining = total;
-    for (size_t i = 0; remaining > 0; ++i) {
-        size_t current = i % 2 == 0 ? 1 : i;
-        current = std::min(current, remaining);
-        sizes.push_back(current);
-        remaining -= current;
-    }
-    return sizes;
+    return make_irregular_blocks_column<int64_t>(values, block_sizes, DataType::INT64);
 }
 
 Column make_irregular_blocks(const std::vector<int64_t>& values) {
-    return make_irregular_blocks(values, default_irregular_sizes(values.size()));
+    return make_irregular_blocks_column<int64_t>(values, DataType::INT64);
 }
 
 // Cross-checks our search functions against std::lower_bound / upper_bound on the reference vector.
@@ -585,10 +554,14 @@ TEST(ColumnSearch, BasicRegular) {
     auto column_data = col.data();
     auto begin = column_data.cbegin<SearchTDT, IteratorType::REGULAR, IteratorDensity::DENSE>();
     auto end = column_data.cend<SearchTDT, IteratorType::REGULAR, IteratorDensity::DENSE>();
+    // 20 is duplicated at indices 4 and 5; lower_bound must land on the first (4) and upper_bound past
+    // the last (6).
     auto lb = lower_bound<SearchTDT, IteratorType::REGULAR, IteratorDensity::DENSE>(begin, end, int64_t{20});
     ASSERT_EQ(*lb, 20);
+    ASSERT_EQ(std::distance(begin, lb), 4);
     auto ub = upper_bound<SearchTDT, IteratorType::REGULAR, IteratorDensity::DENSE>(begin, end, int64_t{20});
     ASSERT_EQ(*ub, 25);
+    ASSERT_EQ(std::distance(begin, ub), 6);
 }
 
 TEST(ColumnSearch, BasicEnumerated) {