From 5f3e02917d813463716f7e2f009d6096d89148da Mon Sep 17 00:00:00 2001
From: Adam Getchell <adam@adamgetchell.org>
Date: Mon, 27 Apr 2026 10:43:48 -0700
Subject: [PATCH 1/8] perf: expand profiling benchmarks around public API
 workflows

- Run profiling comparisons with the checked-out crate toolchain by default
- Add local `just profile` support for comparing code refs and compiler versions
- Expand `ci_performance_suite` beyond construction to cover hulls, boundary traversal, validation, and bistellar flips
- Emit a versioned API benchmark manifest so benchmark logs show which public workflows were measured
---
 .github/workflows/profiling-benchmarks.yml |  79 +++-
 benches/ci_performance_suite.rs            | 476 ++++++++++++++++++++-
 justfile                                   | 128 +++++-
 3 files changed, 649 insertions(+), 34 deletions(-)

diff --git a/.github/workflows/profiling-benchmarks.yml b/.github/workflows/profiling-benchmarks.yml
index e4291800..bc70788a 100644
--- a/.github/workflows/profiling-benchmarks.yml
+++ b/.github/workflows/profiling-benchmarks.yml
@@ -41,7 +41,6 @@ permissions:
 env:
   CARGO_TERM_COLOR: always
   RUST_BACKTRACE: 1
-  RUST_TOOLCHAIN: 1.92.0
 
 jobs:
   comprehensive-profiling:
@@ -56,7 +55,6 @@ jobs:
       - name: Install Rust toolchain
         uses: actions-rust-lang/setup-rust-toolchain@2b1f5e9b395427c92ee4e3331786ca3c37afe2d7 # v1.16.0
         with:
-          toolchain: ${{ env.RUST_TOOLCHAIN }}
           cache: false
           rustflags: ""
 
@@ -112,6 +110,46 @@ jobs:
             } >> "$GITHUB_ENV"
           fi
 
+      - name: Capture profiling environment metadata
+        env:
+          BENCH_FILTER_VALUE: ${{ github.event.inputs.benchmark_filter || '' }}
+        run: |
+          set -euo pipefail
+
+          mkdir -p profiling-results
+
+          declared_toolchain="$(
+            grep -E '^[[:space:]]*channel[[:space:]]*=' rust-toolchain.toml \
+              | head -n 1 \
+              | cut -d '=' -f 2 \
+              | tr -d ' "'
+          )"
+          rust_version="$(
+            grep -E '^rust-version[[:space:]]*=' Cargo.toml \
+              | head -n 1 \
+              | cut -d '=' -f 2 \
+              | tr -d ' "'
+          )"
+          profiling_mode="production"
+          if [[ "${PROFILING_DEV_MODE:-}" == "1" ]]; then
+            profiling_mode="development"
+          fi
+
+          {
+            echo "# Profiling Environment"
+            echo
+            echo "- Code ref: ${GITHUB_REF_NAME}"
+            echo "- Commit: $(git rev-parse HEAD)"
+            echo "- Declared rust-toolchain.toml channel: ${declared_toolchain}"
+            echo "- Cargo.toml rust-version: ${rust_version}"
+            echo "- rustc: $(rustc --version)"
+            echo "- cargo: $(cargo --version)"
+            echo "- Cargo profile: perf"
+            echo "- Benchmark filter: ${BENCH_FILTER_VALUE:-All benchmarks}"
+            echo "- Profiling mode: ${profiling_mode}"
+            echo "- Runner: ${RUNNER_OS}"
+          } > profiling-results/environment_metadata.md
+
       - name: Build profiling suite
         run: |
           # Build with the same perf profile used by `cargo bench --profile perf`
@@ -197,6 +235,7 @@ jobs:
 
           - \`profiling_output.log\`: Complete benchmark output
           - \`memory_profiling_detailed.log\`: Detailed memory allocation analysis
+          - \`environment_metadata.md\`: Code ref, compiler, profile, and filter metadata
           - \`criterion/\`: HTML reports and detailed timing data
 
           EOF
@@ -253,7 +292,6 @@ jobs:
       - name: Install Rust toolchain
         uses: actions-rust-lang/setup-rust-toolchain@2b1f5e9b395427c92ee4e3331786ca3c37afe2d7 # v1.16.0
         with:
-          toolchain: ${{ env.RUST_TOOLCHAIN }}
           cache: false
           rustflags: ""
 
@@ -273,6 +311,40 @@ jobs:
           echo "Running allocation API tests..."
           cargo test --test allocation_api --features count-allocations --verbose
 
+      - name: Capture memory profiling environment metadata
+        run: |
+          set -euo pipefail
+
+          mkdir -p profiling-results
+
+          declared_toolchain="$(
+            grep -E '^[[:space:]]*channel[[:space:]]*=' rust-toolchain.toml \
+              | head -n 1 \
+              | cut -d '=' -f 2 \
+              | tr -d ' "'
+          )"
+          rust_version="$(
+            grep -E '^rust-version[[:space:]]*=' Cargo.toml \
+              | head -n 1 \
+              | cut -d '=' -f 2 \
+              | tr -d ' "'
+          )"
+
+          {
+            echo "# Memory Profiling Environment"
+            echo
+            echo "- Code ref: ${GITHUB_REF_NAME}"
+            echo "- Commit: $(git rev-parse HEAD)"
+            echo "- Declared rust-toolchain.toml channel: ${declared_toolchain}"
+            echo "- Cargo.toml rust-version: ${rust_version}"
+            echo "- rustc: $(rustc --version)"
+            echo "- cargo: $(cargo --version)"
+            echo "- Cargo profile: perf"
+            echo "- Benchmark filter: memory_profiling"
+            echo "- Profiling mode: development"
+            echo "- Runner: ${RUNNER_OS}"
+          } > profiling-results/environment_metadata.md
+
       - name: Run memory scaling benchmarks
         env:
           PROFILING_DEV_MODE: "1"
@@ -292,5 +364,6 @@ jobs:
         with:
           name: memory-stress-results-${{ github.run_number }}
           path: |
+            profiling-results/
             target/criterion/
           retention-days: 14
diff --git a/benches/ci_performance_suite.rs b/benches/ci_performance_suite.rs
index 4267f5ce..62442fb3 100644
--- a/benches/ci_performance_suite.rs
+++ b/benches/ci_performance_suite.rs
@@ -1,15 +1,17 @@
-//! CI Performance Suite - Optimized performance regression testing for CI/CD
+//! CI Performance Suite - optimized performance regression testing for CI/CD
 //!
-//! This benchmark consolidates the most critical performance tests from across
-//! the delaunay library into a single, CI-optimized suite that provides:
+//! This benchmark is the small, durable performance contract for the delaunay
+//! library. It covers the user-facing workflows that should stay fast across
+//! releases without duplicating every specialized microbenchmark:
 //!
-//! 1. Core triangulation performance (3D/4D/5D at key scales)
-//! 2. Critical circumsphere operations (`insphere_lifted` focus)
-//! 3. Key algorithmic bottlenecks (neighbor assignment, deduplication)
-//! 4. Basic memory footprint tracking
+//! 1. Delaunay construction across 2D-5D at CI-sized scales
+//! 2. Convex hull extraction from completed triangulations
+//! 3. Boundary facet traversal
+//! 4. Full validation (Levels 1-4)
+//! 5. Explicit bistellar flip roundtrips on a stable 4D PL-manifold case
 //!
-//! Designed for ~5-10 minute CI runtime while maintaining comprehensive
-//! regression detection across all performance-critical code paths.
+//! Predicate microbenchmarks, allocation-focused measurements, and large-scale
+//! stress tests live in the dedicated benchmark targets under `benches/`.
 //!
 //! ## Sample Size Strategy
 //!
@@ -24,21 +26,99 @@
 //! - 2D: Fundamental triangulation case
 //! - 3D-5D: Higher-dimensional triangulations as documented in README.md
 
-use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
+use criterion::{BatchSize, BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
 use delaunay::core::vertex::Vertex;
+use delaunay::geometry::algorithms::convex_hull::ConvexHull;
+use delaunay::geometry::kernel::{AdaptiveKernel, RobustKernel};
 use delaunay::geometry::point::Point;
 use delaunay::geometry::util::generate_random_points_seeded;
-use delaunay::prelude::{ConstructionOptions, DelaunayTriangulation, RetryPolicy};
+use delaunay::prelude::triangulation::flips::{
+    BistellarFlips, CellKey, EdgeKey, FacetHandle, RidgeHandle, TopologyGuarantee, TriangleHandle,
+};
+use delaunay::prelude::{
+    ConstructionOptions, DelaunayTriangulation, InsertionOrderStrategy, RetryPolicy,
+};
 use delaunay::vertex;
 use std::hint::black_box;
 use std::num::NonZeroUsize;
+use std::sync::Once;
 use tracing::{error, warn};
 
 /// Default point counts for 2D–4D benchmarks.
 const COUNTS: &[usize] = &[10, 25, 50];
 /// Reduced point counts for 5D (50-point construction is prohibitively slow).
 const COUNTS_5D: &[usize] = &[10, 25];
+/// Representative operation count for 2D-4D non-construction workflows.
+const OPERATION_COUNT: usize = 50;
+/// Representative operation count for 5D non-construction workflows.
+const OPERATION_COUNT_5D: usize = 25;
 type SeedSearchResult<const D: usize> = Option<(u64, Vec<Point<f64, D>>, Vec<Vertex<f64, (), D>>)>;
+type BenchTriangulation<const D: usize> = DelaunayTriangulation<AdaptiveKernel<f64>, (), (), D>;
+type FlipTriangulation4 = DelaunayTriangulation<RobustKernel<f64>, (), (), 4>;
+
+struct ApiBenchmarkEntry {
+    group: &'static str,
+    public_api: &'static str,
+    dimensions: &'static str,
+    benchmark_ids: &'static str,
+    note: &'static str,
+}
+
+static API_BENCHMARK_MANIFEST: Once = Once::new();
+
+const API_BENCHMARK_ENTRIES: &[ApiBenchmarkEntry] = &[
+    ApiBenchmarkEntry {
+        group: "construction",
+        public_api: "DelaunayTriangulation::new_with_options",
+        dimensions: "2,3,4,5",
+        benchmark_ids: "tds_new_2d/tds_new/{10,25,50};tds_new_3d/tds_new/{10,25,50};tds_new_4d/tds_new/{10,25,50};tds_new_5d/tds_new/{10,25}",
+        note: "construct_from_seeded_vertices",
+    },
+    ApiBenchmarkEntry {
+        group: "boundary_facets",
+        public_api: "DelaunayTriangulation::boundary_facets",
+        dimensions: "2,3,4,5",
+        benchmark_ids: "boundary_facets/boundary_facets_2d/50;boundary_facets/boundary_facets_3d/50;boundary_facets/boundary_facets_4d/50;boundary_facets/boundary_facets_5d/25",
+        note: "iterate_boundary_facets",
+    },
+    ApiBenchmarkEntry {
+        group: "convex_hull",
+        public_api: "ConvexHull::from_triangulation",
+        dimensions: "2,3,4,5",
+        benchmark_ids: "convex_hull/from_triangulation_2d/50;convex_hull/from_triangulation_3d/50;convex_hull/from_triangulation_4d/50;convex_hull/from_triangulation_5d/25",
+        note: "extract_hull_from_completed_triangulation",
+    },
+    ApiBenchmarkEntry {
+        group: "validation",
+        public_api: "DelaunayTriangulation::validate",
+        dimensions: "3,4,5",
+        benchmark_ids: "validation/validate_3d/50;validation/validate_4d/50;validation/validate_5d/25",
+        note: "levels_1_through_4",
+    },
+    ApiBenchmarkEntry {
+        group: "bistellar_flips",
+        public_api: "BistellarFlips::{flip_k1_insert,flip_k1_remove,flip_k2,flip_k2_inverse_from_edge,flip_k3,flip_k3_inverse_from_triangle}",
+        dimensions: "4",
+        benchmark_ids: "bistellar_flips_4d/k1_roundtrip;bistellar_flips_4d/k2_roundtrip;bistellar_flips_4d/k3_roundtrip",
+        note: "stable_pl_manifold_roundtrips",
+    },
+];
+
+/// Stable 4D PL-manifold configuration used for explicit bistellar flips.
+const STABLE_POINTS_4D: &[[f64; 4]] = &[
+    [0.0, 0.0, 0.0, 0.0],
+    [1.0, 0.0, 0.0, 0.0],
+    [0.0, 1.0, 0.0, 0.0],
+    [0.0, 0.0, 1.0, 0.0],
+    [0.0, 0.0, 0.0, 1.0],
+    [0.10, 0.10, 0.10, 0.10],
+    [0.15, 0.10, 0.10, 0.10],
+    [0.10, 0.15, 0.10, 0.10],
+    [0.10, 0.10, 0.15, 0.10],
+    [0.12, 0.12, 0.12, 0.12],
+    [0.20, 0.15, 0.10, 0.05],
+    [0.08, 0.18, 0.12, 0.14],
+];
 
 /// Pre-computed seeds for each (dimension, count) pair.
 ///
@@ -78,6 +158,21 @@ fn known_seed(dim: usize, count: usize) -> Option<u64> {
         .map(|&(_, _, seed)| seed)
 }
 
+fn print_api_benchmark_manifest_once() {
+    API_BENCHMARK_MANIFEST.call_once(|| {
+        println!(
+            "api_benchmark_manifest crate=delaunay version={} benchmark=ci_performance_suite schema=1",
+            env!("CARGO_PKG_VERSION")
+        );
+        for entry in API_BENCHMARK_ENTRIES {
+            println!(
+                "api_benchmark group={} public_api={} dimensions={} benchmark_ids={} note={}",
+                entry.group, entry.public_api, entry.dimensions, entry.benchmark_ids, entry.note
+            );
+        }
+    });
+}
+
 /// Prepare benchmark inputs by looking up a pre-computed seed, falling back
 /// to a runtime search only if the known seed is missing or invalid.
 fn prepare_benchmark_data<const D: usize>(
@@ -112,6 +207,20 @@ fn prepare_benchmark_data<const D: usize>(
     )
 }
 
+fn prepare_triangulation<const D: usize>(dim_seed: u64, count: usize) -> BenchTriangulation<D> {
+    let bounds = (-100.0, 100.0);
+    let attempts = NonZeroUsize::new(6).expect("retry attempts must be non-zero");
+    let (seed, _, vertices) = prepare_benchmark_data::<D>(dim_seed, count, bounds, attempts);
+    let options = ConstructionOptions::default().with_retry_policy(RetryPolicy::Shuffled {
+        attempts,
+        base_seed: Some(seed),
+    });
+
+    BenchTriangulation::<D>::new_with_options(&vertices, options).unwrap_or_else(|err| {
+        panic!("failed to prepare {D}D benchmark triangulation with {count} vertices: {err}");
+    })
+}
+
 fn find_seed_and_vertices<const D: usize>(
     start_seed: u64,
     count: usize,
@@ -140,6 +249,162 @@ fn find_seed_and_vertices<const D: usize>(
     None
 }
 
+fn stable_vertices_4d() -> Vec<Vertex<f64, (), 4>> {
+    STABLE_POINTS_4D
+        .iter()
+        .map(|coords| vertex!(*coords))
+        .collect()
+}
+
+fn build_flip_triangulation_4d() -> FlipTriangulation4 {
+    let vertices = stable_vertices_4d();
+    let options =
+        ConstructionOptions::default().with_insertion_order(InsertionOrderStrategy::Input);
+    DelaunayTriangulation::with_topology_guarantee_and_options(
+        &RobustKernel::new(),
+        &vertices,
+        TopologyGuarantee::PLManifold,
+        options,
+    )
+    .unwrap_or_else(|err| panic!("failed to build stable 4D flip triangulation: {err}"))
+}
+
+fn cell_centroid_4d(dt: &FlipTriangulation4, cell_key: CellKey) -> [f64; 4] {
+    let cell = dt
+        .tds()
+        .get_cell(cell_key)
+        .expect("cell key should exist in benchmark triangulation");
+
+    let mut coords = [0.0_f64; 4];
+    for &vkey in cell.vertices() {
+        let vertex = dt
+            .tds()
+            .get_vertex_by_key(vkey)
+            .expect("vertex key should exist in benchmark triangulation");
+        let vcoords = vertex.point().coords();
+        for i in 0..4 {
+            coords[i] += vcoords[i];
+        }
+    }
+
+    let vertex_count =
+        u32::try_from(cell.vertices().len()).expect("cell vertex count should fit in u32");
+    let inv = 1.0_f64 / f64::from(vertex_count);
+    for coord in &mut coords {
+        *coord *= inv;
+    }
+    coords
+}
+
+fn roundtrip_k1_4d(dt: &mut FlipTriangulation4) {
+    let cell_key = dt
+        .cells()
+        .next()
+        .map(|(cell_key, _)| cell_key)
+        .expect("benchmark triangulation should have cells");
+    let centroid = cell_centroid_4d(dt, cell_key);
+    let new_vertex = vertex!(centroid);
+    let new_uuid = new_vertex.uuid();
+
+    dt.flip_k1_insert(cell_key, new_vertex)
+        .expect("k=1 insert should succeed on stable 4D benchmark triangulation");
+
+    let new_key = dt
+        .tds()
+        .vertex_key_from_uuid(&new_uuid)
+        .expect("inserted vertex should be present after k=1 insert");
+
+    dt.flip_k1_remove(new_key)
+        .expect("k=1 remove should invert k=1 insert");
+}
+
+fn collect_interior_facets_4d(dt: &FlipTriangulation4) -> Vec<FacetHandle> {
+    let mut facets = Vec::new();
+    for (cell_key, cell) in dt.cells() {
+        if let Some(neighbors) = cell.neighbors() {
+            for (facet_index, neighbor) in neighbors.iter().enumerate() {
+                if neighbor.is_some() {
+                    let facet_index = u8::try_from(facet_index).expect("facet index fits in u8");
+                    facets.push(FacetHandle::new(cell_key, facet_index));
+                }
+            }
+        }
+    }
+    facets
+}
+
+fn roundtrip_k2_4d(dt: &mut FlipTriangulation4) {
+    let mut last_error = None;
+    for facet in collect_interior_facets_4d(dt) {
+        match dt.flip_k2(facet) {
+            Ok(info) => {
+                assert_eq!(
+                    info.inserted_face_vertices.len(),
+                    2,
+                    "k=2 flip should insert an edge"
+                );
+                let edge = EdgeKey::new(
+                    info.inserted_face_vertices[0],
+                    info.inserted_face_vertices[1],
+                );
+                dt.flip_k2_inverse_from_edge(edge)
+                    .expect("k=2 inverse should succeed after k=2 flip");
+                return;
+            }
+            Err(err) => last_error = Some(format!("{err}")),
+        }
+    }
+
+    panic!(
+        "no flippable interior facet found for k=2 benchmark (last error: {})",
+        last_error.unwrap_or_else(|| "none".to_string())
+    );
+}
+
+fn collect_ridges_4d(dt: &FlipTriangulation4) -> Vec<RidgeHandle> {
+    let mut ridges = Vec::new();
+    for (cell_key, cell) in dt.cells() {
+        let vertex_count = cell.number_of_vertices();
+        for i in 0..vertex_count {
+            for j in (i + 1)..vertex_count {
+                let omit_a = u8::try_from(i).expect("ridge index fits in u8");
+                let omit_b = u8::try_from(j).expect("ridge index fits in u8");
+                ridges.push(RidgeHandle::new(cell_key, omit_a, omit_b));
+            }
+        }
+    }
+    ridges
+}
+
+fn roundtrip_k3_4d(dt: &mut FlipTriangulation4) {
+    let mut last_error = None;
+    for ridge in collect_ridges_4d(dt) {
+        match dt.flip_k3(ridge) {
+            Ok(info) => {
+                assert_eq!(
+                    info.inserted_face_vertices.len(),
+                    3,
+                    "k=3 flip should insert a triangle"
+                );
+                let triangle = TriangleHandle::new(
+                    info.inserted_face_vertices[0],
+                    info.inserted_face_vertices[1],
+                    info.inserted_face_vertices[2],
+                );
+                dt.flip_k3_inverse_from_triangle(triangle)
+                    .expect("k=3 inverse should succeed after k=3 flip");
+                return;
+            }
+            Err(err) => last_error = Some(format!("{err}")),
+        }
+    }
+
+    panic!(
+        "no flippable ridge found for k=3 benchmark (last error: {})",
+        last_error.unwrap_or_else(|| "none".to_string())
+    );
+}
+
 fn bench_logging_enabled() -> bool {
     std::env::var("DELAUNAY_BENCH_LOG").is_ok_and(|value| value != "0")
 }
@@ -165,6 +430,7 @@ macro_rules! benchmark_tds_new_dimension {
     ($dim:literal, $func_name:ident, $seed:literal, $counts:expr) => {
         /// Benchmark triangulation creation for D-dimensional triangulations
         fn $func_name(c: &mut Criterion) {
+            print_api_benchmark_manifest_once();
             let counts = $counts;
 
             // Opt-in helper for discovering stable seeds without paying Criterion warmup/
@@ -302,6 +568,188 @@ benchmark_tds_new_dimension!(3, benchmark_tds_new_3d, 123, COUNTS);
 benchmark_tds_new_dimension!(4, benchmark_tds_new_4d, 456, COUNTS);
 benchmark_tds_new_dimension!(5, benchmark_tds_new_5d, 789, COUNTS_5D);
 
+fn benchmark_boundary_facets(c: &mut Criterion) {
+    print_api_benchmark_manifest_once();
+    let mut group = c.benchmark_group("boundary_facets");
+    group.sample_size(25);
+
+    let dt_2d = prepare_triangulation::<2>(42, OPERATION_COUNT);
+    group.throughput(Throughput::Elements(OPERATION_COUNT as u64));
+    group.bench_function(
+        BenchmarkId::new("boundary_facets_2d", OPERATION_COUNT),
+        |b| {
+            b.iter(|| black_box(dt_2d.boundary_facets().count()));
+        },
+    );
+
+    let dt_3d = prepare_triangulation::<3>(123, OPERATION_COUNT);
+    group.throughput(Throughput::Elements(OPERATION_COUNT as u64));
+    group.bench_function(
+        BenchmarkId::new("boundary_facets_3d", OPERATION_COUNT),
+        |b| {
+            b.iter(|| black_box(dt_3d.boundary_facets().count()));
+        },
+    );
+
+    let dt_4d = prepare_triangulation::<4>(456, OPERATION_COUNT);
+    group.throughput(Throughput::Elements(OPERATION_COUNT as u64));
+    group.bench_function(
+        BenchmarkId::new("boundary_facets_4d", OPERATION_COUNT),
+        |b| {
+            b.iter(|| black_box(dt_4d.boundary_facets().count()));
+        },
+    );
+
+    let dt_5d = prepare_triangulation::<5>(789, OPERATION_COUNT_5D);
+    group.throughput(Throughput::Elements(OPERATION_COUNT_5D as u64));
+    group.bench_function(
+        BenchmarkId::new("boundary_facets_5d", OPERATION_COUNT_5D),
+        |b| {
+            b.iter(|| black_box(dt_5d.boundary_facets().count()));
+        },
+    );
+
+    group.finish();
+}
+
+fn benchmark_convex_hull(c: &mut Criterion) {
+    print_api_benchmark_manifest_once();
+    let mut group = c.benchmark_group("convex_hull");
+    group.sample_size(20);
+
+    let dt_2d = prepare_triangulation::<2>(42, OPERATION_COUNT);
+    group.throughput(Throughput::Elements(OPERATION_COUNT as u64));
+    group.bench_function(
+        BenchmarkId::new("from_triangulation_2d", OPERATION_COUNT),
+        |b| {
+            b.iter(|| {
+                black_box(
+                    ConvexHull::from_triangulation(dt_2d.as_triangulation())
+                        .expect("2D convex hull extraction should succeed"),
+                );
+            });
+        },
+    );
+
+    let dt_3d = prepare_triangulation::<3>(123, OPERATION_COUNT);
+    group.throughput(Throughput::Elements(OPERATION_COUNT as u64));
+    group.bench_function(
+        BenchmarkId::new("from_triangulation_3d", OPERATION_COUNT),
+        |b| {
+            b.iter(|| {
+                black_box(
+                    ConvexHull::from_triangulation(dt_3d.as_triangulation())
+                        .expect("3D convex hull extraction should succeed"),
+                );
+            });
+        },
+    );
+
+    let dt_4d = prepare_triangulation::<4>(456, OPERATION_COUNT);
+    group.throughput(Throughput::Elements(OPERATION_COUNT as u64));
+    group.bench_function(
+        BenchmarkId::new("from_triangulation_4d", OPERATION_COUNT),
+        |b| {
+            b.iter(|| {
+                black_box(
+                    ConvexHull::from_triangulation(dt_4d.as_triangulation())
+                        .expect("4D convex hull extraction should succeed"),
+                );
+            });
+        },
+    );
+
+    let dt_5d = prepare_triangulation::<5>(789, OPERATION_COUNT_5D);
+    group.throughput(Throughput::Elements(OPERATION_COUNT_5D as u64));
+    group.bench_function(
+        BenchmarkId::new("from_triangulation_5d", OPERATION_COUNT_5D),
+        |b| {
+            b.iter(|| {
+                black_box(
+                    ConvexHull::from_triangulation(dt_5d.as_triangulation())
+                        .expect("5D convex hull extraction should succeed"),
+                );
+            });
+        },
+    );
+
+    group.finish();
+}
+
+fn benchmark_validation(c: &mut Criterion) {
+    print_api_benchmark_manifest_once();
+    let mut group = c.benchmark_group("validation");
+    group.sample_size(15);
+
+    let dt_3d = prepare_triangulation::<3>(123, OPERATION_COUNT);
+    group.throughput(Throughput::Elements(OPERATION_COUNT as u64));
+    group.bench_function(BenchmarkId::new("validate_3d", OPERATION_COUNT), |b| {
+        b.iter(|| {
+            black_box(dt_3d.validate()).expect("3D benchmark triangulation should validate");
+        });
+    });
+
+    let dt_4d = prepare_triangulation::<4>(456, OPERATION_COUNT);
+    group.throughput(Throughput::Elements(OPERATION_COUNT as u64));
+    group.bench_function(BenchmarkId::new("validate_4d", OPERATION_COUNT), |b| {
+        b.iter(|| {
+            black_box(dt_4d.validate()).expect("4D benchmark triangulation should validate");
+        });
+    });
+
+    let dt_5d = prepare_triangulation::<5>(789, OPERATION_COUNT_5D);
+    group.throughput(Throughput::Elements(OPERATION_COUNT_5D as u64));
+    group.bench_function(BenchmarkId::new("validate_5d", OPERATION_COUNT_5D), |b| {
+        b.iter(|| {
+            black_box(dt_5d.validate()).expect("5D benchmark triangulation should validate");
+        });
+    });
+
+    group.finish();
+}
+
+fn benchmark_bistellar_flips(c: &mut Criterion) {
+    print_api_benchmark_manifest_once();
+    let mut group = c.benchmark_group("bistellar_flips_4d");
+    group.sample_size(10);
+    let base_dt = build_flip_triangulation_4d();
+
+    group.bench_function("k1_roundtrip", |b| {
+        b.iter_batched(
+            || base_dt.clone(),
+            |mut dt| {
+                roundtrip_k1_4d(&mut dt);
+                black_box(dt);
+            },
+            BatchSize::LargeInput,
+        );
+    });
+
+    group.bench_function("k2_roundtrip", |b| {
+        b.iter_batched(
+            || base_dt.clone(),
+            |mut dt| {
+                roundtrip_k2_4d(&mut dt);
+                black_box(dt);
+            },
+            BatchSize::LargeInput,
+        );
+    });
+
+    group.bench_function("k3_roundtrip", |b| {
+        b.iter_batched(
+            || base_dt.clone(),
+            |mut dt| {
+                roundtrip_k3_4d(&mut dt);
+                black_box(dt);
+            },
+            BatchSize::LargeInput,
+        );
+    });
+
+    group.finish();
+}
+
 criterion_group!(
     name = benches;
     config = Criterion::default();
@@ -309,6 +757,10 @@ criterion_group!(
         benchmark_tds_new_2d,
         benchmark_tds_new_3d,
         benchmark_tds_new_4d,
-        benchmark_tds_new_5d
+        benchmark_tds_new_5d,
+        benchmark_boundary_facets,
+        benchmark_convex_hull,
+        benchmark_validation,
+        benchmark_bistellar_flips
 );
 criterion_main!(benches);
diff --git a/justfile b/justfile
index 8dc09a21..3ec98a81 100644
--- a/justfile
+++ b/justfile
@@ -127,13 +127,6 @@ bench-compare: _ensure-uv
 bench-compile:
     cargo bench --workspace --no-run
 
-# Compile benchmarks and integration tests without running. This catches
-# release-profile-only warnings (e.g. cfg-gated unused-mut) that debug-mode
-# clippy/test won't see.
-bench-test-compile:
-    cargo bench --workspace --no-run
-    cargo test --tests --release --no-run
-
 # Development benchmark comparison: perf profile with reduced sample sizes.
 bench-dev: _ensure-uv
     CRIT_SAMPLE_SIZE=10 CRIT_MEASUREMENT_MS=1000 CRIT_WARMUP_MS=500 uv run benchmark-utils compare --baseline baseline-artifact/baseline_results.txt --dev
@@ -147,6 +140,13 @@ bench-perf-summary: _ensure-uv
 bench-smoke:
     CRIT_SAMPLE_SIZE=10 CRIT_MEASUREMENT_MS=500 CRIT_WARMUP_MS=200 cargo bench --workspace --profile perf
 
+# Compile benchmarks and integration tests without running. This catches
+# release-profile-only warnings (e.g. cfg-gated unused-mut) that debug-mode
+# clippy/test won't see.
+bench-test-compile:
+    cargo bench --workspace --no-run
+    cargo test --tests --release --no-run
+
 # Build commands
 build:
     cargo build
@@ -277,7 +277,7 @@ help-workflows:
     @echo "  just debug-large-scale-3d [n] # Issue #341: 3D scalability (default n=10000)"
     @echo "  just debug-large-scale-5d [n] # Issue #342: 5D feasibility (default n=1000)"
     @echo ""
-    @echo "Benchmark workflows (explicit perf-profile runs):"
+    @echo "Benchmark workflows:"
     @echo "  just bench-smoke        # Smoke-test benchmark harnesses (minimal samples)"
     @echo "  just bench              # Run all benchmarks with perf profile (ThinLTO)"
     @echo "  just bench-baseline     # Generate perf-profile performance baseline"
@@ -285,6 +285,7 @@ help-workflows:
     @echo "  just bench-compare      # Compare against baseline with perf profile"
     @echo "  just bench-dev          # Reduced-sample perf-profile comparison (~1-2 min)"
     @echo "  just bench-perf-summary # Generate perf-profile release summary (~30-45 min)"
+    @echo "  just profile [toolchain] [code_ref] # Run ci_performance_suite for a compiler/code pair"
     @echo ""
     @echo "Larger/optional workflows:"
     @echo "  just ci-slow             # CI + slow tests (100+ vertices)"
@@ -371,9 +372,11 @@ perf-help:
     @echo "  just bench-smoke           # Smoke-test benchmark harnesses"
     @echo ""
     @echo "Profiling Commands:"
-    @echo "  just profile               # Profile full triangulation_scaling benchmark"
-    @echo "  just profile-dev           # Profile 3D dev mode (faster iteration)"
-    @echo "  just profile-mem           # Profile memory allocations (with count-allocations feature)"
+    @echo "  just profile               # Run ci_performance_suite for the current tree/toolchain"
+    @echo "  just profile [toolchain] [code_ref]"
+    @echo "                              # Run ci_performance_suite for a compiler/code pair"
+    @echo "  just profile-dev           # Samply profile 3D dev mode (faster iteration)"
+    @echo "  just profile-mem           # Samply profile memory allocations (with count-allocations feature)"
     @echo ""
     @echo "Benchmark System (Delaunay-specific):"
     @echo "  just bench-baseline        # Generate baseline via benchmark-utils"
@@ -395,10 +398,97 @@ perf-help:
     @echo "  just bench-dev             # Reduced-sample benchmark iteration"
     @echo "  CRIT_SAMPLE_SIZE=100 just bench  # Custom sample size"
     @echo "  just bench-ci              # Final optimized CI-suite benchmark run"
+    @echo "  just profile v0.7.5        # v0.7.5 code on its declared Rust toolchain"
+    @echo "  just profile 1.95          # Current tree on Rust 1.95"
+    @echo "  just profile 1.95 v0.7.5   # v0.7.5 code on Rust 1.95"
+
+# Run the selected CI benchmark suite for one compiler/code pair.
+profile toolchain="" code_ref="current":
+    #!/usr/bin/env bash
+    set -euo pipefail
+
+    command -v rustup >/dev/null || { echo "❌ 'rustup' not found. Install Rust via https://rustup.rs"; exit 1; }
+
+    repo_root="$(pwd)"
+    requested_toolchain="{{toolchain}}"
+    requested_ref="{{code_ref}}"
+    workdir="$repo_root"
+    cleanup_worktree=0
+
+    if [[ "$requested_ref" == "current" && -n "$requested_toolchain" ]]; then
+        if [[ ! "$requested_toolchain" =~ ^([0-9]+(\.[0-9]+){0,2}|stable|beta|nightly)([-+].*)?$ ]]; then
+            requested_ref="$requested_toolchain"
+            requested_toolchain=""
+        fi
+    fi
+
+    if [[ "$requested_ref" != "current" && "$requested_ref" != "." ]]; then
+        tmp_parent="$(mktemp -d "${TMPDIR:-/tmp}/delaunay-profile.XXXXXX")"
+        workdir="$tmp_parent/worktree"
+        cleanup_worktree=1
+        git worktree add --detach "$workdir" "$requested_ref"
+    fi
 
-# Profiling
-profile:
-    samply record cargo bench --profile perf --bench profiling_suite -- triangulation_scaling
+    cleanup() {
+        if [[ "$cleanup_worktree" -eq 1 ]]; then
+            git worktree remove --force "$workdir" >/dev/null 2>&1 || true
+            rm -rf "$(dirname "$workdir")"
+        fi
+    }
+    trap cleanup EXIT
+
+    if [[ -z "$requested_toolchain" ]]; then
+        requested_toolchain="$(
+            grep -E '^[[:space:]]*channel[[:space:]]*=' "$workdir/rust-toolchain.toml" \
+                | head -n 1 \
+                | cut -d '=' -f 2 \
+                | tr -d ' "'
+        )"
+    fi
+
+    if [[ -z "$requested_toolchain" ]]; then
+        echo "❌ No toolchain argument provided and no rust-toolchain.toml channel found."
+        exit 1
+    fi
+
+    safe_ref="$(
+        if [[ "$requested_ref" == "current" || "$requested_ref" == "." ]]; then
+            printf 'current'
+        else
+            printf '%s' "$requested_ref"
+        fi | tr -c 'A-Za-z0-9._-' '_'
+    )"
+    safe_toolchain="$(printf '%s' "$requested_toolchain" | tr -c 'A-Za-z0-9._-' '_')"
+    run_dir="$repo_root/target/profile-runs/${safe_ref}-${safe_toolchain}"
+    mkdir -p "$run_dir"
+
+    echo "📌 Code ref: $requested_ref"
+    echo "🦀 Rust toolchain: $requested_toolchain"
+    echo "📊 Benchmark: ci_performance_suite"
+    echo "📁 Results: $run_dir"
+
+    rustup toolchain install "$requested_toolchain" --profile minimal
+
+    {
+        echo "# Profile Run"
+        echo
+        echo "- Code ref: $requested_ref"
+        echo "- Workdir: $workdir"
+        echo "- Commit: $(git -C "$workdir" rev-parse HEAD)"
+        echo "- Dirty tree: $(if [[ "$workdir" == "$repo_root" && -n "$(git status --short)" ]]; then echo yes; else echo no; fi)"
+        echo "- Requested toolchain: $requested_toolchain"
+        echo "- rustc: $(rustup run "$requested_toolchain" rustc --version)"
+        echo "- cargo: $(rustup run "$requested_toolchain" cargo --version)"
+        echo "- Cargo profile: cargo bench default"
+        echo "- Benchmark harness: ci_performance_suite"
+    } > "$run_dir/profile_metadata.md"
+
+    (
+        cd "$workdir"
+        CARGO_TARGET_DIR="$run_dir/target" \
+            rustup run "$requested_toolchain" cargo bench --bench ci_performance_suite \
+            2>&1 | tee "$run_dir/ci_performance_suite.log"
+    )
 
 profile-dev:
     PROFILING_DEV_MODE=1 samply record cargo bench --profile perf --bench profiling_suite -- "triangulation_scaling_3d/tds_new/random_3d"
@@ -715,11 +805,6 @@ tag-force version: python-sync
 test: bench-test-compile test-all
     @echo "✅ Test workflow passed!"
 
-# test-unit: runs lib and doc tests.
-test-unit:
-    cargo test --lib --verbose
-    cargo test --doc --verbose
-
 # test-all: runs lib, doc, integration, and Python tests (comprehensive)
 test-all: test-unit test-integration test-python
     @echo "✅ All tests passed!"
@@ -759,6 +844,11 @@ test-slow:
 test-slow-release:
     cargo test --release --features slow-tests
 
+# test-unit: runs lib and doc tests.
+test-unit:
+    cargo test --lib --verbose
+    cargo test --doc --verbose
+
 toml-fmt: _ensure-taplo
     #!/usr/bin/env bash
     set -euo pipefail

From a0ab0febe8cc17057ee6f56d2f01a7ad96dc13de Mon Sep 17 00:00:00 2001
From: Adam Getchell <adam@adamgetchell.org>
Date: Mon, 27 Apr 2026 14:15:16 -0700
Subject: [PATCH 2/8] perf: refresh profiling benchmark coverage

- Run profiling benchmarks against the selected code and Rust toolchain versions.
- Expand ci_performance_suite around public API workflows, adversarial inputs, incremental insertion, validation, convex hulls, and bistellar flip roundtrips.
- Retire microbenchmarks in favor of CI and profiling suite coverage.
- Update benchmark summary generation and performance results for the expanded CI suite, with circumsphere results grouped separately.
- Add focused prelude exports for generator-heavy doctests, examples, tests, and benchmarks.
---
 .github/workflows/profiling-benchmarks.yml |  20 +-
 Cargo.toml                                 |   5 -
 benches/PERFORMANCE_RESULTS.md             | 171 ++++-
 benches/README.md                          |  23 +-
 benches/ci_performance_suite.rs            | 782 ++++++++++++++++-----
 benches/microbenchmarks.rs                 | 432 ------------
 benches/profiling_suite.rs                 | 191 ++---
 docs/code_organization.md                  |   1 -
 examples/convex_hull_3d_100_points.rs      |  58 +-
 examples/pachner_roundtrip_4d.rs           |   5 +-
 justfile                                   |   7 +-
 scripts/README.md                          |  11 +-
 scripts/benchmark_utils.py                 | 322 ++++++++-
 scripts/tests/test_benchmark_utils.py      | 116 ++-
 src/lib.rs                                 |  70 +-
 tests/prelude_exports.rs                   |  38 +
 16 files changed, 1428 insertions(+), 824 deletions(-)
 delete mode 100644 benches/microbenchmarks.rs
 create mode 100644 tests/prelude_exports.rs

diff --git a/.github/workflows/profiling-benchmarks.yml b/.github/workflows/profiling-benchmarks.yml
index bc70788a..824477fe 100644
--- a/.github/workflows/profiling-benchmarks.yml
+++ b/.github/workflows/profiling-benchmarks.yml
@@ -119,16 +119,18 @@ jobs:
           mkdir -p profiling-results
 
           declared_toolchain="$(
-            grep -E '^[[:space:]]*channel[[:space:]]*=' rust-toolchain.toml \
+            grep -E '^[[:space:]]*channel[[:space:]]*=' rust-toolchain.toml 2>/dev/null \
               | head -n 1 \
               | cut -d '=' -f 2 \
-              | tr -d ' "'
+              | tr -d ' "' \
+              || true
           )"
           rust_version="$(
-            grep -E '^rust-version[[:space:]]*=' Cargo.toml \
+            grep -E '^rust-version[[:space:]]*=' Cargo.toml 2>/dev/null \
               | head -n 1 \
               | cut -d '=' -f 2 \
-              | tr -d ' "'
+              | tr -d ' "' \
+              || true
           )"
           profiling_mode="production"
           if [[ "${PROFILING_DEV_MODE:-}" == "1" ]]; then
@@ -318,16 +320,18 @@ jobs:
           mkdir -p profiling-results
 
           declared_toolchain="$(
-            grep -E '^[[:space:]]*channel[[:space:]]*=' rust-toolchain.toml \
+            grep -E '^[[:space:]]*channel[[:space:]]*=' rust-toolchain.toml 2>/dev/null \
               | head -n 1 \
               | cut -d '=' -f 2 \
-              | tr -d ' "'
+              | tr -d ' "' \
+              || true
           )"
           rust_version="$(
-            grep -E '^rust-version[[:space:]]*=' Cargo.toml \
+            grep -E '^rust-version[[:space:]]*=' Cargo.toml 2>/dev/null \
               | head -n 1 \
               | cut -d '=' -f 2 \
-              | tr -d ' "'
+              | tr -d ' "' \
+              || true
           )"
 
           {
diff --git a/Cargo.toml b/Cargo.toml
index 20fd7cd5..1f6dff1c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -67,11 +67,6 @@ name = "circumsphere_containment"
 path = "benches/circumsphere_containment.rs"
 harness = false
 
-[[bench]]
-name = "microbenchmarks"
-path = "benches/microbenchmarks.rs"
-harness = false
-
 [[bench]]
 name = "topology_guarantee_construction"
 path = "benches/topology_guarantee_construction.rs"
diff --git a/benches/PERFORMANCE_RESULTS.md b/benches/PERFORMANCE_RESULTS.md
index 068f335a..b9f19eff 100644
--- a/benches/PERFORMANCE_RESULTS.md
+++ b/benches/PERFORMANCE_RESULTS.md
@@ -3,9 +3,9 @@
 This file contains performance benchmarks and analysis for the delaunay library.
 The results are automatically generated and updated by the benchmark infrastructure.
 
-**Last Updated**: 2026-04-25 15:39:16 UTC
+**Last Updated**: 2026-04-27 19:30:43 UTC
 **Generated By**: benchmark_utils.py
-**Git Commit**: 7e42be8fba9abe571d0137710fbd7ed0151ebc85
+**Git Commit**: 5f3e02917d813463716f7e2f009d6096d89148da
 **Hardware**: Apple M4 Max (16 cores)
 **Memory**: 64.0 GB
 **OS**: macOS
@@ -13,7 +13,113 @@ The results are automatically generated and updated by the benchmark infrastruct
 
 ## Performance Results Summary
 
-### Circumsphere Performance Results
+### Public API Performance Contract (`ci_performance_suite`)
+
+This suite is the versioned benchmark contract for public Delaunay workflows.
+It covers construction, hull extraction, validation, incremental insertion,
+boundary traversal, and explicit bistellar flip roundtrips.
+
+#### Construction
+
+Public API: `DelaunayTriangulation::new_with_options`
+
+| Benchmark ID | Dimension | Input | Variant | Mean | 95% CI |
+|--------------|-----------|-------|---------|------|--------|
+| `tds_new_2d/tds_new/10` | 2D | 10 | well-conditioned | 143.4 µs | 143.1 µs - 143.7 µs |
+| `tds_new_2d/tds_new_adversarial/10` | 2D | 10 | adversarial | 336.1 µs | 334.7 µs - 337.6 µs |
+| `tds_new_2d/tds_new/25` | 2D | 25 | well-conditioned | 904.6 µs | 902.6 µs - 906.8 µs |
+| `tds_new_2d/tds_new_adversarial/25` | 2D | 25 | adversarial | 3.557 ms | 3.526 ms - 3.586 ms |
+| `tds_new_2d/tds_new/50` | 2D | 50 | well-conditioned | 3.055 ms | 3.046 ms - 3.065 ms |
+| `tds_new_2d/tds_new_adversarial/50` | 2D | 50 | adversarial | 16.089 ms | 16.055 ms - 16.121 ms |
+| `tds_new_3d/tds_new/10` | 3D | 10 | well-conditioned | 1.004 ms | 999.9 µs - 1.009 ms |
+| `tds_new_3d/tds_new_adversarial/10` | 3D | 10 | adversarial | 2.876 ms | 2.868 ms - 2.884 ms |
+| `tds_new_3d/tds_new/25` | 3D | 25 | well-conditioned | 14.925 ms | 14.882 ms - 14.969 ms |
+| `tds_new_3d/tds_new_adversarial/25` | 3D | 25 | adversarial | 33.642 ms | 33.512 ms - 33.773 ms |
+| `tds_new_3d/tds_new/50` | 3D | 50 | well-conditioned | 74.230 ms | 73.980 ms - 74.482 ms |
+| `tds_new_3d/tds_new_adversarial/50` | 3D | 50 | adversarial | 167.721 ms | 166.922 ms - 168.499 ms |
+| `tds_new_4d/tds_new/10` | 4D | 10 | well-conditioned | 12.852 ms | 12.774 ms - 12.936 ms |
+| `tds_new_4d/tds_new_adversarial/10` | 4D | 10 | adversarial | 9.161 ms | 9.115 ms - 9.206 ms |
+| `tds_new_4d/tds_new/25` | 4D | 25 | well-conditioned | 287.991 ms | 286.462 ms - 289.393 ms |
+| `tds_new_4d/tds_new_adversarial/25` | 4D | 25 | adversarial | 231.443 ms | 230.582 ms - 232.428 ms |
+| `tds_new_4d/tds_new/50` | 4D | 50 | well-conditioned | 1.632 s | 1.624 s - 1.645 s |
+| `tds_new_4d/tds_new_adversarial/50` | 4D | 50 | adversarial | 1.283 s | 1.280 s - 1.286 s |
+| `tds_new_5d/tds_new/10` | 5D | 10 | well-conditioned | 24.993 ms | 24.906 ms - 25.072 ms |
+| `tds_new_5d/tds_new_adversarial/10` | 5D | 10 | adversarial | 27.704 ms | 27.550 ms - 27.834 ms |
+| `tds_new_5d/tds_new/25` | 5D | 25 | well-conditioned | 1.461 s | 1.457 s - 1.466 s |
+| `tds_new_5d/tds_new_adversarial/25` | 5D | 25 | adversarial | 1.353 s | 1.350 s - 1.357 s |
+
+#### Boundary facets
+
+Public API: `DelaunayTriangulation::boundary_facets`
+
+| Benchmark ID | Dimension | Input | Variant | Mean | 95% CI |
+|--------------|-----------|-------|---------|------|--------|
+| `boundary_facets/boundary_facets_2d/50` | 2D | 50 | well-conditioned | 15.9 µs | 15.9 µs - 15.9 µs |
+| `boundary_facets/boundary_facets_2d_adversarial/50` | 2D | 50 | adversarial | 16.4 µs | 16.3 µs - 16.4 µs |
+| `boundary_facets/boundary_facets_3d/50` | 3D | 50 | well-conditioned | 66.2 µs | 65.8 µs - 66.5 µs |
+| `boundary_facets/boundary_facets_3d_adversarial/50` | 3D | 50 | adversarial | 65.4 µs | 65.1 µs - 65.8 µs |
+| `boundary_facets/boundary_facets_4d/50` | 4D | 50 | well-conditioned | 270.1 µs | 267.8 µs - 272.3 µs |
+| `boundary_facets/boundary_facets_4d_adversarial/50` | 4D | 50 | adversarial | 255.7 µs | 253.8 µs - 257.6 µs |
+| `boundary_facets/boundary_facets_5d/25` | 5D | 25 | well-conditioned | 245.5 µs | 242.4 µs - 248.5 µs |
+| `boundary_facets/boundary_facets_5d_adversarial/25` | 5D | 25 | adversarial | 233.8 µs | 231.4 µs - 236.3 µs |
+
+#### Convex hull
+
+Public API: `ConvexHull::from_triangulation`
+
+| Benchmark ID | Dimension | Input | Variant | Mean | 95% CI |
+|--------------|-----------|-------|---------|------|--------|
+| `convex_hull/from_triangulation_2d/50` | 2D | 50 | well-conditioned | 16.0 µs | 16.0 µs - 16.1 µs |
+| `convex_hull/from_triangulation_2d_adversarial/50` | 2D | 50 | adversarial | 16.5 µs | 16.5 µs - 16.6 µs |
+| `convex_hull/from_triangulation_3d/50` | 3D | 50 | well-conditioned | 66.3 µs | 66.0 µs - 66.6 µs |
+| `convex_hull/from_triangulation_3d_adversarial/50` | 3D | 50 | adversarial | 66.3 µs | 66.0 µs - 66.5 µs |
+| `convex_hull/from_triangulation_4d/50` | 4D | 50 | well-conditioned | 271.7 µs | 270.0 µs - 273.3 µs |
+| `convex_hull/from_triangulation_4d_adversarial/50` | 4D | 50 | adversarial | 256.6 µs | 254.9 µs - 258.4 µs |
+| `convex_hull/from_triangulation_5d/25` | 5D | 25 | well-conditioned | 247.4 µs | 245.4 µs - 249.2 µs |
+| `convex_hull/from_triangulation_5d_adversarial/25` | 5D | 25 | adversarial | 229.6 µs | 227.0 µs - 232.3 µs |
+
+#### Validation
+
+Public API: `DelaunayTriangulation::validate`
+
+| Benchmark ID | Dimension | Input | Variant | Mean | 95% CI |
+|--------------|-----------|-------|---------|------|--------|
+| `validation/validate_3d/50` | 3D | 50 | well-conditioned | 1.071 ms | 1.057 ms - 1.088 ms |
+| `validation/validate_3d_adversarial/50` | 3D | 50 | adversarial | 1.652 ms | 1.643 ms - 1.662 ms |
+| `validation/validate_4d/50` | 4D | 50 | well-conditioned | 43.553 ms | 43.383 ms - 43.729 ms |
+| `validation/validate_4d_adversarial/50` | 4D | 50 | adversarial | 39.152 ms | 38.994 ms - 39.326 ms |
+| `validation/validate_5d/25` | 5D | 25 | well-conditioned | 78.675 ms | 78.339 ms - 78.994 ms |
+| `validation/validate_5d_adversarial/25` | 5D | 25 | adversarial | 72.246 ms | 71.893 ms - 72.631 ms |
+
+#### Incremental insert
+
+Public API: `DelaunayTriangulation::insert`
+
+| Benchmark ID | Dimension | Input | Variant | Mean | 95% CI |
+|--------------|-----------|-------|---------|------|--------|
+| `incremental_insert/insert_2d/10` | 2D | 10 | well-conditioned | 1.098 ms | 1.095 ms - 1.102 ms |
+| `incremental_insert/insert_2d_adversarial/10` | 2D | 10 | adversarial | 2.071 ms | 2.067 ms - 2.075 ms |
+| `incremental_insert/insert_3d/10` | 3D | 10 | well-conditioned | 5.988 ms | 5.960 ms - 6.018 ms |
+| `incremental_insert/insert_3d_adversarial/10` | 3D | 10 | adversarial | 48.951 ms | 48.658 ms - 49.245 ms |
+| `incremental_insert/insert_4d/6` | 4D | 6 | well-conditioned | 259.223 ms | 258.041 ms - 260.310 ms |
+| `incremental_insert/insert_4d_adversarial/6` | 4D | 6 | adversarial | 431.328 ms | 429.736 ms - 433.006 ms |
+| `incremental_insert/insert_5d/4` | 5D | 4 | well-conditioned | 930.065 ms | 927.662 ms - 932.270 ms |
+| `incremental_insert/insert_5d_adversarial/4` | 5D | 4 | adversarial | 445.154 ms | 443.820 ms - 446.406 ms |
+
+#### Bistellar flips
+
+Public API: `BistellarFlips`
+
+| Benchmark ID | Dimension | Input | Variant | Mean | 95% CI |
+|--------------|-----------|-------|---------|------|--------|
+| `bistellar_flips_4d/k1_roundtrip` | 4D | roundtrip | well-conditioned | 38.0 µs | 37.8 µs - 38.2 µs |
+| `bistellar_flips_4d/k2_roundtrip` | 4D | roundtrip | well-conditioned | 40.6 µs | 40.4 µs - 40.8 µs |
+| `bistellar_flips_4d/k3_roundtrip` | 4D | roundtrip | well-conditioned | 40.1 µs | 40.0 µs - 40.3 µs |
+
+### Circumsphere Predicate Performance
+
+This focused predicate suite tracks `la-stack`-backed circumsphere and
+insphere query performance independently from full triangulation workflows.
 
 #### Version 0.7.6 Results (2026-04-25)
 
@@ -21,33 +127,33 @@ The results are automatically generated and updated by the benchmark infrastruct
 
 | Test Case | insphere | insphere_distance | insphere_lifted | Winner |
 |-----------|----------|------------------|-----------------|---------|
-| Basic 2D | 15 ns | 25 ns | 7 ns | **insphere_lifted** |
-| Boundary vertex | 2 ns | 24 ns | 196 ns | **insphere** |
-| Far vertex | 15 ns | 25 ns | 7 ns | **insphere_lifted** |
+| Basic 2D | 15 ns | 26 ns | 7 ns | **insphere_lifted** |
+| Boundary vertex | 2 ns | 25 ns | 260 ns | **insphere** |
+| Far vertex | 15 ns | 24 ns | 8 ns | **insphere_lifted** |
 
 #### Single Query Performance (3D)
 
 | Test Case | insphere | insphere_distance | insphere_lifted | Winner |
 |-----------|----------|------------------|-----------------|---------|
-| Basic 3D | 2.1 µs | 25 ns | 17 ns | **insphere_lifted** |
-| Boundary vertex | 2 ns | 26 ns | 432 ns | **insphere** |
-| Far vertex | 2.1 µs | 26 ns | 17 ns | **insphere_lifted** |
+| Basic 3D | 2.8 µs | 26 ns | 18 ns | **insphere_lifted** |
+| Boundary vertex | 2 ns | 26 ns | 563 ns | **insphere** |
+| Far vertex | 2.8 µs | 26 ns | 17 ns | **insphere_lifted** |
 
 #### Single Query Performance (4D)
 
 | Test Case | insphere | insphere_distance | insphere_lifted | Winner |
 |-----------|----------|------------------|-----------------|---------|
-| Basic 4D | 5.1 µs | 53 ns | 2.9 µs | **insphere_distance** |
-| Boundary vertex | 2 ns | 60 ns | 1.5 µs | **insphere** |
-| Far vertex | 3.2 µs | 53 ns | 1.8 µs | **insphere_distance** |
+| Basic 4D | 6.7 µs | 56 ns | 3.7 µs | **insphere_distance** |
+| Boundary vertex | 2 ns | 57 ns | 1.9 µs | **insphere** |
+| Far vertex | 4.4 µs | 54 ns | 2.5 µs | **insphere_distance** |
 
 #### Single Query Performance (5D)
 
 | Test Case | insphere | insphere_distance | insphere_lifted | Winner |
 |-----------|----------|------------------|-----------------|---------|
-| Basic 5D | 8.3 µs | 80 ns | 4.8 µs | **insphere_distance** |
-| Boundary vertex | 2 ns | 81 ns | 2.3 µs | **insphere** |
-| Far vertex | 4.9 µs | 79 ns | 2.8 µs | **insphere_distance** |
+| Basic 5D | 10.4 µs | 82 ns | 6.0 µs | **insphere_distance** |
+| Boundary vertex | 2 ns | 82 ns | 2.9 µs | **insphere** |
+| Far vertex | 6.3 µs | 81 ns | 3.8 µs | **insphere_distance** |
 
 ## Triangulation Data Structure Performance
 
@@ -88,13 +194,13 @@ The results are automatically generated and updated by the benchmark infrastruct
 | 10 | 27.463 ms | 0.364 Kelem/s | 1.0x |
 | 25 | 5956.682 ms | 0.004 Kelem/s | 216.9x |
 
-## Key Findings
+## Circumsphere Predicate Analysis
 
 ### Performance Ranking
 
 1. **insphere_distance** - (best in 4D, 5D) - Best average performance
-2. **insphere_lifted** - (best in 2D, 3D) - ~33.6x average vs fastest
-3. **insphere** - ~70.4x slower than fastest on average
+2. **insphere_lifted** - (best in 2D, 3D) - ~42.6x average vs fastest
+3. **insphere** - ~89.2x slower than fastest on average
 
 ### Numerical Accuracy Analysis
 
@@ -105,19 +211,19 @@ Based on random test cases:
 - **insphere_distance vs insphere_lifted**: 100.0% agreement
 - **All three methods agree**: 100.0% (expected due to different numerical approaches)
 
-## Recommendations
+### Recommendations
 
-### Method Selection Guide
+#### Method Selection Guide
 
 **All three methods are mathematically correct** (they produce valid insphere test results).
 Choose based on your specific requirements:
 
-#### Performance Optimization by Dimension
+##### Performance Optimization by Dimension
 
 - **`insphere_distance`**: (best in 4D, 5D) - Best average performance
-- **`insphere_lifted`**: (best in 2D, 3D) - ~33.6x average vs fastest
+- **`insphere_lifted`**: (best in 2D, 3D) - ~42.6x average vs fastest
 
-#### General Recommendations
+##### General Recommendations
 
 **For maximum performance**: Choose the method that performs best in your target dimension (see above)
 
@@ -127,20 +233,20 @@ and uses the standard determinant-based approach with well-understood numerical
 **For algorithm transparency**: `insphere_distance` explicitly calculates the circumcenter,
 making it excellent for educational purposes, debugging, and algorithm validation
 
-#### Performance Comparison
+##### Performance Comparison
 
 Average performance across all non-boundary test cases:
 
-- `insphere_distance`: 46 ns (best in 4D, 5D)
-- `insphere_lifted`: 1.5 µs (best in 2D, 3D)
-- `insphere`: 3.2 µs (third fastest)
+- `insphere_distance`: 47 ns (best in 4D, 5D)
+- `insphere_lifted`: 2.0 µs (best in 2D, 3D)
+- `insphere`: 4.2 µs (third fastest)
 
-## Conclusion
+### Conclusion
 
 All three methods are mathematically correct and produce valid results. Performance characteristics vary by dimension:
 
 - `insphere_distance` (best in 4D, 5D) - Best average performance
-- `insphere_lifted` (best in 2D, 3D) - ~33.6x average vs fastest
+- `insphere_lifted` (best in 2D, 3D) - ~42.6x average vs fastest
 
 For general-purpose applications, choose based on your primary use case:
 
@@ -188,6 +294,11 @@ The disagreements between methods are expected due to:
 
 ## Benchmark Structure
 
+The `ci_performance_suite.rs` benchmark is the primary regression and
+release-summary suite. It emits a versioned `api_benchmark_manifest` and
+covers public construction, hull, validation, insertion, boundary, and
+bistellar-flip workflows across supported dimensions.
+
 The `circumsphere_containment.rs` benchmark includes:
 
 - **Random queries**: Batch processing performance with 1000 random test points
@@ -203,7 +314,7 @@ This file is automatically generated from benchmark results. To update:
 # Generate performance summary with current data
 uv run benchmark-utils generate-summary
 
-# Run fresh perf-profile benchmarks and generate summary (includes numerical accuracy)
+# Run fresh perf-profile public API and circumsphere benchmarks
 uv run benchmark-utils generate-summary --run-benchmarks --profile perf
 
 # Generate baseline results for regression testing
diff --git a/benches/README.md b/benches/README.md
index 93992d41..fde5e694 100644
--- a/benches/README.md
+++ b/benches/README.md
@@ -12,10 +12,9 @@ This directory contains performance benchmarks for the delaunay library, organiz
 
 | Benchmark | Purpose | Scale | Runtime | Used By |
 |-----------|---------|-------|---------|----------|
-| `ci_performance_suite.rs` | **CI regression detection** | 10–50 vertices | ~5-10 min | CI workflows, baseline generation |
-| `circumsphere_containment.rs` | Algorithm comparison | Random queries | ~5 min | Performance summary generation |
+| `ci_performance_suite.rs` | **CI regression detection** | 10–50 vertices | ~5-10 min | CI workflows, baseline generation, performance summary |
+| `circumsphere_containment.rs` | Predicate algorithm comparison | Random queries | ~5 min | Performance summary predicate subsection |
 | `large_scale_performance.rs` | **Phase 4 SlotMap evaluation** | 1k–10k vertices | ~10-30 min (default); ~2-3 hours (BENCH_LARGE_SCALE=1) | Manual |
-| `microbenchmarks.rs` | Core operations | Various | ~10 min | Manual |
 | `profiling_suite.rs` | Comprehensive profiling | 10³–10⁶ vertices | 1-2 hours | Monthly profiling, manual |
 | `topology_guarantee_construction.rs` | Topology guarantee construction overhead | 2D–5D (small/medium point counts) | ~5–15 min | Manual |
 | ~~`triangulation_creation.rs`~~ | ~~Simple construction~~ | ~~1000 vertices~~ | ~~N/A~~ | **DEPRECATED / REMOVED** |
@@ -25,11 +24,12 @@ This directory contains performance benchmarks for the delaunay library, organiz
 | Use Case | Benchmark | Command |
 |----------|-----------|----------|
 | CI regression check | `ci_performance_suite.rs` | `just bench-ci` or `cargo bench --profile perf --bench ci_performance_suite` |
-| Release performance summary | `circumsphere_containment.rs` | `just bench-perf-summary` |
+| Release performance summary | `ci_performance_suite.rs` + `circumsphere_containment.rs` | `just bench-perf-summary` |
 | Smoke-test benchmark harnesses | Workspace benches | `just bench-smoke` |
 | Phase 4 SlotMap evaluation | `large_scale_performance.rs` | `cargo bench --profile perf --bench large_scale_performance` |
 | Deep profiling (1-2 hours) | `profiling_suite.rs` | `cargo bench --profile perf --bench profiling_suite` |
 | Memory analysis | `profiling_suite.rs` (memory groups) | `cargo bench --profile perf --bench profiling_suite -- memory_profiling` |
+| Validation layer diagnostics | `profiling_suite.rs` (validation components) | `cargo bench --profile perf --bench profiling_suite -- validation_components` |
 | Algorithm comparison | `circumsphere_containment.rs` | `cargo bench --profile perf --bench circumsphere_containment` |
 | Topology guarantee overhead | `topology_guarantee_construction.rs` | See section below |
 
@@ -73,7 +73,7 @@ numbers. Do not treat `bench-smoke` output as performance data.
 cargo bench --profile perf --bench ci_performance_suite
 ```
 
-The CI Performance Suite is the primary benchmarking suite used for automated performance-regression testing:
+The CI Performance Suite is the primary benchmarking suite used for automated performance-regression testing and generated performance summaries:
 
 - **Purpose**: Fast performance regression detection for regular CI/CD
 - **Dimensions**: 2D–5D triangulations
@@ -102,7 +102,9 @@ cargo bench --bench circumsphere_containment -- --test
 
 📊 **[View Detailed Performance Results](PERFORMANCE_RESULTS.md)**
 
-Comprehensive performance benchmarks, analysis, and recommendations have been moved to a dedicated file for easier maintenance and automated updates.
+Comprehensive performance benchmarks, analysis, and recommendations have been moved to a dedicated file for easier
+maintenance and automated updates. Circumsphere performance remains a dedicated subsection because these predicates
+exercise `la-stack` code paths that are important to tune independently.
 
 ##### Quick Summary
 
@@ -190,14 +192,6 @@ just compare-storage-large # Large scale comparison (~8-12 hours, compute cluste
 It measures iteration speed, memory usage, query performance, and validation - all critical
 for SlotMap comparison.
 
-### Microbenchmarks (`microbenchmarks.rs`)
-
-A collection of smaller benchmarks for core operations (varies by module).
-
-```bash
-cargo bench --profile perf --bench microbenchmarks
-```
-
 ### Profiling Suite (`profiling_suite.rs`) (comprehensive)
 
 ```bash
@@ -215,6 +209,7 @@ cargo bench --profile perf --bench profiling_suite --features count-allocations
 cargo bench --profile perf --bench profiling_suite --features count-allocations -- memory_profiling
 cargo bench --profile perf --bench profiling_suite --features count-allocations -- query_latency
 cargo bench --profile perf --bench profiling_suite --features count-allocations -- algorithmic_bottlenecks
+cargo bench --profile perf --bench profiling_suite --features count-allocations -- validation_components
 
 # Run only memory profiling group (useful for focused analysis)
 cargo bench --profile perf --bench profiling_suite --features count-allocations -- "memory_profiling"
diff --git a/benches/ci_performance_suite.rs b/benches/ci_performance_suite.rs
index 62442fb3..cafc9750 100644
--- a/benches/ci_performance_suite.rs
+++ b/benches/ci_performance_suite.rs
@@ -8,7 +8,8 @@
 //! 2. Convex hull extraction from completed triangulations
 //! 3. Boundary facet traversal
 //! 4. Full validation (Levels 1-4)
-//! 5. Explicit bistellar flip roundtrips on a stable 4D PL-manifold case
+//! 5. Incremental vertex insertion
+//! 6. Explicit bistellar flip roundtrips on a stable 4D PL-manifold case
 //!
 //! Predicate microbenchmarks, allocation-focused measurements, and large-scale
 //! stress tests live in the dedicated benchmark targets under `benches/`.
@@ -26,22 +27,21 @@
 //! - 2D: Fundamental triangulation case
 //! - 3D-5D: Higher-dimensional triangulations as documented in README.md
 
-use criterion::{BatchSize, BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
-use delaunay::core::vertex::Vertex;
-use delaunay::geometry::algorithms::convex_hull::ConvexHull;
-use delaunay::geometry::kernel::{AdaptiveKernel, RobustKernel};
-use delaunay::geometry::point::Point;
-use delaunay::geometry::util::generate_random_points_seeded;
+use criterion::measurement::WallTime;
+use criterion::{
+    BatchSize, BenchmarkGroup, BenchmarkId, Criterion, Throughput, criterion_group, criterion_main,
+};
+use delaunay::prelude::generators::generate_random_points_seeded;
+use delaunay::prelude::geometry::{AdaptiveKernel, Coordinate, Point, RobustKernel};
+use delaunay::prelude::query::ConvexHull;
 use delaunay::prelude::triangulation::flips::{
     BistellarFlips, CellKey, EdgeKey, FacetHandle, RidgeHandle, TopologyGuarantee, TriangleHandle,
 };
-use delaunay::prelude::{
-    ConstructionOptions, DelaunayTriangulation, InsertionOrderStrategy, RetryPolicy,
+use delaunay::prelude::triangulation::{
+    ConstructionOptions, DelaunayTriangulation, InsertionOrderStrategy, RetryPolicy, Vertex,
 };
 use delaunay::vertex;
-use std::hint::black_box;
-use std::num::NonZeroUsize;
-use std::sync::Once;
+use std::{env, hint::black_box, num::NonZeroUsize, sync::Once};
 use tracing::{error, warn};
 
 /// Default point counts for 2D–4D benchmarks.
@@ -52,10 +52,31 @@ const COUNTS_5D: &[usize] = &[10, 25];
 const OPERATION_COUNT: usize = 50;
 /// Representative operation count for 5D non-construction workflows.
 const OPERATION_COUNT_5D: usize = 25;
+/// Small insert batch for 2D-3D incremental insertion benchmarks.
+const INSERT_COUNT: usize = 10;
+/// Reduced insert batch for 4D incremental insertion benchmarks.
+const INSERT_COUNT_4D: usize = 6;
+/// Reduced insert batch for 5D incremental insertion benchmarks.
+const INSERT_COUNT_5D: usize = 4;
 type SeedSearchResult<const D: usize> = Option<(u64, Vec<Point<f64, D>>, Vec<Vertex<f64, (), D>>)>;
 type BenchTriangulation<const D: usize> = DelaunayTriangulation<AdaptiveKernel<f64>, (), (), D>;
 type FlipTriangulation4 = DelaunayTriangulation<RobustKernel<f64>, (), (), 4>;
 
+#[derive(Clone, Copy)]
+enum Dataset {
+    WellConditioned,
+    Adversarial,
+}
+
+impl Dataset {
+    const fn suffix(self) -> &'static str {
+        match self {
+            Self::WellConditioned => "",
+            Self::Adversarial => "_adversarial",
+        }
+    }
+}
+
 struct ApiBenchmarkEntry {
     group: &'static str,
     public_api: &'static str,
@@ -71,29 +92,36 @@ const API_BENCHMARK_ENTRIES: &[ApiBenchmarkEntry] = &[
         group: "construction",
         public_api: "DelaunayTriangulation::new_with_options",
         dimensions: "2,3,4,5",
-        benchmark_ids: "tds_new_2d/tds_new/{10,25,50};tds_new_3d/tds_new/{10,25,50};tds_new_4d/tds_new/{10,25,50};tds_new_5d/tds_new/{10,25}",
-        note: "construct_from_seeded_vertices",
+        benchmark_ids: "tds_new_2d/{tds_new,tds_new_adversarial}/{10,25,50};tds_new_3d/{tds_new,tds_new_adversarial}/{10,25,50};tds_new_4d/{tds_new,tds_new_adversarial}/{10,25,50};tds_new_5d/{tds_new,tds_new_adversarial}/{10,25}",
+        note: "construct_from_seeded_vertices_and_adversarial_large_coordinate_inputs",
     },
     ApiBenchmarkEntry {
         group: "boundary_facets",
         public_api: "DelaunayTriangulation::boundary_facets",
         dimensions: "2,3,4,5",
-        benchmark_ids: "boundary_facets/boundary_facets_2d/50;boundary_facets/boundary_facets_3d/50;boundary_facets/boundary_facets_4d/50;boundary_facets/boundary_facets_5d/25",
-        note: "iterate_boundary_facets",
+        benchmark_ids: "boundary_facets/{boundary_facets_2d,boundary_facets_2d_adversarial}/50;boundary_facets/{boundary_facets_3d,boundary_facets_3d_adversarial}/50;boundary_facets/{boundary_facets_4d,boundary_facets_4d_adversarial}/50;boundary_facets/{boundary_facets_5d,boundary_facets_5d_adversarial}/25",
+        note: "iterate_boundary_facets_on_well_conditioned_and_adversarial_inputs",
     },
     ApiBenchmarkEntry {
         group: "convex_hull",
         public_api: "ConvexHull::from_triangulation",
         dimensions: "2,3,4,5",
-        benchmark_ids: "convex_hull/from_triangulation_2d/50;convex_hull/from_triangulation_3d/50;convex_hull/from_triangulation_4d/50;convex_hull/from_triangulation_5d/25",
-        note: "extract_hull_from_completed_triangulation",
+        benchmark_ids: "convex_hull/{from_triangulation_2d,from_triangulation_2d_adversarial}/50;convex_hull/{from_triangulation_3d,from_triangulation_3d_adversarial}/50;convex_hull/{from_triangulation_4d,from_triangulation_4d_adversarial}/50;convex_hull/{from_triangulation_5d,from_triangulation_5d_adversarial}/25",
+        note: "extract_hull_from_well_conditioned_and_adversarial_triangulations",
     },
     ApiBenchmarkEntry {
         group: "validation",
         public_api: "DelaunayTriangulation::validate",
         dimensions: "3,4,5",
-        benchmark_ids: "validation/validate_3d/50;validation/validate_4d/50;validation/validate_5d/25",
-        note: "levels_1_through_4",
+        benchmark_ids: "validation/{validate_3d,validate_3d_adversarial}/50;validation/{validate_4d,validate_4d_adversarial}/50;validation/{validate_5d,validate_5d_adversarial}/25",
+        note: "levels_1_through_4_on_well_conditioned_and_adversarial_inputs",
+    },
+    ApiBenchmarkEntry {
+        group: "incremental_insert",
+        public_api: "DelaunayTriangulation::insert",
+        dimensions: "2,3,4,5",
+        benchmark_ids: "incremental_insert/{insert_2d,insert_2d_adversarial}/10;incremental_insert/{insert_3d,insert_3d_adversarial}/10;incremental_insert/{insert_4d,insert_4d_adversarial}/6;incremental_insert/{insert_5d,insert_5d_adversarial}/4",
+        note: "insert_batches_into_prebuilt_well_conditioned_and_adversarial_triangulations",
     },
     ApiBenchmarkEntry {
         group: "bistellar_flips",
@@ -158,7 +186,7 @@ fn known_seed(dim: usize, count: usize) -> Option<u64> {
         .map(|&(_, _, seed)| seed)
 }
 
-fn print_api_benchmark_manifest_once() {
+fn print_manifest_once() {
     API_BENCHMARK_MANIFEST.call_once(|| {
         println!(
             "api_benchmark_manifest crate=delaunay version={} benchmark=ci_performance_suite schema=1",
@@ -175,7 +203,7 @@ fn print_api_benchmark_manifest_once() {
 
 /// Prepare benchmark inputs by looking up a pre-computed seed, falling back
 /// to a runtime search only if the known seed is missing or invalid.
-fn prepare_benchmark_data<const D: usize>(
+fn prepare_data<const D: usize>(
     dim_seed: u64,
     count: usize,
     bounds: (f64, f64),
@@ -183,7 +211,7 @@ fn prepare_benchmark_data<const D: usize>(
 ) -> (u64, Vec<Point<f64, D>>, Vec<Vertex<f64, (), D>>) {
     // Fast path: use the pre-computed seed (single verification construction)
     if let Some(seed) = known_seed(D, count) {
-        if let Some(result) = find_seed_and_vertices::<D>(seed, count, bounds, 1, attempts) {
+        if let Some(result) = find_seed_vertices::<D>(seed, count, bounds, 1, attempts) {
             return result;
         }
         warn!(
@@ -196,21 +224,19 @@ fn prepare_benchmark_data<const D: usize>(
 
     // Slow fallback: runtime search from the base seed
     let base_seed = dim_seed.wrapping_add(count as u64);
-    let search_limit = bench_seed_search_limit();
-    find_seed_and_vertices::<D>(base_seed, count, bounds, search_limit, attempts).unwrap_or_else(
-        || {
-            panic!(
-                "No stable benchmark seed found for {D}D/{count}: \
+    let search_limit = seed_search_limit();
+    find_seed_vertices::<D>(base_seed, count, bounds, search_limit, attempts).unwrap_or_else(|| {
+        panic!(
+            "No stable benchmark seed found for {D}D/{count}: \
                  start_seed={base_seed}; search_limit={search_limit}; bounds={bounds:?}"
-            )
-        },
-    )
+        )
+    })
 }
 
-fn prepare_triangulation<const D: usize>(dim_seed: u64, count: usize) -> BenchTriangulation<D> {
+fn prepare_dt<const D: usize>(dim_seed: u64, count: usize) -> BenchTriangulation<D> {
     let bounds = (-100.0, 100.0);
     let attempts = NonZeroUsize::new(6).expect("retry attempts must be non-zero");
-    let (seed, _, vertices) = prepare_benchmark_data::<D>(dim_seed, count, bounds, attempts);
+    let (seed, _, vertices) = prepare_data::<D>(dim_seed, count, bounds, attempts);
     let options = ConstructionOptions::default().with_retry_policy(RetryPolicy::Shuffled {
         attempts,
         base_seed: Some(seed),
@@ -221,7 +247,38 @@ fn prepare_triangulation<const D: usize>(dim_seed: u64, count: usize) -> BenchTr
     })
 }
 
-fn find_seed_and_vertices<const D: usize>(
+fn prepare_adv_dt<const D: usize>(dim_seed: u64, count: usize) -> BenchTriangulation<D> {
+    let attempts = NonZeroUsize::new(8).expect("retry attempts must be non-zero");
+    let (seed, _, vertices) = prepare_adv_data::<D>(dim_seed, count, attempts);
+    let options = ConstructionOptions::default().with_retry_policy(RetryPolicy::Shuffled {
+        attempts,
+        base_seed: Some(seed),
+    });
+
+    BenchTriangulation::<D>::new_with_options(&vertices, options).unwrap_or_else(|err| {
+        panic!(
+            "failed to prepare adversarial {D}D benchmark triangulation with {count} vertices: {err}"
+        );
+    })
+}
+
+fn prepare_inserts<const D: usize>(
+    dim_seed: u64,
+    count: usize,
+    dataset: Dataset,
+) -> Vec<Vertex<f64, (), D>> {
+    let seed = dim_seed.wrapping_add(0x5151_5151);
+    let points = match dataset {
+        Dataset::WellConditioned => {
+            generate_random_points_seeded::<f64, D>(count, (-50.0, 50.0), seed)
+                .unwrap_or_else(|error| panic!("insert point generation failed for {D}D: {error}"))
+        }
+        Dataset::Adversarial => generate_adv_points::<D>(count, seed),
+    };
+    points.iter().map(|point| vertex!(*point)).collect()
+}
+
+fn find_seed_vertices<const D: usize>(
     start_seed: u64,
     count: usize,
     bounds: (f64, f64),
@@ -249,6 +306,62 @@ fn find_seed_and_vertices<const D: usize>(
     None
 }
 
+fn prepare_adv_data<const D: usize>(
+    dim_seed: u64,
+    count: usize,
+    attempts: NonZeroUsize,
+) -> (u64, Vec<Point<f64, D>>, Vec<Vertex<f64, (), D>>) {
+    let start_seed = dim_seed
+        .wrapping_mul(17)
+        .wrapping_add(count as u64)
+        .wrapping_add(0xA5A5_A5A5);
+    let search_limit = seed_search_limit();
+
+    for offset in 0..search_limit {
+        let candidate_seed = start_seed.wrapping_add(offset as u64);
+        let points = generate_adv_points::<D>(count, candidate_seed);
+        let vertices = points.iter().map(|p| vertex!(*p)).collect::<Vec<_>>();
+        let options = ConstructionOptions::default().with_retry_policy(RetryPolicy::Shuffled {
+            attempts,
+            base_seed: Some(candidate_seed),
+        });
+
+        if BenchTriangulation::<D>::new_with_options(&vertices, options).is_ok() {
+            return (candidate_seed, points, vertices);
+        }
+    }
+
+    panic!(
+        "No stable adversarial benchmark seed found for {D}D/{count}: \
+         start_seed={start_seed}; search_limit={search_limit}"
+    );
+}
+
+fn generate_adv_points<const D: usize>(count: usize, seed: u64) -> Vec<Point<f64, D>> {
+    let base_points = generate_random_points_seeded::<f64, D>(count, (-1.0, 1.0), seed)
+        .unwrap_or_else(|error| {
+            panic!("generate_random_points_seeded failed for adversarial {D}D: {error}");
+        });
+
+    base_points
+        .iter()
+        .enumerate()
+        .map(|(index, point)| {
+            let index = u32::try_from(index).expect("benchmark point index should fit in u32");
+            let mut coords = [0.0_f64; D];
+            for (axis, coord) in coords.iter_mut().enumerate() {
+                let axis_number = u32::try_from(axis + 1).expect("axis should fit in u32");
+                let base = point.coords()[axis];
+                let cluster_offset = f64::from(index % 7) * 1.0e-3;
+                let axis_offset = f64::from(axis_number) * 0.25;
+                let perturbation = f64::from((index + axis_number) % 11) * 1.0e-6;
+                *coord = base.mul_add(1.0e3, 1.0e9 + axis_offset + cluster_offset + perturbation);
+            }
+            Point::new(coords)
+        })
+        .collect()
+}
+
 fn stable_vertices_4d() -> Vec<Vertex<f64, (), 4>> {
     STABLE_POINTS_4D
         .iter()
@@ -256,7 +369,7 @@ fn stable_vertices_4d() -> Vec<Vertex<f64, (), 4>> {
         .collect()
 }
 
-fn build_flip_triangulation_4d() -> FlipTriangulation4 {
+fn build_flip_dt_4d() -> FlipTriangulation4 {
     let vertices = stable_vertices_4d();
     let options =
         ConstructionOptions::default().with_insertion_order(InsertionOrderStrategy::Input);
@@ -318,7 +431,7 @@ fn roundtrip_k1_4d(dt: &mut FlipTriangulation4) {
         .expect("k=1 remove should invert k=1 insert");
 }
 
-fn collect_interior_facets_4d(dt: &FlipTriangulation4) -> Vec<FacetHandle> {
+fn interior_facets_4d(dt: &FlipTriangulation4) -> Vec<FacetHandle> {
     let mut facets = Vec::new();
     for (cell_key, cell) in dt.cells() {
         if let Some(neighbors) = cell.neighbors() {
@@ -333,10 +446,11 @@ fn collect_interior_facets_4d(dt: &FlipTriangulation4) -> Vec<FacetHandle> {
     facets
 }
 
-fn roundtrip_k2_4d(dt: &mut FlipTriangulation4) {
+fn flippable_k2_facet_4d(dt: &FlipTriangulation4) -> FacetHandle {
     let mut last_error = None;
-    for facet in collect_interior_facets_4d(dt) {
-        match dt.flip_k2(facet) {
+    for facet in interior_facets_4d(dt) {
+        let mut trial = dt.clone();
+        match trial.flip_k2(facet) {
             Ok(info) => {
                 assert_eq!(
                     info.inserted_face_vertices.len(),
@@ -347,9 +461,10 @@ fn roundtrip_k2_4d(dt: &mut FlipTriangulation4) {
                     info.inserted_face_vertices[0],
                     info.inserted_face_vertices[1],
                 );
-                dt.flip_k2_inverse_from_edge(edge)
+                trial
+                    .flip_k2_inverse_from_edge(edge)
                     .expect("k=2 inverse should succeed after k=2 flip");
-                return;
+                return facet;
             }
             Err(err) => last_error = Some(format!("{err}")),
         }
@@ -361,7 +476,24 @@ fn roundtrip_k2_4d(dt: &mut FlipTriangulation4) {
     );
 }
 
-fn collect_ridges_4d(dt: &FlipTriangulation4) -> Vec<RidgeHandle> {
+fn roundtrip_k2_4d(dt: &mut FlipTriangulation4, facet: FacetHandle) {
+    let info = dt
+        .flip_k2(facet)
+        .expect("k=2 flip should succeed for preselected 4D benchmark facet");
+    assert_eq!(
+        info.inserted_face_vertices.len(),
+        2,
+        "k=2 flip should insert an edge"
+    );
+    let edge = EdgeKey::new(
+        info.inserted_face_vertices[0],
+        info.inserted_face_vertices[1],
+    );
+    dt.flip_k2_inverse_from_edge(edge)
+        .expect("k=2 inverse should succeed after k=2 flip");
+}
+
+fn ridges_4d(dt: &FlipTriangulation4) -> Vec<RidgeHandle> {
     let mut ridges = Vec::new();
     for (cell_key, cell) in dt.cells() {
         let vertex_count = cell.number_of_vertices();
@@ -376,10 +508,11 @@ fn collect_ridges_4d(dt: &FlipTriangulation4) -> Vec<RidgeHandle> {
     ridges
 }
 
-fn roundtrip_k3_4d(dt: &mut FlipTriangulation4) {
+fn flippable_k3_ridge_4d(dt: &FlipTriangulation4) -> RidgeHandle {
     let mut last_error = None;
-    for ridge in collect_ridges_4d(dt) {
-        match dt.flip_k3(ridge) {
+    for ridge in ridges_4d(dt) {
+        let mut trial = dt.clone();
+        match trial.flip_k3(ridge) {
             Ok(info) => {
                 assert_eq!(
                     info.inserted_face_vertices.len(),
@@ -391,9 +524,10 @@ fn roundtrip_k3_4d(dt: &mut FlipTriangulation4) {
                     info.inserted_face_vertices[1],
                     info.inserted_face_vertices[2],
                 );
-                dt.flip_k3_inverse_from_triangle(triangle)
+                trial
+                    .flip_k3_inverse_from_triangle(triangle)
                     .expect("k=3 inverse should succeed after k=3 flip");
-                return;
+                return ridge;
             }
             Err(err) => last_error = Some(format!("{err}")),
         }
@@ -405,16 +539,34 @@ fn roundtrip_k3_4d(dt: &mut FlipTriangulation4) {
     );
 }
 
+fn roundtrip_k3_4d(dt: &mut FlipTriangulation4, ridge: RidgeHandle) {
+    let info = dt
+        .flip_k3(ridge)
+        .expect("k=3 flip should succeed for preselected 4D benchmark ridge");
+    assert_eq!(
+        info.inserted_face_vertices.len(),
+        3,
+        "k=3 flip should insert a triangle"
+    );
+    let triangle = TriangleHandle::new(
+        info.inserted_face_vertices[0],
+        info.inserted_face_vertices[1],
+        info.inserted_face_vertices[2],
+    );
+    dt.flip_k3_inverse_from_triangle(triangle)
+        .expect("k=3 inverse should succeed after k=3 flip");
+}
+
 fn bench_logging_enabled() -> bool {
-    std::env::var("DELAUNAY_BENCH_LOG").is_ok_and(|value| value != "0")
+    env::var("DELAUNAY_BENCH_LOG").is_ok_and(|value| value != "0")
 }
 
-fn bench_discover_seeds_enabled() -> bool {
-    std::env::var("DELAUNAY_BENCH_DISCOVER_SEEDS").is_ok_and(|value| value != "0")
+fn discover_seeds_enabled() -> bool {
+    env::var("DELAUNAY_BENCH_DISCOVER_SEEDS").is_ok_and(|value| value != "0")
 }
 
-fn bench_seed_search_limit() -> usize {
-    std::env::var("DELAUNAY_BENCH_DISCOVER_SEEDS_LIMIT")
+fn seed_search_limit() -> usize {
+    env::var("DELAUNAY_BENCH_DISCOVER_SEEDS_LIMIT")
         .ok()
         .and_then(|value| value.parse::<usize>().ok())
         .unwrap_or(2000)
@@ -429,8 +581,9 @@ fn bench_seed_search_limit() -> usize {
 macro_rules! benchmark_tds_new_dimension {
     ($dim:literal, $func_name:ident, $seed:literal, $counts:expr) => {
         /// Benchmark triangulation creation for D-dimensional triangulations
+        #[allow(clippy::too_many_lines)]
         fn $func_name(c: &mut Criterion) {
-            print_api_benchmark_manifest_once();
+            print_manifest_once();
             let counts = $counts;
 
             // Opt-in helper for discovering stable seeds without paying Criterion warmup/
@@ -448,9 +601,9 @@ macro_rules! benchmark_tds_new_dimension {
             //
             // We avoid `std::process::exit` here so that destructors run and Criterion
             // can clean up state on both success and failure.
-            if bench_discover_seeds_enabled() {
+            if discover_seeds_enabled() {
                 let bounds = (-100.0, 100.0);
-                let filters: Vec<String> = std::env::args()
+                let filters: Vec<String> = env::args()
                     .skip(1)
                     .filter(|arg| !arg.starts_with('-'))
                     .collect();
@@ -464,12 +617,12 @@ macro_rules! benchmark_tds_new_dimension {
                     }
 
                     let seed = ($seed as u64).wrapping_add(count as u64);
-                    let limit = bench_seed_search_limit();
+                    let limit = seed_search_limit();
                     let attempts =
                         NonZeroUsize::new(6).expect("retry attempts must be non-zero");
 
                     if let Some((candidate_seed, _, _)) =
-                        find_seed_and_vertices::<$dim>(seed, count, bounds, limit, attempts)
+                        find_seed_vertices::<$dim>(seed, count, bounds, limit, attempts)
                     {
                         println!(
                             "seed_search_found dim={} count={} seed={}",
@@ -510,7 +663,7 @@ macro_rules! benchmark_tds_new_dimension {
                     let attempts =
                         NonZeroUsize::new(6).expect("retry attempts must be non-zero");
                     let (seed, points, vertices) =
-                        prepare_benchmark_data::<$dim>($seed, count, bounds, attempts);
+                        prepare_data::<$dim>($seed, count, bounds, attempts);
                     let sample_points = points.iter().take(5).collect::<Vec<_>>();
 
                     // In benchmarks we compile in release mode, where the default retry policy is
@@ -555,6 +708,55 @@ macro_rules! benchmark_tds_new_dimension {
                         }
                     });
                 });
+
+                group.bench_with_input(
+                    BenchmarkId::new("tds_new_adversarial", count),
+                    &count,
+                    |b, &count| {
+                        let attempts =
+                            NonZeroUsize::new(8).expect("retry attempts must be non-zero");
+                        let (seed, points, vertices) =
+                            prepare_adv_data::<$dim>($seed, count, attempts);
+                        let sample_points = points.iter().take(5).collect::<Vec<_>>();
+                        let options = ConstructionOptions::default().with_retry_policy(
+                            RetryPolicy::Shuffled {
+                                attempts,
+                                base_seed: Some(seed),
+                            },
+                        );
+
+                        b.iter(|| {
+                            match DelaunayTriangulation::<_, (), (), $dim>::new_with_options(
+                                &vertices,
+                                options,
+                            ) {
+                                Ok(dt) => {
+                                    black_box(dt);
+                                }
+                                Err(err) => {
+                                    let error = format!("{err:?}");
+                                    if bench_logging_enabled() {
+                                        error!(
+                                            dim = $dim,
+                                            count,
+                                            seed,
+                                            sample_points = ?sample_points,
+                                            error = %error,
+                                            "adversarial DelaunayTriangulation::new failed"
+                                        );
+                                    }
+                                    panic!(
+                                        "adversarial DelaunayTriangulation::new failed for {}D: {error}; dim={}; count={}; seed={}; sample_points={sample_points:?}",
+                                        $dim,
+                                        $dim,
+                                        count,
+                                        seed
+                                    );
+                                }
+                            }
+                        });
+                    },
+                );
             }
 
             group.finish();
@@ -568,151 +770,406 @@ benchmark_tds_new_dimension!(3, benchmark_tds_new_3d, 123, COUNTS);
 benchmark_tds_new_dimension!(4, benchmark_tds_new_4d, 456, COUNTS);
 benchmark_tds_new_dimension!(5, benchmark_tds_new_5d, 789, COUNTS_5D);
 
-fn benchmark_boundary_facets(c: &mut Criterion) {
-    print_api_benchmark_manifest_once();
-    let mut group = c.benchmark_group("boundary_facets");
-    group.sample_size(25);
-
-    let dt_2d = prepare_triangulation::<2>(42, OPERATION_COUNT);
-    group.throughput(Throughput::Elements(OPERATION_COUNT as u64));
+fn bench_boundary_case<const D: usize>(
+    group: &mut BenchmarkGroup<'_, WallTime>,
+    dimension: usize,
+    dataset: Dataset,
+    count: usize,
+    dt: &BenchTriangulation<D>,
+) {
+    group.throughput(Throughput::Elements(count as u64));
     group.bench_function(
-        BenchmarkId::new("boundary_facets_2d", OPERATION_COUNT),
+        BenchmarkId::new(
+            format!("boundary_facets_{dimension}d{}", dataset.suffix()),
+            count,
+        ),
         |b| {
-            b.iter(|| black_box(dt_2d.boundary_facets().count()));
+            b.iter(|| black_box(dt.boundary_facets().count()));
         },
     );
+}
 
-    let dt_3d = prepare_triangulation::<3>(123, OPERATION_COUNT);
-    group.throughput(Throughput::Elements(OPERATION_COUNT as u64));
+fn bench_hull_case<const D: usize>(
+    group: &mut BenchmarkGroup<'_, WallTime>,
+    dimension: usize,
+    dataset: Dataset,
+    count: usize,
+    dt: &BenchTriangulation<D>,
+) {
+    group.throughput(Throughput::Elements(count as u64));
     group.bench_function(
-        BenchmarkId::new("boundary_facets_3d", OPERATION_COUNT),
+        BenchmarkId::new(
+            format!("from_triangulation_{dimension}d{}", dataset.suffix()),
+            count,
+        ),
         |b| {
-            b.iter(|| black_box(dt_3d.boundary_facets().count()));
+            b.iter(|| {
+                black_box(
+                    ConvexHull::from_triangulation(dt.as_triangulation()).unwrap_or_else(|err| {
+                        panic!("{dimension}D convex hull extraction should succeed: {err}")
+                    }),
+                );
+            });
         },
     );
+}
 
-    let dt_4d = prepare_triangulation::<4>(456, OPERATION_COUNT);
-    group.throughput(Throughput::Elements(OPERATION_COUNT as u64));
+fn bench_validate_case<const D: usize>(
+    group: &mut BenchmarkGroup<'_, WallTime>,
+    dimension: usize,
+    dataset: Dataset,
+    count: usize,
+    dt: &BenchTriangulation<D>,
+) {
+    group.throughput(Throughput::Elements(count as u64));
     group.bench_function(
-        BenchmarkId::new("boundary_facets_4d", OPERATION_COUNT),
+        BenchmarkId::new(format!("validate_{dimension}d{}", dataset.suffix()), count),
         |b| {
-            b.iter(|| black_box(dt_4d.boundary_facets().count()));
+            b.iter(|| {
+                black_box(dt.validate()).unwrap_or_else(|err| {
+                    panic!("{dimension}D benchmark triangulation should validate: {err}");
+                });
+            });
         },
     );
+}
 
-    let dt_5d = prepare_triangulation::<5>(789, OPERATION_COUNT_5D);
-    group.throughput(Throughput::Elements(OPERATION_COUNT_5D as u64));
+fn bench_insert_case<const D: usize>(
+    group: &mut BenchmarkGroup<'_, WallTime>,
+    dimension: usize,
+    dataset: Dataset,
+    count: usize,
+    base_dt: &BenchTriangulation<D>,
+    insert_vertices: &[Vertex<f64, (), D>],
+) {
+    group.throughput(Throughput::Elements(count as u64));
     group.bench_function(
-        BenchmarkId::new("boundary_facets_5d", OPERATION_COUNT_5D),
+        BenchmarkId::new(format!("insert_{dimension}d{}", dataset.suffix()), count),
         |b| {
-            b.iter(|| black_box(dt_5d.boundary_facets().count()));
+            b.iter_batched(
+                || (base_dt.clone(), insert_vertices.to_vec()),
+                |(mut dt, vertices)| {
+                    for vertex in vertices {
+                        black_box(dt.insert(vertex)).unwrap_or_else(|err| {
+                            panic!("{dimension}D incremental insert should succeed: {err}");
+                        });
+                    }
+                    black_box(dt);
+                },
+                BatchSize::LargeInput,
+            );
         },
     );
+}
+
+fn benchmark_boundary_facets(c: &mut Criterion) {
+    print_manifest_once();
+    let mut group = c.benchmark_group("boundary_facets");
+    group.sample_size(25);
+
+    let dt_2d = prepare_dt::<2>(42, OPERATION_COUNT);
+    bench_boundary_case(
+        &mut group,
+        2,
+        Dataset::WellConditioned,
+        OPERATION_COUNT,
+        &dt_2d,
+    );
+    let dt_2d_adversarial = prepare_adv_dt::<2>(42, OPERATION_COUNT);
+    bench_boundary_case(
+        &mut group,
+        2,
+        Dataset::Adversarial,
+        OPERATION_COUNT,
+        &dt_2d_adversarial,
+    );
+
+    let dt_3d = prepare_dt::<3>(123, OPERATION_COUNT);
+    bench_boundary_case(
+        &mut group,
+        3,
+        Dataset::WellConditioned,
+        OPERATION_COUNT,
+        &dt_3d,
+    );
+    let dt_3d_adversarial = prepare_adv_dt::<3>(123, OPERATION_COUNT);
+    bench_boundary_case(
+        &mut group,
+        3,
+        Dataset::Adversarial,
+        OPERATION_COUNT,
+        &dt_3d_adversarial,
+    );
+
+    let dt_4d = prepare_dt::<4>(456, OPERATION_COUNT);
+    bench_boundary_case(
+        &mut group,
+        4,
+        Dataset::WellConditioned,
+        OPERATION_COUNT,
+        &dt_4d,
+    );
+    let dt_4d_adversarial = prepare_adv_dt::<4>(456, OPERATION_COUNT);
+    bench_boundary_case(
+        &mut group,
+        4,
+        Dataset::Adversarial,
+        OPERATION_COUNT,
+        &dt_4d_adversarial,
+    );
+
+    let dt_5d = prepare_dt::<5>(789, OPERATION_COUNT_5D);
+    bench_boundary_case(
+        &mut group,
+        5,
+        Dataset::WellConditioned,
+        OPERATION_COUNT_5D,
+        &dt_5d,
+    );
+    let dt_5d_adversarial = prepare_adv_dt::<5>(789, OPERATION_COUNT_5D);
+    bench_boundary_case(
+        &mut group,
+        5,
+        Dataset::Adversarial,
+        OPERATION_COUNT_5D,
+        &dt_5d_adversarial,
+    );
 
     group.finish();
 }
 
 fn benchmark_convex_hull(c: &mut Criterion) {
-    print_api_benchmark_manifest_once();
+    print_manifest_once();
     let mut group = c.benchmark_group("convex_hull");
     group.sample_size(20);
 
-    let dt_2d = prepare_triangulation::<2>(42, OPERATION_COUNT);
-    group.throughput(Throughput::Elements(OPERATION_COUNT as u64));
-    group.bench_function(
-        BenchmarkId::new("from_triangulation_2d", OPERATION_COUNT),
-        |b| {
-            b.iter(|| {
-                black_box(
-                    ConvexHull::from_triangulation(dt_2d.as_triangulation())
-                        .expect("2D convex hull extraction should succeed"),
-                );
-            });
-        },
+    let dt_2d = prepare_dt::<2>(42, OPERATION_COUNT);
+    bench_hull_case(
+        &mut group,
+        2,
+        Dataset::WellConditioned,
+        OPERATION_COUNT,
+        &dt_2d,
+    );
+    let dt_2d_adversarial = prepare_adv_dt::<2>(42, OPERATION_COUNT);
+    bench_hull_case(
+        &mut group,
+        2,
+        Dataset::Adversarial,
+        OPERATION_COUNT,
+        &dt_2d_adversarial,
     );
 
-    let dt_3d = prepare_triangulation::<3>(123, OPERATION_COUNT);
-    group.throughput(Throughput::Elements(OPERATION_COUNT as u64));
-    group.bench_function(
-        BenchmarkId::new("from_triangulation_3d", OPERATION_COUNT),
-        |b| {
-            b.iter(|| {
-                black_box(
-                    ConvexHull::from_triangulation(dt_3d.as_triangulation())
-                        .expect("3D convex hull extraction should succeed"),
-                );
-            });
-        },
+    let dt_3d = prepare_dt::<3>(123, OPERATION_COUNT);
+    bench_hull_case(
+        &mut group,
+        3,
+        Dataset::WellConditioned,
+        OPERATION_COUNT,
+        &dt_3d,
+    );
+    let dt_3d_adversarial = prepare_adv_dt::<3>(123, OPERATION_COUNT);
+    bench_hull_case(
+        &mut group,
+        3,
+        Dataset::Adversarial,
+        OPERATION_COUNT,
+        &dt_3d_adversarial,
     );
 
-    let dt_4d = prepare_triangulation::<4>(456, OPERATION_COUNT);
-    group.throughput(Throughput::Elements(OPERATION_COUNT as u64));
-    group.bench_function(
-        BenchmarkId::new("from_triangulation_4d", OPERATION_COUNT),
-        |b| {
-            b.iter(|| {
-                black_box(
-                    ConvexHull::from_triangulation(dt_4d.as_triangulation())
-                        .expect("4D convex hull extraction should succeed"),
-                );
-            });
-        },
+    let dt_4d = prepare_dt::<4>(456, OPERATION_COUNT);
+    bench_hull_case(
+        &mut group,
+        4,
+        Dataset::WellConditioned,
+        OPERATION_COUNT,
+        &dt_4d,
+    );
+    let dt_4d_adversarial = prepare_adv_dt::<4>(456, OPERATION_COUNT);
+    bench_hull_case(
+        &mut group,
+        4,
+        Dataset::Adversarial,
+        OPERATION_COUNT,
+        &dt_4d_adversarial,
     );
 
-    let dt_5d = prepare_triangulation::<5>(789, OPERATION_COUNT_5D);
-    group.throughput(Throughput::Elements(OPERATION_COUNT_5D as u64));
-    group.bench_function(
-        BenchmarkId::new("from_triangulation_5d", OPERATION_COUNT_5D),
-        |b| {
-            b.iter(|| {
-                black_box(
-                    ConvexHull::from_triangulation(dt_5d.as_triangulation())
-                        .expect("5D convex hull extraction should succeed"),
-                );
-            });
-        },
+    let dt_5d = prepare_dt::<5>(789, OPERATION_COUNT_5D);
+    bench_hull_case(
+        &mut group,
+        5,
+        Dataset::WellConditioned,
+        OPERATION_COUNT_5D,
+        &dt_5d,
+    );
+    let dt_5d_adversarial = prepare_adv_dt::<5>(789, OPERATION_COUNT_5D);
+    bench_hull_case(
+        &mut group,
+        5,
+        Dataset::Adversarial,
+        OPERATION_COUNT_5D,
+        &dt_5d_adversarial,
     );
 
     group.finish();
 }
 
 fn benchmark_validation(c: &mut Criterion) {
-    print_api_benchmark_manifest_once();
+    print_manifest_once();
     let mut group = c.benchmark_group("validation");
     group.sample_size(15);
 
-    let dt_3d = prepare_triangulation::<3>(123, OPERATION_COUNT);
-    group.throughput(Throughput::Elements(OPERATION_COUNT as u64));
-    group.bench_function(BenchmarkId::new("validate_3d", OPERATION_COUNT), |b| {
-        b.iter(|| {
-            black_box(dt_3d.validate()).expect("3D benchmark triangulation should validate");
-        });
-    });
+    let dt_3d = prepare_dt::<3>(123, OPERATION_COUNT);
+    bench_validate_case(
+        &mut group,
+        3,
+        Dataset::WellConditioned,
+        OPERATION_COUNT,
+        &dt_3d,
+    );
+    let dt_3d_adversarial = prepare_adv_dt::<3>(123, OPERATION_COUNT);
+    bench_validate_case(
+        &mut group,
+        3,
+        Dataset::Adversarial,
+        OPERATION_COUNT,
+        &dt_3d_adversarial,
+    );
 
-    let dt_4d = prepare_triangulation::<4>(456, OPERATION_COUNT);
-    group.throughput(Throughput::Elements(OPERATION_COUNT as u64));
-    group.bench_function(BenchmarkId::new("validate_4d", OPERATION_COUNT), |b| {
-        b.iter(|| {
-            black_box(dt_4d.validate()).expect("4D benchmark triangulation should validate");
-        });
-    });
+    let dt_4d = prepare_dt::<4>(456, OPERATION_COUNT);
+    bench_validate_case(
+        &mut group,
+        4,
+        Dataset::WellConditioned,
+        OPERATION_COUNT,
+        &dt_4d,
+    );
+    let dt_4d_adversarial = prepare_adv_dt::<4>(456, OPERATION_COUNT);
+    bench_validate_case(
+        &mut group,
+        4,
+        Dataset::Adversarial,
+        OPERATION_COUNT,
+        &dt_4d_adversarial,
+    );
 
-    let dt_5d = prepare_triangulation::<5>(789, OPERATION_COUNT_5D);
-    group.throughput(Throughput::Elements(OPERATION_COUNT_5D as u64));
-    group.bench_function(BenchmarkId::new("validate_5d", OPERATION_COUNT_5D), |b| {
-        b.iter(|| {
-            black_box(dt_5d.validate()).expect("5D benchmark triangulation should validate");
-        });
-    });
+    let dt_5d = prepare_dt::<5>(789, OPERATION_COUNT_5D);
+    bench_validate_case(
+        &mut group,
+        5,
+        Dataset::WellConditioned,
+        OPERATION_COUNT_5D,
+        &dt_5d,
+    );
+    let dt_5d_adversarial = prepare_adv_dt::<5>(789, OPERATION_COUNT_5D);
+    bench_validate_case(
+        &mut group,
+        5,
+        Dataset::Adversarial,
+        OPERATION_COUNT_5D,
+        &dt_5d_adversarial,
+    );
+
+    group.finish();
+}
+
+fn benchmark_insert(c: &mut Criterion) {
+    print_manifest_once();
+    let mut group = c.benchmark_group("incremental_insert");
+    group.sample_size(15);
+
+    let dt_2d = prepare_dt::<2>(42, OPERATION_COUNT);
+    let insert_2d = prepare_inserts::<2>(42, INSERT_COUNT, Dataset::WellConditioned);
+    bench_insert_case(
+        &mut group,
+        2,
+        Dataset::WellConditioned,
+        INSERT_COUNT,
+        &dt_2d,
+        &insert_2d,
+    );
+    let dt_2d_adversarial = prepare_adv_dt::<2>(42, OPERATION_COUNT);
+    let insert_2d_adversarial = prepare_inserts::<2>(42, INSERT_COUNT, Dataset::Adversarial);
+    bench_insert_case(
+        &mut group,
+        2,
+        Dataset::Adversarial,
+        INSERT_COUNT,
+        &dt_2d_adversarial,
+        &insert_2d_adversarial,
+    );
+
+    let dt_3d = prepare_dt::<3>(123, OPERATION_COUNT);
+    let insert_3d = prepare_inserts::<3>(123, INSERT_COUNT, Dataset::WellConditioned);
+    bench_insert_case(
+        &mut group,
+        3,
+        Dataset::WellConditioned,
+        INSERT_COUNT,
+        &dt_3d,
+        &insert_3d,
+    );
+    let dt_3d_adversarial = prepare_adv_dt::<3>(123, OPERATION_COUNT);
+    let insert_3d_adversarial = prepare_inserts::<3>(123, INSERT_COUNT, Dataset::Adversarial);
+    bench_insert_case(
+        &mut group,
+        3,
+        Dataset::Adversarial,
+        INSERT_COUNT,
+        &dt_3d_adversarial,
+        &insert_3d_adversarial,
+    );
+
+    let dt_4d = prepare_dt::<4>(456, OPERATION_COUNT);
+    let insert_4d = prepare_inserts::<4>(456, INSERT_COUNT_4D, Dataset::WellConditioned);
+    bench_insert_case(
+        &mut group,
+        4,
+        Dataset::WellConditioned,
+        INSERT_COUNT_4D,
+        &dt_4d,
+        &insert_4d,
+    );
+    let dt_4d_adversarial = prepare_adv_dt::<4>(456, OPERATION_COUNT);
+    let insert_4d_adversarial = prepare_inserts::<4>(456, INSERT_COUNT_4D, Dataset::Adversarial);
+    bench_insert_case(
+        &mut group,
+        4,
+        Dataset::Adversarial,
+        INSERT_COUNT_4D,
+        &dt_4d_adversarial,
+        &insert_4d_adversarial,
+    );
+
+    let dt_5d = prepare_dt::<5>(789, OPERATION_COUNT_5D);
+    let insert_5d = prepare_inserts::<5>(789, INSERT_COUNT_5D, Dataset::WellConditioned);
+    bench_insert_case(
+        &mut group,
+        5,
+        Dataset::WellConditioned,
+        INSERT_COUNT_5D,
+        &dt_5d,
+        &insert_5d,
+    );
+    let dt_5d_adversarial = prepare_adv_dt::<5>(789, OPERATION_COUNT_5D);
+    let insert_5d_adversarial = prepare_inserts::<5>(789, INSERT_COUNT_5D, Dataset::Adversarial);
+    bench_insert_case(
+        &mut group,
+        5,
+        Dataset::Adversarial,
+        INSERT_COUNT_5D,
+        &dt_5d_adversarial,
+        &insert_5d_adversarial,
+    );
 
     group.finish();
 }
 
 fn benchmark_bistellar_flips(c: &mut Criterion) {
-    print_api_benchmark_manifest_once();
+    print_manifest_once();
     let mut group = c.benchmark_group("bistellar_flips_4d");
     group.sample_size(10);
-    let base_dt = build_flip_triangulation_4d();
+    let base_dt = build_flip_dt_4d();
 
     group.bench_function("k1_roundtrip", |b| {
         b.iter_batched(
@@ -727,9 +1184,13 @@ fn benchmark_bistellar_flips(c: &mut Criterion) {
 
     group.bench_function("k2_roundtrip", |b| {
         b.iter_batched(
-            || base_dt.clone(),
-            |mut dt| {
-                roundtrip_k2_4d(&mut dt);
+            || {
+                let dt = base_dt.clone();
+                let facet = flippable_k2_facet_4d(&dt);
+                (dt, facet)
+            },
+            |(mut dt, facet)| {
+                roundtrip_k2_4d(&mut dt, facet);
                 black_box(dt);
             },
             BatchSize::LargeInput,
@@ -738,9 +1199,13 @@ fn benchmark_bistellar_flips(c: &mut Criterion) {
 
     group.bench_function("k3_roundtrip", |b| {
         b.iter_batched(
-            || base_dt.clone(),
-            |mut dt| {
-                roundtrip_k3_4d(&mut dt);
+            || {
+                let dt = base_dt.clone();
+                let ridge = flippable_k3_ridge_4d(&dt);
+                (dt, ridge)
+            },
+            |(mut dt, ridge)| {
+                roundtrip_k3_4d(&mut dt, ridge);
                 black_box(dt);
             },
             BatchSize::LargeInput,
@@ -761,6 +1226,7 @@ criterion_group!(
         benchmark_boundary_facets,
         benchmark_convex_hull,
         benchmark_validation,
+        benchmark_insert,
         benchmark_bistellar_flips
 );
 criterion_main!(benches);
diff --git a/benches/microbenchmarks.rs b/benches/microbenchmarks.rs
deleted file mode 100644
index 40d6daee..00000000
--- a/benches/microbenchmarks.rs
+++ /dev/null
@@ -1,432 +0,0 @@
-//! Microbenchmarks for key delaunay methods
-//!
-//! This benchmark suite focuses on measuring the performance of individual key methods
-//! in the delaunay triangulation library, particularly those that are performance-critical:
-//!
-//! 1. **`DelaunayTriangulation::with_kernel`**: Complete triangulation creation
-//! 2. **Layered validation**: `dt.tds().is_valid()/validate()`, `dt.as_triangulation().is_valid()/validate()`, `dt.is_valid()`, `dt.validate()`
-//! 3. **Incremental construction**: Performance of `insert()` method for vertex insertion
-//! 4. **Memory usage patterns**: Allocation and deallocation patterns
-//!
-//! These benchmarks measure the effectiveness of the optimization implementations
-//! completed as part of the Pure Incremental Delaunay Triangulation refactoring project.
-
-use criterion::{BatchSize, BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
-use delaunay::geometry::kernel::RobustKernel;
-use delaunay::geometry::util::generate_random_points_seeded;
-use delaunay::prelude::query::*;
-use delaunay::triangulation::delaunay::DelaunayTriangulation;
-use delaunay::vertex;
-use std::hint::black_box;
-use std::sync::OnceLock;
-
-#[cfg(feature = "bench-logging")]
-fn init_tracing() {
-    static INIT: std::sync::Once = std::sync::Once::new();
-    INIT.call_once(|| {
-        let filter = tracing_subscriber::EnvFilter::try_from_default_env()
-            .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info"));
-        let _ = tracing_subscriber::fmt().with_env_filter(filter).try_init();
-    });
-}
-
-#[cfg(not(feature = "bench-logging"))]
-const fn init_tracing() {}
-
-macro_rules! bench_info {
-    ($($arg:tt)*) => {{
-        #[cfg(feature = "bench-logging")]
-        {
-            init_tracing();
-            tracing::info!($($arg)*);
-        }
-    }};
-}
-
-macro_rules! bench_warn {
-    ($($arg:tt)*) => {{
-        #[cfg(feature = "bench-logging")]
-        {
-            init_tracing();
-            tracing::warn!($($arg)*);
-        }
-    }};
-}
-
-/// Get the deterministic seed for random point generation.
-/// Reads `DELAUNAY_BENCH_SEED` (decimal or 0x-hex). Defaults to 0xD1EA.
-/// Logs the resolved seed once on first use if `PRINT_BENCH_SEED` is set and
-/// the `bench-logging` feature is enabled.
-fn get_benchmark_seed() -> u64 {
-    static SEED: OnceLock<u64> = OnceLock::new();
-    *SEED.get_or_init(|| {
-        let seed = std::env::var("DELAUNAY_BENCH_SEED")
-            .ok()
-            .and_then(|s| {
-                let s = s.trim();
-                s.strip_prefix("0x")
-                    .or_else(|| s.strip_prefix("0X"))
-                    .map_or_else(|| s.parse().ok(), |hex| u64::from_str_radix(hex, 16).ok())
-            })
-            .unwrap_or(0xD1EA);
-        if std::env::var("PRINT_BENCH_SEED").is_ok() {
-            bench_info!("Benchmark seed: 0x{seed:X} ({seed})");
-        }
-        seed
-    })
-}
-
-/// Macro to generate comprehensive dimensional benchmarks for core algorithms
-macro_rules! generate_dimensional_benchmarks {
-    ($dim:literal) => {
-        pastey::paste! {
-            /// Benchmark incremental Delaunay triangulation for [<$dim>]D
-            fn [<benchmark_delaunay_triangulation_ $dim d>](c: &mut Criterion) {
-                let point_counts = [10, 25, 50, 100, 250];
-                let seed = get_benchmark_seed(); // Cache seed locally for consistency across iterations
-
-                let mut group = c.benchmark_group(concat!("delaunay_triangulation_", stringify!([<$dim>]), "d"));
-
-                for &n_points in &point_counts {
-                    let throughput = n_points as u64;
-                    group.throughput(Throughput::Elements(throughput));
-
-                    group.bench_with_input(
-                        BenchmarkId::new("with_kernel", n_points),
-                        &n_points,
-                        |b, &n_points| {
-                            b.iter_batched(
-                                || {
-                                    let points: Vec<Point<f64, $dim>> = generate_random_points_seeded(n_points, (-100.0, 100.0), seed).unwrap();
-                                    points.iter().map(|p| vertex!(*p)).collect::<Vec<_>>()
-                                },
-                                |vertices| black_box(DelaunayTriangulation::<RobustKernel<f64>, (), (), $dim>::with_kernel(&RobustKernel::new(), &vertices).unwrap()),
-                                BatchSize::LargeInput,
-                            );
-                        },
-                    );
-                }
-
-                group.finish();
-            }
-        }
-    };
-}
-
-// Generate comprehensive benchmarks for dimensions 2-5
-generate_dimensional_benchmarks!(2);
-generate_dimensional_benchmarks!(3);
-generate_dimensional_benchmarks!(4);
-generate_dimensional_benchmarks!(5);
-
-/// Macro to generate memory usage benchmarks for all dimensions
-macro_rules! generate_memory_usage_benchmarks {
-    ($dim:literal) => {
-        pastey::paste! {
-            /// Benchmark memory allocation patterns for [<$dim>]D
-            fn [<benchmark_memory_usage_ $dim d>](c: &mut Criterion) {
-                let point_counts: &[usize] = if $dim <= 3 { &[50, 100, 200] } else { &[20, 50, 100] };
-                let seed = get_benchmark_seed(); // Cache seed locally for consistency across iterations
-
-                let mut group = c.benchmark_group(&format!("memory_usage_{}d", $dim));
-
-                for &n_points in point_counts {
-                    group.bench_with_input(
-                        BenchmarkId::new("triangulation_memory", n_points),
-                        &n_points,
-                        |b, &n_points| {
-                            b.iter(|| {
-                                // Measure complete triangulation creation and destruction
-                                let points: Vec<Point<f64, $dim>> = generate_random_points_seeded(n_points, (-100.0, 100.0), seed).unwrap();
-                                let vertices: Vec<_> = points.iter().map(|p| vertex!(*p)).collect();
-                                let dt = DelaunayTriangulation::<RobustKernel<f64>, (), (), $dim>::with_kernel(&RobustKernel::new(), &vertices).unwrap();
-                                black_box((dt.tds().number_of_vertices(), dt.tds().number_of_cells()))
-                            });
-                        },
-                    );
-                }
-
-                group.finish();
-            }
-        }
-    };
-}
-
-// Generate memory usage benchmarks for dimensions 2-5
-generate_memory_usage_benchmarks!(2);
-generate_memory_usage_benchmarks!(3);
-generate_memory_usage_benchmarks!(4);
-generate_memory_usage_benchmarks!(5);
-
-/// Macro to generate validation method benchmarks for all dimensions
-macro_rules! generate_validation_benchmarks {
-    ($dim:literal) => {
-        pastey::paste! {
-            /// Benchmark validation methods performance for [<$dim>]D
-            fn [<benchmark_validation_methods_ $dim d>](c: &mut Criterion) {
-                let point_counts: &[usize] = if $dim <= 3 { &[10, 25, 50, 100] } else { &[10, 25, 50] };
-                let seed = get_benchmark_seed(); // Cache seed locally for consistency across iterations
-
-                let mut group = c.benchmark_group(&format!("validation_methods_{}d", $dim));
-
-                for &n_points in point_counts {
-                    let throughput = n_points as u64;
-                    group.throughput(Throughput::Elements(throughput));
-
-                    group.bench_with_input(
-                        BenchmarkId::new("validate", n_points),
-                        &n_points,
-                        |b, &n_points| {
-                            b.iter_batched(
-                                || {
-                                    let points: Vec<Point<f64, $dim>> = generate_random_points_seeded(n_points, (-100.0, 100.0), seed).unwrap();
-                                    let vertices: Vec<_> = points.iter().map(|p| vertex!(*p)).collect();
-                                    DelaunayTriangulation::<RobustKernel<f64>, (), (), $dim>::with_kernel(&RobustKernel::new(), &vertices).unwrap()
-
-                                },
-                                |dt| {
-                                    dt.validate().unwrap();
-                                    black_box(dt);
-                                },
-                                BatchSize::LargeInput,
-                            );
-                        },
-                    );
-
-                    group.bench_with_input(
-                        BenchmarkId::new("is_valid_delaunay", n_points),
-                        &n_points,
-                        |b, &n_points| {
-                            b.iter_batched(
-                                || {
-                                    let points: Vec<Point<f64, $dim>> = generate_random_points_seeded(n_points, (-100.0, 100.0), seed).unwrap();
-                                    let vertices: Vec<_> = points.iter().map(|p| vertex!(*p)).collect();
-                                    DelaunayTriangulation::<RobustKernel<f64>, (), (), $dim>::with_kernel(&RobustKernel::new(), &vertices).unwrap()
-                                },
-                                |dt| {
-                                    dt.is_valid().unwrap();
-                                    black_box(dt);
-                                },
-                                BatchSize::LargeInput,
-                            );
-                        },
-                    );
-                }
-
-                group.finish();
-            }
-
-            /// Benchmark individual validation components for [<$dim>]D
-            fn [<benchmark_validation_components_ $dim d>](c: &mut Criterion) {
-                let seed = get_benchmark_seed(); // Cache seed locally for consistency across iterations
-                let n_points = if $dim <= 3 { 50 } else { 25 }; // Fixed size for component benchmarks
-                let points: Vec<Point<f64, $dim>> = generate_random_points_seeded(n_points, (-100.0, 100.0), seed).unwrap();
-                let vertices: Vec<_> = points.iter().map(|p| vertex!(*p)).collect();
-                let dt = DelaunayTriangulation::<RobustKernel<f64>, (), (), $dim>::with_kernel(&RobustKernel::new(), &vertices).unwrap();
-
-                let mut group = c.benchmark_group(&format!("validation_components_{}d", $dim));
-
-                group.bench_function("tds_is_valid", |b| {
-                    b.iter(|| {
-                        dt.tds().is_valid().unwrap();
-                        // Black box to prevent dead code elimination
-                        black_box(());
-                    });
-                });
-
-                group.bench_function("tri_is_valid", |b| {
-                    b.iter(|| {
-                        dt.as_triangulation().is_valid().unwrap();
-                        // Black box to prevent dead code elimination
-                        black_box(());
-                    });
-                });
-
-                group.bench_function("is_valid_delaunay", |b| {
-                    b.iter(|| {
-                        dt.is_valid().unwrap();
-                        // Black box to prevent dead code elimination
-                        black_box(());
-                    });
-                });
-
-                group.bench_function("validate", |b| {
-                    b.iter(|| {
-                        dt.validate().unwrap();
-                        // Black box to prevent dead code elimination
-                        black_box(());
-                    });
-                });
-
-                group.finish();
-            }
-        }
-    };
-}
-
-// Generate validation benchmarks for dimensions 2-5
-generate_validation_benchmarks!(2);
-generate_validation_benchmarks!(3);
-generate_validation_benchmarks!(4);
-generate_validation_benchmarks!(5);
-
-/// Macro to generate incremental construction benchmarks for all dimensions
-macro_rules! generate_incremental_construction_benchmarks {
-    ($dim:literal) => {
-        pastey::paste! {
-            /// Benchmark incremental vertex addition for [<$dim>]D
-            fn [<benchmark_incremental_construction_ $dim d>](c: &mut Criterion) {
-                let seed = get_benchmark_seed(); // Cache seed locally for consistency across iterations
-                let mut group = c.benchmark_group(&format!("incremental_construction_{}d", $dim));
-
-                // Generate initial simplex for the given dimension
-                let mut initial_coords = Vec::new();
-                for i in 0..=$dim {
-                    let mut coords = vec![0.0; $dim];
-                    if i < $dim {
-                        coords[i] = 1.0;
-                    }
-                    initial_coords.push(coords);
-                }
-                let initial_vertices: Vec<_> = initial_coords
-                    .into_iter()
-                    .map(|coords| {
-                        let mut array = [0.0; $dim];
-                        array.copy_from_slice(&coords);
-                        vertex!(array)
-                    })
-                    .collect();
-
-                // Test single vertex addition
-                let additional_coords = vec![0.5; $dim];
-                let mut additional_array = [0.0; $dim];
-                additional_array.copy_from_slice(&additional_coords);
-                // Note: additional_vertex is Copy, so we can use the same value in each benchmark iteration
-                let additional_vertex = vertex!(additional_array);
-
-                group.bench_function("single_vertex_addition", |b| {
-                    b.iter_batched(
-                        || DelaunayTriangulation::<RobustKernel<f64>, (), (), $dim>::with_kernel(&RobustKernel::new(), &initial_vertices).unwrap(),
-                        |mut dt| {
-                            dt.insert(additional_vertex).unwrap();
-                            black_box(dt);
-                        },
-                        BatchSize::SmallInput,
-                    );
-                });
-
-                // Test multiple vertex additions with dimension-appropriate counts
-                let counts: &[usize] = if $dim <= 3 { &[2, 5, 10] } else { &[2, 4, 6] };
-                for &count in counts {
-                    group.bench_with_input(
-                        BenchmarkId::new("multiple_vertex_addition", count),
-                        &count,
-                        |b, &count| {
-                            b.iter_batched(
-                                || {
-                                    let dt = DelaunayTriangulation::<RobustKernel<f64>, (), (), $dim>::with_kernel(&RobustKernel::new(), &initial_vertices).unwrap();
-                                    let additional_points: Vec<Point<f64, $dim>> = generate_random_points_seeded(count, (-100.0, 100.0), seed).unwrap();
-                                    let additional_vertices: Vec<_> =
-                                        additional_points.iter().map(|p| vertex!(*p)).collect();
-                                    (dt, additional_vertices)
-                                },
-                                |(mut dt, additional_vertices)| {
-                                    for vertex in additional_vertices {
-                                        dt.insert(vertex).unwrap();
-                                    }
-                                    black_box(dt);
-                                },
-                                BatchSize::SmallInput,
-                            );
-                        },
-                    );
-                }
-
-                group.finish();
-            }
-        }
-    };
-}
-
-// Generate incremental construction benchmarks for dimensions 2-5
-generate_incremental_construction_benchmarks!(2);
-generate_incremental_construction_benchmarks!(3);
-generate_incremental_construction_benchmarks!(4);
-generate_incremental_construction_benchmarks!(5);
-
-/// Build Criterion configuration with optional environment variable overrides.
-///
-/// Supports:
-/// - `CRIT_SAMPLE_SIZE`: Number of samples per benchmark (default: Criterion's default)
-/// - `CRIT_MEASUREMENT_MS`: Measurement time in milliseconds (default: Criterion's default)
-/// - `CRIT_WARMUP_MS`: Warm-up time in milliseconds (default: Criterion's default)
-///
-/// This allows CI and local tuning without code changes.
-fn bench_config() -> Criterion {
-    use std::time::Duration;
-    init_tracing();
-    let mut c = Criterion::default();
-
-    if let Some(v) = std::env::var("CRIT_SAMPLE_SIZE")
-        .ok()
-        .and_then(|s| s.parse::<usize>().ok())
-    {
-        c = c.sample_size(v);
-    } else if std::env::var("CRIT_SAMPLE_SIZE").is_ok() {
-        bench_warn!("Failed to parse CRIT_SAMPLE_SIZE, using default");
-    }
-
-    if let Some(v) = std::env::var("CRIT_MEASUREMENT_MS")
-        .ok()
-        .and_then(|s| s.parse::<u64>().ok())
-    {
-        c = c.measurement_time(Duration::from_millis(v));
-    } else if std::env::var("CRIT_MEASUREMENT_MS").is_ok() {
-        bench_warn!("Failed to parse CRIT_MEASUREMENT_MS, using default");
-    }
-
-    if let Some(v) = std::env::var("CRIT_WARMUP_MS")
-        .ok()
-        .and_then(|s| s.parse::<u64>().ok())
-    {
-        c = c.warm_up_time(Duration::from_millis(v));
-    } else if std::env::var("CRIT_WARMUP_MS").is_ok() {
-        bench_warn!("Failed to parse CRIT_WARMUP_MS, using default");
-    }
-
-    c
-}
-
-criterion_group!(
-    name = benches;
-    config = bench_config();
-    targets =
-        // Core triangulation benchmarks (2D-5D)
-        benchmark_delaunay_triangulation_2d,
-        benchmark_delaunay_triangulation_3d,
-        benchmark_delaunay_triangulation_4d,
-        benchmark_delaunay_triangulation_5d,
-
-        // Memory usage benchmarks (2D-5D)
-        benchmark_memory_usage_2d,
-        benchmark_memory_usage_3d,
-        benchmark_memory_usage_4d,
-        benchmark_memory_usage_5d,
-
-        // Validation benchmarks (2D-5D)
-        benchmark_validation_methods_2d,
-        benchmark_validation_methods_3d,
-        benchmark_validation_methods_4d,
-        benchmark_validation_methods_5d,
-        benchmark_validation_components_2d,
-        benchmark_validation_components_3d,
-        benchmark_validation_components_4d,
-        benchmark_validation_components_5d,
-
-        // Incremental construction benchmarks (2D-5D)
-        benchmark_incremental_construction_2d,
-        benchmark_incremental_construction_3d,
-        benchmark_incremental_construction_4d,
-        benchmark_incremental_construction_5d
-);
-criterion_main!(benches);
diff --git a/benches/profiling_suite.rs b/benches/profiling_suite.rs
index 46313e3c..08afed19 100644
--- a/benches/profiling_suite.rs
+++ b/benches/profiling_suite.rs
@@ -9,6 +9,7 @@
 //! 4. **Query latency analysis** (circumsphere tests, neighbor queries)
 //! 5. **Multi-dimensional scaling** (2D through 5D)
 //! 6. **Algorithmic bottleneck identification** (specific operation profiling)
+//! 7. **Validation layer diagnostics** (Level 1-3 vs Level 4 cost separation)
 //!
 //! ## Usage
 //!
@@ -52,7 +53,10 @@
 //! BENCH_SAMPLE_SIZE=5 BENCH_WARMUP_SECS=5 BENCH_PERCENTILE=90 cargo bench --profile perf --bench profiling_suite
 //! ```
 
-use criterion::{BatchSize, BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
+use criterion::measurement::WallTime;
+use criterion::{
+    BatchSize, BenchmarkGroup, BenchmarkId, Criterion, Throughput, criterion_group, criterion_main,
+};
 use delaunay::core::collections::SmallBuffer;
 use delaunay::geometry::util::{
     generate_grid_points, generate_poisson_points, generate_random_points_seeded,
@@ -62,13 +66,14 @@ use delaunay::prelude::query::*;
 use delaunay::prelude::triangulation::DelaunayTriangulationBuilder;
 use delaunay::vertex;
 use num_traits::cast;
-use serde::{Serialize, de::DeserializeOwned};
+use std::env;
 use std::hint::black_box;
+use std::sync::Once;
 use std::time::{Duration, Instant};
 
 #[cfg(feature = "bench-logging")]
 fn init_tracing() {
-    static INIT: std::sync::Once = std::sync::Once::new();
+    static INIT: Once = Once::new();
     INIT.call_once(|| {
         let filter = tracing_subscriber::EnvFilter::try_from_default_env()
             .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info"));
@@ -116,14 +121,13 @@ struct AllocationInfo {
 }
 
 #[cfg(not(feature = "count-allocations"))]
-fn measure<F: FnOnce()>(f: F) -> AllocationInfo {
+fn measure(f: impl FnOnce()) -> AllocationInfo {
     f();
     AllocationInfo::default()
 }
 
 #[cfg(not(feature = "count-allocations"))]
-fn print_count_allocations_banner_once() {
-    use std::sync::Once;
+fn print_alloc_banner_once() {
     static ONCE: Once = Once::new();
     ONCE.call_once(|| {
         bench_warn!("count-allocations feature not enabled; memory stats are placeholders.");
@@ -153,7 +157,7 @@ const PROFILING_COUNTS_DEVELOPMENT: &[usize] = &[
 /// Returns true for: "1", "true", "TRUE", "yes", "on" (case-insensitive)
 /// Returns false for anything else (including "0", "false", empty, or unset)
 fn is_dev_mode() -> bool {
-    let dev = std::env::var("PROFILING_DEV_MODE").ok();
+    let dev = env::var("PROFILING_DEV_MODE").ok();
     dev.as_deref().is_some_and(|s| {
         s == "1"
             || s.eq_ignore_ascii_case("true")
@@ -174,7 +178,7 @@ fn get_profiling_counts() -> &'static [usize] {
 /// Helper function to parse benchmark measurement time from environment
 /// Guards against zero/invalid values by ensuring minimum of 1 second
 fn bench_time(default_secs: u64) -> Duration {
-    let secs = std::env::var("BENCH_MEASUREMENT_TIME")
+    let secs = env::var("BENCH_MEASUREMENT_TIME")
         .ok()
         .and_then(|s| s.parse::<u64>().ok())
         .map_or_else(|| default_secs.max(1), |parsed| parsed.max(1));
@@ -200,7 +204,7 @@ impl PointDistribution {
 }
 
 /// Generate points according to the specified distribution
-fn generate_points_by_distribution<const D: usize>(
+fn gen_points<const D: usize>(
     count: usize,
     distribution: PointDistribution,
     seed: u64,
@@ -253,7 +257,7 @@ fn generate_points_by_distribution<const D: usize>(
 
 /// Comprehensive triangulation scaling analysis across dimensions and distributions
 #[expect(clippy::significant_drop_tightening, clippy::too_many_lines)]
-fn benchmark_triangulation_scaling(c: &mut Criterion) {
+fn bench_scaling(c: &mut Criterion) {
     let counts = get_profiling_counts();
     let distributions = [
         PointDistribution::Random,
@@ -268,8 +272,7 @@ fn benchmark_triangulation_scaling(c: &mut Criterion) {
     for &count in counts {
         for &distribution in &distributions {
             // Pre-generate sample points to calculate actual count and avoid double-generation
-            let sample_points =
-                generate_points_by_distribution::<2>(count, distribution, DEFAULT_SEED);
+            let sample_points = gen_points::<2>(count, distribution, DEFAULT_SEED);
             let actual_count = sample_points.len();
             group.throughput(Throughput::Elements(actual_count as u64));
 
@@ -281,11 +284,7 @@ fn benchmark_triangulation_scaling(c: &mut Criterion) {
                     b.iter_batched(
                         || {
                             // Reuse same generation logic to ensure consistent point count
-                            let points = generate_points_by_distribution::<2>(
-                                count,
-                                distribution,
-                                DEFAULT_SEED,
-                            );
+                            let points = gen_points::<2>(count, distribution, DEFAULT_SEED);
                             points.iter().map(|p| vertex!(*p)).collect::<Vec<_>>()
                         },
                         |vertices| {
@@ -320,8 +319,7 @@ fn benchmark_triangulation_scaling(c: &mut Criterion) {
             }
 
             // Pre-generate sample points to calculate actual count and avoid double-generation
-            let sample_points =
-                generate_points_by_distribution::<3>(count, distribution, DEFAULT_SEED);
+            let sample_points = gen_points::<3>(count, distribution, DEFAULT_SEED);
             let actual_count = sample_points.len();
             group.throughput(Throughput::Elements(actual_count as u64));
 
@@ -332,11 +330,7 @@ fn benchmark_triangulation_scaling(c: &mut Criterion) {
                 |b, &(count, distribution, _actual_count)| {
                     b.iter_batched(
                         || {
-                            let points = generate_points_by_distribution::<3>(
-                                count,
-                                distribution,
-                                DEFAULT_SEED,
-                            );
+                            let points = gen_points::<3>(count, distribution, DEFAULT_SEED);
                             points.iter().map(|p| vertex!(*p)).collect::<Vec<_>>()
                         },
                         |vertices| {
@@ -373,8 +367,7 @@ fn benchmark_triangulation_scaling(c: &mut Criterion) {
     {
         for &distribution in &distributions {
             // Pre-generate sample points to calculate actual count and avoid double-generation
-            let sample_points =
-                generate_points_by_distribution::<4>(count, distribution, DEFAULT_SEED);
+            let sample_points = gen_points::<4>(count, distribution, DEFAULT_SEED);
             let actual_count = sample_points.len();
             group.throughput(Throughput::Elements(actual_count as u64));
 
@@ -385,11 +378,7 @@ fn benchmark_triangulation_scaling(c: &mut Criterion) {
                 |b, &(count, distribution, _actual_count)| {
                     b.iter_batched(
                         || {
-                            let points = generate_points_by_distribution::<4>(
-                                count,
-                                distribution,
-                                DEFAULT_SEED,
-                            );
+                            let points = gen_points::<4>(count, distribution, DEFAULT_SEED);
                             points.iter().map(|p| vertex!(*p)).collect::<Vec<_>>()
                         },
                         |vertices| {
@@ -425,8 +414,7 @@ fn benchmark_triangulation_scaling(c: &mut Criterion) {
     {
         for &distribution in &distributions {
             // Pre-generate sample points to calculate actual count and avoid double-generation
-            let sample_points =
-                generate_points_by_distribution::<5>(count, distribution, DEFAULT_SEED);
+            let sample_points = gen_points::<5>(count, distribution, DEFAULT_SEED);
             let actual_count = sample_points.len();
             group.throughput(Throughput::Elements(actual_count as u64));
 
@@ -437,11 +425,7 @@ fn benchmark_triangulation_scaling(c: &mut Criterion) {
                 |b, &(count, distribution, _actual_count)| {
                     b.iter_batched(
                         || {
-                            let points = generate_points_by_distribution::<5>(
-                                count,
-                                distribution,
-                                DEFAULT_SEED,
-                            );
+                            let points = gen_points::<5>(count, distribution, DEFAULT_SEED);
                             points.iter().map(|p| vertex!(*p)).collect::<Vec<_>>()
                         },
                         |vertices| {
@@ -472,7 +456,7 @@ fn calculate_percentile(values: &mut [u64]) -> u64 {
     }
 
     // Parse percentile from environment, defaulting to 95
-    let percentile = std::env::var("BENCH_PERCENTILE")
+    let percentile = env::var("BENCH_PERCENTILE")
         .ok()
         .and_then(|s| s.parse::<usize>().ok())
         .map_or(95, |p| p.clamp(1, 100)); // Clamp to valid percentile range
@@ -525,12 +509,10 @@ fn print_alloc_summary(
 /// Generic helper to benchmark memory usage for a specific dimension D
 #[expect(clippy::cast_possible_wrap)]
 fn bench_memory_usage<const D: usize>(
-    group: &mut criterion::BenchmarkGroup<'_, criterion::measurement::WallTime>,
+    group: &mut BenchmarkGroup<'_, WallTime>,
     bench_id_prefix: &str,
     count: usize,
-) where
-    [f64; D]: Copy + DeserializeOwned + Serialize + Sized,
-{
+) {
     group.bench_with_input(
         BenchmarkId::new(bench_id_prefix, count),
         &count,
@@ -549,11 +531,8 @@ fn bench_memory_usage<const D: usize>(
                     let start_time = Instant::now();
 
                     let alloc_info = measure(|| {
-                        let points = generate_points_by_distribution::<D>(
-                            count,
-                            PointDistribution::Random,
-                            DEFAULT_SEED,
-                        );
+                        let points =
+                            gen_points::<D>(count, PointDistribution::Random, DEFAULT_SEED);
                         let vertices: Vec<_> = points.iter().map(|p| vertex!(*p)).collect();
                         actual_point_counts.push(points.len()); // Track actual count
                         if let Ok(dt) = DelaunayTriangulationBuilder::new(&vertices).build::<()>() {
@@ -624,7 +603,7 @@ fn bench_memory_usage<const D: usize>(
 /// Memory usage profiling across different scales and dimensions using allocation counter
 fn benchmark_memory_profiling(c: &mut Criterion) {
     #[cfg(not(feature = "count-allocations"))]
-    print_count_allocations_banner_once();
+    print_alloc_banner_once();
 
     let counts = if is_dev_mode() {
         &[1_000, 10_000][..]
@@ -682,11 +661,7 @@ fn benchmark_query_latency(c: &mut Criterion) {
             &count,
             |b, &count| {
                 // Setup: Create triangulation and query points
-                let points = generate_points_by_distribution::<3>(
-                    count,
-                    PointDistribution::Random,
-                    DEFAULT_SEED,
-                );
+                let points = gen_points::<3>(count, PointDistribution::Random, DEFAULT_SEED);
                 let vertices: Vec<_> = points.iter().map(|p| vertex!(*p)).collect();
                 let Ok(dt) = DelaunayTriangulationBuilder::new(&vertices).build::<()>() else {
                     // Construction hit a geometric degeneracy; skip this benchmark entry
@@ -696,11 +671,7 @@ fn benchmark_query_latency(c: &mut Criterion) {
                 let tds = dt.tds();
 
                 // Generate query points
-                let query_points = generate_points_by_distribution::<3>(
-                    100,
-                    PointDistribution::Random,
-                    QUERY_SEED,
-                );
+                let query_points = gen_points::<3>(100, PointDistribution::Random, QUERY_SEED);
 
                 // Precompute all valid simplex vertices outside the benchmark loop
                 let mut precomputed_simplices: Vec<
@@ -742,11 +713,8 @@ fn benchmark_query_latency(c: &mut Criterion) {
                             let query_point_obj = *query_point;
 
                             // Use the fastest circumsphere method (based on benchmark results)
-                            {
-                                use delaunay::geometry::predicates::insphere_lifted;
-                                let result = insphere_lifted(points_for_test, query_point_obj);
-                                query_results.push(result);
-                            }
+                            let result = insphere_lifted(points_for_test, query_point_obj);
+                            query_results.push(result);
 
                             // Limit total queries to prevent extremely long benchmarks
                             if query_results.len() >= MAX_QUERY_RESULTS {
@@ -768,12 +736,72 @@ fn benchmark_query_latency(c: &mut Criterion) {
     group.finish();
 }
 
+// ============================================================================
+// Validation Layer Diagnostics
+// ============================================================================
+
+macro_rules! benchmark_validation_components_dimension {
+    ($dim:literal, $func_name:ident, $count:expr) => {
+        fn $func_name(c: &mut Criterion) {
+            let points = gen_points::<$dim>($count, PointDistribution::Random, DEFAULT_SEED);
+            let vertices: Vec<_> = points.iter().map(|point| vertex!(*point)).collect();
+            let dt = DelaunayTriangulationBuilder::new(&vertices)
+                .build::<()>()
+                .unwrap_or_else(|err| {
+                    panic!(
+                        "failed to build {}D validation component benchmark triangulation: {err}",
+                        $dim
+                    );
+                });
+
+            let mut group = c.benchmark_group(format!("validation_components_{}d", $dim));
+            group.measurement_time(bench_time(15));
+            group.throughput(Throughput::Elements($count as u64));
+
+            group.bench_function("tds_is_valid", |b| {
+                b.iter(|| {
+                    black_box(dt.tds().is_valid())
+                        .expect("TDS validation should pass for benchmark triangulation");
+                });
+            });
+
+            group.bench_function("tri_is_valid", |b| {
+                b.iter(|| {
+                    black_box(dt.as_triangulation().is_valid())
+                        .expect("triangulation validation should pass for benchmark triangulation");
+                });
+            });
+
+            group.bench_function("is_valid_delaunay", |b| {
+                b.iter(|| {
+                    black_box(dt.is_valid())
+                        .expect("Delaunay validation should pass for benchmark triangulation");
+                });
+            });
+
+            group.bench_function("validate", |b| {
+                b.iter(|| {
+                    black_box(dt.validate())
+                        .expect("full validation should pass for benchmark triangulation");
+                });
+            });
+
+            group.finish();
+        }
+    };
+}
+
+benchmark_validation_components_dimension!(2, benchmark_validation_components_2d, 50);
+benchmark_validation_components_dimension!(3, benchmark_validation_components_3d, 50);
+benchmark_validation_components_dimension!(4, benchmark_validation_components_4d, 25);
+benchmark_validation_components_dimension!(5, benchmark_validation_components_5d, 25);
+
 // ============================================================================
 // Algorithmic Bottleneck Identification
 // ============================================================================
 
 /// Profile specific algorithmic components to identify bottlenecks
-fn benchmark_algorithmic_bottlenecks(c: &mut Criterion) {
+fn bench_bottlenecks(c: &mut Criterion) {
     let counts = if is_dev_mode() {
         &[3_000][..]
     } else {
@@ -791,11 +819,8 @@ fn benchmark_algorithmic_bottlenecks(c: &mut Criterion) {
             |b, &count| {
                 b.iter_batched(
                     || {
-                        let points = generate_points_by_distribution::<3>(
-                            count,
-                            PointDistribution::Random,
-                            DEFAULT_SEED,
-                        );
+                        let points =
+                            gen_points::<3>(count, PointDistribution::Random, DEFAULT_SEED);
                         let vertices: Vec<_> = points.iter().map(|p| vertex!(*p)).collect();
                         DelaunayTriangulationBuilder::new(&vertices)
                             .build::<()>()
@@ -820,17 +845,17 @@ fn benchmark_algorithmic_bottlenecks(c: &mut Criterion) {
             |b, &count| {
                 b.iter_batched(
                     || {
-                        let points = generate_points_by_distribution::<3>(
-                            count,
-                            PointDistribution::Random,
-                            DEFAULT_SEED,
-                        );
+                        let points =
+                            gen_points::<3>(count, PointDistribution::Random, DEFAULT_SEED);
                         let vertices: Vec<_> = points.iter().map(|p| vertex!(*p)).collect();
-                        DelaunayTriangulationBuilder::new(&vertices).build::<()>().ok()
+                        DelaunayTriangulationBuilder::new(&vertices)
+                            .build::<()>()
+                            .ok()
                     },
                     |dt| {
                         if let Some(dt) = dt {
-                            let hull = delaunay::geometry::algorithms::convex_hull::ConvexHull::from_triangulation(dt.as_triangulation()).unwrap();
+                            let hull =
+                                ConvexHull::from_triangulation(dt.as_triangulation()).unwrap();
                             black_box(hull);
                         }
                     },
@@ -852,11 +877,11 @@ criterion_group!(
     config = {
         init_tracing();
         // Allow configuration via environment variables for CI stability
-        let sample_size = std::env::var("BENCH_SAMPLE_SIZE")
+        let sample_size = env::var("BENCH_SAMPLE_SIZE")
             .ok()
             .and_then(|v| v.parse().ok())
             .unwrap_or(10);
-        let warm_up_secs = std::env::var("BENCH_WARMUP_SECS")
+        let warm_up_secs = env::var("BENCH_WARMUP_SECS")
             .ok()
             .and_then(|v| v.parse().ok())
             .unwrap_or(10);
@@ -867,10 +892,14 @@ criterion_group!(
             .measurement_time(bench_time(60))
     };
     targets =
-        benchmark_triangulation_scaling,
+        bench_scaling,
         benchmark_memory_profiling,
         benchmark_query_latency,
-        benchmark_algorithmic_bottlenecks
+        benchmark_validation_components_2d,
+        benchmark_validation_components_3d,
+        benchmark_validation_components_4d,
+        benchmark_validation_components_5d,
+        bench_bottlenecks
 );
 
 criterion_main!(profiling_benches);
diff --git a/docs/code_organization.md b/docs/code_organization.md
index e6a6bcd1..fb577a99 100644
--- a/docs/code_organization.md
+++ b/docs/code_organization.md
@@ -65,7 +65,6 @@ delaunay/
 │   ├── ci_performance_suite.rs
 │   ├── circumsphere_containment.rs
 │   ├── large_scale_performance.rs
-│   ├── microbenchmarks.rs
 │   ├── profiling_suite.rs
 │   └── topology_guarantee_construction.rs
 ├── docs/
diff --git a/examples/convex_hull_3d_100_points.rs b/examples/convex_hull_3d_100_points.rs
index b924a805..76153eac 100644
--- a/examples/convex_hull_3d_100_points.rs
+++ b/examples/convex_hull_3d_100_points.rs
@@ -28,10 +28,14 @@
 //! - Validation results
 //! - Performance metrics
 
-use delaunay::geometry::util::generate_random_triangulation;
+use delaunay::prelude::generators::generate_random_triangulation;
 use delaunay::prelude::query::*;
+use delaunay::prelude::triangulation::flips::CellKey;
 use num_traits::cast::cast;
-use std::time::Instant;
+use std::cmp;
+use std::env;
+use std::mem;
+use std::time::{Duration, Instant};
 
 const SEED_CANDIDATES: &[u64] = &[1, 7, 11, 42, 99, 123, 666];
 
@@ -44,15 +48,12 @@ fn main() {
     // Use a fixed seed + bounds so that `just examples` is reproducible and robust.
     let n_points = 100;
     let bounds = (-3.0, 3.0);
-    let seed_override: Option<u64> =
-        std::env::var("DELAUNAY_EXAMPLE_SEED")
-            .ok()
-            .and_then(|value| {
-                value.parse().ok().or_else(|| {
-                    eprintln!("Invalid DELAUNAY_EXAMPLE_SEED={value:?}; using default seed list.");
-                    None
-                })
-            });
+    let seed_override: Option<u64> = env::var("DELAUNAY_EXAMPLE_SEED").ok().and_then(|value| {
+        value.parse().ok().or_else(|| {
+            eprintln!("Invalid DELAUNAY_EXAMPLE_SEED={value:?}; using default seed list.");
+            None
+        })
+    });
     let seed_candidates: Vec<u64> =
         seed_override.map_or_else(|| SEED_CANDIDATES.to_vec(), |seed| vec![seed]);
 
@@ -116,7 +117,7 @@ fn main() {
     analyze_triangulation(&dt);
 
     // Extract and analyze convex hull
-    extract_and_analyze_convex_hull(&dt);
+    analyze_hull(&dt);
 
     // Test point containment
     test_point_containment(&dt);
@@ -158,7 +159,7 @@ fn analyze_triangulation(dt: &DelaunayTriangulation<AdaptiveKernel<f64>, (), (),
 }
 
 /// Extract and analyze the convex hull from the triangulation
-fn extract_and_analyze_convex_hull(dt: &DelaunayTriangulation<AdaptiveKernel<f64>, (), (), 3>) {
+fn analyze_hull(dt: &DelaunayTriangulation<AdaptiveKernel<f64>, (), (), 3>) {
     println!("Convex Hull Extraction:");
     println!("=======================");
 
@@ -200,7 +201,7 @@ fn extract_and_analyze_convex_hull(dt: &DelaunayTriangulation<AdaptiveKernel<f64
     if hull.number_of_facets() > 0 {
         println!("\n  Facet Analysis:");
         let facets: Vec<_> = hull.facets().collect();
-        let sample_size = std::cmp::min(5, facets.len());
+        let sample_size = cmp::min(5, facets.len());
 
         for (i, facet_handle) in facets.iter().take(sample_size).enumerate() {
             // Create FacetView to access facet properties
@@ -260,30 +261,30 @@ fn test_point_containment(dt: &DelaunayTriangulation<AdaptiveKernel<f64>, (), ()
     }
 
     let centroid_point = Point::new(centroid);
-    test_point_containment_single(&hull, &centroid_point, "Centroid", dt);
+    test_contains_point(&hull, &centroid_point, "Centroid", dt);
 
     // Test slightly offset from centroid (should still be inside)
     let near_centroid = Point::new([centroid[0] + 0.1, centroid[1] + 0.1, centroid[2] + 0.1]);
-    test_point_containment_single(&hull, &near_centroid, "Near centroid", dt);
+    test_contains_point(&hull, &near_centroid, "Near centroid", dt);
 
     // Test 2: Points clearly outside the convex hull
     println!("\n  Testing exterior points:");
 
     let far_point = Point::new([50.0, 50.0, 50.0]);
-    test_point_containment_single(&hull, &far_point, "Far exterior", dt);
+    test_contains_point(&hull, &far_point, "Far exterior", dt);
 
     let axis_point = Point::new([20.0, 0.0, 0.0]);
-    test_point_containment_single(&hull, &axis_point, "X-axis exterior", dt);
+    test_contains_point(&hull, &axis_point, "X-axis exterior", dt);
 
     let negative_point = Point::new([-20.0, -20.0, -20.0]);
-    test_point_containment_single(&hull, &negative_point, "Negative exterior", dt);
+    test_contains_point(&hull, &negative_point, "Negative exterior", dt);
 
     // Test 3: Sample triangulation vertices (should be on boundary or inside)
     println!("\n  Testing triangulation vertices:");
-    let sample_vertices = std::cmp::min(3, vertex_count);
+    let sample_vertices = cmp::min(3, vertex_count);
     for (i, (_, vertex)) in dt.tds().vertices().enumerate().take(sample_vertices) {
         let point: Point<f64, 3> = vertex.into();
-        test_point_containment_single(
+        test_contains_point(
             &hull,
             &point,
             &format!("Triangulation vertex {}", i + 1),
@@ -295,7 +296,7 @@ fn test_point_containment(dt: &DelaunayTriangulation<AdaptiveKernel<f64>, (), ()
 }
 
 /// Test containment for a single point and display results
-fn test_point_containment_single(
+fn test_contains_point(
     hull: &ConvexHull<AdaptiveKernel<f64>, (), (), 3>,
     point: &Point<f64, 3>,
     description: &str,
@@ -443,8 +444,7 @@ fn performance_analysis(dt: &DelaunayTriangulation<AdaptiveKernel<f64>, (), (),
         .collect();
 
     let len_u32 = u32::try_from(extraction_times.len()).unwrap_or(1u32);
-    let avg_extraction_time: std::time::Duration =
-        extraction_times.iter().sum::<std::time::Duration>() / len_u32;
+    let avg_extraction_time: Duration = extraction_times.iter().sum::<Duration>() / len_u32;
     let min_extraction_time = *extraction_times.iter().min().unwrap();
     let max_extraction_time = *extraction_times.iter().max().unwrap();
 
@@ -465,8 +465,7 @@ fn performance_analysis(dt: &DelaunayTriangulation<AdaptiveKernel<f64>, (), (),
         .collect();
 
     let len_u32 = u32::try_from(containment_times.len()).unwrap_or(1u32);
-    let avg_containment_time: std::time::Duration =
-        containment_times.iter().sum::<std::time::Duration>() / len_u32;
+    let avg_containment_time: Duration = containment_times.iter().sum::<Duration>() / len_u32;
 
     println!("\n  Point Containment Queries (10 runs):");
     println!("    • Average time: {avg_containment_time:?}");
@@ -483,8 +482,7 @@ fn performance_analysis(dt: &DelaunayTriangulation<AdaptiveKernel<f64>, (), (),
         .collect();
 
     let len_u32 = u32::try_from(visibility_times.len()).unwrap_or(1u32);
-    let avg_visibility_time: std::time::Duration =
-        visibility_times.iter().sum::<std::time::Duration>() / len_u32;
+    let avg_visibility_time: Duration = visibility_times.iter().sum::<Duration>() / len_u32;
 
     println!("\n  Visible Facet Queries (5 runs):");
     println!("    • Average time: {avg_visibility_time:?}");
@@ -507,9 +505,9 @@ fn performance_analysis(dt: &DelaunayTriangulation<AdaptiveKernel<f64>, (), (),
     }
 
     // Memory usage estimation
-    let hull_size = std::mem::size_of::<ConvexHull<AdaptiveKernel<f64>, (), (), 3>>();
+    let hull_size = mem::size_of::<ConvexHull<AdaptiveKernel<f64>, (), (), 3>>();
     // Phase 3C: Facets are now lightweight (CellKey, u8) tuples
-    let facet_handle_size = std::mem::size_of::<(delaunay::core::CellKey, u8)>();
+    let facet_handle_size = mem::size_of::<(CellKey, u8)>();
     let estimated_hull_memory = hull_size + (facet_count * facet_handle_size);
 
     println!("\n  Memory Usage Estimation:");
diff --git a/examples/pachner_roundtrip_4d.rs b/examples/pachner_roundtrip_4d.rs
index b114645e..aa8cf042 100644
--- a/examples/pachner_roundtrip_4d.rs
+++ b/examples/pachner_roundtrip_4d.rs
@@ -14,10 +14,9 @@
 //! ```
 
 use ::uuid::Uuid;
-use delaunay::geometry::kernel::RobustKernel;
-use delaunay::prelude::triangulation::Vertex;
+use delaunay::prelude::geometry::RobustKernel;
 use delaunay::prelude::triangulation::flips::*;
-use delaunay::triangulation::delaunay::{ConstructionOptions, InsertionOrderStrategy};
+use delaunay::prelude::triangulation::{ConstructionOptions, InsertionOrderStrategy, Vertex};
 use std::time::Instant;
 
 type Dt4 = DelaunayTriangulation<RobustKernel<f64>, (), (), 4>;
diff --git a/justfile b/justfile
index 3ec98a81..a32552c9 100644
--- a/justfile
+++ b/justfile
@@ -442,7 +442,8 @@ profile toolchain="" code_ref="current":
             grep -E '^[[:space:]]*channel[[:space:]]*=' "$workdir/rust-toolchain.toml" \
                 | head -n 1 \
                 | cut -d '=' -f 2 \
-                | tr -d ' "'
+                | tr -d ' "' \
+                || true
         )"
     fi
 
@@ -479,14 +480,14 @@ profile toolchain="" code_ref="current":
         echo "- Requested toolchain: $requested_toolchain"
         echo "- rustc: $(rustup run "$requested_toolchain" rustc --version)"
         echo "- cargo: $(rustup run "$requested_toolchain" cargo --version)"
-        echo "- Cargo profile: cargo bench default"
+        echo "- Cargo profile: cargo bench --profile perf"
         echo "- Benchmark harness: ci_performance_suite"
     } > "$run_dir/profile_metadata.md"
 
     (
         cd "$workdir"
         CARGO_TARGET_DIR="$run_dir/target" \
-            rustup run "$requested_toolchain" cargo bench --bench ci_performance_suite \
+            rustup run "$requested_toolchain" cargo bench --profile perf --bench ci_performance_suite \
             2>&1 | tee "$run_dir/ci_performance_suite.log"
     )
 
diff --git a/scripts/README.md b/scripts/README.md
index e870176b..92f9ab9d 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -682,7 +682,8 @@ gh release create vX.Y.Z --notes-from-tag
 # 1. Run benchmarks directly (CI performance suite)
 cargo bench --profile perf --bench ci_performance_suite
 
-# Generate release performance summary with fresh perf-profile data
+# Generate release performance summary with fresh perf-profile public API
+# and circumsphere predicate data
 uv run benchmark-utils generate-summary --run-benchmarks --profile perf
 
 # 2. Generate new baseline
@@ -692,12 +693,16 @@ uv run benchmark-utils generate-baseline
 uv run benchmark-utils compare --baseline baseline-artifact/baseline_results.txt
 ```
 
-**CI Performance Suite**: The benchmark utilities now use `benches/ci_performance_suite.rs` for CI/CD-optimized performance testing:
+**CI Performance Suite**: The benchmark utilities use `benches/ci_performance_suite.rs` for CI/CD-optimized
+performance testing and as the primary generated performance-summary source:
 
 - **Dimensions**: 2D, 3D, 4D, and 5D triangulations.
 - **Point counts**: [10, 25, 50].
 - **Runtime**: ~5–10 minutes.
-- **Coverage**: Core triangulation performance across all supported dimensions.
+- **Coverage**: Public construction, hull, validation, insertion, boundary, and bistellar-flip workflows across supported dimensions.
+
+Circumsphere predicate benchmarks remain part of `generate-summary` as a
+dedicated subsection because they track `la-stack`-backed predicate performance.
 
 **Migration Notes**:
 
diff --git a/scripts/benchmark_utils.py b/scripts/benchmark_utils.py
index 87c4c7e1..c2902f5f 100755
--- a/scripts/benchmark_utils.py
+++ b/scripts/benchmark_utils.py
@@ -101,11 +101,40 @@
 # numbers are generated with the same ThinLTO/codegen-units settings.
 TRUSTED_BENCH_PROFILE = "perf"
 
+CI_PERFORMANCE_SUITE_GROUPS = {
+    "construction": (
+        "Construction",
+        "DelaunayTriangulation::new_with_options",
+    ),
+    "boundary_facets": (
+        "Boundary facets",
+        "DelaunayTriangulation::boundary_facets",
+    ),
+    "convex_hull": (
+        "Convex hull",
+        "ConvexHull::from_triangulation",
+    ),
+    "validation": (
+        "Validation",
+        "DelaunayTriangulation::validate",
+    ),
+    "incremental_insert": (
+        "Incremental insert",
+        "DelaunayTriangulation::insert",
+    ),
+    "bistellar_flips": (
+        "Bistellar flips",
+        "BistellarFlips",
+    ),
+}
+
+CI_PERFORMANCE_SUITE_GROUP_ORDER = tuple(CI_PERFORMANCE_SUITE_GROUPS)
+
 # Development mode arguments - centralized to keep baseline generation and comparison in sync
 # Reduces samples for faster iteration during development (10x faster than full benchmarks)
 #
-# Note: These are Criterion CLI arguments. Alternatively, benchmarks can be configured via
-# environment variables (see benches/microbenchmarks.rs bench_config()):
+# Note: These are Criterion CLI arguments. Some benchmarks can also be configured via
+# environment variables documented in benches/README.md:
 #   CRIT_SAMPLE_SIZE=10 CRIT_MEASUREMENT_MS=2000 CRIT_WARMUP_MS=1000
 # The CLI arguments take precedence over env vars when both are present.
 DEV_MODE_BENCH_ARGS = [
@@ -119,6 +148,26 @@
 ]
 
 
+@dataclass(frozen=True)
+class CiPerformanceResult:
+    """Parsed Criterion result for one ci_performance_suite benchmark ID."""
+
+    group_key: str
+    benchmark_id: str
+    dimension: str
+    input_size: str
+    mean_ns: float
+    low_ns: float
+    high_ns: float
+
+    @property
+    def variant(self) -> str:
+        """Return the geometry/input variant label for this benchmark."""
+        if "adversarial" in self.benchmark_id:
+            return "adversarial"
+        return "well-conditioned"
+
+
 def _criterion_arg_value(args: list[str], flag: str) -> str:
     """Return the Criterion value that follows flag in args."""
     try:
@@ -174,7 +223,7 @@ def __init__(self, project_root: Path):
         self._baseline_fallback = project_root / "benches" / "baseline_results.txt"
         self.comparison_file = project_root / "benches" / "compare_results.txt"
 
-        # Path for storing circumsphere benchmark results
+        # Path for storing Criterion benchmark results
         self.circumsphere_results_dir = project_root / "target" / "criterion"
 
         # Storage for numerical accuracy data from benchmarks
@@ -196,7 +245,7 @@ def generate_summary(
 
         Args:
             output_path: Output file path (defaults to benches/PERFORMANCE_RESULTS.md)
-            run_benchmarks: Whether to run fresh circumsphere benchmarks
+            run_benchmarks: Whether to run fresh public API and circumsphere benchmarks
             generator_name: Name of the tool generating the summary (for attribution)
             cargo_profile: Optional Cargo profile for fresh benchmark runs.  When
                 ``run_benchmarks`` is True and no profile is specified, defaults
@@ -219,10 +268,11 @@ def generate_summary(
                 # comparable with baseline/compare output.
                 if cargo_profile is None:
                     cargo_profile = TRUSTED_BENCH_PROFILE
-                success, accuracy_data = self._run_circumsphere_benchmarks(cargo_profile=cargo_profile)
-                if success:
+                ci_success = self._run_ci_performance_suite(cargo_profile=cargo_profile)
+                circumsphere_success, accuracy_data = self._run_circumsphere_benchmarks(cargo_profile=cargo_profile)
+                if circumsphere_success:
                     self.numerical_accuracy_data = accuracy_data
-                else:
+                if not ci_success or not circumsphere_success:
                     print("⚠️ Benchmark run failed, using existing/fallback data")
 
             # Generate markdown content
@@ -295,7 +345,12 @@ def _generate_markdown_content(self, generator_name: str | None = None) -> str:
             ],
         )
 
-        # Add circumsphere performance results from actual benchmark data
+        # Add public API performance results from the CI suite first. This is
+        # the versioned benchmark contract used by baseline/comparison tooling.
+        lines.extend(self._get_ci_performance_suite_results())
+
+        # Add circumsphere predicate results as a focused subsection. These
+        # remain important because they exercise la-stack-backed predicates.
         lines.extend(self._get_circumsphere_performance_results())
 
         # Add baseline results if available
@@ -406,6 +461,38 @@ def _run_circumsphere_benchmarks(self, cargo_profile: str | None = None) -> tupl
             print(f"❌ Error running circumsphere benchmarks: {e}")
             return False, None
 
+    def _run_ci_performance_suite(self, cargo_profile: str | None = None) -> bool:
+        """
+        Run the public API CI performance suite to generate fresh Criterion data.
+
+        Args:
+            cargo_profile: Cargo profile for the fresh run. Defaults to
+                :data:`TRUSTED_BENCH_PROFILE` so summary, baseline, and
+                comparison measurements use the same optimized profile.
+
+        Returns:
+            True if the benchmark completed successfully, False otherwise.
+        """
+        try:
+            print("🔄 Running ci_performance_suite benchmarks...")
+
+            profile = cargo_profile if cargo_profile is not None else TRUSTED_BENCH_PROFILE
+            cargo_args = ["bench", "--profile", profile, "--bench", "ci_performance_suite", "--", *DEV_MODE_BENCH_ARGS]
+
+            run_cargo_command(
+                cargo_args,
+                cwd=self.project_root,
+                timeout=900,
+                capture_output=True,
+            )
+
+            print("✅ ci_performance_suite benchmarks completed successfully")
+            return True
+
+        except Exception as e:
+            print(f"❌ Error running ci_performance_suite benchmarks: {e}")
+            return False
+
     def _parse_numerical_accuracy_output(self, stdout: str) -> dict[str, str] | None:
         """
         Parse numerical accuracy data from circumsphere benchmark stdout.
@@ -787,6 +874,191 @@ def _get_fallback_circumsphere_data(self) -> list[CircumsphereTestCase]:
             ),
         ]
 
+    @staticmethod
+    def _format_duration_ns(time_ns: float) -> str:
+        """Format nanosecond Criterion timings with readable units."""
+        if time_ns >= 1_000_000_000:
+            return f"{time_ns / 1_000_000_000:.3f} s"
+        if time_ns >= 1_000_000:
+            return f"{time_ns / 1_000_000:.3f} ms"
+        if time_ns >= 1_000:
+            return f"{time_ns / 1_000:.1f} µs"
+        return f"{time_ns:.0f} ns"
+
+    @staticmethod
+    def _ci_suite_group_key(first_path_part: str) -> str | None:
+        """Map a Criterion path prefix to a ci_performance_suite group key."""
+        if first_path_part.startswith("tds_new_"):
+            return "construction"
+        if first_path_part == "bistellar_flips_4d":
+            return "bistellar_flips"
+        if first_path_part in CI_PERFORMANCE_SUITE_GROUPS:
+            return first_path_part
+        return None
+
+    @staticmethod
+    def _ci_suite_dimension(benchmark_id: str) -> str:
+        """Extract the dimension label from a ci_performance_suite benchmark ID."""
+        match = re.search(r"(?:^|_)(\d+)d(?:_|/|$)", benchmark_id)
+        if match:
+            return f"{match.group(1)}D"
+        return "n/a"
+
+    @staticmethod
+    def _ci_suite_input_size(path_parts: tuple[str, ...]) -> str:
+        """Extract a human-readable input size from Criterion benchmark path parts."""
+        if path_parts and path_parts[-1].isdigit():
+            return path_parts[-1]
+        return "roundtrip"
+
+    @staticmethod
+    def _load_criterion_estimate(estimates_path: Path) -> tuple[float, float, float] | None:
+        """Load mean and confidence interval values from a Criterion estimates file."""
+        try:
+            with estimates_path.open("r", encoding="utf-8") as f:
+                data = json.load(f)
+
+            mean_data = data.get("mean", {})
+            mean_ns = float(mean_data["point_estimate"])
+            confidence_interval = mean_data.get("confidence_interval", {})
+            low_ns = float(confidence_interval.get("lower_bound", mean_ns))
+            high_ns = float(confidence_interval.get("upper_bound", mean_ns))
+            if mean_ns <= 0:
+                return None
+            return mean_ns, low_ns, high_ns
+        except (OSError, KeyError, TypeError, ValueError, json.JSONDecodeError):
+            return None
+
+    def _parse_ci_performance_suite_results(self) -> list[CiPerformanceResult]:
+        """
+        Parse Criterion data for the versioned ci_performance_suite benchmark IDs.
+
+        Criterion stores each benchmark under a path derived from its group and
+        benchmark ID. This parser keeps those IDs intact so the generated
+        summary can compare API surfaces side-by-side as the suite grows.
+        """
+        criterion_dir = self.circumsphere_results_dir
+        if not criterion_dir.exists():
+            return []
+
+        estimates_by_id: dict[tuple[str, ...], tuple[str, Path]] = {}
+        for estimates_path in sorted(criterion_dir.glob("**/estimates.json")):
+            if estimates_path.parent.name not in {"base", "new"}:
+                continue
+
+            try:
+                path_parts = estimates_path.relative_to(criterion_dir).parts[:-2]
+            except ValueError:
+                continue
+
+            if not path_parts:
+                continue
+
+            group_key = self._ci_suite_group_key(path_parts[0])
+            if group_key is None:
+                continue
+
+            existing = estimates_by_id.get(path_parts)
+            if existing is None or (existing[0] == "base" and estimates_path.parent.name == "new"):
+                estimates_by_id[path_parts] = (estimates_path.parent.name, estimates_path)
+
+        results = []
+        for path_parts, (_, estimates_path) in estimates_by_id.items():
+            estimates = self._load_criterion_estimate(estimates_path)
+            if estimates is None:
+                continue
+
+            benchmark_id = "/".join(path_parts)
+            group_key = self._ci_suite_group_key(path_parts[0])
+            if group_key is None:
+                continue
+
+            mean_ns, low_ns, high_ns = estimates
+            results.append(
+                CiPerformanceResult(
+                    group_key=group_key,
+                    benchmark_id=benchmark_id,
+                    dimension=self._ci_suite_dimension(benchmark_id),
+                    input_size=self._ci_suite_input_size(path_parts),
+                    mean_ns=mean_ns,
+                    low_ns=low_ns,
+                    high_ns=high_ns,
+                ),
+            )
+
+        group_order = {group: index for index, group in enumerate(CI_PERFORMANCE_SUITE_GROUP_ORDER)}
+        results.sort(
+            key=lambda result: (
+                group_order.get(result.group_key, sys.maxsize),
+                int(result.dimension.removesuffix("D")) if result.dimension.removesuffix("D").isdigit() else sys.maxsize,
+                int(result.input_size) if result.input_size.isdigit() else sys.maxsize,
+                result.benchmark_id,
+            ),
+        )
+        return results
+
+    def _get_ci_performance_suite_results(self) -> list[str]:
+        """
+        Generate the public API performance summary from ci_performance_suite data.
+
+        Returns:
+            List of markdown lines with ci_performance_suite benchmark data.
+        """
+        results = self._parse_ci_performance_suite_results()
+
+        lines = [
+            "### Public API Performance Contract (`ci_performance_suite`)",
+            "",
+            "This suite is the versioned benchmark contract for public Delaunay workflows.",
+            "It covers construction, hull extraction, validation, incremental insertion,",
+            "boundary traversal, and explicit bistellar flip roundtrips.",
+            "",
+        ]
+
+        if not results:
+            lines.extend(
+                [
+                    "⚠️ No `ci_performance_suite` Criterion results available. Run:",
+                    "```bash",
+                    f"cargo bench --profile {TRUSTED_BENCH_PROFILE} --bench ci_performance_suite",
+                    "```",
+                    "",
+                ],
+            )
+            return lines
+
+        results_by_group: dict[str, list[CiPerformanceResult]] = {}
+        for result in results:
+            results_by_group.setdefault(result.group_key, []).append(result)
+
+        for group_key in CI_PERFORMANCE_SUITE_GROUP_ORDER:
+            group_results = results_by_group.get(group_key)
+            if not group_results:
+                continue
+
+            group_label, public_api = CI_PERFORMANCE_SUITE_GROUPS[group_key]
+            lines.extend(
+                [
+                    f"#### {group_label}",
+                    "",
+                    f"Public API: `{public_api}`",
+                    "",
+                    "| Benchmark ID | Dimension | Input | Variant | Mean | 95% CI |",
+                    "|--------------|-----------|-------|---------|------|--------|",
+                ],
+            )
+
+            for result in group_results:
+                confidence_interval = f"{self._format_duration_ns(result.low_ns)} - {self._format_duration_ns(result.high_ns)}"
+                lines.append(
+                    f"| `{result.benchmark_id}` | {result.dimension} | {result.input_size} | {result.variant} | "
+                    f"{self._format_duration_ns(result.mean_ns)} | {confidence_interval} |",
+                )
+
+            lines.append("")
+
+        return lines
+
     def _get_circumsphere_performance_results(self) -> list[str]:
         """
         Generate circumsphere containment performance results section with dynamic data.
@@ -799,7 +1071,7 @@ def _get_circumsphere_performance_results(self) -> list[str]:
 
         if not test_cases:
             return [
-                "### Circumsphere Performance Results",
+                "### Circumsphere Predicate Performance",
                 "",
                 f"#### Version {self.current_version} Results ({self.current_date})",
                 "",
@@ -811,7 +1083,10 @@ def _get_circumsphere_performance_results(self) -> list[str]:
             ]
 
         lines = [
-            "### Circumsphere Performance Results",
+            "### Circumsphere Predicate Performance",
+            "",
+            "This focused predicate suite tracks `la-stack`-backed circumsphere and",
+            "insphere query performance independently from full triangulation workflows.",
             "",
             f"#### Version {self.current_version} Results ({self.current_date})",
             "",
@@ -981,7 +1256,7 @@ def _get_dynamic_analysis_sections(self) -> list[str]:
         performance_ranking = self._analyze_performance_ranking(test_data)
 
         lines = [
-            "## Key Findings",
+            "## Circumsphere Predicate Analysis",
             "",
             "### Performance Ranking",
             "",
@@ -996,7 +1271,7 @@ def _get_dynamic_analysis_sections(self) -> list[str]:
 
         lines.extend(
             [
-                "## Recommendations",
+                "### Recommendations",
                 "",
             ],
         )
@@ -1009,7 +1284,7 @@ def _get_dynamic_analysis_sections(self) -> list[str]:
             lines.extend(
                 [
                     "",
-                    "## Conclusion",
+                    "### Conclusion",
                     "",
                     "All three methods are mathematically correct and produce valid results. Performance characteristics vary by dimension:",
                     "",
@@ -1117,7 +1392,7 @@ def _generate_dynamic_recommendations(self, performance_ranking: list[tuple[str,
             return []
 
         lines = [
-            "### Method Selection Guide",
+            "#### Method Selection Guide",
             "",
             "**All three methods are mathematically correct** (they produce valid insphere test results).",
             "Choose based on your specific requirements:",
@@ -1125,7 +1400,7 @@ def _generate_dynamic_recommendations(self, performance_ranking: list[tuple[str,
         ]
 
         # Add dimension-specific performance recommendations
-        lines.append("#### Performance Optimization by Dimension")
+        lines.append("##### Performance Optimization by Dimension")
         lines.append("")
 
         for method, _avg_time, desc in performance_ranking:
@@ -1136,7 +1411,7 @@ def _generate_dynamic_recommendations(self, performance_ranking: list[tuple[str,
         lines.extend(
             [
                 "",
-                "#### General Recommendations",
+                "##### General Recommendations",
                 "",
                 "**For maximum performance**: Choose the method that performs best in your target dimension (see above)",
                 "",
@@ -1146,7 +1421,7 @@ def _generate_dynamic_recommendations(self, performance_ranking: list[tuple[str,
                 "**For algorithm transparency**: `insphere_distance` explicitly calculates the circumcenter,",
                 "making it excellent for educational purposes, debugging, and algorithm validation",
                 "",
-                "#### Performance Comparison",
+                "##### Performance Comparison",
                 "",
                 "Average performance across all non-boundary test cases:",
                 "",
@@ -1235,6 +1510,11 @@ def _get_static_sections(self) -> list[str]:
             "",
             "## Benchmark Structure",
             "",
+            "The `ci_performance_suite.rs` benchmark is the primary regression and",
+            "release-summary suite. It emits a versioned `api_benchmark_manifest` and",
+            "covers public construction, hull, validation, insertion, boundary, and",
+            "bistellar-flip workflows across supported dimensions.",
+            "",
             "The `circumsphere_containment.rs` benchmark includes:",
             "",
             "- **Random queries**: Batch processing performance with 1000 random test points",
@@ -1260,7 +1540,7 @@ def _get_update_instructions(self) -> list[str]:
             "# Generate performance summary with current data",
             "uv run benchmark-utils generate-summary",
             "",
-            "# Run fresh perf-profile benchmarks and generate summary (includes numerical accuracy)",
+            "# Run fresh perf-profile public API and circumsphere benchmarks",
             f"uv run benchmark-utils generate-summary --run-benchmarks --profile {TRUSTED_BENCH_PROFILE}",
             "",
             "# Generate baseline results for regression testing",
@@ -3068,7 +3348,11 @@ def _add_performance_summary_subcommands(subparsers: "argparse._SubParsersAction
     """Add performance summary generation subcommands."""
     perf_summary_parser = subparsers.add_parser("generate-summary", help="Generate performance summary markdown")
     perf_summary_parser.add_argument("--output", type=Path, help="Output file path (defaults to benches/PERFORMANCE_RESULTS.md)")
-    perf_summary_parser.add_argument("--run-benchmarks", action="store_true", help="Run fresh circumsphere benchmarks before generating summary")
+    perf_summary_parser.add_argument(
+        "--run-benchmarks",
+        action="store_true",
+        help="Run fresh ci_performance_suite and circumsphere benchmarks before generating summary",
+    )
     perf_summary_parser.add_argument(
         "--profile",
         default=TRUSTED_BENCH_PROFILE,
diff --git a/scripts/tests/test_benchmark_utils.py b/scripts/tests/test_benchmark_utils.py
index e0037337..28a448ed 100644
--- a/scripts/tests/test_benchmark_utils.py
+++ b/scripts/tests/test_benchmark_utils.py
@@ -2161,11 +2161,50 @@ def test_generate_markdown_content(self, mock_datetime, mock_run_git, mock_git_c
             assert "## Performance Results Summary" in content
 
             # Check static content sections
-            assert "## Key Findings" in content
+            assert "### Public API Performance Contract (`ci_performance_suite`)" in content
+            assert "## Circumsphere Predicate Analysis" in content
             assert "### Performance Ranking" in content
-            assert "## Recommendations" in content
+            assert "### Recommendations" in content
             assert "## Performance Data Updates" in content
 
+    def test_get_ci_performance_suite_results(self):
+        """Test public API summary generation from ci_performance_suite Criterion data."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_root = Path(temp_dir)
+
+            def write_estimate(path_parts, mean_ns):
+                estimates_dir = project_root / "target" / "criterion" / Path(*path_parts) / "base"
+                estimates_dir.mkdir(parents=True)
+                estimates = {
+                    "mean": {
+                        "point_estimate": mean_ns,
+                        "confidence_interval": {
+                            "lower_bound": mean_ns * 0.9,
+                            "upper_bound": mean_ns * 1.1,
+                        },
+                    },
+                }
+                (estimates_dir / "estimates.json").write_text(json.dumps(estimates), encoding="utf-8")
+
+            write_estimate(("tds_new_2d", "tds_new", "10"), 120_000.0)
+            write_estimate(("boundary_facets", "boundary_facets_3d_adversarial", "50"), 7_500.0)
+            write_estimate(("bistellar_flips_4d", "k2_roundtrip"), 950.0)
+
+            generator = PerformanceSummaryGenerator(project_root)
+            lines = generator._get_ci_performance_suite_results()
+            content = "\n".join(lines)
+
+            assert "### Public API Performance Contract (`ci_performance_suite`)" in content
+            assert "#### Construction" in content
+            assert "Public API: `DelaunayTriangulation::new_with_options`" in content
+            assert "`tds_new_2d/tds_new/10`" in content
+            assert "well-conditioned" in content
+            assert "#### Boundary facets" in content
+            assert "`boundary_facets/boundary_facets_3d_adversarial/50`" in content
+            assert "adversarial" in content
+            assert "#### Bistellar flips" in content
+            assert "`bistellar_flips_4d/k2_roundtrip`" in content
+
     def test_get_circumsphere_performance_results(self):
         """Test getting circumsphere performance results."""
         with tempfile.TemporaryDirectory() as temp_dir:
@@ -2175,7 +2214,7 @@ def test_get_circumsphere_performance_results(self):
             lines = generator._get_circumsphere_performance_results()
             content = "\n".join(lines)
 
-            assert "### Circumsphere Performance Results" in content
+            assert "### Circumsphere Predicate Performance" in content
             # Should contain fallback performance data when no criterion results exist
             assert "Basic 3D" in content or "Version unknown" in content
 
@@ -2345,6 +2384,60 @@ def test_run_circumsphere_benchmarks_failure(self, mock_cargo, capsys):
             captured = capsys.readouterr()
             assert "Error running circumsphere benchmarks" in captured.out
 
+    @patch("benchmark_utils.run_cargo_command")
+    def test_run_ci_performance_suite_success(self, mock_cargo):
+        """Test running the public API CI performance suite successfully."""
+        mock_cargo.return_value = Mock(stdout="")
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_root = Path(temp_dir)
+            generator = PerformanceSummaryGenerator(project_root)
+
+            success = generator._run_ci_performance_suite()
+
+            assert success is True
+            mock_cargo.assert_called_once()
+            args = mock_cargo.call_args.args[0]
+            assert args[:5] == [
+                "bench",
+                "--profile",
+                TRUSTED_BENCH_PROFILE,
+                "--bench",
+                "ci_performance_suite",
+            ]
+
+    @patch("benchmark_utils.run_cargo_command")
+    def test_run_ci_performance_suite_uses_requested_cargo_profile(self, mock_cargo):
+        """Test running the public API CI performance suite with an explicit profile."""
+        mock_cargo.return_value = Mock(stdout="")
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_root = Path(temp_dir)
+            generator = PerformanceSummaryGenerator(project_root)
+
+            requested_profile = "release"
+            success = generator._run_ci_performance_suite(cargo_profile=requested_profile)
+
+            assert success is True
+            mock_cargo.assert_called_once()
+            args = mock_cargo.call_args.args[0]
+            assert args[:5] == ["bench", "--profile", requested_profile, "--bench", "ci_performance_suite"]
+
+    @patch("benchmark_utils.run_cargo_command")
+    def test_run_ci_performance_suite_failure(self, mock_cargo, capsys):
+        """Test handling ci_performance_suite benchmark failures."""
+        mock_cargo.side_effect = Exception("Benchmark failed")
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_root = Path(temp_dir)
+            generator = PerformanceSummaryGenerator(project_root)
+
+            success = generator._run_ci_performance_suite()
+
+            assert success is False
+            captured = capsys.readouterr()
+            assert "Error running ci_performance_suite benchmarks" in captured.out
+
     @patch("benchmark_utils.run_git_command")
     def test_generate_summary_success(self, mock_git, capsys):
         """Test successful generation of performance summary."""
@@ -2370,8 +2463,10 @@ def test_generate_summary_success(self, mock_git, capsys):
             assert "Generated performance summary" in captured.out
 
     @patch("benchmark_utils.PerformanceSummaryGenerator._run_circumsphere_benchmarks")
-    def test_generate_summary_with_benchmarks(self, mock_run_benchmarks):
+    @patch("benchmark_utils.PerformanceSummaryGenerator._run_ci_performance_suite")
+    def test_generate_summary_with_benchmarks(self, mock_run_ci_suite, mock_run_benchmarks):
         """Test generating summary with fresh benchmark run."""
+        mock_run_ci_suite.return_value = True
         mock_run_benchmarks.return_value = (True, None)
 
         with tempfile.TemporaryDirectory() as temp_dir:
@@ -2385,12 +2480,15 @@ def test_generate_summary_with_benchmarks(self, mock_run_benchmarks):
             assert success is True
             # When run_benchmarks=True without an explicit profile, generate_summary
             # must default to TRUSTED_BENCH_PROFILE.
+            mock_run_ci_suite.assert_called_once_with(cargo_profile=TRUSTED_BENCH_PROFILE)
             mock_run_benchmarks.assert_called_once_with(cargo_profile=TRUSTED_BENCH_PROFILE)
             assert output_file.exists()
 
     @patch("benchmark_utils.PerformanceSummaryGenerator._run_circumsphere_benchmarks")
-    def test_generate_summary_passes_cargo_profile_to_benchmarks(self, mock_run_benchmarks):
+    @patch("benchmark_utils.PerformanceSummaryGenerator._run_ci_performance_suite")
+    def test_generate_summary_passes_cargo_profile_to_benchmarks(self, mock_run_ci_suite, mock_run_benchmarks):
         """Test generating a summary with fresh benchmarks under a specific Cargo profile."""
+        mock_run_ci_suite.return_value = True
         mock_run_benchmarks.return_value = (True, None)
 
         with tempfile.TemporaryDirectory() as temp_dir:
@@ -2403,12 +2501,15 @@ def test_generate_summary_passes_cargo_profile_to_benchmarks(self, mock_run_benc
             success = generator.generate_summary(output_path=output_file, run_benchmarks=True, cargo_profile=requested_profile)
 
             assert success is True
+            mock_run_ci_suite.assert_called_once_with(cargo_profile=requested_profile)
             mock_run_benchmarks.assert_called_once_with(cargo_profile=requested_profile)
             assert output_file.exists()
 
     @patch("benchmark_utils.PerformanceSummaryGenerator._run_circumsphere_benchmarks")
-    def test_generate_summary_benchmark_failure_continues(self, mock_run_benchmarks, capsys):
+    @patch("benchmark_utils.PerformanceSummaryGenerator._run_ci_performance_suite")
+    def test_generate_summary_benchmark_failure_continues(self, mock_run_ci_suite, mock_run_benchmarks, capsys):
         """Test that summary generation continues even if benchmark run fails."""
+        mock_run_ci_suite.return_value = False
         mock_run_benchmarks.return_value = (False, None)
 
         with tempfile.TemporaryDirectory() as temp_dir:
@@ -2592,7 +2693,8 @@ def test_full_generation_workflow_integration(self):
                 assert "Single Query Performance (3D)" in content
                 assert "Triangulation Data Structure Performance" in content
                 assert "Performance Status: Good" in content
-                assert "Key Findings" in content
+                assert "Public API Performance Contract" in content
+                assert "Circumsphere Predicate Analysis" in content
                 assert "Performance Ranking" in content
                 assert "Recommendations" in content
                 assert "Performance Data Updates" in content
diff --git a/src/lib.rs b/src/lib.rs
index ef5238fa..0e3d37a4 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -47,6 +47,7 @@
 //! | Build a triangulation, insert/remove vertices | `use delaunay::prelude::triangulation::*` |
 //! | Read-only queries, traversal, convex hull | `use delaunay::prelude::query::*` |
 //! | Geometry helpers, predicates, points | `use delaunay::prelude::geometry::*` |
+//! | Random points / triangulations for examples and tests | `use delaunay::prelude::generators::*` |
 //! | Bistellar flips (Pachner moves) | `use delaunay::prelude::triangulation::flips::*` |
 //! | Delaunay repair and flip-based Level 4 validation | `use delaunay::prelude::triangulation::repair::*` |
 //! | Delaunayize workflow (repair + flip) | `use delaunay::prelude::triangulation::delaunayize::*` |
@@ -987,10 +988,6 @@ pub mod prelude {
     /// This is useful if you want a smaller import surface than `delaunay::prelude::*`,
     /// while still having access to the key public APIs typically used in docs/tests/examples/benches.
     ///
-    /// Note: `query` currently also re-exports a few helpers commonly used in
-    /// docs/tests/examples/benches (e.g., random generators). If this grows over time, it may be
-    /// split into more focused modules (e.g., `prelude::generators`).
-    ///
     /// Includes:
     /// - Topology traversal: [`DelaunayTriangulation::edges`], [`DelaunayTriangulation::incident_edges`],
     ///   [`DelaunayTriangulation::cell_neighbors`]
@@ -998,7 +995,6 @@ pub mod prelude {
     /// - Zero-allocation geometry accessors: [`DelaunayTriangulation::vertex_coords`],
     ///   [`DelaunayTriangulation::cell_vertices`]
     /// - Convex hull extraction: [`ConvexHull::from_triangulation`]
-    /// - Test/example helpers: [`generate_random_triangulation`], [`generate_random_points_seeded`]
     pub mod query {
         // Core read-only traversal / adjacency
         pub use crate::core::adjacency::{AdjacencyIndex, AdjacencyIndexBuildError};
@@ -1024,7 +1020,8 @@ pub mod prelude {
         // Read-only algorithms
         pub use crate::geometry::algorithms::convex_hull::ConvexHull;
 
-        // Convenience generators (commonly used in docs/tests/examples/benches)
+        // Convenience generators kept for compatibility with existing docs/tests/examples/benches.
+        // Prefer prelude::generators for new code that only needs fixture data.
         pub use crate::geometry::util::{
             generate_random_points_seeded, generate_random_triangulation,
         };
@@ -1035,6 +1032,33 @@ pub mod prelude {
         // Convenience macro (commonly used in docs/tests/examples) without importing full `prelude::*`.
         pub use crate::vertex;
     }
+
+    /// Focused exports for generating fixture data in doctests, integration tests,
+    /// examples, and benchmarks.
+    ///
+    /// This module is intentionally separate from [`prelude::query`](crate::prelude::query)
+    /// so read-only traversal imports do not need to imply random data generation.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use delaunay::prelude::generators::generate_random_points_seeded;
+    /// use delaunay::prelude::geometry::Point;
+    ///
+    /// let points: Vec<Point<f64, 3>> =
+    ///     generate_random_points_seeded(4, (0.0, 1.0), 42).unwrap();
+    ///
+    /// assert_eq!(points.len(), 4);
+    /// ```
+    pub mod generators {
+        pub use crate::geometry::util::{
+            RandomPointGenerationError, generate_grid_points, generate_poisson_points,
+            generate_random_points, generate_random_points_in_ball,
+            generate_random_points_in_ball_seeded, generate_random_points_periodic,
+            generate_random_points_seeded, generate_random_triangulation,
+            generate_random_triangulation_with_topology_guarantee,
+        };
+    }
     /// Topology validation & analysis utilities.
     pub mod topology {
         /// Topology validation utilities.
@@ -1057,7 +1081,7 @@ pub mod prelude {
 /// Traits are checked at compile time, so this function is only used for
 /// testing.
 #[must_use]
-pub const fn is_normal<T: Sized + Send + Sync + Unpin>() -> bool {
+pub const fn is_normal<T: Send + Sync + Unpin>() -> bool {
     true
 }
 
@@ -1086,9 +1110,13 @@ mod tests {
             RepairQueueOrder, TopologyGuarantee, verify_delaunay_for_triangulation,
             verify_delaunay_via_flip_predicates, vertex,
         },
+        prelude::*,
         triangulation::delaunay::DelaunayTriangulation,
     };
 
+    #[cfg(feature = "count-allocations")]
+    use allocation_counter::measure;
+
     // =============================================================================
     // TYPE SAFETY TESTS
     // =============================================================================
@@ -1117,9 +1145,7 @@ mod tests {
     }
 
     #[test]
-    fn test_prelude_collections_exports() {
-        use crate::prelude::*;
-
+    fn prelude_collections_exports() {
         // Test that we can use the collections from the prelude
         let mut map: FastHashMap<u64, usize> = FastHashMap::default();
         map.insert(123, 456);
@@ -1147,7 +1173,7 @@ mod tests {
     }
 
     #[test]
-    fn test_prelude_triangulation_repair_exports() {
+    fn prelude_repair_exports() {
         let vertices = vec![
             vertex!([0.0, 0.0]),
             vertex!([1.0, 0.0]),
@@ -1181,9 +1207,7 @@ mod tests {
     }
 
     #[test]
-    fn test_prelude_quality_exports() {
-        use crate::prelude::*;
-
+    fn prelude_quality_exports() {
         // Test that quality functions are accessible from prelude
         let vertices = vec![
             vertex!([0.0, 0.0]),
@@ -1206,8 +1230,6 @@ mod tests {
 
     #[test]
     fn test_prelude_kernel_exports() {
-        use crate::prelude::*;
-
         // Test that kernel types and predicates are accessible from prelude
         let fast_kernel = FastKernel::<f64>::new();
         let robust_kernel = RobustKernel::<f64>::new();
@@ -1252,8 +1274,6 @@ mod tests {
 
     #[test]
     fn test_prelude_core_types() {
-        use crate::prelude::*;
-
         // Test that core types are accessible and work from prelude
         // Point construction
         let p1 = Point::new([0.0, 0.0, 0.0]);
@@ -1292,8 +1312,6 @@ mod tests {
 
     #[test]
     fn test_prelude_point_location() {
-        use crate::prelude::*;
-
         // Test that point location algorithms are accessible
         let vertices = vec![
             vertex!([0.0, 0.0]),
@@ -1326,8 +1344,6 @@ mod tests {
 
     #[test]
     fn test_prelude_geometry_types() {
-        use crate::prelude::*;
-
         // Test Point with Coordinate trait
         let p = Point::new([1.0_f64, 2.0_f64, 3.0_f64]);
         assert!((p.coords()[0] - 1.0_f64).abs() < f64::EPSILON);
@@ -1353,8 +1369,6 @@ mod tests {
 
     #[test]
     fn test_prelude_convex_hull() {
-        use crate::prelude::*;
-
         // Test that convex hull operations are accessible
         let vertices = vec![
             vertex!([0.0, 0.0, 0.0]),
@@ -1390,9 +1404,7 @@ mod tests {
     /// Run these with `cargo test allocation_counting --features count-allocations`
     #[cfg(feature = "count-allocations")]
     #[test]
-    fn test_basic_allocation_counting() {
-        use allocation_counter::measure;
-
+    fn basic_alloc_counting() {
         // Test a trivial operation that should not allocate
         let result = measure(|| {
             let x = 1 + 1;
@@ -1428,9 +1440,7 @@ mod tests {
 
     #[cfg(feature = "count-allocations")]
     #[test]
-    fn test_allocation_counting_with_allocating_operation() {
-        use allocation_counter::measure;
-
+    fn alloc_counting_with_vec() {
         // Test an operation that does allocate memory
         let result = measure(|| {
             let _vec: Vec<i32> = vec![1, 2, 3, 4, 5];
diff --git a/tests/prelude_exports.rs b/tests/prelude_exports.rs
new file mode 100644
index 00000000..824e7c42
--- /dev/null
+++ b/tests/prelude_exports.rs
@@ -0,0 +1,38 @@
+//! Public prelude smoke tests.
+//!
+//! These tests intentionally use focused preludes instead of module-internal
+//! paths so doctests, integration tests, examples, and benchmarks have a small
+//! import contract to copy from.
+
+use delaunay::prelude::generators::generate_random_points_seeded;
+use delaunay::prelude::geometry::{AdaptiveKernel, Point};
+use delaunay::prelude::query::ConvexHull;
+use delaunay::prelude::triangulation::flips::{BistellarFlips, TopologyGuarantee};
+use delaunay::prelude::triangulation::{
+    ConstructionOptions, DelaunayTriangulation, InsertionOrderStrategy, Vertex,
+};
+use delaunay::vertex;
+
+const fn assert_bistellar_flips(_: &impl BistellarFlips<AdaptiveKernel<f64>, (), (), 3>) {}
+
+#[test]
+fn preludes_cover_bench_apis() {
+    let _generated_points: Vec<Point<f64, 2>> =
+        generate_random_points_seeded(3, (0.0, 1.0), 42).unwrap();
+
+    let vertices: Vec<Vertex<f64, (), 3>> = vec![
+        vertex!([0.0, 0.0, 0.0]),
+        vertex!([1.0, 0.0, 0.0]),
+        vertex!([0.0, 1.0, 0.0]),
+        vertex!([0.0, 0.0, 1.0]),
+    ];
+    let options =
+        ConstructionOptions::default().with_insertion_order(InsertionOrderStrategy::Input);
+    let dt = DelaunayTriangulation::new_with_options(&vertices, options).unwrap();
+
+    assert_eq!(dt.topology_guarantee(), TopologyGuarantee::PLManifold);
+    assert!(dt.boundary_facets().count() > 0);
+    assert!(ConvexHull::from_triangulation(dt.as_triangulation()).is_ok());
+    assert!(dt.validate().is_ok());
+    assert_bistellar_flips(&dt);
+}

From dad0588743d878a72872338779c876aef3aba8d6 Mon Sep 17 00:00:00 2001
From: Adam Getchell <adam@adamgetchell.org>
Date: Mon, 27 Apr 2026 16:41:39 -0700
Subject: [PATCH 3/8] Changed: Enhance profiling suite and CI with dynamic
 benchmark configuration

- The `ci_performance_suite` now dynamically generates benchmark IDs,
  reducing maintenance overhead and improving consistency. It also leverages
  pre-computed adversarial seeds for stable and reproducible performance
  measurements across various operations and dimensions.
- The `profiling_suite` gains support for adversarial point distributions
  in validation benchmarks, using a robust seed search to ensure valid
  triangulations for complex cases.
- Profiling environment metadata capture is refactored into a reusable
  script, simplifying workflow definitions in GitHub Actions.
- The `justfile` separates `SlotMap` backend linting into a dedicated
  `just check-storage-backends` command, providing a focused compatibility
  canary.
- The `just profile` command's cleanup logic is made more robust, ensuring
  temporary worktree removal in all scenarios.
---
 .github/workflows/profiling-benchmarks.yml |  78 +----
 benches/ci_performance_suite.rs            | 359 +++++++++++++++------
 benches/profiling_suite.rs                 |  99 +++++-
 docs/dev/commands.md                       |  11 +-
 justfile                                   |  25 +-
 scripts/benchmark_utils.py                 |   2 +-
 scripts/ci/capture_profiling_metadata.sh   |  45 +++
 7 files changed, 422 insertions(+), 197 deletions(-)
 create mode 100755 scripts/ci/capture_profiling_metadata.sh

diff --git a/.github/workflows/profiling-benchmarks.yml b/.github/workflows/profiling-benchmarks.yml
index 824477fe..7103c22b 100644
--- a/.github/workflows/profiling-benchmarks.yml
+++ b/.github/workflows/profiling-benchmarks.yml
@@ -113,44 +113,7 @@ jobs:
       - name: Capture profiling environment metadata
         env:
           BENCH_FILTER_VALUE: ${{ github.event.inputs.benchmark_filter || '' }}
-        run: |
-          set -euo pipefail
-
-          mkdir -p profiling-results
-
-          declared_toolchain="$(
-            grep -E '^[[:space:]]*channel[[:space:]]*=' rust-toolchain.toml 2>/dev/null \
-              | head -n 1 \
-              | cut -d '=' -f 2 \
-              | tr -d ' "' \
-              || true
-          )"
-          rust_version="$(
-            grep -E '^rust-version[[:space:]]*=' Cargo.toml 2>/dev/null \
-              | head -n 1 \
-              | cut -d '=' -f 2 \
-              | tr -d ' "' \
-              || true
-          )"
-          profiling_mode="production"
-          if [[ "${PROFILING_DEV_MODE:-}" == "1" ]]; then
-            profiling_mode="development"
-          fi
-
-          {
-            echo "# Profiling Environment"
-            echo
-            echo "- Code ref: ${GITHUB_REF_NAME}"
-            echo "- Commit: $(git rev-parse HEAD)"
-            echo "- Declared rust-toolchain.toml channel: ${declared_toolchain}"
-            echo "- Cargo.toml rust-version: ${rust_version}"
-            echo "- rustc: $(rustc --version)"
-            echo "- cargo: $(cargo --version)"
-            echo "- Cargo profile: perf"
-            echo "- Benchmark filter: ${BENCH_FILTER_VALUE:-All benchmarks}"
-            echo "- Profiling mode: ${profiling_mode}"
-            echo "- Runner: ${RUNNER_OS}"
-          } > profiling-results/environment_metadata.md
+        run: ./scripts/ci/capture_profiling_metadata.sh
 
       - name: Build profiling suite
         run: |
@@ -314,40 +277,11 @@ jobs:
           cargo test --test allocation_api --features count-allocations --verbose
 
       - name: Capture memory profiling environment metadata
-        run: |
-          set -euo pipefail
-
-          mkdir -p profiling-results
-
-          declared_toolchain="$(
-            grep -E '^[[:space:]]*channel[[:space:]]*=' rust-toolchain.toml 2>/dev/null \
-              | head -n 1 \
-              | cut -d '=' -f 2 \
-              | tr -d ' "' \
-              || true
-          )"
-          rust_version="$(
-            grep -E '^rust-version[[:space:]]*=' Cargo.toml 2>/dev/null \
-              | head -n 1 \
-              | cut -d '=' -f 2 \
-              | tr -d ' "' \
-              || true
-          )"
-
-          {
-            echo "# Memory Profiling Environment"
-            echo
-            echo "- Code ref: ${GITHUB_REF_NAME}"
-            echo "- Commit: $(git rev-parse HEAD)"
-            echo "- Declared rust-toolchain.toml channel: ${declared_toolchain}"
-            echo "- Cargo.toml rust-version: ${rust_version}"
-            echo "- rustc: $(rustc --version)"
-            echo "- cargo: $(cargo --version)"
-            echo "- Cargo profile: perf"
-            echo "- Benchmark filter: memory_profiling"
-            echo "- Profiling mode: development"
-            echo "- Runner: ${RUNNER_OS}"
-          } > profiling-results/environment_metadata.md
+        env:
+          PROFILE_METADATA_TITLE: Memory Profiling Environment
+          PROFILE_METADATA_FILTER: memory_profiling
+          PROFILE_METADATA_MODE: development
+        run: ./scripts/ci/capture_profiling_metadata.sh
 
       - name: Run memory scaling benchmarks
         env:
diff --git a/benches/ci_performance_suite.rs b/benches/ci_performance_suite.rs
index cafc9750..3f3ee791 100644
--- a/benches/ci_performance_suite.rs
+++ b/benches/ci_performance_suite.rs
@@ -31,6 +31,7 @@ use criterion::measurement::WallTime;
 use criterion::{
     BatchSize, BenchmarkGroup, BenchmarkId, Criterion, Throughput, criterion_group, criterion_main,
 };
+use delaunay::geometry::util::simplex_volume;
 use delaunay::prelude::generators::generate_random_points_seeded;
 use delaunay::prelude::geometry::{AdaptiveKernel, Coordinate, Point, RobustKernel};
 use delaunay::prelude::query::ConvexHull;
@@ -81,56 +82,117 @@ struct ApiBenchmarkEntry {
     group: &'static str,
     public_api: &'static str,
     dimensions: &'static str,
-    benchmark_ids: &'static str,
+    benchmark_ids: String,
     note: &'static str,
 }
 
 static API_BENCHMARK_MANIFEST: Once = Once::new();
 
-const API_BENCHMARK_ENTRIES: &[ApiBenchmarkEntry] = &[
-    ApiBenchmarkEntry {
-        group: "construction",
-        public_api: "DelaunayTriangulation::new_with_options",
-        dimensions: "2,3,4,5",
-        benchmark_ids: "tds_new_2d/{tds_new,tds_new_adversarial}/{10,25,50};tds_new_3d/{tds_new,tds_new_adversarial}/{10,25,50};tds_new_4d/{tds_new,tds_new_adversarial}/{10,25,50};tds_new_5d/{tds_new,tds_new_adversarial}/{10,25}",
-        note: "construct_from_seeded_vertices_and_adversarial_large_coordinate_inputs",
-    },
-    ApiBenchmarkEntry {
-        group: "boundary_facets",
-        public_api: "DelaunayTriangulation::boundary_facets",
-        dimensions: "2,3,4,5",
-        benchmark_ids: "boundary_facets/{boundary_facets_2d,boundary_facets_2d_adversarial}/50;boundary_facets/{boundary_facets_3d,boundary_facets_3d_adversarial}/50;boundary_facets/{boundary_facets_4d,boundary_facets_4d_adversarial}/50;boundary_facets/{boundary_facets_5d,boundary_facets_5d_adversarial}/25",
-        note: "iterate_boundary_facets_on_well_conditioned_and_adversarial_inputs",
-    },
-    ApiBenchmarkEntry {
-        group: "convex_hull",
-        public_api: "ConvexHull::from_triangulation",
-        dimensions: "2,3,4,5",
-        benchmark_ids: "convex_hull/{from_triangulation_2d,from_triangulation_2d_adversarial}/50;convex_hull/{from_triangulation_3d,from_triangulation_3d_adversarial}/50;convex_hull/{from_triangulation_4d,from_triangulation_4d_adversarial}/50;convex_hull/{from_triangulation_5d,from_triangulation_5d_adversarial}/25",
-        note: "extract_hull_from_well_conditioned_and_adversarial_triangulations",
-    },
-    ApiBenchmarkEntry {
-        group: "validation",
-        public_api: "DelaunayTriangulation::validate",
-        dimensions: "3,4,5",
-        benchmark_ids: "validation/{validate_3d,validate_3d_adversarial}/50;validation/{validate_4d,validate_4d_adversarial}/50;validation/{validate_5d,validate_5d_adversarial}/25",
-        note: "levels_1_through_4_on_well_conditioned_and_adversarial_inputs",
-    },
-    ApiBenchmarkEntry {
-        group: "incremental_insert",
-        public_api: "DelaunayTriangulation::insert",
-        dimensions: "2,3,4,5",
-        benchmark_ids: "incremental_insert/{insert_2d,insert_2d_adversarial}/10;incremental_insert/{insert_3d,insert_3d_adversarial}/10;incremental_insert/{insert_4d,insert_4d_adversarial}/6;incremental_insert/{insert_5d,insert_5d_adversarial}/4",
-        note: "insert_batches_into_prebuilt_well_conditioned_and_adversarial_triangulations",
-    },
-    ApiBenchmarkEntry {
-        group: "bistellar_flips",
-        public_api: "BistellarFlips::{flip_k1_insert,flip_k1_remove,flip_k2,flip_k2_inverse_from_edge,flip_k3,flip_k3_inverse_from_triangle}",
-        dimensions: "4",
-        benchmark_ids: "bistellar_flips_4d/k1_roundtrip;bistellar_flips_4d/k2_roundtrip;bistellar_flips_4d/k3_roundtrip",
-        note: "stable_pl_manifold_roundtrips",
-    },
-];
+fn count_list(counts: &[usize]) -> String {
+    counts
+        .iter()
+        .map(usize::to_string)
+        .collect::<Vec<_>>()
+        .join(",")
+}
+
+fn construction_benchmark_ids() -> String {
+    [
+        format!(
+            "tds_new_2d/{{tds_new,tds_new_adversarial}}/{{{}}}",
+            count_list(COUNTS)
+        ),
+        format!(
+            "tds_new_3d/{{tds_new,tds_new_adversarial}}/{{{}}}",
+            count_list(COUNTS)
+        ),
+        format!(
+            "tds_new_4d/{{tds_new,tds_new_adversarial}}/{{{}}}",
+            count_list(COUNTS)
+        ),
+        format!(
+            "tds_new_5d/{{tds_new,tds_new_adversarial}}/{{{}}}",
+            count_list(COUNTS_5D)
+        ),
+    ]
+    .join(";")
+}
+
+fn operation_benchmark_ids(group: &str, prefix: &str) -> String {
+    [
+        format!("{group}/{{{prefix}_2d,{prefix}_2d_adversarial}}/{OPERATION_COUNT}"),
+        format!("{group}/{{{prefix}_3d,{prefix}_3d_adversarial}}/{OPERATION_COUNT}"),
+        format!("{group}/{{{prefix}_4d,{prefix}_4d_adversarial}}/{OPERATION_COUNT}"),
+        format!("{group}/{{{prefix}_5d,{prefix}_5d_adversarial}}/{OPERATION_COUNT_5D}"),
+    ]
+    .join(";")
+}
+
+fn validation_benchmark_ids() -> String {
+    [
+        format!("validation/{{validate_3d,validate_3d_adversarial}}/{OPERATION_COUNT}"),
+        format!("validation/{{validate_4d,validate_4d_adversarial}}/{OPERATION_COUNT}"),
+        format!("validation/{{validate_5d,validate_5d_adversarial}}/{OPERATION_COUNT_5D}"),
+    ]
+    .join(";")
+}
+
+fn insert_benchmark_ids() -> String {
+    [
+        format!("incremental_insert/{{insert_2d,insert_2d_adversarial}}/{INSERT_COUNT}"),
+        format!("incremental_insert/{{insert_3d,insert_3d_adversarial}}/{INSERT_COUNT}"),
+        format!("incremental_insert/{{insert_4d,insert_4d_adversarial}}/{INSERT_COUNT_4D}"),
+        format!("incremental_insert/{{insert_5d,insert_5d_adversarial}}/{INSERT_COUNT_5D}"),
+    ]
+    .join(";")
+}
+
+fn api_benchmark_entries() -> Vec<ApiBenchmarkEntry> {
+    vec![
+        ApiBenchmarkEntry {
+            group: "construction",
+            public_api: "DelaunayTriangulation::new_with_options",
+            dimensions: "2,3,4,5",
+            benchmark_ids: construction_benchmark_ids(),
+            note: "construct_from_seeded_vertices_and_adversarial_large_coordinate_inputs",
+        },
+        ApiBenchmarkEntry {
+            group: "boundary_facets",
+            public_api: "DelaunayTriangulation::boundary_facets",
+            dimensions: "2,3,4,5",
+            benchmark_ids: operation_benchmark_ids("boundary_facets", "boundary_facets"),
+            note: "iterate_boundary_facets_on_well_conditioned_and_adversarial_inputs",
+        },
+        ApiBenchmarkEntry {
+            group: "convex_hull",
+            public_api: "ConvexHull::from_triangulation",
+            dimensions: "2,3,4,5",
+            benchmark_ids: operation_benchmark_ids("convex_hull", "from_triangulation"),
+            note: "extract_hull_from_well_conditioned_and_adversarial_triangulations",
+        },
+        ApiBenchmarkEntry {
+            group: "validation",
+            public_api: "DelaunayTriangulation::validate",
+            dimensions: "3,4,5",
+            benchmark_ids: validation_benchmark_ids(),
+            note: "levels_1_through_4_on_well_conditioned_and_adversarial_inputs",
+        },
+        ApiBenchmarkEntry {
+            group: "incremental_insert",
+            public_api: "DelaunayTriangulation::insert",
+            dimensions: "2,3,4,5",
+            benchmark_ids: insert_benchmark_ids(),
+            note: "insert_batches_into_prebuilt_well_conditioned_and_adversarial_triangulations",
+        },
+        ApiBenchmarkEntry {
+            group: "bistellar_flips",
+            public_api: "BistellarFlips::{flip_k1_insert,flip_k1_remove,flip_k2,flip_k2_inverse_from_edge,flip_k3,flip_k3_inverse_from_triangle}",
+            dimensions: "4",
+            benchmark_ids: "bistellar_flips_4d/k1_roundtrip;bistellar_flips_4d/k2_roundtrip;bistellar_flips_4d/k3_roundtrip".to_string(),
+            note: "stable_pl_manifold_roundtrips",
+        },
+    ]
+}
 
 /// Stable 4D PL-manifold configuration used for explicit bistellar flips.
 const STABLE_POINTS_4D: &[[f64; 4]] = &[
@@ -179,6 +241,24 @@ const KNOWN_SEEDS: &[(usize, usize, u64)] = &[
     (5, 25, 816),
 ];
 
+const KNOWN_ADV_SEEDS: &[(usize, usize, u64)] = &[
+    // 2D
+    (2, 10, 2_779_097_209),
+    (2, 25, 2_779_097_224),
+    (2, 50, 2_779_097_249),
+    // 3D
+    (3, 10, 2_779_098_586),
+    (3, 25, 2_779_098_601),
+    (3, 50, 2_779_098_627),
+    // 4D
+    (4, 10, 2_779_104_247),
+    (4, 25, 2_779_104_262),
+    (4, 50, 2_779_104_287),
+    // 5D
+    (5, 10, 2_779_109_908),
+    (5, 25, 2_779_109_924),
+];
+
 fn known_seed(dim: usize, count: usize) -> Option<u64> {
     KNOWN_SEEDS
         .iter()
@@ -186,13 +266,20 @@ fn known_seed(dim: usize, count: usize) -> Option<u64> {
         .map(|&(_, _, seed)| seed)
 }
 
+fn known_adv_seed(dim: usize, count: usize) -> Option<u64> {
+    KNOWN_ADV_SEEDS
+        .iter()
+        .find(|&&(d, c, _)| d == dim && c == count)
+        .map(|&(_, _, seed)| seed)
+}
+
 fn print_manifest_once() {
     API_BENCHMARK_MANIFEST.call_once(|| {
         println!(
             "api_benchmark_manifest crate=delaunay version={} benchmark=ci_performance_suite schema=1",
             env!("CARGO_PKG_VERSION")
         );
-        for entry in API_BENCHMARK_ENTRIES {
+        for entry in api_benchmark_entries() {
             println!(
                 "api_benchmark group={} public_api={} dimensions={} benchmark_ids={} note={}",
                 entry.group, entry.public_api, entry.dimensions, entry.benchmark_ids, entry.note
@@ -267,7 +354,10 @@ fn prepare_inserts<const D: usize>(
     count: usize,
     dataset: Dataset,
 ) -> Vec<Vertex<f64, (), D>> {
-    let seed = dim_seed.wrapping_add(0x5151_5151);
+    let mut seed = dim_seed.wrapping_add(0x5151_5151);
+    if matches!(dataset, Dataset::Adversarial) {
+        seed ^= 0xA5A5_A5A5;
+    }
     let points = match dataset {
         Dataset::WellConditioned => {
             generate_random_points_seeded::<f64, D>(count, (-50.0, 50.0), seed)
@@ -306,11 +396,42 @@ fn find_seed_vertices<const D: usize>(
     None
 }
 
+fn stable_adv_points<const D: usize>(
+    seed: u64,
+    count: usize,
+    attempts: NonZeroUsize,
+) -> SeedSearchResult<D> {
+    let points = generate_adv_points::<D>(count, seed);
+    let vertices = points.iter().map(|p| vertex!(*p)).collect::<Vec<_>>();
+    let options = ConstructionOptions::default().with_retry_policy(RetryPolicy::Shuffled {
+        attempts,
+        base_seed: Some(seed),
+    });
+
+    BenchTriangulation::<D>::new_with_options(&vertices, options)
+        .is_ok()
+        .then_some((seed, points, vertices))
+}
+
 fn prepare_adv_data<const D: usize>(
     dim_seed: u64,
     count: usize,
     attempts: NonZeroUsize,
 ) -> (u64, Vec<Point<f64, D>>, Vec<Vertex<f64, (), D>>) {
+    if !discover_seeds_enabled()
+        && let Some(seed) = known_adv_seed(D, count)
+    {
+        if let Some(result) = stable_adv_points::<D>(seed, count, attempts) {
+            return result;
+        }
+        warn!(
+            known_seed = seed,
+            dim = D,
+            count,
+            "known adversarial seed failed, falling back to runtime search"
+        );
+    }
+
     let start_seed = dim_seed
         .wrapping_mul(17)
         .wrapping_add(count as u64)
@@ -319,15 +440,11 @@ fn prepare_adv_data<const D: usize>(
 
     for offset in 0..search_limit {
         let candidate_seed = start_seed.wrapping_add(offset as u64);
-        let points = generate_adv_points::<D>(count, candidate_seed);
-        let vertices = points.iter().map(|p| vertex!(*p)).collect::<Vec<_>>();
-        let options = ConstructionOptions::default().with_retry_policy(RetryPolicy::Shuffled {
-            attempts,
-            base_seed: Some(candidate_seed),
-        });
-
-        if BenchTriangulation::<D>::new_with_options(&vertices, options).is_ok() {
-            return (candidate_seed, points, vertices);
+        if let Some(result) = stable_adv_points::<D>(candidate_seed, count, attempts) {
+            if discover_seeds_enabled() {
+                println!("ADV_SEED {D} {count} {candidate_seed}");
+            }
+            return result;
         }
     }
 
@@ -409,12 +526,36 @@ fn cell_centroid_4d(dt: &FlipTriangulation4, cell_key: CellKey) -> [f64; 4] {
     coords
 }
 
-fn roundtrip_k1_4d(dt: &mut FlipTriangulation4) {
-    let cell_key = dt
-        .cells()
-        .next()
+fn cell_points_4d(dt: &FlipTriangulation4, cell_key: CellKey) -> Vec<Point<f64, 4>> {
+    let cell = dt
+        .tds()
+        .get_cell(cell_key)
+        .expect("cell key should exist in benchmark triangulation");
+
+    cell.vertices()
+        .iter()
+        .map(|vertex_key| {
+            *dt.tds()
+                .get_vertex_by_key(*vertex_key)
+                .expect("vertex key should exist in benchmark triangulation")
+                .point()
+        })
+        .collect()
+}
+
+fn largest_volume_cell_4d(dt: &FlipTriangulation4) -> CellKey {
+    dt.cells()
+        .filter_map(|(cell_key, _)| {
+            simplex_volume(&cell_points_4d(dt, cell_key))
+                .ok()
+                .map(|volume| (cell_key, volume))
+        })
+        .max_by(|(_, left), (_, right)| left.total_cmp(right))
         .map(|(cell_key, _)| cell_key)
-        .expect("benchmark triangulation should have cells");
+        .expect("stable 4D benchmark triangulation should have a non-degenerate cell")
+}
+
+fn roundtrip_k1_4d(dt: &mut FlipTriangulation4, cell_key: CellKey) {
     let centroid = cell_centroid_4d(dt, cell_key);
     let new_vertex = vertex!(centroid);
     let new_uuid = new_vertex.uuid();
@@ -609,35 +750,45 @@ macro_rules! benchmark_tds_new_dimension {
                     .collect();
 
                 for &count in counts {
-                    let bench_id =
-                        format!("tds_new_{}d/tds_new/{}", stringify!($dim), count);
+                    let bench_id = format!("tds_new_{}d/tds_new/{}", stringify!($dim), count);
+                    let adv_bench_id =
+                        format!("tds_new_{}d/tds_new_adversarial/{}", stringify!($dim), count);
 
-                    if !filters.is_empty() && !filters.iter().any(|filter| bench_id.contains(filter)) {
-                        continue;
+                    if !filters.is_empty()
+                        && filters.iter().any(|filter| adv_bench_id.contains(filter))
+                    {
+                        let attempts =
+                            NonZeroUsize::new(8).expect("retry attempts must be non-zero");
+                        let _ = prepare_adv_data::<$dim>($seed, count, attempts);
+                        return;
                     }
 
-                    let seed = ($seed as u64).wrapping_add(count as u64);
-                    let limit = seed_search_limit();
-                    let attempts =
-                        NonZeroUsize::new(6).expect("retry attempts must be non-zero");
-
-                    if let Some((candidate_seed, _, _)) =
-                        find_seed_vertices::<$dim>(seed, count, bounds, limit, attempts)
+                    if filters.is_empty()
+                        || filters.iter().any(|filter| bench_id.contains(filter))
                     {
-                        println!(
-                            "seed_search_found dim={} count={} seed={}",
-                            $dim, count, candidate_seed
+                        let seed = ($seed as u64).wrapping_add(count as u64);
+                        let limit = seed_search_limit();
+                        let attempts =
+                            NonZeroUsize::new(6).expect("retry attempts must be non-zero");
+
+                        if let Some((candidate_seed, _, _)) =
+                            find_seed_vertices::<$dim>(seed, count, bounds, limit, attempts)
+                        {
+                            println!(
+                                "seed_search_found dim={} count={} seed={}",
+                                $dim, count, candidate_seed
+                            );
+                            return;
+                        }
+
+                        panic!(
+                            "seed_search_failed dim={} count={} start_seed={} limit={}",
+                            $dim,
+                            count,
+                            seed,
+                            limit
                         );
-                        return;
                     }
-
-                    panic!(
-                        "seed_search_failed dim={} count={} start_seed={} limit={}",
-                        $dim,
-                        count,
-                        seed,
-                        limit
-                    );
                 }
 
                 // No filter matched this benchmark function; do nothing.
@@ -864,6 +1015,9 @@ fn bench_insert_case<const D: usize>(
 
 fn benchmark_boundary_facets(c: &mut Criterion) {
     print_manifest_once();
+    if discover_seeds_enabled() {
+        return;
+    }
     let mut group = c.benchmark_group("boundary_facets");
     group.sample_size(25);
 
@@ -940,6 +1094,9 @@ fn benchmark_boundary_facets(c: &mut Criterion) {
 
 fn benchmark_convex_hull(c: &mut Criterion) {
     print_manifest_once();
+    if discover_seeds_enabled() {
+        return;
+    }
     let mut group = c.benchmark_group("convex_hull");
     group.sample_size(20);
 
@@ -1016,6 +1173,9 @@ fn benchmark_convex_hull(c: &mut Criterion) {
 
 fn benchmark_validation(c: &mut Criterion) {
     print_manifest_once();
+    if discover_seeds_enabled() {
+        return;
+    }
     let mut group = c.benchmark_group("validation");
     group.sample_size(15);
 
@@ -1075,6 +1235,9 @@ fn benchmark_validation(c: &mut Criterion) {
 
 fn benchmark_insert(c: &mut Criterion) {
     print_manifest_once();
+    if discover_seeds_enabled() {
+        return;
+    }
     let mut group = c.benchmark_group("incremental_insert");
     group.sample_size(15);
 
@@ -1167,15 +1330,21 @@ fn benchmark_insert(c: &mut Criterion) {
 
 fn benchmark_bistellar_flips(c: &mut Criterion) {
     print_manifest_once();
+    if discover_seeds_enabled() {
+        return;
+    }
     let mut group = c.benchmark_group("bistellar_flips_4d");
     group.sample_size(10);
     let base_dt = build_flip_dt_4d();
+    let k1_cell = largest_volume_cell_4d(&base_dt);
+    let k2_facet = flippable_k2_facet_4d(&base_dt);
+    let k3_ridge = flippable_k3_ridge_4d(&base_dt);
 
     group.bench_function("k1_roundtrip", |b| {
         b.iter_batched(
             || base_dt.clone(),
             |mut dt| {
-                roundtrip_k1_4d(&mut dt);
+                roundtrip_k1_4d(&mut dt, k1_cell);
                 black_box(dt);
             },
             BatchSize::LargeInput,
@@ -1184,13 +1353,9 @@ fn benchmark_bistellar_flips(c: &mut Criterion) {
 
     group.bench_function("k2_roundtrip", |b| {
         b.iter_batched(
-            || {
-                let dt = base_dt.clone();
-                let facet = flippable_k2_facet_4d(&dt);
-                (dt, facet)
-            },
-            |(mut dt, facet)| {
-                roundtrip_k2_4d(&mut dt, facet);
+            || base_dt.clone(),
+            |mut dt| {
+                roundtrip_k2_4d(&mut dt, k2_facet);
                 black_box(dt);
             },
             BatchSize::LargeInput,
@@ -1199,13 +1364,9 @@ fn benchmark_bistellar_flips(c: &mut Criterion) {
 
     group.bench_function("k3_roundtrip", |b| {
         b.iter_batched(
-            || {
-                let dt = base_dt.clone();
-                let ridge = flippable_k3_ridge_4d(&dt);
-                (dt, ridge)
-            },
-            |(mut dt, ridge)| {
-                roundtrip_k3_4d(&mut dt, ridge);
+            || base_dt.clone(),
+            |mut dt| {
+                roundtrip_k3_4d(&mut dt, k3_ridge);
                 black_box(dt);
             },
             BatchSize::LargeInput,
diff --git a/benches/profiling_suite.rs b/benches/profiling_suite.rs
index 08afed19..be835182 100644
--- a/benches/profiling_suite.rs
+++ b/benches/profiling_suite.rs
@@ -58,16 +58,20 @@ use criterion::{
     BatchSize, BenchmarkGroup, BenchmarkId, Criterion, Throughput, criterion_group, criterion_main,
 };
 use delaunay::core::collections::SmallBuffer;
+use delaunay::geometry::traits::coordinate::Coordinate;
 use delaunay::geometry::util::{
     generate_grid_points, generate_poisson_points, generate_random_points_seeded,
     safe_usize_to_scalar,
 };
 use delaunay::prelude::query::*;
-use delaunay::prelude::triangulation::DelaunayTriangulationBuilder;
+use delaunay::prelude::triangulation::{
+    ConstructionOptions, DelaunayTriangulationBuilder, RetryPolicy,
+};
 use delaunay::vertex;
 use num_traits::cast;
 use std::env;
 use std::hint::black_box;
+use std::num::NonZeroUsize;
 use std::sync::Once;
 use std::time::{Duration, Instant};
 
@@ -104,6 +108,7 @@ const QUERY_RESULTS_BUFFER_SIZE: usize = 1024; // For bounded query result colle
 const DEFAULT_SEED: u64 = 42;
 const QUERY_SEED: u64 = 123;
 const MAX_QUERY_RESULTS: usize = 1_000;
+const VALIDATION_SEED_SEARCH_LIMIT: u64 = 64;
 
 // Memory allocation counting support
 #[cfg(feature = "count-allocations")]
@@ -191,6 +196,7 @@ enum PointDistribution {
     Random,
     Grid,
     PoissonDisk,
+    Adversarial,
 }
 
 impl PointDistribution {
@@ -199,6 +205,7 @@ impl PointDistribution {
             Self::Random => "random",
             Self::Grid => "grid",
             Self::PoissonDisk => "poisson",
+            Self::Adversarial => "adversarial",
         }
     }
 }
@@ -212,6 +219,28 @@ fn gen_points<const D: usize>(
     match distribution {
         PointDistribution::Random => generate_random_points_seeded(count, (-100.0, 100.0), seed)
             .expect("random point generation failed"),
+        PointDistribution::Adversarial => generate_random_points_seeded::<f64, D>(
+            count,
+            (-1.0, 1.0),
+            seed ^ 0xA5A5_A5A5_A5A5_A5A5,
+        )
+        .expect("adversarial base point generation failed")
+        .iter()
+        .enumerate()
+        .map(|(index, point)| {
+            let index = u32::try_from(index).expect("benchmark point index should fit in u32");
+            let mut coords = [0.0_f64; D];
+            for (axis, coord) in coords.iter_mut().enumerate() {
+                let axis_number = u32::try_from(axis + 1).expect("axis should fit in u32");
+                let base: f64 = point.coords()[axis];
+                let cluster_offset = f64::from(index % 7) * 1.0e-3;
+                let axis_offset = f64::from(axis_number) * 0.25;
+                let perturbation = f64::from((index + axis_number) % 11) * 1.0e-6;
+                *coord = base.mul_add(1.0e3, 1.0e9 + axis_offset + cluster_offset + perturbation);
+            }
+            Point::new(coords)
+        })
+        .collect(),
         PointDistribution::Grid => {
             // Calculate points per dimension to get approximately `count` points total
             let count_f64 = safe_usize_to_scalar::<f64>(count).unwrap_or(2.0);
@@ -528,13 +557,12 @@ fn bench_memory_usage<const D: usize>(
                     SmallBuffer::new();
 
                 for _ in 0..iters {
+                    let points = gen_points::<D>(count, PointDistribution::Random, DEFAULT_SEED);
+                    let pts_len = points.len();
+                    let vertices: Vec<_> = points.iter().map(|p| vertex!(*p)).collect();
                     let start_time = Instant::now();
 
                     let alloc_info = measure(|| {
-                        let points =
-                            gen_points::<D>(count, PointDistribution::Random, DEFAULT_SEED);
-                        let vertices: Vec<_> = points.iter().map(|p| vertex!(*p)).collect();
-                        actual_point_counts.push(points.len()); // Track actual count
                         if let Ok(dt) = DelaunayTriangulationBuilder::new(&vertices).build::<()>() {
                             black_box(dt);
                         }
@@ -542,6 +570,7 @@ fn bench_memory_usage<const D: usize>(
 
                     total_time += start_time.elapsed();
                     allocation_infos.push(alloc_info);
+                    actual_point_counts.push(pts_len);
                 }
 
                 // Report memory usage summary if available
@@ -743,18 +772,54 @@ fn benchmark_query_latency(c: &mut Criterion) {
 macro_rules! benchmark_validation_components_dimension {
     ($dim:literal, $func_name:ident, $count:expr) => {
         fn $func_name(c: &mut Criterion) {
-            let points = gen_points::<$dim>($count, PointDistribution::Random, DEFAULT_SEED);
-            let vertices: Vec<_> = points.iter().map(|point| vertex!(*point)).collect();
-            let dt = DelaunayTriangulationBuilder::new(&vertices)
-                .build::<()>()
-                .unwrap_or_else(|err| {
+            let is_adversarial = stringify!($func_name).ends_with("_adversarial");
+            let distribution = if is_adversarial {
+                PointDistribution::Adversarial
+            } else {
+                PointDistribution::Random
+            };
+            let suffix = if is_adversarial { "_adversarial" } else { "" };
+            let mut last_error = None;
+            let dt = (0..VALIDATION_SEED_SEARCH_LIMIT)
+                .find_map(|offset| {
+                    let seed = DEFAULT_SEED.wrapping_add(offset);
+                    let points = gen_points::<$dim>($count, distribution, seed);
+                    let vertices: Vec<_> = points.iter().map(|point| vertex!(*point)).collect();
+                    let builder = DelaunayTriangulationBuilder::new(&vertices);
+                    let builder = if is_adversarial {
+                        let attempts =
+                            NonZeroUsize::new(8).expect("retry attempts must be non-zero");
+                        builder.construction_options(
+                            ConstructionOptions::default().with_retry_policy(
+                                RetryPolicy::Shuffled {
+                                    attempts,
+                                    base_seed: Some(seed),
+                                },
+                            ),
+                        )
+                    } else {
+                        builder
+                    };
+
+                    match builder.build::<()>() {
+                        Ok(dt) => Some(dt),
+                        Err(err) => {
+                            last_error = Some(format!("{err}"));
+                            None
+                        }
+                    }
+                })
+                .unwrap_or_else(|| {
                     panic!(
-                        "failed to build {}D validation component benchmark triangulation: {err}",
-                        $dim
+                        "failed to build {}D validation component benchmark triangulation \
+                         after {} seeds (last error: {})",
+                        $dim,
+                        VALIDATION_SEED_SEARCH_LIMIT,
+                        last_error.unwrap_or_else(|| "none".to_string())
                     );
                 });
 
-            let mut group = c.benchmark_group(format!("validation_components_{}d", $dim));
+            let mut group = c.benchmark_group(format!("validation_components_{}d{}", $dim, suffix));
             group.measurement_time(bench_time(15));
             group.throughput(Throughput::Elements($count as u64));
 
@@ -795,6 +860,10 @@ benchmark_validation_components_dimension!(2, benchmark_validation_components_2d
 benchmark_validation_components_dimension!(3, benchmark_validation_components_3d, 50);
 benchmark_validation_components_dimension!(4, benchmark_validation_components_4d, 25);
 benchmark_validation_components_dimension!(5, benchmark_validation_components_5d, 25);
+benchmark_validation_components_dimension!(2, benchmark_validation_components_2d_adversarial, 50);
+benchmark_validation_components_dimension!(3, benchmark_validation_components_3d_adversarial, 50);
+benchmark_validation_components_dimension!(4, benchmark_validation_components_4d_adversarial, 25);
+benchmark_validation_components_dimension!(5, benchmark_validation_components_5d_adversarial, 25);
 
 // ============================================================================
 // Algorithmic Bottleneck Identification
@@ -899,6 +968,10 @@ criterion_group!(
         benchmark_validation_components_3d,
         benchmark_validation_components_4d,
         benchmark_validation_components_5d,
+        benchmark_validation_components_2d_adversarial,
+        benchmark_validation_components_3d_adversarial,
+        benchmark_validation_components_4d_adversarial,
+        benchmark_validation_components_5d_adversarial,
         bench_bottlenecks
 );
 
diff --git a/docs/dev/commands.md b/docs/dev/commands.md
index d6adb4ac..48454e2e 100644
--- a/docs/dev/commands.md
+++ b/docs/dev/commands.md
@@ -103,6 +103,14 @@ just check
 `just check` is the non-mutating lint/validator bundle. It does not run tests,
 examples, or benchmarks.
 
+`just check` validates the default DenseSlotMap backend plus all feature
+combinations. The legacy SlotMap backend is kept as an optional compatibility
+canary; run it explicitly with:
+
+```bash
+just check-storage-backends
+```
+
 ---
 
 ## Documentation Validation
@@ -301,7 +309,8 @@ CI enforces:
 - tests
 
 Rust warnings are denied by the manifest lint policy and Clippy warnings are
-denied by the `just clippy` invocations. Keep any intentional warning-level
+denied by the `just clippy` invocations. `just check-storage-backends` separately
+checks the SlotMap backend with `--no-default-features`. Keep any intentional warning-level
 exceptions explicit in `Cargo.toml`.
 
 Agents must ensure changes pass CI locally before proposing patches.
diff --git a/justfile b/justfile
index a32552c9..d6c1b1f7 100644
--- a/justfile
+++ b/justfile
@@ -174,6 +174,11 @@ changelog-update: changelog
 check: lint
     @echo "✅ Checks complete!"
 
+# Optional SlotMap compatibility canary. DenseSlotMap is the default production
+# backend; run this when changing storage abstractions or before releases.
+check-storage-backends:
+    cargo clippy --workspace --all-targets --no-default-features -- -D warnings -W clippy::pedantic -W clippy::nursery -W clippy::cargo
+
 # CI simulation: comprehensive validation (matches .github/workflows/ci.yml)
 # Runs: checks + test workflow + examples
 ci: check test examples
@@ -197,9 +202,6 @@ clean:
 
 # Code quality and formatting
 clippy:
-    # SlotMap backend (disabled default DenseSlotMap)
-    cargo clippy --workspace --all-targets --no-default-features -- -D warnings -W clippy::pedantic -W clippy::nursery -W clippy::cargo
-
     # DenseSlotMap backend (default)
     cargo clippy --workspace --all-targets -- -D warnings -W clippy::pedantic -W clippy::nursery -W clippy::cargo
 
@@ -290,6 +292,7 @@ help-workflows:
     @echo "Larger/optional workflows:"
     @echo "  just ci-slow             # CI + slow tests (100+ vertices)"
     @echo "  just ci-baseline         # CI + save performance baseline"
+    @echo "  just check-storage-backends # Optional SlotMap compatibility canary"
     @echo "  just coverage            # Generate coverage report (HTML)"
     @echo "  just semgrep             # Run repository-owned Semgrep rules"
     @echo "  just compare-storage       # Compare SlotMap vs DenseSlotMap (~4-6 hours)"
@@ -415,6 +418,13 @@ profile toolchain="" code_ref="current":
     workdir="$repo_root"
     cleanup_worktree=0
 
+    cleanup() {
+        if [[ "$cleanup_worktree" -eq 1 ]]; then
+            git worktree remove --force "$workdir" >/dev/null 2>&1 || true
+            rm -rf "$(dirname "$workdir")"
+        fi
+    }
+
     if [[ "$requested_ref" == "current" && -n "$requested_toolchain" ]]; then
         if [[ ! "$requested_toolchain" =~ ^([0-9]+(\.[0-9]+){0,2}|stable|beta|nightly)([-+].*)?$ ]]; then
             requested_ref="$requested_toolchain"
@@ -426,17 +436,10 @@ profile toolchain="" code_ref="current":
         tmp_parent="$(mktemp -d "${TMPDIR:-/tmp}/delaunay-profile.XXXXXX")"
         workdir="$tmp_parent/worktree"
         cleanup_worktree=1
+        trap cleanup EXIT
         git worktree add --detach "$workdir" "$requested_ref"
     fi
 
-    cleanup() {
-        if [[ "$cleanup_worktree" -eq 1 ]]; then
-            git worktree remove --force "$workdir" >/dev/null 2>&1 || true
-            rm -rf "$(dirname "$workdir")"
-        fi
-    }
-    trap cleanup EXIT
-
     if [[ -z "$requested_toolchain" ]]; then
         requested_toolchain="$(
             grep -E '^[[:space:]]*channel[[:space:]]*=' "$workdir/rust-toolchain.toml" \
diff --git a/scripts/benchmark_utils.py b/scripts/benchmark_utils.py
index c2902f5f..bcb7fdbb 100755
--- a/scripts/benchmark_utils.py
+++ b/scripts/benchmark_utils.py
@@ -890,7 +890,7 @@ def _ci_suite_group_key(first_path_part: str) -> str | None:
         """Map a Criterion path prefix to a ci_performance_suite group key."""
         if first_path_part.startswith("tds_new_"):
             return "construction"
-        if first_path_part == "bistellar_flips_4d":
+        if first_path_part.startswith("bistellar_flips"):
             return "bistellar_flips"
         if first_path_part in CI_PERFORMANCE_SUITE_GROUPS:
             return first_path_part
diff --git a/scripts/ci/capture_profiling_metadata.sh b/scripts/ci/capture_profiling_metadata.sh
new file mode 100755
index 00000000..0faff056
--- /dev/null
+++ b/scripts/ci/capture_profiling_metadata.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+mkdir -p profiling-results
+
+declared_toolchain="$(
+	grep -E '^[[:space:]]*channel[[:space:]]*=' rust-toolchain.toml 2>/dev/null |
+		head -n 1 |
+		cut -d '=' -f 2 |
+		tr -d ' "' ||
+		true
+)"
+rust_version="$(
+	grep -E '^rust-version[[:space:]]*=' Cargo.toml 2>/dev/null |
+		head -n 1 |
+		cut -d '=' -f 2 |
+		tr -d ' "' ||
+		true
+)"
+
+profiling_mode="${PROFILE_METADATA_MODE:-}"
+if [[ -z "$profiling_mode" ]]; then
+	profiling_mode="production"
+	if [[ "${PROFILING_DEV_MODE:-}" == "1" ]]; then
+		profiling_mode="development"
+	fi
+fi
+
+benchmark_filter="${BENCH_FILTER_VALUE:-${PROFILE_METADATA_FILTER:-All benchmarks}}"
+metadata_title="${PROFILE_METADATA_TITLE:-Profiling Environment}"
+
+{
+	echo "# ${metadata_title}"
+	echo
+	echo "- Code ref: ${GITHUB_REF_NAME}"
+	echo "- Commit: $(git rev-parse HEAD)"
+	echo "- Declared rust-toolchain.toml channel: ${declared_toolchain}"
+	echo "- Cargo.toml rust-version: ${rust_version}"
+	echo "- rustc: $(rustc --version)"
+	echo "- cargo: $(cargo --version)"
+	echo "- Cargo profile: perf"
+	echo "- Benchmark filter: ${benchmark_filter}"
+	echo "- Profiling mode: ${profiling_mode}"
+	echo "- Runner: ${RUNNER_OS}"
+} >profiling-results/environment_metadata.md

From bf8f985771b5db3e291d365d08864278ec6c1a03 Mon Sep 17 00:00:00 2001
From: Adam Getchell <adam@adamgetchell.org>
Date: Mon, 27 Apr 2026 19:34:09 -0700
Subject: [PATCH 4/8] Changed: Update `uv` to `0.11.8` and refine benchmark
 suite tooling

- Upgrade `uv` Python package manager to version `0.11.8` across all
  GitHub Actions workflows for consistent environments.
- Make internal benchmark logging conditional on the `bench-logging` feature
  in `ci_performance_suite`, reducing default log verbosity.
- Enhance memory profiling in `profiling_suite` with configurable percentile
  calculation for allocation summaries, improving analysis flexibility.
- Improve error handling and reporting in Python benchmark execution scripts
  for greater reliability and clearer failure feedback.
---
 .github/workflows/benchmarks.yml        |  3 +-
 .github/workflows/ci.yml                |  2 +-
 .github/workflows/generate-baseline.yml |  3 +-
 benches/ci_performance_suite.rs         | 87 ++++++++++++-------------
 benches/profiling_suite.rs              | 47 +++++++++----
 scripts/benchmark_utils.py              | 15 ++++-
 scripts/tests/test_benchmark_utils.py   | 22 ++++++-
 7 files changed, 110 insertions(+), 69 deletions(-)

diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
index 35ba34d4..51afba6a 100644
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -42,6 +42,7 @@ concurrency:
 env:
   CARGO_TERM_COLOR: always
   RUST_BACKTRACE: 1
+  UV_VERSION: "0.11.8"
   BENCHMARK_TIMEOUT: 1800 # 30 min; pre-computed seeds + reduced 5D counts keep runtime well under this
   DELAUNAY_BENCH_DISCOVER_SEEDS_LIMIT: 256 # fallback only; ci_performance_suite uses pre-computed seeds
 
@@ -64,7 +65,7 @@ jobs:
       - name: Install uv (Python package manager)
         uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
         with:
-          version: "latest"
+          version: ${{ env.UV_VERSION }}
 
       - name: Verify uv installation
         run: uv --version
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 6e50c50a..241f1f2d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -25,7 +25,7 @@ env:
   MARKDOWNLINT_VERSION: "0.47.0"
   SHFMT_VERSION: "3.12.0"
   TYPOS_VERSION: "1.43.4"
-  UV_VERSION: "0.9.21"
+  UV_VERSION: "0.11.8"
 
 jobs:
   build:
diff --git a/.github/workflows/generate-baseline.yml b/.github/workflows/generate-baseline.yml
index a2884b2f..7b49d2bc 100644
--- a/.github/workflows/generate-baseline.yml
+++ b/.github/workflows/generate-baseline.yml
@@ -25,6 +25,7 @@ permissions:
 env:
   CARGO_TERM_COLOR: always
   RUST_BACKTRACE: 1
+  UV_VERSION: "0.11.8"
   # Seed search limit for both old (pre-v0.8) and current env var names.
   # Old tags read DELAUNAY_BENCH_SEED_SEARCH_LIMIT; current code reads
   # DELAUNAY_BENCH_DISCOVER_SEEDS_LIMIT. Setting both ensures backward
@@ -54,7 +55,7 @@ jobs:
       - name: Install uv (Python package manager)
         uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
         with:
-          version: "latest"
+          version: ${{ env.UV_VERSION }}
 
       - name: Verify uv installation
         run: uv --version
diff --git a/benches/ci_performance_suite.rs b/benches/ci_performance_suite.rs
index 3f3ee791..a06adaf2 100644
--- a/benches/ci_performance_suite.rs
+++ b/benches/ci_performance_suite.rs
@@ -43,7 +43,8 @@ use delaunay::prelude::triangulation::{
 };
 use delaunay::vertex;
 use std::{env, hint::black_box, num::NonZeroUsize, sync::Once};
-use tracing::{error, warn};
+#[cfg(feature = "bench-logging")]
+use tracing::warn;
 
 /// Default point counts for 2D–4D benchmarks.
 const COUNTS: &[usize] = &[10, 25, 50];
@@ -297,16 +298,27 @@ fn prepare_data<const D: usize>(
     attempts: NonZeroUsize,
 ) -> (u64, Vec<Point<f64, D>>, Vec<Vertex<f64, (), D>>) {
     // Fast path: use the pre-computed seed (single verification construction)
-    if let Some(seed) = known_seed(D, count) {
-        if let Some(result) = find_seed_vertices::<D>(seed, count, bounds, 1, attempts) {
-            return result;
+    match known_seed(D, count).map(|seed| {
+        (
+            seed,
+            find_seed_vertices::<D>(seed, count, bounds, 1, attempts),
+        )
+    }) {
+        Some((_seed, Some(result))) => return result,
+        Some((seed, None)) => {
+            #[cfg(not(feature = "bench-logging"))]
+            let _ = seed;
+            #[cfg(feature = "bench-logging")]
+            {
+                warn!(
+                    known_seed = seed,
+                    dim = D,
+                    count,
+                    "known seed failed, falling back to runtime search"
+                );
+            }
         }
-        warn!(
-            known_seed = seed,
-            dim = D,
-            count,
-            "known seed failed, falling back to runtime search"
-        );
+        None => {}
     }
 
     // Slow fallback: runtime search from the base seed
@@ -418,18 +430,26 @@ fn prepare_adv_data<const D: usize>(
     count: usize,
     attempts: NonZeroUsize,
 ) -> (u64, Vec<Point<f64, D>>, Vec<Vertex<f64, (), D>>) {
-    if !discover_seeds_enabled()
-        && let Some(seed) = known_adv_seed(D, count)
-    {
-        if let Some(result) = stable_adv_points::<D>(seed, count, attempts) {
-            return result;
+    if !discover_seeds_enabled() {
+        match known_adv_seed(D, count)
+            .map(|seed| (seed, stable_adv_points::<D>(seed, count, attempts)))
+        {
+            Some((_seed, Some(result))) => return result,
+            Some((seed, None)) => {
+                #[cfg(not(feature = "bench-logging"))]
+                let _ = seed;
+                #[cfg(feature = "bench-logging")]
+                {
+                    warn!(
+                        known_seed = seed,
+                        dim = D,
+                        count,
+                        "known adversarial seed failed, falling back to runtime search"
+                    );
+                }
+            }
+            None => {}
         }
-        warn!(
-            known_seed = seed,
-            dim = D,
-            count,
-            "known adversarial seed failed, falling back to runtime search"
-        );
     }
 
     let start_seed = dim_seed
@@ -698,10 +718,6 @@ fn roundtrip_k3_4d(dt: &mut FlipTriangulation4, ridge: RidgeHandle) {
         .expect("k=3 inverse should succeed after k=3 flip");
 }
 
-fn bench_logging_enabled() -> bool {
-    env::var("DELAUNAY_BENCH_LOG").is_ok_and(|value| value != "0")
-}
-
 fn discover_seeds_enabled() -> bool {
     env::var("DELAUNAY_BENCH_DISCOVER_SEEDS").is_ok_and(|value| value != "0")
 }
@@ -836,17 +852,6 @@ macro_rules! benchmark_tds_new_dimension {
                             }
                             Err(err) => {
                                 let error = format!("{err:?}");
-                                if bench_logging_enabled() {
-                                    error!(
-                                        dim = $dim,
-                                        count,
-                                        seed,
-                                        bounds = ?bounds,
-                                        sample_points = ?sample_points,
-                                        error = %error,
-                                        "DelaunayTriangulation::new failed"
-                                    );
-                                }
                                 panic!(
                                     "DelaunayTriangulation::new failed for {}D: {error}; dim={}; count={}; seed={}; bounds={:?}; sample_points={sample_points:?}",
                                     $dim,
@@ -886,16 +891,6 @@ macro_rules! benchmark_tds_new_dimension {
                                 }
                                 Err(err) => {
                                     let error = format!("{err:?}");
-                                    if bench_logging_enabled() {
-                                        error!(
-                                            dim = $dim,
-                                            count,
-                                            seed,
-                                            sample_points = ?sample_points,
-                                            error = %error,
-                                            "adversarial DelaunayTriangulation::new failed"
-                                        );
-                                    }
                                     panic!(
                                         "adversarial DelaunayTriangulation::new failed for {}D: {error}; dim={}; count={}; seed={}; sample_points={sample_points:?}",
                                         $dim,
diff --git a/benches/profiling_suite.rs b/benches/profiling_suite.rs
index be835182..31d9ba9a 100644
--- a/benches/profiling_suite.rs
+++ b/benches/profiling_suite.rs
@@ -477,19 +477,34 @@ fn bench_scaling(c: &mut Criterion) {
 // Memory Usage Profiling
 // ============================================================================
 
-/// Calculate percentile from a slice of values using nearest-rank method
-/// Supports configurable percentile via environment variable `BENCH_PERCENTILE` (default: 95)
-fn calculate_percentile(values: &mut [u64]) -> u64 {
+/// Read the memory summary percentile from `BENCH_PERCENTILE` (default: 95).
+fn configured_percentile() -> usize {
+    env::var("BENCH_PERCENTILE")
+        .ok()
+        .and_then(|s| s.parse::<usize>().ok())
+        .map_or(95, |p| p.clamp(1, 100))
+}
+
+/// Format a percentile as an ordinal label for the memory summary.
+fn percentile_label(percentile: usize) -> String {
+    let suffix = match percentile % 100 {
+        11..=13 => "th",
+        _ => match percentile % 10 {
+            1 => "st",
+            2 => "nd",
+            3 => "rd",
+            _ => "th",
+        },
+    };
+    format!("{percentile}{suffix}")
+}
+
+/// Calculate percentile from a slice of values using nearest-rank method.
+fn calculate_percentile(values: &mut [u64], percentile: usize) -> u64 {
     if values.is_empty() {
         return 0;
     }
 
-    // Parse percentile from environment, defaulting to 95
-    let percentile = env::var("BENCH_PERCENTILE")
-        .ok()
-        .and_then(|s| s.parse::<usize>().ok())
-        .map_or(95, |p| p.clamp(1, 100)); // Clamp to valid percentile range
-
     values.sort_unstable();
     let n = values.len();
     // nearest-rank: ceil(p/100 * n), clamped to [1, n]
@@ -506,7 +521,8 @@ fn print_alloc_summary(
     info: &AllocationInfo,
     description: &str,
     actual_point_count: usize,
-    percentile_95: u64,
+    percentile: usize,
+    percentile_value: u64,
 ) {
     println!("\n=== Memory Allocation Summary for {description} ({actual_point_count} points) ===");
     println!("Total allocations: {}", info.count_total);
@@ -520,9 +536,10 @@ fn print_alloc_summary(
         info.bytes_max as f64 / (1024.0 * 1024.0)
     );
     println!(
-        "95th percentile bytes: {} ({:.2} MB)",
-        percentile_95,
-        percentile_95 as f64 / (1024.0 * 1024.0)
+        "{} percentile bytes: {} ({:.2} MB)",
+        percentile_label(percentile),
+        percentile_value,
+        percentile_value as f64 / (1024.0 * 1024.0)
     );
     if actual_point_count > 0 {
         println!(
@@ -613,12 +630,14 @@ fn bench_memory_usage<const D: usize>(
                     // Calculate percentile of bytes_max (configurable via BENCH_PERCENTILE, default 95th)
                     let mut bytes_max_values: Vec<u64> =
                         allocation_infos.iter().map(|i| i.bytes_max).collect();
-                    let percentile_value = calculate_percentile(&mut bytes_max_values);
+                    let percentile = configured_percentile();
+                    let percentile_value = calculate_percentile(&mut bytes_max_values, percentile);
 
                     print_alloc_summary(
                         &avg_info,
                         &format!("{D}D Triangulation"),
                         avg_actual_count,
+                        percentile,
                         percentile_value,
                     );
                 }
diff --git a/scripts/benchmark_utils.py b/scripts/benchmark_utils.py
index bcb7fdbb..cc455b7a 100755
--- a/scripts/benchmark_utils.py
+++ b/scripts/benchmark_utils.py
@@ -479,17 +479,26 @@ def _run_ci_performance_suite(self, cargo_profile: str | None = None) -> bool:
             profile = cargo_profile if cargo_profile is not None else TRUSTED_BENCH_PROFILE
             cargo_args = ["bench", "--profile", profile, "--bench", "ci_performance_suite", "--", *DEV_MODE_BENCH_ARGS]
 
-            run_cargo_command(
+            result = run_cargo_command(
                 cargo_args,
                 cwd=self.project_root,
                 timeout=900,
                 capture_output=True,
             )
+            if result.returncode != 0:
+                print(f"❌ Error running ci_performance_suite benchmarks: cargo exited with status {result.returncode}")
+                return False
 
             print("✅ ci_performance_suite benchmarks completed successfully")
             return True
 
-        except Exception as e:
+        except ExecutableNotFoundError as e:
+            print(f"❌ Error running ci_performance_suite benchmarks: {e}")
+            return False
+        except subprocess.TimeoutExpired as e:
+            print(f"❌ Error running ci_performance_suite benchmarks: {e}")
+            return False
+        except OSError as e:
             print(f"❌ Error running ci_performance_suite benchmarks: {e}")
             return False
 
@@ -899,7 +908,7 @@ def _ci_suite_group_key(first_path_part: str) -> str | None:
     @staticmethod
     def _ci_suite_dimension(benchmark_id: str) -> str:
         """Extract the dimension label from a ci_performance_suite benchmark ID."""
-        match = re.search(r"(?:^|_)(\d+)d(?:_|/|$)", benchmark_id)
+        match = re.search(r"(?:^|_|/)(\d+)d(?:_|/|$)", benchmark_id)
         if match:
             return f"{match.group(1)}D"
         return "n/a"
diff --git a/scripts/tests/test_benchmark_utils.py b/scripts/tests/test_benchmark_utils.py
index 28a448ed..4a20c89f 100644
--- a/scripts/tests/test_benchmark_utils.py
+++ b/scripts/tests/test_benchmark_utils.py
@@ -2201,6 +2201,7 @@ def write_estimate(path_parts, mean_ns):
             assert "well-conditioned" in content
             assert "#### Boundary facets" in content
             assert "`boundary_facets/boundary_facets_3d_adversarial/50`" in content
+            assert "| `boundary_facets/boundary_facets_3d_adversarial/50` | 3D | 50 | adversarial |" in content
             assert "adversarial" in content
             assert "#### Bistellar flips" in content
             assert "`bistellar_flips_4d/k2_roundtrip`" in content
@@ -2387,7 +2388,7 @@ def test_run_circumsphere_benchmarks_failure(self, mock_cargo, capsys):
     @patch("benchmark_utils.run_cargo_command")
     def test_run_ci_performance_suite_success(self, mock_cargo):
         """Test running the public API CI performance suite successfully."""
-        mock_cargo.return_value = Mock(stdout="")
+        mock_cargo.return_value = Mock(returncode=0, stdout="")
 
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2409,7 +2410,7 @@ def test_run_ci_performance_suite_success(self, mock_cargo):
     @patch("benchmark_utils.run_cargo_command")
     def test_run_ci_performance_suite_uses_requested_cargo_profile(self, mock_cargo):
         """Test running the public API CI performance suite with an explicit profile."""
-        mock_cargo.return_value = Mock(stdout="")
+        mock_cargo.return_value = Mock(returncode=0, stdout="")
 
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2423,10 +2424,25 @@ def test_run_ci_performance_suite_uses_requested_cargo_profile(self, mock_cargo)
             args = mock_cargo.call_args.args[0]
             assert args[:5] == ["bench", "--profile", requested_profile, "--bench", "ci_performance_suite"]
 
+    @patch("benchmark_utils.run_cargo_command")
+    def test_run_ci_performance_suite_nonzero_exit(self, mock_cargo, capsys):
+        """Test handling ci_performance_suite nonzero process exits."""
+        mock_cargo.return_value = Mock(returncode=101, stdout="", stderr="benchmark failed")
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_root = Path(temp_dir)
+            generator = PerformanceSummaryGenerator(project_root)
+
+            success = generator._run_ci_performance_suite()
+
+            assert success is False
+            captured = capsys.readouterr()
+            assert "cargo exited with status 101" in captured.out
+
     @patch("benchmark_utils.run_cargo_command")
     def test_run_ci_performance_suite_failure(self, mock_cargo, capsys):
         """Test handling ci_performance_suite benchmark failures."""
-        mock_cargo.side_effect = Exception("Benchmark failed")
+        mock_cargo.side_effect = OSError("Benchmark failed")
 
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)

From d06e2a61e8efa54fd5b264c296bfaeaaca29be67 Mon Sep 17 00:00:00 2001
From: Adam Getchell <adam@adamgetchell.org>
Date: Mon, 27 Apr 2026 22:19:33 -0700
Subject: [PATCH 5/8] Changed: Introduce explicit benchmark IDs and refine
 memory profiling

Adopt explicit `benchmark_id` fields in Python scripts to enable
more robust identification, storage, and comparison of benchmark
results. This is critical for handling complex benchmark suites,
such as `ci_performance_suite`, which may not always have a simple
`points` based dimension, or which may have multiple benchmarks
for the same points/dimension combination. Baseline files and
markdown reports now support these expanded IDs.

Additionally, internal memory profiling in `profiling_suite.rs`
is made conditional on the `count-allocations` feature. When the
feature is not enabled, allocation tracking structures and logic
are omitted or reduced to no-ops, reducing compilation overhead
and clarifying the intent of memory statistics.
---
 benches/profiling_suite.rs             | 143 +++++++++++------------
 scripts/benchmark_models.py            |  55 +++++++--
 scripts/benchmark_utils.py             | 151 ++++++++++++++++++++-----
 scripts/tests/test_benchmark_models.py |  21 ++++
 scripts/tests/test_benchmark_utils.py  | 124 ++++++++++++++++++++
 5 files changed, 382 insertions(+), 112 deletions(-)

diff --git a/benches/profiling_suite.rs b/benches/profiling_suite.rs
index 31d9ba9a..21851d0b 100644
--- a/benches/profiling_suite.rs
+++ b/benches/profiling_suite.rs
@@ -88,17 +88,6 @@ fn init_tracing() {
 #[cfg(not(feature = "bench-logging"))]
 const fn init_tracing() {}
 
-#[cfg(not(feature = "count-allocations"))]
-macro_rules! bench_warn {
-    ($($arg:tt)*) => {{
-        #[cfg(feature = "bench-logging")]
-        {
-            init_tracing();
-            tracing::warn!($($arg)*);
-        }
-    }};
-}
-
 // SmallBuffer size constants for different use cases
 const BENCHMARK_ITERATION_BUFFER_SIZE: usize = 8; // For tracking allocation info across benchmark iterations
 const SIMPLEX_VERTICES_BUFFER_SIZE: usize = 4; // 3D simplex = 4 vertices
@@ -116,26 +105,19 @@ use allocation_counter::{AllocationInfo, measure};
 
 #[cfg(not(feature = "count-allocations"))]
 #[derive(Debug, Default)]
-struct AllocationInfo {
-    count_total: u64,
-    count_current: i64,
-    count_max: u64,
-    bytes_total: u64,
-    bytes_current: i64,
-    bytes_max: u64,
-}
+struct AllocationInfo;
 
 #[cfg(not(feature = "count-allocations"))]
 fn measure(f: impl FnOnce()) -> AllocationInfo {
     f();
-    AllocationInfo::default()
+    AllocationInfo
 }
 
 #[cfg(not(feature = "count-allocations"))]
 fn print_alloc_banner_once() {
     static ONCE: Once = Once::new();
     ONCE.call_once(|| {
-        bench_warn!("count-allocations feature not enabled; memory stats are placeholders.");
+        println!("allocation stats unavailable: count-allocations feature disabled");
     });
 }
 
@@ -478,6 +460,7 @@ fn bench_scaling(c: &mut Criterion) {
 // ============================================================================
 
 /// Read the memory summary percentile from `BENCH_PERCENTILE` (default: 95).
+#[cfg(feature = "count-allocations")]
 fn configured_percentile() -> usize {
     env::var("BENCH_PERCENTILE")
         .ok()
@@ -486,6 +469,7 @@ fn configured_percentile() -> usize {
 }
 
 /// Format a percentile as an ordinal label for the memory summary.
+#[cfg(feature = "count-allocations")]
 fn percentile_label(percentile: usize) -> String {
     let suffix = match percentile % 100 {
         11..=13 => "th",
@@ -500,6 +484,7 @@ fn percentile_label(percentile: usize) -> String {
 }
 
 /// Calculate percentile from a slice of values using nearest-rank method.
+#[cfg(feature = "count-allocations")]
 fn calculate_percentile(values: &mut [u64], percentile: usize) -> u64 {
     if values.is_empty() {
         return 0;
@@ -516,6 +501,7 @@ fn calculate_percentile(values: &mut [u64], percentile: usize) -> u64 {
 }
 
 /// Print memory allocation summary
+#[cfg(feature = "count-allocations")]
 #[expect(clippy::cast_precision_loss)]
 fn print_alloc_summary(
     info: &AllocationInfo,
@@ -553,7 +539,7 @@ fn print_alloc_summary(
 }
 
 /// Generic helper to benchmark memory usage for a specific dimension D
-#[expect(clippy::cast_possible_wrap)]
+#[cfg_attr(feature = "count-allocations", expect(clippy::cast_possible_wrap))]
 fn bench_memory_usage<const D: usize>(
     group: &mut BenchmarkGroup<'_, WallTime>,
     bench_id_prefix: &str,
@@ -590,56 +576,71 @@ fn bench_memory_usage<const D: usize>(
                     actual_point_counts.push(pts_len);
                 }
 
-                // Report memory usage summary if available
-                if !allocation_infos.is_empty() {
-                    // Safe cast for division - allocation_infos.len() is guaranteed to be small and non-zero
-                    let divisor_unsigned = allocation_infos.len() as u64;
-                    let divisor_signed = allocation_infos.len() as i64;
-                    let avg_info = AllocationInfo {
-                        count_total: allocation_infos.iter().map(|i| i.count_total).sum::<u64>()
-                            / divisor_unsigned,
-                        count_current: allocation_infos
-                            .iter()
-                            .map(|i| i.count_current)
-                            .sum::<i64>()
-                            / divisor_signed,
-                        count_max: allocation_infos
-                            .iter()
-                            .map(|i| i.count_max)
-                            .max()
-                            .unwrap_or(0),
-                        bytes_total: allocation_infos.iter().map(|i| i.bytes_total).sum::<u64>()
-                            / divisor_unsigned,
-                        bytes_current: allocation_infos
-                            .iter()
-                            .map(|i| i.bytes_current)
-                            .sum::<i64>()
-                            / divisor_signed,
-                        bytes_max: allocation_infos
-                            .iter()
-                            .map(|i| i.bytes_max)
-                            .max()
-                            .unwrap_or(0),
-                    };
-                    let avg_actual_count = if actual_point_counts.is_empty() {
-                        0
-                    } else {
-                        actual_point_counts.iter().sum::<usize>() / actual_point_counts.len()
-                    };
+                #[cfg(not(feature = "count-allocations"))]
+                {
+                    print_alloc_banner_once();
+                }
 
-                    // Calculate percentile of bytes_max (configurable via BENCH_PERCENTILE, default 95th)
-                    let mut bytes_max_values: Vec<u64> =
-                        allocation_infos.iter().map(|i| i.bytes_max).collect();
-                    let percentile = configured_percentile();
-                    let percentile_value = calculate_percentile(&mut bytes_max_values, percentile);
-
-                    print_alloc_summary(
-                        &avg_info,
-                        &format!("{D}D Triangulation"),
-                        avg_actual_count,
-                        percentile,
-                        percentile_value,
-                    );
+                #[cfg(feature = "count-allocations")]
+                {
+                    // Report memory usage summary if available
+                    if !allocation_infos.is_empty() {
+                        // Safe cast for division - allocation_infos.len() is guaranteed to be small and non-zero
+                        let divisor_unsigned = allocation_infos.len() as u64;
+                        let divisor_signed = allocation_infos.len() as i64;
+                        let avg_info = AllocationInfo {
+                            count_total: allocation_infos
+                                .iter()
+                                .map(|i| i.count_total)
+                                .sum::<u64>()
+                                / divisor_unsigned,
+                            count_current: allocation_infos
+                                .iter()
+                                .map(|i| i.count_current)
+                                .sum::<i64>()
+                                / divisor_signed,
+                            count_max: allocation_infos
+                                .iter()
+                                .map(|i| i.count_max)
+                                .max()
+                                .unwrap_or(0),
+                            bytes_total: allocation_infos
+                                .iter()
+                                .map(|i| i.bytes_total)
+                                .sum::<u64>()
+                                / divisor_unsigned,
+                            bytes_current: allocation_infos
+                                .iter()
+                                .map(|i| i.bytes_current)
+                                .sum::<i64>()
+                                / divisor_signed,
+                            bytes_max: allocation_infos
+                                .iter()
+                                .map(|i| i.bytes_max)
+                                .max()
+                                .unwrap_or(0),
+                        };
+                        let avg_actual_count = if actual_point_counts.is_empty() {
+                            0
+                        } else {
+                            actual_point_counts.iter().sum::<usize>() / actual_point_counts.len()
+                        };
+
+                        // Calculate percentile of bytes_max (configurable via BENCH_PERCENTILE, default 95th)
+                        let mut bytes_max_values: Vec<u64> =
+                            allocation_infos.iter().map(|i| i.bytes_max).collect();
+                        let percentile = configured_percentile();
+                        let percentile_value =
+                            calculate_percentile(&mut bytes_max_values, percentile);
+
+                        print_alloc_summary(
+                            &avg_info,
+                            &format!("{D}D Triangulation"),
+                            avg_actual_count,
+                            percentile,
+                            percentile_value,
+                        );
+                    }
                 }
 
                 total_time
diff --git a/scripts/benchmark_models.py b/scripts/benchmark_models.py
index 98039ca6..e9dbe888 100644
--- a/scripts/benchmark_models.py
+++ b/scripts/benchmark_models.py
@@ -24,6 +24,12 @@ class BenchmarkData:
     throughput_mean: float | None = None
     throughput_high: float | None = None
     throughput_unit: str | None = None
+    benchmark_id: str = ""
+
+    @property
+    def comparison_key(self) -> str:
+        """Return the stable key used for baseline/regression matching."""
+        return self.benchmark_id or f"{self.points}_{self.dimension}"
 
     def with_timing(self, low: float, mean: float, high: float, unit: str) -> "BenchmarkData":
         """Set timing data (fluent interface)."""
@@ -45,8 +51,10 @@ def to_baseline_format(self) -> str:
         """Convert to baseline file format."""
         lines = [
             f"=== {self.points} Points ({self.dimension}) ===",
-            f"Time: [{self.time_low}, {self.time_mean}, {self.time_high}] {self.time_unit}",
         ]
+        if self.benchmark_id:
+            lines.append(f"Benchmark ID: {self.benchmark_id}")
+        lines.append(f"Time: [{self.time_low}, {self.time_mean}, {self.time_high}] {self.time_unit}")
 
         if self.throughput_low is not None and self.throughput_mean is not None and self.throughput_high is not None and self.throughput_unit:
             lines.append(f"Throughput: [{self.throughput_low}, {self.throughput_mean}, {self.throughput_high}] {self.throughput_unit}")
@@ -179,6 +187,15 @@ def parse_time_data(benchmark: BenchmarkData, line: str) -> bool:
     return False
 
 
+def _parse_benchmark_id_data(benchmark: BenchmarkData, line: str) -> bool:
+    """Parse optional baseline benchmark identifier metadata."""
+    match = re.match(r"^Benchmark ID:\s*(.+)$", line.strip())
+    if match:
+        benchmark.benchmark_id = match.group(1).strip()
+        return True
+    return False
+
+
 def parse_throughput_data(benchmark: BenchmarkData, line: str) -> bool:
     """
     Parse throughput data lines to extract throughput information.
@@ -235,6 +252,9 @@ def extract_benchmark_data(baseline_content: str) -> list[BenchmarkData]:
             continue
 
         if current_benchmark:
+            if _parse_benchmark_id_data(current_benchmark, line):
+                continue
+
             # Try to parse time data
             if parse_time_data(current_benchmark, line):
                 continue
@@ -332,15 +352,23 @@ def _dim_key(d: str) -> tuple[int, str]:
 
     for dimension in sorted(by_dimension.keys(), key=_dim_key):
         dim_benchmarks = sorted(by_dimension[dimension], key=lambda b: b.points)
-
-        lines.extend(
-            [
-                f"### {dimension} Triangulation Performance",
-                "",
-                "| Points | Time (mean) | Throughput (mean) | Scaling |",
-                "|--------|-------------|-------------------|----------|",
-            ],
-        )
+        include_benchmark_id = any(bench.benchmark_id for bench in dim_benchmarks)
+
+        lines.extend([f"### {dimension} Triangulation Performance", ""])
+        if include_benchmark_id:
+            lines.extend(
+                [
+                    "| Benchmark ID | Points | Time (mean) | Throughput (mean) | Scaling |",
+                    "|--------------|--------|-------------|-------------------|----------|",
+                ],
+            )
+        else:
+            lines.extend(
+                [
+                    "| Points | Time (mean) | Throughput (mean) | Scaling |",
+                    "|--------|-------------|-------------------|----------|",
+                ],
+            )
 
         # Calculate scaling relative to smallest benchmark
         first_nonzero = next((b for b in dim_benchmarks if b.time_mean and b.time_mean > 0), None)
@@ -362,7 +390,12 @@ def _dim_key(d: str) -> tuple[int, str]:
             else:
                 scaling_str = "N/A"
 
-            lines.append(f"| {bench.points} | {time_str} | {throughput_str} | {scaling_str} |")
+            if include_benchmark_id:
+                lines.append(
+                    f"| `{bench.comparison_key}` | {bench.points} | {time_str} | {throughput_str} | {scaling_str} |",
+                )
+            else:
+                lines.append(f"| {bench.points} | {time_str} | {throughput_str} | {scaling_str} |")
 
         lines.append("")  # Empty line between tables
 
diff --git a/scripts/benchmark_utils.py b/scripts/benchmark_utils.py
index cc455b7a..84be00ea 100755
--- a/scripts/benchmark_utils.py
+++ b/scripts/benchmark_utils.py
@@ -1570,7 +1570,7 @@ class CriterionParser:
     """Parse Criterion benchmark output and JSON data."""
 
     @staticmethod
-    def parse_estimates_json(estimates_path: Path, points: int, dimension: str) -> BenchmarkData | None:
+    def parse_estimates_json(estimates_path: Path, points: int | None, dimension: str) -> BenchmarkData | None:
         """
         Parse Criterion estimates.json file to extract benchmark data.
 
@@ -1599,27 +1599,98 @@ def parse_estimates_json(estimates_path: Path, points: int, dimension: str) -> B
             low_us = low_ns / 1000
             high_us = high_ns / 1000
 
-            # Calculate throughput in Kelem/s
-            # Throughput = points / time_in_seconds
-            # For time in microseconds: throughput = points * 1,000,000 / time_us
-            # For Kelem/s: throughput_kelem = (points * 1,000,000 / time_us) / 1000 = points * 1000 / time_us
-            # Guard against division by zero for very fast benchmarks
-            eps = 1e-9  # µs - minimum time to prevent division by zero
-            thrpt_mean = points * 1000 / max(mean_us, eps)
-            thrpt_low = points * 1000 / max(high_us, eps)  # Lower time = higher throughput
-            thrpt_high = points * 1000 / max(low_us, eps)  # Higher time = lower throughput
-
-            return (
-                BenchmarkData(points, dimension)
-                # Baseline timing values are rounded to 2 decimal places for consistency
-                # This standardizes storage format and avoids spurious precision differences
-                .with_timing(round(low_us, 2), round(mean_us, 2), round(high_us, 2), "µs")
-                .with_throughput(round(thrpt_low, 3), round(thrpt_mean, 3), round(thrpt_high, 3), "Kelem/s")
-            )
+            benchmark = BenchmarkData(points or 0, dimension).with_timing(round(low_us, 2), round(mean_us, 2), round(high_us, 2), "µs")
+
+            if points is not None:
+                # Calculate throughput in Kelem/s
+                # Throughput = points / time_in_seconds
+                # For time in microseconds: throughput = points * 1,000,000 / time_us
+                # For Kelem/s: throughput_kelem = (points * 1,000,000 / time_us) / 1000 = points * 1000 / time_us
+                # Guard against division by zero for very fast benchmarks
+                eps = 1e-9  # µs - minimum time to prevent division by zero
+                thrpt_mean = points * 1000 / max(mean_us, eps)
+                thrpt_low = points * 1000 / max(high_us, eps)  # Lower time = higher throughput
+                thrpt_high = points * 1000 / max(low_us, eps)  # Higher time = lower throughput
+                benchmark.with_throughput(round(thrpt_low, 3), round(thrpt_mean, 3), round(thrpt_high, 3), "Kelem/s")
+
+            return benchmark
 
         except (FileNotFoundError, json.JSONDecodeError, KeyError, ZeroDivisionError, ValueError):
             return None
 
+    @staticmethod
+    def _ci_suite_group_key(first_path_part: str) -> str | None:
+        """Map a Criterion path prefix to a ci_performance_suite group key."""
+        if first_path_part.startswith("tds_new_"):
+            return "construction"
+        if first_path_part.startswith("bistellar_flips"):
+            return "bistellar_flips"
+        if first_path_part in CI_PERFORMANCE_SUITE_GROUPS:
+            return first_path_part
+        return None
+
+    @staticmethod
+    def _ci_suite_dimension(benchmark_id: str) -> str:
+        """Extract the dimension label from a ci_performance_suite benchmark ID."""
+        match = re.search(r"(?:^|_|/)(\d+)d(?:_|/|$)", benchmark_id)
+        if match:
+            return f"{match.group(1)}D"
+        return "n/a"
+
+    @staticmethod
+    def _ci_suite_input_points(path_parts: tuple[str, ...]) -> int | None:
+        """Extract the numeric input size when the Criterion ID has one."""
+        if path_parts and path_parts[-1].isdigit():
+            return int(path_parts[-1])
+        return None
+
+    @staticmethod
+    def _process_ci_performance_suite_results(criterion_dir: Path) -> list[BenchmarkData]:
+        """Discover ci_performance_suite Criterion results with expanded benchmark IDs."""
+        estimates_by_id: dict[tuple[str, ...], tuple[str, Path]] = {}
+
+        for estimates_path in sorted(criterion_dir.glob("**/estimates.json")):
+            if estimates_path.parent.name not in {"base", "new"}:
+                continue
+
+            try:
+                path_parts = estimates_path.relative_to(criterion_dir).parts[:-2]
+            except ValueError:
+                continue
+
+            if not path_parts or CriterionParser._ci_suite_group_key(path_parts[0]) is None:
+                continue
+
+            existing = estimates_by_id.get(path_parts)
+            if existing is None or (existing[0] == "base" and estimates_path.parent.name == "new"):
+                estimates_by_id[path_parts] = (estimates_path.parent.name, estimates_path)
+
+        results: list[BenchmarkData] = []
+        for path_parts, (_, estimates_path) in estimates_by_id.items():
+            benchmark_id = "/".join(path_parts)
+            dimension = CriterionParser._ci_suite_dimension(benchmark_id)
+            if dimension == "n/a":
+                continue
+
+            points = CriterionParser._ci_suite_input_points(path_parts)
+            benchmark_data = CriterionParser.parse_estimates_json(estimates_path, points, dimension)
+            if benchmark_data is None:
+                continue
+
+            benchmark_data.benchmark_id = benchmark_id
+            results.append(benchmark_data)
+
+        group_order = {group: index for index, group in enumerate(CI_PERFORMANCE_SUITE_GROUP_ORDER)}
+        results.sort(
+            key=lambda result: (
+                group_order.get(CriterionParser._ci_suite_group_key(result.benchmark_id.split("/", 1)[0]) or "", sys.maxsize),
+                int(result.dimension.removesuffix("D")) if result.dimension.removesuffix("D").isdigit() else sys.maxsize,
+                result.points,
+                result.benchmark_id,
+            ),
+        )
+        return results
+
     @staticmethod
     def _extract_dimension_from_dir(dim_dir: Path) -> str | None:
         """Extract dimension string from directory name (e.g., '2d' -> '2')."""
@@ -1660,7 +1731,7 @@ def _process_point_directory(point_dir: Path, dim: str) -> BenchmarkData | None:
     def _process_fallback_discovery(criterion_dir: Path) -> list[BenchmarkData]:
         """Recursively discover estimates.json files when structured search fails."""
         results = []
-        seen: set[tuple[int, str]] = set()
+        seen: set[str] = set()
 
         for estimates_file in criterion_dir.rglob("estimates.json"):
             parent_name = estimates_file.parent.name
@@ -1679,7 +1750,7 @@ def _process_fallback_discovery(criterion_dir: Path) -> list[BenchmarkData]:
 
             points = int(points_dir.name)
             dimension = f"{dim_match.group(1)}D"
-            key = (points, dimension)
+            key = f"{points}_{dimension}"
 
             # Prefer "new" over "base" when duplicates exist
             if key in seen and parent_name == "base":
@@ -1709,6 +1780,10 @@ def find_criterion_results(target_dir: Path) -> list[BenchmarkData]:
         if not criterion_dir.exists():
             return results
 
+        results = CriterionParser._process_ci_performance_suite_results(criterion_dir)
+        if results:
+            return results
+
         # Look for benchmark results in *d directories (group names can change)
         for dim_dir in sorted(p for p in criterion_dir.iterdir() if p.is_dir() and re.search(r"\d+[dD]$", p.name)):
             dim = CriterionParser._extract_dimension_from_dir(dim_dir)
@@ -1975,10 +2050,18 @@ def _parse_baseline_file(self, baseline_content: str) -> dict[str, BenchmarkData
             if match:
                 points = int(match.group(1))
                 dimension = f"{match.group(2)}D"
+                benchmark_id = ""
+                next_line_index = i + 1
+
+                if next_line_index < len(lines):
+                    id_match = re.match(r"Benchmark ID:\s*(.+)", lines[next_line_index].strip())
+                    if id_match:
+                        benchmark_id = id_match.group(1).strip()
+                        next_line_index += 1
 
                 # Parse time line
-                if i + 1 < len(lines):
-                    time_line = lines[i + 1].strip()
+                if next_line_index < len(lines):
+                    time_line = lines[next_line_index].strip()
                     time_match = re.match(r"Time: \[([0-9.]+), ([0-9.]+), ([0-9.]+)\] (.+)", time_line)
                     if time_match:
                         time_low = float(time_match.group(1))
@@ -1990,8 +2073,8 @@ def _parse_baseline_file(self, baseline_content: str) -> dict[str, BenchmarkData
                         throughput_low = throughput_mean = throughput_high = None
                         throughput_unit = None
 
-                        if i + 2 < len(lines):
-                            thrpt_line = lines[i + 2].strip()
+                        if next_line_index + 1 < len(lines):
+                            thrpt_line = lines[next_line_index + 1].strip()
                             thrpt_match = re.match(r"Throughput: \[([0-9.]+), ([0-9.]+), ([0-9.]+)\] (.+)", thrpt_line)
                             if thrpt_match:
                                 throughput_low = float(thrpt_match.group(1))
@@ -1999,8 +2082,7 @@ def _parse_baseline_file(self, baseline_content: str) -> dict[str, BenchmarkData
                                 throughput_high = float(thrpt_match.group(3))
                                 throughput_unit = thrpt_match.group(4)
 
-                        key = f"{points}_{dimension}"
-                        benchmark = BenchmarkData(points, dimension).with_timing(time_low, time_mean, time_high, time_unit)
+                        benchmark = BenchmarkData(points, dimension, benchmark_id=benchmark_id).with_timing(time_low, time_mean, time_high, time_unit)
                         if throughput_mean is not None and throughput_low is not None and throughput_high is not None and throughput_unit is not None:
                             benchmark.with_throughput(
                                 throughput_low,
@@ -2011,13 +2093,13 @@ def _parse_baseline_file(self, baseline_content: str) -> dict[str, BenchmarkData
                         else:
                             logger.debug(
                                 "Missing throughput data for %s: low=%s mean=%s high=%s unit=%s",
-                                key,
+                                benchmark.comparison_key,
                                 throughput_low,
                                 throughput_mean,
                                 throughput_high,
                                 throughput_unit,
                             )
-                        results[key] = benchmark
+                        results[benchmark.comparison_key] = benchmark
 
             i += 1
 
@@ -2160,14 +2242,21 @@ def _write_comparison_header(self, f, metadata: dict[str, str], hardware_report:
             f.write(f"{sampling_warning}\n\n")
         f.write(hardware_report)
 
+    @staticmethod
+    def _matching_baseline(current: BenchmarkData, baseline_results: dict[str, BenchmarkData]) -> BenchmarkData | None:
+        """Return the matching baseline entry, using legacy keys only for legacy current IDs."""
+        baseline_benchmark = baseline_results.get(current.comparison_key)
+        if baseline_benchmark is not None or current.benchmark_id:
+            return baseline_benchmark
+        return baseline_results.get(f"{current.points}_{current.dimension}")
+
     def _write_performance_comparison(self, f: TextIO, current_results: list[BenchmarkData], baseline_results: dict[str, BenchmarkData]) -> bool:
         """Write performance comparison section and return whether average regression exceeds threshold."""
         time_changes = []  # Track all time changes for average calculation
         individual_regressions = 0
 
         for current_benchmark in current_results:
-            key = f"{current_benchmark.points}_{current_benchmark.dimension}"
-            baseline_benchmark = baseline_results.get(key)
+            baseline_benchmark = self._matching_baseline(current_benchmark, baseline_results)
 
             self._write_benchmark_header(f, current_benchmark)
             self._write_current_benchmark_data(f, current_benchmark)
@@ -2258,6 +2347,8 @@ def _write_performance_comparison(self, f: TextIO, current_results: list[Benchma
     def _write_benchmark_header(self, f, benchmark: BenchmarkData) -> None:
         """Write benchmark section header."""
         f.write(f"=== {benchmark.points} Points ({benchmark.dimension}) ===\n")
+        if benchmark.benchmark_id:
+            f.write(f"Benchmark ID: {benchmark.benchmark_id}\n")
 
     def _write_current_benchmark_data(self, f, benchmark: BenchmarkData) -> None:
         """Write current benchmark data."""
diff --git a/scripts/tests/test_benchmark_models.py b/scripts/tests/test_benchmark_models.py
index ed5107b9..0faabb5e 100644
--- a/scripts/tests/test_benchmark_models.py
+++ b/scripts/tests/test_benchmark_models.py
@@ -288,6 +288,27 @@ def test_format_benchmark_tables(self):
         assert "| 5000 |" in markdown_content  # Should contain the 5000 point row
         assert "4.5x" in markdown_content  # Scaling: 500/110 ≈ 4.5
 
+    def test_format_benchmark_tables_includes_benchmark_ids(self):
+        """Test expanded benchmark IDs are shown in baseline summary tables."""
+        benchmarks = [
+            BenchmarkData(50, "3D", benchmark_id="boundary_facets/boundary_facets_3d/50")
+            .with_timing(9.0, 10.0, 11.0, "µs")
+            .with_throughput(4.545, 5.0, 5.556, "Kelem/s"),
+            BenchmarkData(50, "3D", benchmark_id="validation/validate_3d/50").with_timing(
+                19.0,
+                20.0,
+                21.0,
+                "µs",
+            ),
+        ]
+
+        lines = format_benchmark_tables(benchmarks)
+        markdown_content = "\n".join(lines)
+
+        assert "| Benchmark ID | Points | Time (mean) | Throughput (mean) | Scaling |" in markdown_content
+        assert "| `boundary_facets/boundary_facets_3d/50` | 50 | 10.00 µs | 5.00 Kelem/s | 1.0x |" in markdown_content
+        assert "| `validation/validate_3d/50` | 50 | 20.00 µs | N/A | 2.0x |" in markdown_content
+
     def test_format_time_value(self):
         """Test formatting time values with appropriate precision."""
         # Test zero and negative values (should return N/A)
diff --git a/scripts/tests/test_benchmark_utils.py b/scripts/tests/test_benchmark_utils.py
index 4a20c89f..72d89a46 100644
--- a/scripts/tests/test_benchmark_utils.py
+++ b/scripts/tests/test_benchmark_utils.py
@@ -118,6 +118,16 @@ def test_parse_estimates_json_valid_data(self, sample_estimates_data):
         finally:
             estimates_path.unlink()
 
+    def test_benchmark_data_positional_timing_compatibility(self):
+        """Test legacy positional construction still maps the third argument to time_low."""
+        benchmark = BenchmarkData(1000, "2D", 1.0, 2.0, 3.0, "µs")
+
+        assert benchmark.time_low == 1.0
+        assert benchmark.time_mean == 2.0
+        assert benchmark.time_high == 3.0
+        assert benchmark.time_unit == "µs"
+        assert benchmark.benchmark_id == ""
+
     def test_parse_estimates_json_zero_mean(self):
         """Test parsing estimates.json with zero mean time."""
         estimates_data = {"mean": {"point_estimate": 0.0, "confidence_interval": {"lower_bound": 0.0, "upper_bound": 0.0}}}
@@ -248,6 +258,46 @@ def test_ci_performance_suite_patterns(self):
         actual_order = [(b.dimension, b.points) for b in ci_suite_results]
         assert actual_order == expected_order
 
+    def test_find_criterion_results_preserves_ci_suite_ids(self):
+        """Test ci_performance_suite results keep expanded Criterion benchmark IDs."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            target_dir = Path(temp_dir) / "target"
+
+            def write_estimate(path_parts, mean_ns):
+                estimates_dir = target_dir / "criterion" / Path(*path_parts) / "base"
+                estimates_dir.mkdir(parents=True)
+                estimates = {
+                    "mean": {
+                        "point_estimate": mean_ns,
+                        "confidence_interval": {
+                            "lower_bound": mean_ns * 0.9,
+                            "upper_bound": mean_ns * 1.1,
+                        },
+                    },
+                }
+                (estimates_dir / "estimates.json").write_text(json.dumps(estimates), encoding="utf-8")
+
+            write_estimate(("boundary_facets", "boundary_facets_3d", "50"), 10_000.0)
+            write_estimate(("validation", "validate_3d", "50"), 20_000.0)
+            write_estimate(("boundary_facets", "boundary_facets_3d_adversarial", "50"), 30_000.0)
+            write_estimate(("bistellar_flips_4d", "k2_roundtrip"), 40_000.0)
+
+            results = CriterionParser.find_criterion_results(target_dir)
+
+            assert [result.comparison_key for result in results] == [
+                "boundary_facets/boundary_facets_3d/50",
+                "boundary_facets/boundary_facets_3d_adversarial/50",
+                "validation/validate_3d/50",
+                "bistellar_flips_4d/k2_roundtrip",
+            ]
+            sized_results = [result for result in results if result.comparison_key != "bistellar_flips_4d/k2_roundtrip"]
+            assert {(result.points, result.dimension) for result in sized_results} == {(50, "3D")}
+
+            roundtrip = next(result for result in results if result.comparison_key == "bistellar_flips_4d/k2_roundtrip")
+            assert roundtrip.points == 0
+            assert roundtrip.dimension == "4D"
+            assert roundtrip.throughput_mean is None
+
 
 class TestPerformanceComparator:
     """Test cases for PerformanceComparator class."""
@@ -301,6 +351,80 @@ def test_parse_baseline_file(self, comparator, sample_baseline_content):
         assert bench_2d_1000.time_mean == 110.0
         assert bench_2d_1000.throughput_mean == 9.091
 
+    def test_parse_baseline_file_with_benchmark_ids(self, comparator):
+        """Test parsing expanded ci_performance_suite baseline identifiers."""
+        baseline_content = """Date: 2023-06-15 10:30:00 PDT
+Git commit: abc123def456
+
+=== 50 Points (3D) ===
+Benchmark ID: boundary_facets/boundary_facets_3d/50
+Time: [9.0, 10.0, 11.0] µs
+Throughput: [4.545, 5.0, 5.556] Kelem/s
+
+=== 50 Points (3D) ===
+Benchmark ID: validation/validate_3d/50
+Time: [19.0, 20.0, 21.0] µs
+Throughput: [2.381, 2.5, 2.632] Kelem/s
+"""
+
+        results = comparator._parse_baseline_file(baseline_content)
+
+        assert set(results) == {
+            "boundary_facets/boundary_facets_3d/50",
+            "validation/validate_3d/50",
+        }
+        assert results["boundary_facets/boundary_facets_3d/50"].time_mean == 10.0
+        assert results["validation/validate_3d/50"].time_mean == 20.0
+
+    def test_write_performance_comparison_matches_benchmark_ids(self, comparator):
+        """Test comparison uses expanded benchmark IDs instead of point/dimension collisions."""
+        current_results = [
+            BenchmarkData(50, "3D", benchmark_id="boundary_facets/boundary_facets_3d/50").with_timing(9.0, 10.0, 11.0, "µs"),
+            BenchmarkData(50, "3D", benchmark_id="validation/validate_3d/50").with_timing(19.0, 20.0, 21.0, "µs"),
+        ]
+        baseline_results = {
+            "boundary_facets/boundary_facets_3d/50": BenchmarkData(
+                50,
+                "3D",
+                benchmark_id="boundary_facets/boundary_facets_3d/50",
+            ).with_timing(9.0, 10.0, 11.0, "µs"),
+            "validation/validate_3d/50": BenchmarkData(
+                50,
+                "3D",
+                benchmark_id="validation/validate_3d/50",
+            ).with_timing(38.0, 40.0, 42.0, "µs"),
+        }
+
+        output = StringIO()
+        comparator._write_performance_comparison(output, current_results, baseline_results)
+        content = output.getvalue()
+
+        assert "Benchmark ID: boundary_facets/boundary_facets_3d/50" in content
+        assert "Benchmark ID: validation/validate_3d/50" in content
+        assert "OK: Time change +0.0%" in content
+        assert "IMPROVEMENT: Time decreased by 50.0%" in content
+
+    def test_write_performance_comparison_no_legacy_fallback_for_benchmark_id(self, comparator):
+        """Test expanded IDs do not compare against unrelated collapsed legacy baselines."""
+        current_results = [
+            BenchmarkData(50, "3D", benchmark_id="validation/validate_3d/50").with_timing(
+                19.0,
+                20.0,
+                21.0,
+                "µs",
+            ),
+        ]
+        baseline_results = {
+            "50_3D": BenchmarkData(50, "3D").with_timing(38.0, 40.0, 42.0, "µs"),
+        }
+
+        output = StringIO()
+        comparator._write_performance_comparison(output, current_results, baseline_results)
+        content = output.getvalue()
+
+        assert "Baseline: N/A (no matching entry)" in content
+        assert "IMPROVEMENT: Time decreased by 50.0%" not in content
+
     def test_write_time_comparison_no_regression(self, comparator):
         """Test time comparison writing with no regression."""
         current = BenchmarkData(1000, "2D").with_timing(100.0, 110.0, 120.0, "µs")

From a88f788b565eb25d5fda82a4a4fdf800659737e2 Mon Sep 17 00:00:00 2001
From: Adam Getchell <adam@adamgetchell.org>
Date: Mon, 27 Apr 2026 23:37:37 -0700
Subject: [PATCH 6/8] Changed: Support unsized benchmarks and refine memory
 profiling in tooling

Update benchmark scripts to correctly model and report workloads that
do not operate on a numeric input size (e.g., bistellar flips). This
involves allowing benchmark data to represent an absent point count
and adapting parsing, sorting, and reporting logic. Memory profiling
in `profiling_suite.rs` is also refined: allocation statistics are
now gated on both `count-allocations` and `bench-logging` features,
and reporting logic is restructured for clarity and robustness across
benchmark samples. Utility functions were extracted in Python.
---
 benches/profiling_suite.rs             | 163 +++++++++++++------------
 scripts/benchmark_models.py            |  39 ++++--
 scripts/benchmark_utils.py             |  90 ++++++--------
 scripts/tests/test_benchmark_models.py |  36 +++++-
 scripts/tests/test_benchmark_utils.py  | 124 +++++++++++--------
 5 files changed, 258 insertions(+), 194 deletions(-)

diff --git a/benches/profiling_suite.rs b/benches/profiling_suite.rs
index 21851d0b..a50c69cc 100644
--- a/benches/profiling_suite.rs
+++ b/benches/profiling_suite.rs
@@ -50,7 +50,7 @@
 //!
 //! Example with custom configuration:
 //! ```bash
-//! BENCH_SAMPLE_SIZE=5 BENCH_WARMUP_SECS=5 BENCH_PERCENTILE=90 cargo bench --profile perf --bench profiling_suite
+//! BENCH_SAMPLE_SIZE=10 BENCH_WARMUP_SECS=5 BENCH_PERCENTILE=90 cargo bench --profile perf --bench profiling_suite
 //! ```
 
 use criterion::measurement::WallTime;
@@ -89,6 +89,7 @@ fn init_tracing() {
 const fn init_tracing() {}
 
 // SmallBuffer size constants for different use cases
+#[cfg(all(feature = "count-allocations", feature = "bench-logging"))]
 const BENCHMARK_ITERATION_BUFFER_SIZE: usize = 8; // For tracking allocation info across benchmark iterations
 const SIMPLEX_VERTICES_BUFFER_SIZE: usize = 4; // 3D simplex = 4 vertices
 const QUERY_RESULTS_BUFFER_SIZE: usize = 1024; // For bounded query result collections (max 1000 in code)
@@ -460,7 +461,7 @@ fn bench_scaling(c: &mut Criterion) {
 // ============================================================================
 
 /// Read the memory summary percentile from `BENCH_PERCENTILE` (default: 95).
-#[cfg(feature = "count-allocations")]
+#[cfg(all(feature = "count-allocations", feature = "bench-logging"))]
 fn configured_percentile() -> usize {
     env::var("BENCH_PERCENTILE")
         .ok()
@@ -469,7 +470,7 @@ fn configured_percentile() -> usize {
 }
 
 /// Format a percentile as an ordinal label for the memory summary.
-#[cfg(feature = "count-allocations")]
+#[cfg(all(feature = "count-allocations", feature = "bench-logging"))]
 fn percentile_label(percentile: usize) -> String {
     let suffix = match percentile % 100 {
         11..=13 => "th",
@@ -484,7 +485,7 @@ fn percentile_label(percentile: usize) -> String {
 }
 
 /// Calculate percentile from a slice of values using nearest-rank method.
-#[cfg(feature = "count-allocations")]
+#[cfg(all(feature = "count-allocations", feature = "bench-logging"))]
 fn calculate_percentile(values: &mut [u64], percentile: usize) -> u64 {
     if values.is_empty() {
         return 0;
@@ -501,7 +502,7 @@ fn calculate_percentile(values: &mut [u64], percentile: usize) -> u64 {
 }
 
 /// Print memory allocation summary
-#[cfg(feature = "count-allocations")]
+#[cfg(all(feature = "count-allocations", feature = "bench-logging"))]
 #[expect(clippy::cast_precision_loss)]
 fn print_alloc_summary(
     info: &AllocationInfo,
@@ -538,29 +539,86 @@ fn print_alloc_summary(
     println!("=====================================\n");
 }
 
+#[cfg(all(feature = "count-allocations", feature = "bench-logging"))]
+#[expect(clippy::cast_possible_wrap)]
+fn print_alloc_summary_from_samples<const D: usize>(
+    allocation_infos: &SmallBuffer<AllocationInfo, BENCHMARK_ITERATION_BUFFER_SIZE>,
+    actual_point_counts: &SmallBuffer<usize, BENCHMARK_ITERATION_BUFFER_SIZE>,
+) {
+    if allocation_infos.is_empty() {
+        return;
+    }
+
+    // Safe cast for division: Criterion sample buffers here are small and non-empty.
+    let divisor_unsigned = allocation_infos.len() as u64;
+    let divisor_signed = allocation_infos.len() as i64;
+    let avg_info = AllocationInfo {
+        count_total: allocation_infos.iter().map(|i| i.count_total).sum::<u64>() / divisor_unsigned,
+        count_current: allocation_infos
+            .iter()
+            .map(|i| i.count_current)
+            .sum::<i64>()
+            / divisor_signed,
+        count_max: allocation_infos
+            .iter()
+            .map(|i| i.count_max)
+            .max()
+            .unwrap_or(0),
+        bytes_total: allocation_infos.iter().map(|i| i.bytes_total).sum::<u64>() / divisor_unsigned,
+        bytes_current: allocation_infos
+            .iter()
+            .map(|i| i.bytes_current)
+            .sum::<i64>()
+            / divisor_signed,
+        bytes_max: allocation_infos
+            .iter()
+            .map(|i| i.bytes_max)
+            .max()
+            .unwrap_or(0),
+    };
+    let avg_actual_count = if actual_point_counts.is_empty() {
+        0
+    } else {
+        actual_point_counts.iter().sum::<usize>() / actual_point_counts.len()
+    };
+
+    let mut bytes_max_values: Vec<u64> = allocation_infos.iter().map(|i| i.bytes_max).collect();
+    let percentile = configured_percentile();
+    let percentile_value = calculate_percentile(&mut bytes_max_values, percentile);
+
+    print_alloc_summary(
+        &avg_info,
+        &format!("{D}D Triangulation"),
+        avg_actual_count,
+        percentile,
+        percentile_value,
+    );
+}
+
 /// Generic helper to benchmark memory usage for a specific dimension D
-#[cfg_attr(feature = "count-allocations", expect(clippy::cast_possible_wrap))]
 fn bench_memory_usage<const D: usize>(
     group: &mut BenchmarkGroup<'_, WallTime>,
     bench_id_prefix: &str,
     count: usize,
 ) {
+    #[cfg(all(feature = "count-allocations", feature = "bench-logging"))]
+    let mut allocation_infos: SmallBuffer<AllocationInfo, BENCHMARK_ITERATION_BUFFER_SIZE> =
+        SmallBuffer::new();
+
+    #[cfg(all(feature = "count-allocations", feature = "bench-logging"))]
+    let mut actual_point_counts: SmallBuffer<usize, BENCHMARK_ITERATION_BUFFER_SIZE> =
+        SmallBuffer::new();
+
     group.bench_with_input(
         BenchmarkId::new(bench_id_prefix, count),
         &count,
         |b, &count| {
             b.iter_custom(|iters| {
                 let mut total_time = Duration::new(0, 0);
-                let mut allocation_infos: SmallBuffer<
-                    AllocationInfo,
-                    BENCHMARK_ITERATION_BUFFER_SIZE,
-                > = SmallBuffer::new();
-
-                let mut actual_point_counts: SmallBuffer<usize, BENCHMARK_ITERATION_BUFFER_SIZE> =
-                    SmallBuffer::new();
 
                 for _ in 0..iters {
                     let points = gen_points::<D>(count, PointDistribution::Random, DEFAULT_SEED);
+                    #[cfg(all(feature = "count-allocations", feature = "bench-logging"))]
                     let pts_len = points.len();
                     let vertices: Vec<_> = points.iter().map(|p| vertex!(*p)).collect();
                     let start_time = Instant::now();
@@ -572,81 +630,24 @@ fn bench_memory_usage<const D: usize>(
                     });
 
                     total_time += start_time.elapsed();
-                    allocation_infos.push(alloc_info);
-                    actual_point_counts.push(pts_len);
-                }
-
-                #[cfg(not(feature = "count-allocations"))]
-                {
-                    print_alloc_banner_once();
-                }
 
-                #[cfg(feature = "count-allocations")]
-                {
-                    // Report memory usage summary if available
-                    if !allocation_infos.is_empty() {
-                        // Safe cast for division - allocation_infos.len() is guaranteed to be small and non-zero
-                        let divisor_unsigned = allocation_infos.len() as u64;
-                        let divisor_signed = allocation_infos.len() as i64;
-                        let avg_info = AllocationInfo {
-                            count_total: allocation_infos
-                                .iter()
-                                .map(|i| i.count_total)
-                                .sum::<u64>()
-                                / divisor_unsigned,
-                            count_current: allocation_infos
-                                .iter()
-                                .map(|i| i.count_current)
-                                .sum::<i64>()
-                                / divisor_signed,
-                            count_max: allocation_infos
-                                .iter()
-                                .map(|i| i.count_max)
-                                .max()
-                                .unwrap_or(0),
-                            bytes_total: allocation_infos
-                                .iter()
-                                .map(|i| i.bytes_total)
-                                .sum::<u64>()
-                                / divisor_unsigned,
-                            bytes_current: allocation_infos
-                                .iter()
-                                .map(|i| i.bytes_current)
-                                .sum::<i64>()
-                                / divisor_signed,
-                            bytes_max: allocation_infos
-                                .iter()
-                                .map(|i| i.bytes_max)
-                                .max()
-                                .unwrap_or(0),
-                        };
-                        let avg_actual_count = if actual_point_counts.is_empty() {
-                            0
-                        } else {
-                            actual_point_counts.iter().sum::<usize>() / actual_point_counts.len()
-                        };
-
-                        // Calculate percentile of bytes_max (configurable via BENCH_PERCENTILE, default 95th)
-                        let mut bytes_max_values: Vec<u64> =
-                            allocation_infos.iter().map(|i| i.bytes_max).collect();
-                        let percentile = configured_percentile();
-                        let percentile_value =
-                            calculate_percentile(&mut bytes_max_values, percentile);
-
-                        print_alloc_summary(
-                            &avg_info,
-                            &format!("{D}D Triangulation"),
-                            avg_actual_count,
-                            percentile,
-                            percentile_value,
-                        );
+                    #[cfg(all(feature = "count-allocations", feature = "bench-logging"))]
+                    {
+                        allocation_infos.push(alloc_info);
+                        actual_point_counts.push(pts_len);
                     }
+
+                    #[cfg(not(all(feature = "count-allocations", feature = "bench-logging")))]
+                    let _ = alloc_info;
                 }
 
                 total_time
             });
         },
     );
+
+    #[cfg(all(feature = "count-allocations", feature = "bench-logging"))]
+    print_alloc_summary_from_samples::<D>(&allocation_infos, &actual_point_counts);
 }
 
 /// Memory usage profiling across different scales and dimensions using allocation counter
@@ -969,7 +970,7 @@ criterion_group!(
         let sample_size = env::var("BENCH_SAMPLE_SIZE")
             .ok()
             .and_then(|v| v.parse().ok())
-            .unwrap_or(10);
+            .map_or(10, |size: usize| size.max(10));
         let warm_up_secs = env::var("BENCH_WARMUP_SECS")
             .ok()
             .and_then(|v| v.parse().ok())
diff --git a/scripts/benchmark_models.py b/scripts/benchmark_models.py
index e9dbe888..fc70c017 100644
--- a/scripts/benchmark_models.py
+++ b/scripts/benchmark_models.py
@@ -14,7 +14,7 @@
 class BenchmarkData:
     """Represents benchmark data for a single test case."""
 
-    points: int
+    points: int | None
     dimension: str
     time_low: float = 0.0
     time_mean: float = 0.0
@@ -29,7 +29,19 @@ class BenchmarkData:
     @property
     def comparison_key(self) -> str:
         """Return the stable key used for baseline/regression matching."""
-        return self.benchmark_id or f"{self.points}_{self.dimension}"
+        points_key = self.points if self.points is not None else "unsized"
+        return self.benchmark_id or f"{points_key}_{self.dimension}"
+
+    @property
+    def points_label(self) -> str:
+        """Return a display label for the benchmark input size."""
+        return str(self.points) if self.points is not None else "n/a"
+
+    def header_line(self) -> str:
+        """Return the baseline/comparison section header for this benchmark."""
+        if self.points is None:
+            return f"=== Unsized Workload ({self.dimension}) ==="
+        return f"=== {self.points} Points ({self.dimension}) ==="
 
     def with_timing(self, low: float, mean: float, high: float, unit: str) -> "BenchmarkData":
         """Set timing data (fluent interface)."""
@@ -50,7 +62,7 @@ def with_throughput(self, low: float, mean: float, high: float, unit: str) -> "B
     def to_baseline_format(self) -> str:
         """Convert to baseline file format."""
         lines = [
-            f"=== {self.points} Points ({self.dimension}) ===",
+            self.header_line(),
         ]
         if self.benchmark_id:
             lines.append(f"Benchmark ID: {self.benchmark_id}")
@@ -142,10 +154,10 @@ def parse_benchmark_header(line: str) -> BenchmarkData | None:
     Returns:
         BenchmarkData object or None if no match
     """
-    # Match pattern like "=== 1000 Points (2D) ==="
-    match = re.match(r"^=== (\d+) Points \((.+)\) ===$", line.strip())
+    # Match patterns like "=== 1000 Points (2D) ===" or "=== Unsized Workload (4D) ==="
+    match = re.match(r"^=== (?:(\d+) Points|Unsized Workload) \((.+)\) ===$", line.strip())
     if match:
-        points = int(match.group(1))
+        points = int(match.group(1)) if match.group(1) is not None else None
         dimension = match.group(2)
         return BenchmarkData(points=points, dimension=dimension)
     return None
@@ -351,7 +363,10 @@ def _dim_key(d: str) -> tuple[int, str]:
         return (int(m.group(1)) if m else 1_000_000, d)
 
     for dimension in sorted(by_dimension.keys(), key=_dim_key):
-        dim_benchmarks = sorted(by_dimension[dimension], key=lambda b: b.points)
+        dim_benchmarks = sorted(
+            by_dimension[dimension],
+            key=lambda b: (b.points is None, b.points or 0, b.comparison_key),
+        )
         include_benchmark_id = any(bench.benchmark_id for bench in dim_benchmarks)
 
         lines.extend([f"### {dimension} Triangulation Performance", ""])
@@ -370,8 +385,10 @@ def _dim_key(d: str) -> tuple[int, str]:
                 ],
             )
 
-        # Calculate scaling relative to smallest benchmark
-        first_nonzero = next((b for b in dim_benchmarks if b.time_mean and b.time_mean > 0), None)
+        # Calculate scaling relative to the smallest numeric workload only for
+        # legacy homogeneous tables. Expanded benchmark IDs mix different API
+        # surfaces, so a single per-dimension scaling baseline is misleading.
+        first_nonzero = None if include_benchmark_id else next((b for b in dim_benchmarks if b.time_mean and b.time_mean > 0), None)
         baseline_time = first_nonzero.time_mean if first_nonzero else None
 
         for bench in dim_benchmarks:
@@ -392,10 +409,10 @@ def _dim_key(d: str) -> tuple[int, str]:
 
             if include_benchmark_id:
                 lines.append(
-                    f"| `{bench.comparison_key}` | {bench.points} | {time_str} | {throughput_str} | {scaling_str} |",
+                    f"| `{bench.comparison_key}` | {bench.points_label} | {time_str} | {throughput_str} | {scaling_str} |",
                 )
             else:
-                lines.append(f"| {bench.points} | {time_str} | {throughput_str} | {scaling_str} |")
+                lines.append(f"| {bench.points_label} | {time_str} | {throughput_str} | {scaling_str} |")
 
         lines.append("")  # Empty line between tables
 
diff --git a/scripts/benchmark_utils.py b/scripts/benchmark_utils.py
index 84be00ea..a8928c9f 100755
--- a/scripts/benchmark_utils.py
+++ b/scripts/benchmark_utils.py
@@ -130,6 +130,26 @@
 
 CI_PERFORMANCE_SUITE_GROUP_ORDER = tuple(CI_PERFORMANCE_SUITE_GROUPS)
 
+
+def ci_suite_group_key(first_path_part: str) -> str | None:
+    """Map a Criterion path prefix to a ci_performance_suite group key."""
+    if first_path_part.startswith("tds_new_"):
+        return "construction"
+    if first_path_part.startswith("bistellar_flips"):
+        return "bistellar_flips"
+    if first_path_part in CI_PERFORMANCE_SUITE_GROUPS:
+        return first_path_part
+    return None
+
+
+def ci_suite_dimension(benchmark_id: str) -> str:
+    """Extract the dimension label from a ci_performance_suite benchmark ID."""
+    match = re.search(r"(?:^|_|/)(\d+)d(?:_|/|$)", benchmark_id)
+    if match:
+        return f"{match.group(1)}D"
+    return "n/a"
+
+
 # Development mode arguments - centralized to keep baseline generation and comparison in sync
 # Reduces samples for faster iteration during development (10x faster than full benchmarks)
 #
@@ -894,25 +914,6 @@ def _format_duration_ns(time_ns: float) -> str:
             return f"{time_ns / 1_000:.1f} µs"
         return f"{time_ns:.0f} ns"
 
-    @staticmethod
-    def _ci_suite_group_key(first_path_part: str) -> str | None:
-        """Map a Criterion path prefix to a ci_performance_suite group key."""
-        if first_path_part.startswith("tds_new_"):
-            return "construction"
-        if first_path_part.startswith("bistellar_flips"):
-            return "bistellar_flips"
-        if first_path_part in CI_PERFORMANCE_SUITE_GROUPS:
-            return first_path_part
-        return None
-
-    @staticmethod
-    def _ci_suite_dimension(benchmark_id: str) -> str:
-        """Extract the dimension label from a ci_performance_suite benchmark ID."""
-        match = re.search(r"(?:^|_|/)(\d+)d(?:_|/|$)", benchmark_id)
-        if match:
-            return f"{match.group(1)}D"
-        return "n/a"
-
     @staticmethod
     def _ci_suite_input_size(path_parts: tuple[str, ...]) -> str:
         """Extract a human-readable input size from Criterion benchmark path parts."""
@@ -963,7 +964,7 @@ def _parse_ci_performance_suite_results(self) -> list[CiPerformanceResult]:
             if not path_parts:
                 continue
 
-            group_key = self._ci_suite_group_key(path_parts[0])
+            group_key = ci_suite_group_key(path_parts[0])
             if group_key is None:
                 continue
 
@@ -978,7 +979,7 @@ def _parse_ci_performance_suite_results(self) -> list[CiPerformanceResult]:
                 continue
 
             benchmark_id = "/".join(path_parts)
-            group_key = self._ci_suite_group_key(path_parts[0])
+            group_key = ci_suite_group_key(path_parts[0])
             if group_key is None:
                 continue
 
@@ -987,7 +988,7 @@ def _parse_ci_performance_suite_results(self) -> list[CiPerformanceResult]:
                 CiPerformanceResult(
                     group_key=group_key,
                     benchmark_id=benchmark_id,
-                    dimension=self._ci_suite_dimension(benchmark_id),
+                    dimension=ci_suite_dimension(benchmark_id),
                     input_size=self._ci_suite_input_size(path_parts),
                     mean_ns=mean_ns,
                     low_ns=low_ns,
@@ -1599,7 +1600,7 @@ def parse_estimates_json(estimates_path: Path, points: int | None, dimension: st
             low_us = low_ns / 1000
             high_us = high_ns / 1000
 
-            benchmark = BenchmarkData(points or 0, dimension).with_timing(round(low_us, 2), round(mean_us, 2), round(high_us, 2), "µs")
+            benchmark = BenchmarkData(points, dimension).with_timing(round(low_us, 2), round(mean_us, 2), round(high_us, 2), "µs")
 
             if points is not None:
                 # Calculate throughput in Kelem/s
@@ -1618,25 +1619,6 @@ def parse_estimates_json(estimates_path: Path, points: int | None, dimension: st
         except (FileNotFoundError, json.JSONDecodeError, KeyError, ZeroDivisionError, ValueError):
             return None
 
-    @staticmethod
-    def _ci_suite_group_key(first_path_part: str) -> str | None:
-        """Map a Criterion path prefix to a ci_performance_suite group key."""
-        if first_path_part.startswith("tds_new_"):
-            return "construction"
-        if first_path_part.startswith("bistellar_flips"):
-            return "bistellar_flips"
-        if first_path_part in CI_PERFORMANCE_SUITE_GROUPS:
-            return first_path_part
-        return None
-
-    @staticmethod
-    def _ci_suite_dimension(benchmark_id: str) -> str:
-        """Extract the dimension label from a ci_performance_suite benchmark ID."""
-        match = re.search(r"(?:^|_|/)(\d+)d(?:_|/|$)", benchmark_id)
-        if match:
-            return f"{match.group(1)}D"
-        return "n/a"
-
     @staticmethod
     def _ci_suite_input_points(path_parts: tuple[str, ...]) -> int | None:
         """Extract the numeric input size when the Criterion ID has one."""
@@ -1658,7 +1640,7 @@ def _process_ci_performance_suite_results(criterion_dir: Path) -> list[Benchmark
             except ValueError:
                 continue
 
-            if not path_parts or CriterionParser._ci_suite_group_key(path_parts[0]) is None:
+            if not path_parts or ci_suite_group_key(path_parts[0]) is None:
                 continue
 
             existing = estimates_by_id.get(path_parts)
@@ -1668,7 +1650,7 @@ def _process_ci_performance_suite_results(criterion_dir: Path) -> list[Benchmark
         results: list[BenchmarkData] = []
         for path_parts, (_, estimates_path) in estimates_by_id.items():
             benchmark_id = "/".join(path_parts)
-            dimension = CriterionParser._ci_suite_dimension(benchmark_id)
+            dimension = ci_suite_dimension(benchmark_id)
             if dimension == "n/a":
                 continue
 
@@ -1683,9 +1665,10 @@ def _process_ci_performance_suite_results(criterion_dir: Path) -> list[Benchmark
         group_order = {group: index for index, group in enumerate(CI_PERFORMANCE_SUITE_GROUP_ORDER)}
         results.sort(
             key=lambda result: (
-                group_order.get(CriterionParser._ci_suite_group_key(result.benchmark_id.split("/", 1)[0]) or "", sys.maxsize),
+                group_order.get(ci_suite_group_key(result.benchmark_id.split("/", 1)[0]) or "", sys.maxsize),
                 int(result.dimension.removesuffix("D")) if result.dimension.removesuffix("D").isdigit() else sys.maxsize,
-                result.points,
+                result.points is None,
+                result.points or 0,
                 result.benchmark_id,
             ),
         )
@@ -1802,8 +1785,9 @@ def find_criterion_results(target_dir: Path) -> list[BenchmarkData]:
         if not results:
             results = CriterionParser._process_fallback_discovery(criterion_dir)
 
-        # Sort by dimension, then by point count
-        results.sort(key=lambda x: (int(x.dimension.rstrip("D")), x.points))
+        # Sort by dimension, then by point count. Unsized benchmarks sort after
+        # numeric workloads within the same dimension.
+        results.sort(key=lambda x: (int(x.dimension.rstrip("D")), x.points is None, x.points or 0))
         return results
 
 
@@ -2046,9 +2030,9 @@ def _parse_baseline_file(self, baseline_content: str) -> dict[str, BenchmarkData
             line = lines[i].strip()
 
             # Look for benchmark sections
-            match = re.match(r"=== (\d+) Points \((\d+)D\) ===", line)
+            match = re.match(r"=== (?:(\d+) Points|Unsized Workload) \((\d+)D\) ===", line)
             if match:
-                points = int(match.group(1))
+                points = int(match.group(1)) if match.group(1) is not None else None
                 dimension = f"{match.group(2)}D"
                 benchmark_id = ""
                 next_line_index = i + 1
@@ -2248,6 +2232,8 @@ def _matching_baseline(current: BenchmarkData, baseline_results: dict[str, Bench
         baseline_benchmark = baseline_results.get(current.comparison_key)
         if baseline_benchmark is not None or current.benchmark_id:
             return baseline_benchmark
+        if current.points is None:
+            return None
         return baseline_results.get(f"{current.points}_{current.dimension}")
 
     def _write_performance_comparison(self, f: TextIO, current_results: list[BenchmarkData], baseline_results: dict[str, BenchmarkData]) -> bool:
@@ -2346,7 +2332,7 @@ def _write_performance_comparison(self, f: TextIO, current_results: list[Benchma
 
     def _write_benchmark_header(self, f, benchmark: BenchmarkData) -> None:
         """Write benchmark section header."""
-        f.write(f"=== {benchmark.points} Points ({benchmark.dimension}) ===\n")
+        f.write(f"{benchmark.header_line()}\n")
         if benchmark.benchmark_id:
             f.write(f"Benchmark ID: {benchmark.benchmark_id}\n")
 
@@ -3115,7 +3101,7 @@ def _parse_baseline_metadata(baseline_content: str) -> dict[str, str]:
 
 def _sorted_benchmark_list(results: Mapping[str, "BenchmarkData"]) -> list["BenchmarkData"]:
     """Return benchmarks sorted by (dimension, point count) for stable output."""
-    return sorted(results.values(), key=lambda b: (int(b.dimension.rstrip("D")), b.points))
+    return sorted(results.values(), key=lambda b: (int(b.dimension.rstrip("D")), b.points is None, b.points or 0))
 
 
 def _find_downloaded_baseline_file(download_dir: Path) -> Path:
diff --git a/scripts/tests/test_benchmark_models.py b/scripts/tests/test_benchmark_models.py
index 0faabb5e..7f6cdf1c 100644
--- a/scripts/tests/test_benchmark_models.py
+++ b/scripts/tests/test_benchmark_models.py
@@ -74,6 +74,16 @@ def test_to_baseline_format_with_timing_and_throughput(self):
 """
         assert result == expected
 
+    def test_to_baseline_format_with_unsized_workload(self):
+        """Test baseline format output for workloads without numeric input size."""
+        data = BenchmarkData(None, "4D", benchmark_id="bistellar_flips_4d/k2_roundtrip").with_timing(0.8, 0.95, 1.1, "µs")
+
+        result = data.to_baseline_format()
+
+        assert "=== Unsized Workload (4D) ===" in result
+        assert "Benchmark ID: bistellar_flips_4d/k2_roundtrip" in result
+        assert "0 Points" not in result
+
 
 class TestCircumspherePerformanceData:
     """Test cases for CircumspherePerformanceData class."""
@@ -225,6 +235,11 @@ def test_parse_benchmark_header(self):
         assert result.points == 1000
         assert result.dimension == "2D"
 
+        result = parse_benchmark_header("=== Unsized Workload (4D) ===")
+        assert result is not None
+        assert result.points is None
+        assert result.dimension == "4D"
+
         # Invalid header
         result = parse_benchmark_header("Invalid header")
         assert result is None
@@ -306,8 +321,25 @@ def test_format_benchmark_tables_includes_benchmark_ids(self):
         markdown_content = "\n".join(lines)
 
         assert "| Benchmark ID | Points | Time (mean) | Throughput (mean) | Scaling |" in markdown_content
-        assert "| `boundary_facets/boundary_facets_3d/50` | 50 | 10.00 µs | 5.00 Kelem/s | 1.0x |" in markdown_content
-        assert "| `validation/validate_3d/50` | 50 | 20.00 µs | N/A | 2.0x |" in markdown_content
+        assert "| `boundary_facets/boundary_facets_3d/50` | 50 | 10.00 µs | 5.00 Kelem/s | N/A |" in markdown_content
+        assert "| `validation/validate_3d/50` | 50 | 20.00 µs | N/A | N/A |" in markdown_content
+
+    def test_format_benchmark_tables_renders_unsized_points(self):
+        """Test unsized workloads render without fake numeric point counts."""
+        benchmarks = [
+            BenchmarkData(None, "4D", benchmark_id="bistellar_flips_4d/k2_roundtrip").with_timing(
+                0.8,
+                0.95,
+                1.1,
+                "µs",
+            ),
+        ]
+
+        lines = format_benchmark_tables(benchmarks)
+        markdown_content = "\n".join(lines)
+
+        assert "| `bistellar_flips_4d/k2_roundtrip` | n/a | 0.950 µs | N/A | N/A |" in markdown_content
+        assert "0 Points" not in markdown_content
 
     def test_format_time_value(self):
         """Test formatting time values with appropriate precision."""
diff --git a/scripts/tests/test_benchmark_utils.py b/scripts/tests/test_benchmark_utils.py
index 72d89a46..4d6005f9 100644
--- a/scripts/tests/test_benchmark_utils.py
+++ b/scripts/tests/test_benchmark_utils.py
@@ -47,6 +47,27 @@
 )
 
 THRESHOLD_PERCENT = f"{DEFAULT_REGRESSION_THRESHOLD:.1f}%"
+PUBLIC_API_TITLE = "### Public API Performance Contract (`ci_performance_suite`)"
+CIRCUMSPHERE_TITLE = "## Circumsphere Predicate Analysis"
+PERFORMANCE_RANKING_TITLE = "### Performance Ranking"
+RECOMMENDATIONS_TITLE = "### Recommendations"
+PERFORMANCE_UPDATES_TITLE = "## Performance Data Updates"
+
+
+def write_estimate(target_dir: Path, path_parts, mean_ns):
+    """Write a minimal Criterion estimates.json fixture."""
+    estimates_dir = target_dir / "criterion" / Path(*path_parts) / "base"
+    estimates_dir.mkdir(parents=True)
+    estimates = {
+        "mean": {
+            "point_estimate": mean_ns,
+            "confidence_interval": {
+                "lower_bound": mean_ns * 0.9,
+                "upper_bound": mean_ns * 1.1,
+            },
+        },
+    }
+    (estimates_dir / "estimates.json").write_text(json.dumps(estimates), encoding="utf-8")
 
 
 def compute_average_time_change(current_results, baseline_results):
@@ -128,6 +149,24 @@ def test_benchmark_data_positional_timing_compatibility(self):
         assert benchmark.time_unit == "µs"
         assert benchmark.benchmark_id == ""
 
+    def test_parse_estimates_json_preserves_unsized_workload(self, sample_estimates_data):
+        """Test Criterion estimates without numeric input size do not get fake throughput."""
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
+            json.dump(sample_estimates_data, f)
+            f.flush()
+            estimates_path = Path(f.name)
+
+        try:
+            result = CriterionParser.parse_estimates_json(estimates_path, None, "4D")
+
+            assert result is not None
+            assert result.points is None
+            assert result.dimension == "4D"
+            assert result.throughput_mean is None
+            assert "0 Points" not in result.to_baseline_format()
+        finally:
+            estimates_path.unlink()
+
     def test_parse_estimates_json_zero_mean(self):
         """Test parsing estimates.json with zero mean time."""
         estimates_data = {"mean": {"point_estimate": 0.0, "confidence_interval": {"lower_bound": 0.0, "upper_bound": 0.0}}}
@@ -263,24 +302,10 @@ def test_find_criterion_results_preserves_ci_suite_ids(self):
         with tempfile.TemporaryDirectory() as temp_dir:
             target_dir = Path(temp_dir) / "target"
 
-            def write_estimate(path_parts, mean_ns):
-                estimates_dir = target_dir / "criterion" / Path(*path_parts) / "base"
-                estimates_dir.mkdir(parents=True)
-                estimates = {
-                    "mean": {
-                        "point_estimate": mean_ns,
-                        "confidence_interval": {
-                            "lower_bound": mean_ns * 0.9,
-                            "upper_bound": mean_ns * 1.1,
-                        },
-                    },
-                }
-                (estimates_dir / "estimates.json").write_text(json.dumps(estimates), encoding="utf-8")
-
-            write_estimate(("boundary_facets", "boundary_facets_3d", "50"), 10_000.0)
-            write_estimate(("validation", "validate_3d", "50"), 20_000.0)
-            write_estimate(("boundary_facets", "boundary_facets_3d_adversarial", "50"), 30_000.0)
-            write_estimate(("bistellar_flips_4d", "k2_roundtrip"), 40_000.0)
+            write_estimate(target_dir, ("boundary_facets", "boundary_facets_3d", "50"), 10_000.0)
+            write_estimate(target_dir, ("validation", "validate_3d", "50"), 20_000.0)
+            write_estimate(target_dir, ("boundary_facets", "boundary_facets_3d_adversarial", "50"), 30_000.0)
+            write_estimate(target_dir, ("bistellar_flips_4d", "k2_roundtrip"), 40_000.0)
 
             results = CriterionParser.find_criterion_results(target_dir)
 
@@ -294,7 +319,7 @@ def write_estimate(path_parts, mean_ns):
             assert {(result.points, result.dimension) for result in sized_results} == {(50, "3D")}
 
             roundtrip = next(result for result in results if result.comparison_key == "bistellar_flips_4d/k2_roundtrip")
-            assert roundtrip.points == 0
+            assert roundtrip.points is None
             assert roundtrip.dimension == "4D"
             assert roundtrip.throughput_mean is None
 
@@ -376,6 +401,23 @@ def test_parse_baseline_file_with_benchmark_ids(self, comparator):
         assert results["boundary_facets/boundary_facets_3d/50"].time_mean == 10.0
         assert results["validation/validate_3d/50"].time_mean == 20.0
 
+    def test_parse_baseline_file_with_unsized_benchmark_id(self, comparator):
+        """Test parsing expanded CI benchmarks without numeric input sizes."""
+        baseline_content = """Date: 2023-06-15 10:30:00 PDT
+Git commit: abc123def456
+
+=== Unsized Workload (4D) ===
+Benchmark ID: bistellar_flips_4d/k2_roundtrip
+Time: [0.8, 0.95, 1.1] µs
+"""
+
+        results = comparator._parse_baseline_file(baseline_content)
+
+        benchmark = results["bistellar_flips_4d/k2_roundtrip"]
+        assert benchmark.points is None
+        assert benchmark.dimension == "4D"
+        assert benchmark.throughput_mean is None
+
     def test_write_performance_comparison_matches_benchmark_ids(self, comparator):
         """Test comparison uses expanded benchmark IDs instead of point/dimension collisions."""
         current_results = [
@@ -2285,40 +2327,26 @@ def test_generate_markdown_content(self, mock_datetime, mock_run_git, mock_git_c
             assert "## Performance Results Summary" in content
 
             # Check static content sections
-            assert "### Public API Performance Contract (`ci_performance_suite`)" in content
-            assert "## Circumsphere Predicate Analysis" in content
-            assert "### Performance Ranking" in content
-            assert "### Recommendations" in content
-            assert "## Performance Data Updates" in content
+            assert PUBLIC_API_TITLE in content
+            assert CIRCUMSPHERE_TITLE in content
+            assert PERFORMANCE_RANKING_TITLE in content
+            assert RECOMMENDATIONS_TITLE in content
+            assert PERFORMANCE_UPDATES_TITLE in content
 
     def test_get_ci_performance_suite_results(self):
         """Test public API summary generation from ci_performance_suite Criterion data."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
 
-            def write_estimate(path_parts, mean_ns):
-                estimates_dir = project_root / "target" / "criterion" / Path(*path_parts) / "base"
-                estimates_dir.mkdir(parents=True)
-                estimates = {
-                    "mean": {
-                        "point_estimate": mean_ns,
-                        "confidence_interval": {
-                            "lower_bound": mean_ns * 0.9,
-                            "upper_bound": mean_ns * 1.1,
-                        },
-                    },
-                }
-                (estimates_dir / "estimates.json").write_text(json.dumps(estimates), encoding="utf-8")
-
-            write_estimate(("tds_new_2d", "tds_new", "10"), 120_000.0)
-            write_estimate(("boundary_facets", "boundary_facets_3d_adversarial", "50"), 7_500.0)
-            write_estimate(("bistellar_flips_4d", "k2_roundtrip"), 950.0)
+            write_estimate(project_root / "target", ("tds_new_2d", "tds_new", "10"), 120_000.0)
+            write_estimate(project_root / "target", ("boundary_facets", "boundary_facets_3d_adversarial", "50"), 7_500.0)
+            write_estimate(project_root / "target", ("bistellar_flips_4d", "k2_roundtrip"), 950.0)
 
             generator = PerformanceSummaryGenerator(project_root)
             lines = generator._get_ci_performance_suite_results()
             content = "\n".join(lines)
 
-            assert "### Public API Performance Contract (`ci_performance_suite`)" in content
+            assert PUBLIC_API_TITLE in content
             assert "#### Construction" in content
             assert "Public API: `DelaunayTriangulation::new_with_options`" in content
             assert "`tds_new_2d/tds_new/10`" in content
@@ -2352,7 +2380,7 @@ def test_get_update_instructions(self):
             lines = generator._get_update_instructions()
             content = "\n".join(lines)
 
-            assert "## Performance Data Updates" in content
+            assert PERFORMANCE_UPDATES_TITLE in content
             assert "uv run benchmark-utils generate-baseline" in content
             assert "uv run benchmark-utils generate-summary" in content
             assert "PerformanceSummaryGenerator" in content
@@ -2833,11 +2861,11 @@ def test_full_generation_workflow_integration(self):
                 assert "Single Query Performance (3D)" in content
                 assert "Triangulation Data Structure Performance" in content
                 assert "Performance Status: Good" in content
-                assert "Public API Performance Contract" in content
-                assert "Circumsphere Predicate Analysis" in content
-                assert "Performance Ranking" in content
-                assert "Recommendations" in content
-                assert "Performance Data Updates" in content
+                assert PUBLIC_API_TITLE.removeprefix("### ") in content
+                assert CIRCUMSPHERE_TITLE.removeprefix("## ") in content
+                assert PERFORMANCE_RANKING_TITLE.removeprefix("### ") in content
+                assert RECOMMENDATIONS_TITLE.removeprefix("### ") in content
+                assert PERFORMANCE_UPDATES_TITLE.removeprefix("## ") in content
 
     def test_dimension_sorting_numeric_order(self):
         """Test that dimensions are sorted numerically, not lexically."""

From c3019579cb4936710ec7d8ec9240d51213f38f2b Mon Sep 17 00:00:00 2001
From: Adam Getchell <adam@adamgetchell.org>
Date: Tue, 28 Apr 2026 08:53:38 -0700
Subject: [PATCH 7/8] Changed: Filter stale CI performance benchmark results

Introduces a manifest system to accurately track and filter benchmark
results from the `ci_performance_suite`. This prevents incorrect
performance comparisons due to stale Criterion output directories.

Additionally, this change enforces explicit benchmark IDs for unsized
workloads to ensure stable comparison keys, improves local profiling
metadata capture, and refines conditional allocation logging.
---
 benches/profiling_suite.rs               |  1 +
 docs/dev/commands.md                     |  7 +-
 scripts/benchmark_models.py              |  8 ++-
 scripts/benchmark_utils.py               | 83 ++++++++++++++++++++++--
 scripts/ci/capture_profiling_metadata.sh |  4 +-
 scripts/tests/test_benchmark_models.py   |  7 ++
 scripts/tests/test_benchmark_utils.py    | 60 ++++++++++++++++-
 7 files changed, 158 insertions(+), 12 deletions(-)

diff --git a/benches/profiling_suite.rs b/benches/profiling_suite.rs
index a50c69cc..194c3785 100644
--- a/benches/profiling_suite.rs
+++ b/benches/profiling_suite.rs
@@ -118,6 +118,7 @@ fn measure(f: impl FnOnce()) -> AllocationInfo {
 fn print_alloc_banner_once() {
     static ONCE: Once = Once::new();
     ONCE.call_once(|| {
+        #[cfg(feature = "bench-logging")]
         println!("allocation stats unavailable: count-allocations feature disabled");
     });
 }
diff --git a/docs/dev/commands.md b/docs/dev/commands.md
index 48454e2e..80f9503e 100644
--- a/docs/dev/commands.md
+++ b/docs/dev/commands.md
@@ -103,9 +103,10 @@ just check
 `just check` is the non-mutating lint/validator bundle. It does not run tests,
 examples, or benchmarks.
 
-`just check` validates the default DenseSlotMap backend plus all feature
-combinations. The legacy SlotMap backend is kept as an optional compatibility
-canary; run it explicitly with:
+`just check` runs the default DenseSlotMap backend checks and an
+`--all-features` pass. The justfile runs Clippy for the default feature set and
+for `--all-features`; the legacy SlotMap backend is kept as an optional
+compatibility canary. Run it explicitly with:
 
 ```bash
 just check-storage-backends
diff --git a/scripts/benchmark_models.py b/scripts/benchmark_models.py
index fc70c017..ebec8197 100644
--- a/scripts/benchmark_models.py
+++ b/scripts/benchmark_models.py
@@ -29,8 +29,12 @@ class BenchmarkData:
     @property
     def comparison_key(self) -> str:
         """Return the stable key used for baseline/regression matching."""
-        points_key = self.points if self.points is not None else "unsized"
-        return self.benchmark_id or f"{points_key}_{self.dimension}"
+        if self.benchmark_id:
+            return self.benchmark_id
+        if self.points is None:
+            msg = "Unsized benchmarks require benchmark_id for comparison matching"
+            raise ValueError(msg)
+        return f"{self.points}_{self.dimension}"
 
     @property
     def points_label(self) -> str:
diff --git a/scripts/benchmark_utils.py b/scripts/benchmark_utils.py
index a8928c9f..dbf709ad 100755
--- a/scripts/benchmark_utils.py
+++ b/scripts/benchmark_utils.py
@@ -24,6 +24,7 @@
 from collections.abc import Mapping
 from dataclasses import dataclass
 from datetime import UTC, datetime
+from itertools import product
 from pathlib import Path
 from shutil import copy2 as copyfile  # NOTE: Use copy2 (metadata-preserving) under the 'copyfile' alias for tests/patching convenience.
 from typing import TYPE_CHECKING, TextIO
@@ -129,6 +130,7 @@
 }
 
 CI_PERFORMANCE_SUITE_GROUP_ORDER = tuple(CI_PERFORMANCE_SUITE_GROUPS)
+_CI_PERFORMANCE_SUITE_MANIFEST_IDS_FILE = "ci_performance_suite_manifest_ids.txt"
 
 
 def ci_suite_group_key(first_path_part: str) -> str | None:
@@ -150,6 +152,65 @@ def ci_suite_dimension(benchmark_id: str) -> str:
     return "n/a"
 
 
+def _expand_ci_benchmark_id_pattern(pattern: str) -> set[str]:
+    """Expand the simple brace patterns emitted by ci_performance_suite."""
+    segments = []
+    for segment in pattern.split("/"):
+        if segment.startswith("{") and segment.endswith("}"):
+            segments.append([option for option in segment[1:-1].split(",") if option])
+        else:
+            segments.append([segment])
+    return {"/".join(parts) for parts in product(*segments)}
+
+
+def _parse_ci_performance_manifest_ids(stdout: str) -> set[str]:
+    """Parse benchmark IDs from ci_performance_suite manifest stdout lines."""
+    manifest_ids: set[str] = set()
+    for line in stdout.splitlines():
+        if not line.startswith("api_benchmark "):
+            continue
+        fields = dict(token.split("=", 1) for token in line.split()[1:] if "=" in token)
+        benchmark_ids = fields.get("benchmark_ids", "")
+        for pattern in benchmark_ids.split(";"):
+            if pattern:
+                manifest_ids.update(_expand_ci_benchmark_id_pattern(pattern))
+    return manifest_ids
+
+
+def _ci_performance_manifest_ids_path(criterion_dir: Path) -> Path:
+    """Return the sidecar manifest path used to filter ci_performance_suite results."""
+    return criterion_dir / _CI_PERFORMANCE_SUITE_MANIFEST_IDS_FILE
+
+
+def _write_ci_performance_manifest_ids(project_root: Path, stdout: str) -> None:
+    """Persist the runtime ci_performance_suite manifest beside Criterion results."""
+    if not isinstance(stdout, str):
+        return
+    criterion_dir = project_root / "target" / "criterion"
+    manifest_path = _ci_performance_manifest_ids_path(criterion_dir)
+    manifest_ids = _parse_ci_performance_manifest_ids(stdout)
+    if not manifest_ids:
+        manifest_path.unlink(missing_ok=True)
+        return
+    criterion_dir.mkdir(parents=True, exist_ok=True)
+    manifest_path.write_text(
+        "\n".join(sorted(manifest_ids)) + "\n",
+        encoding="utf-8",
+    )
+
+
+def _load_ci_performance_manifest_ids(criterion_dir: Path) -> set[str] | None:
+    """Load ci_performance_suite benchmark IDs when a runtime manifest exists."""
+    manifest_path = _ci_performance_manifest_ids_path(criterion_dir)
+    if not manifest_path.exists():
+        return None
+    try:
+        manifest_ids = {line.strip() for line in manifest_path.read_text(encoding="utf-8").splitlines() if line.strip()}
+    except OSError:
+        return None
+    return manifest_ids or None
+
+
 # Development mode arguments - centralized to keep baseline generation and comparison in sync
 # Reduces samples for faster iteration during development (10x faster than full benchmarks)
 #
@@ -504,11 +565,13 @@ def _run_ci_performance_suite(self, cargo_profile: str | None = None) -> bool:
                 cwd=self.project_root,
                 timeout=900,
                 capture_output=True,
+                check=False,
             )
             if result.returncode != 0:
                 print(f"❌ Error running ci_performance_suite benchmarks: cargo exited with status {result.returncode}")
                 return False
 
+            _write_ci_performance_manifest_ids(self.project_root, result.stdout)
             print("✅ ci_performance_suite benchmarks completed successfully")
             return True
 
@@ -951,6 +1014,7 @@ def _parse_ci_performance_suite_results(self) -> list[CiPerformanceResult]:
         if not criterion_dir.exists():
             return []
 
+        manifest_ids = _load_ci_performance_manifest_ids(criterion_dir)
         estimates_by_id: dict[tuple[str, ...], tuple[str, Path]] = {}
         for estimates_path in sorted(criterion_dir.glob("**/estimates.json")):
             if estimates_path.parent.name not in {"base", "new"}:
@@ -964,6 +1028,10 @@ def _parse_ci_performance_suite_results(self) -> list[CiPerformanceResult]:
             if not path_parts:
                 continue
 
+            benchmark_id = "/".join(path_parts)
+            if manifest_ids is not None and benchmark_id not in manifest_ids:
+                continue
+
             group_key = ci_suite_group_key(path_parts[0])
             if group_key is None:
                 continue
@@ -1629,6 +1697,7 @@ def _ci_suite_input_points(path_parts: tuple[str, ...]) -> int | None:
     @staticmethod
     def _process_ci_performance_suite_results(criterion_dir: Path) -> list[BenchmarkData]:
         """Discover ci_performance_suite Criterion results with expanded benchmark IDs."""
+        manifest_ids = _load_ci_performance_manifest_ids(criterion_dir)
         estimates_by_id: dict[tuple[str, ...], tuple[str, Path]] = {}
 
         for estimates_path in sorted(criterion_dir.glob("**/estimates.json")):
@@ -1643,6 +1712,10 @@ def _process_ci_performance_suite_results(criterion_dir: Path) -> list[Benchmark
             if not path_parts or ci_suite_group_key(path_parts[0]) is None:
                 continue
 
+            benchmark_id = "/".join(path_parts)
+            if manifest_ids is not None and benchmark_id not in manifest_ids:
+                continue
+
             existing = estimates_by_id.get(path_parts)
             if existing is None or (existing[0] == "base" and estimates_path.parent.name == "new"):
                 estimates_by_id[path_parts] = (estimates_path.parent.name, estimates_path)
@@ -1822,7 +1895,7 @@ def generate_baseline(self, dev_mode: bool = False, output_file: Path | None = N
 
             # Run fresh benchmark - using secure subprocess wrapper
             if dev_mode:
-                run_cargo_command(
+                result = run_cargo_command(
                     [
                         "bench",
                         "--profile",
@@ -1837,12 +1910,13 @@ def generate_baseline(self, dev_mode: bool = False, output_file: Path | None = N
                     capture_output=True,
                 )
             else:
-                run_cargo_command(
+                result = run_cargo_command(
                     ["bench", "--profile", TRUSTED_BENCH_PROFILE, "--bench", "ci_performance_suite"],
                     cwd=self.project_root,
                     timeout=bench_timeout,
                     capture_output=True,
                 )
+            _write_ci_performance_manifest_ids(self.project_root, result.stdout)
 
             # Parse Criterion results
             target_dir = self.project_root / "target"
@@ -1956,7 +2030,7 @@ def compare_with_baseline(
         try:
             # Run fresh benchmark - using secure subprocess wrapper
             if dev_mode:
-                run_cargo_command(
+                result = run_cargo_command(
                     [
                         "bench",
                         "--profile",
@@ -1971,12 +2045,13 @@ def compare_with_baseline(
                     capture_output=True,
                 )
             else:
-                run_cargo_command(
+                result = run_cargo_command(
                     ["bench", "--profile", TRUSTED_BENCH_PROFILE, "--bench", "ci_performance_suite"],
                     cwd=self.project_root,
                     timeout=bench_timeout,
                     capture_output=True,
                 )
+            _write_ci_performance_manifest_ids(self.project_root, result.stdout)
 
             # Parse current results
             target_dir = self.project_root / "target"
diff --git a/scripts/ci/capture_profiling_metadata.sh b/scripts/ci/capture_profiling_metadata.sh
index 0faff056..c67d5d6f 100755
--- a/scripts/ci/capture_profiling_metadata.sh
+++ b/scripts/ci/capture_profiling_metadata.sh
@@ -32,7 +32,7 @@ metadata_title="${PROFILE_METADATA_TITLE:-Profiling Environment}"
 {
 	echo "# ${metadata_title}"
 	echo
-	echo "- Code ref: ${GITHUB_REF_NAME}"
+	echo "- Code ref: ${GITHUB_REF_NAME:-local}"
 	echo "- Commit: $(git rev-parse HEAD)"
 	echo "- Declared rust-toolchain.toml channel: ${declared_toolchain}"
 	echo "- Cargo.toml rust-version: ${rust_version}"
@@ -41,5 +41,5 @@ metadata_title="${PROFILE_METADATA_TITLE:-Profiling Environment}"
 	echo "- Cargo profile: perf"
 	echo "- Benchmark filter: ${benchmark_filter}"
 	echo "- Profiling mode: ${profiling_mode}"
-	echo "- Runner: ${RUNNER_OS}"
+	echo "- Runner: ${RUNNER_OS:-$(uname -s)}"
 } >profiling-results/environment_metadata.md
diff --git a/scripts/tests/test_benchmark_models.py b/scripts/tests/test_benchmark_models.py
index 7f6cdf1c..3930392a 100644
--- a/scripts/tests/test_benchmark_models.py
+++ b/scripts/tests/test_benchmark_models.py
@@ -84,6 +84,13 @@ def test_to_baseline_format_with_unsized_workload(self):
         assert "Benchmark ID: bistellar_flips_4d/k2_roundtrip" in result
         assert "0 Points" not in result
 
+    def test_unsized_comparison_key_requires_benchmark_id(self):
+        """Test unsized workloads cannot silently collide on comparison keys."""
+        data = BenchmarkData(None, "4D")
+
+        with pytest.raises(ValueError, match="Unsized benchmarks require benchmark_id"):
+            _ = data.comparison_key
+
 
 class TestCircumspherePerformanceData:
     """Test cases for CircumspherePerformanceData class."""
diff --git a/scripts/tests/test_benchmark_utils.py b/scripts/tests/test_benchmark_utils.py
index 4d6005f9..ceb648fb 100644
--- a/scripts/tests/test_benchmark_utils.py
+++ b/scripts/tests/test_benchmark_utils.py
@@ -30,6 +30,7 @@
     CircumsphereTestCase,
 )
 from benchmark_utils import (
+    _CI_PERFORMANCE_SUITE_MANIFEST_IDS_FILE,
     DEFAULT_REGRESSION_THRESHOLD,
     DEV_MODE_BENCH_ARGS,
     TRUSTED_BENCH_PROFILE,
@@ -40,6 +41,7 @@
     PerformanceSummaryGenerator,
     ProjectRootNotFoundError,
     WorkflowHelper,
+    _expand_ci_benchmark_id_pattern,
     configure_logging,
     create_argument_parser,
     find_project_root,
@@ -70,6 +72,16 @@ def write_estimate(target_dir: Path, path_parts, mean_ns):
     (estimates_dir / "estimates.json").write_text(json.dumps(estimates), encoding="utf-8")
 
 
+def write_ci_performance_manifest(target_dir: Path, benchmark_ids: list[str]):
+    """Write the ci_performance_suite runtime manifest sidecar."""
+    criterion_dir = target_dir / "criterion"
+    criterion_dir.mkdir(parents=True, exist_ok=True)
+    (criterion_dir / _CI_PERFORMANCE_SUITE_MANIFEST_IDS_FILE).write_text(
+        "\n".join(benchmark_ids) + "\n",
+        encoding="utf-8",
+    )
+
+
 def compute_average_time_change(current_results, baseline_results):
     """Replicate PerformanceComparator's geometric mean logic for tests."""
     time_changes = []
@@ -297,6 +309,17 @@ def test_ci_performance_suite_patterns(self):
         actual_order = [(b.dimension, b.points) for b in ci_suite_results]
         assert actual_order == expected_order
 
+    def test_ci_benchmark_id_pattern_expands_braced_segments(self):
+        """Test ci_performance_suite manifest brace patterns expand to concrete IDs."""
+        result = _expand_ci_benchmark_id_pattern("tds_new_2d/{tds_new,tds_new_adversarial}/{10,25}")
+
+        assert result == {
+            "tds_new_2d/tds_new/10",
+            "tds_new_2d/tds_new/25",
+            "tds_new_2d/tds_new_adversarial/10",
+            "tds_new_2d/tds_new_adversarial/25",
+        }
+
     def test_find_criterion_results_preserves_ci_suite_ids(self):
         """Test ci_performance_suite results keep expanded Criterion benchmark IDs."""
         with tempfile.TemporaryDirectory() as temp_dir:
@@ -323,6 +346,29 @@ def test_find_criterion_results_preserves_ci_suite_ids(self):
             assert roundtrip.dimension == "4D"
             assert roundtrip.throughput_mean is None
 
+    def test_find_criterion_results_filters_stale_ci_suite_ids_with_manifest(self):
+        """Test ci_performance_suite parsing ignores stale Criterion files outside the manifest."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            target_dir = Path(temp_dir) / "target"
+
+            write_estimate(target_dir, ("boundary_facets", "boundary_facets_3d", "50"), 10_000.0)
+            write_estimate(target_dir, ("validation", "validate_3d", "50"), 20_000.0)
+            write_estimate(target_dir, ("boundary_facets", "old_boundary_facets_3d", "50"), 30_000.0)
+            write_ci_performance_manifest(
+                target_dir,
+                [
+                    "boundary_facets/boundary_facets_3d/50",
+                    "validation/validate_3d/50",
+                ],
+            )
+
+            results = CriterionParser.find_criterion_results(target_dir)
+
+            assert [result.comparison_key for result in results] == [
+                "boundary_facets/boundary_facets_3d/50",
+                "validation/validate_3d/50",
+            ]
+
 
 class TestPerformanceComparator:
     """Test cases for PerformanceComparator class."""
@@ -2540,7 +2586,13 @@ def test_run_circumsphere_benchmarks_failure(self, mock_cargo, capsys):
     @patch("benchmark_utils.run_cargo_command")
     def test_run_ci_performance_suite_success(self, mock_cargo):
         """Test running the public API CI performance suite successfully."""
-        mock_cargo.return_value = Mock(returncode=0, stdout="")
+        mock_cargo.return_value = Mock(
+            returncode=0,
+            stdout=(
+                "api_benchmark group=boundary_facets public_api=DelaunayTriangulation::boundary_facets "
+                "dimensions=3 benchmark_ids=boundary_facets/boundary_facets_3d/50 note=test\n"
+            ),
+        )
 
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2558,6 +2610,8 @@ def test_run_ci_performance_suite_success(self, mock_cargo):
                 "--bench",
                 "ci_performance_suite",
             ]
+            manifest_path = project_root / "target" / "criterion" / _CI_PERFORMANCE_SUITE_MANIFEST_IDS_FILE
+            assert manifest_path.read_text(encoding="utf-8") == "boundary_facets/boundary_facets_3d/50\n"
 
     @patch("benchmark_utils.run_cargo_command")
     def test_run_ci_performance_suite_uses_requested_cargo_profile(self, mock_cargo):
@@ -2566,6 +2620,9 @@ def test_run_ci_performance_suite_uses_requested_cargo_profile(self, mock_cargo)
 
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
+            stale_manifest_path = project_root / "target" / "criterion" / _CI_PERFORMANCE_SUITE_MANIFEST_IDS_FILE
+            stale_manifest_path.parent.mkdir(parents=True)
+            stale_manifest_path.write_text("stale/benchmark/id\n", encoding="utf-8")
             generator = PerformanceSummaryGenerator(project_root)
 
             requested_profile = "release"
@@ -2575,6 +2632,7 @@ def test_run_ci_performance_suite_uses_requested_cargo_profile(self, mock_cargo)
             mock_cargo.assert_called_once()
             args = mock_cargo.call_args.args[0]
             assert args[:5] == ["bench", "--profile", requested_profile, "--bench", "ci_performance_suite"]
+            assert not stale_manifest_path.exists()
 
     @patch("benchmark_utils.run_cargo_command")
     def test_run_ci_performance_suite_nonzero_exit(self, mock_cargo, capsys):

From 70703fb1b555b42186001c55877f3ad2d14bdf3a Mon Sep 17 00:00:00 2001
From: Adam Getchell <adam@adamgetchell.org>
Date: Tue, 28 Apr 2026 10:06:05 -0700
Subject: [PATCH 8/8] Changed: Refine Python-based performance and profiling
 tooling

This commit introduces comprehensive improvements to the Python scripts for
benchmarking, profiling, and hardware analysis. Key changes focus on improving
code robustness, type safety, and overall maintainability.

- **New Development Guidelines:** Adds `docs/dev/python.md` to formalize best
  practices for Python automation, guiding developers on rigorous typing,
  structured subprocess mocking, and precise exception handling.
- **Stricter Linting and Typing:** Updates `pyproject.toml` to enable additional
  Ruff rules for type annotation coverage (`ANN201`, `ANN202`, `ANN204`) and
  re-enable `BLE001` (blind exception catch), applying these quality checks
  across all Python scripts.
- **Robust Exception Handling:** Replaces broad `except Exception` clauses with
  specific, recoverable error families (`_RECOVERABLE_CLI_ERRORS`) within
  core benchmark, hardware, and comparison utilities, preventing silent
  failures and improving debuggability.
- **Typed Subprocess Mocks:** Updates test fixtures to return fully typed
  `subprocess.CompletedProcess` objects, aligning with new mocking guidance
  and enhancing test reliability for command-line interactions.
- **Internal Refactorings:** Extracts and clarifies helper functions in
  `benchmark_utils.py`, `hardware_utils.py`, and `postprocess_changelog.py`
  to improve code readability and reusability.
- **Dev Mode Benchmark Options:** Enables `ci_performance_suite` to
  conditionally apply reduced Criterion sampling arguments for faster local
  development feedback.
- **Benchmark Config Clarification:** Adds a note to `profiling_suite.rs`
  clarifying Criterion's `BENCH_SAMPLE_SIZE` clamping behavior.
---
 benches/profiling_suite.rs                    |   3 +-
 docs/dev/python.md                            |  86 ++++
 pyproject.toml                                |   9 +-
 scripts/benchmark_utils.py                    | 229 +++++----
 scripts/compare_storage_backends.py           |  29 +-
 scripts/hardware_utils.py                     |  65 +--
 scripts/postprocess_changelog.py              |  75 +--
 scripts/tests/conftest.py                     |  23 +-
 scripts/tests/test_benchmark_models.py        |  62 +--
 scripts/tests/test_benchmark_utils.py         | 469 ++++++++++--------
 .../tests/test_compare_storage_backends.py    |  44 +-
 scripts/tests/test_hardware_utils.py          |  85 ++--
 scripts/tests/test_subprocess_utils.py        |  60 +--
 scripts/tests/test_tag_release.py             |   2 +-
 14 files changed, 701 insertions(+), 540 deletions(-)
 create mode 100644 docs/dev/python.md

diff --git a/benches/profiling_suite.rs b/benches/profiling_suite.rs
index 194c3785..2a635cf5 100644
--- a/benches/profiling_suite.rs
+++ b/benches/profiling_suite.rs
@@ -45,7 +45,8 @@
 //! - `PROFILING_DEV_MODE`: Set to "1", "true", "yes", or "on" for reduced scale (faster iteration)
 //! - `BENCH_MEASUREMENT_TIME`: Override measurement time in seconds (minimum: 1, guards against invalid values)
 //! - `BENCH_PERCENTILE`: Configure percentile for memory analysis (1-100, default: 95)
-//! - `BENCH_SAMPLE_SIZE`: Override Criterion sample size (default: 10)
+//! - `BENCH_SAMPLE_SIZE`: Override Criterion sample size (default: 10; values below 10 are clamped to 10, so
+//!   `BENCH_SAMPLE_SIZE=5` still runs 10 samples)
 //! - `BENCH_WARMUP_SECS`: Override Criterion warm-up time in seconds (default: 10)
 //!
 //! Example with custom configuration:
diff --git a/docs/dev/python.md b/docs/dev/python.md
new file mode 100644
index 00000000..7b803142
--- /dev/null
+++ b/docs/dev/python.md
@@ -0,0 +1,86 @@
+# Python Development Guidelines
+
+Guidance for Python automation under `scripts/`.
+
+The Rust library is the primary product, but the Python benchmark, changelog,
+hardware, and release utilities are part of the trusted development workflow.
+Keep them typed and predictable so failures are visible in CI instead of being
+hidden behind loose mocks or broad exception handling.
+
+---
+
+## Validation
+
+Run the Python validators through the repository toolchain:
+
+```bash
+uv run ruff check scripts/
+uv run ty check scripts/ --error all
+uv run pytest scripts/tests
+```
+
+`ty check scripts/ --error all` is the type-checking authority. Prefer reducing
+untyped surfaces in code and tests over adding more `ty` configuration.
+
+`just check` also runs Python formatting checks, Ruff, and `ty` as part of the
+normal repository validation bundle.
+
+---
+
+## Typing
+
+- Add return annotations to functions and methods.
+- Prefer concrete standard-library types over `Any`, `dict`, or bare `Mock`
+  when the shape is known.
+- Keep helper signatures precise enough that `ty` can validate the call sites.
+- Avoid growing type-checker configuration unless a demonstrated false positive
+  cannot be solved cleanly in code.
+
+---
+
+## Subprocess Mocks
+
+When mocking command wrappers such as `run_git_command()`,
+`run_cargo_command()`, or `run_safe_command()`, prefer real typed subprocess
+results:
+
+```python
+import subprocess
+
+
+def completed_process(stdout: str = "", *, returncode: int = 0) -> subprocess.CompletedProcess[str]:
+    """Return a typed subprocess result for command-wrapper mocks."""
+    return subprocess.CompletedProcess(args=[], returncode=returncode, stdout=stdout, stderr="")
+```
+
+Use that helper instead of ad-hoc mocks such as:
+
+```python
+mock_result = Mock()
+mock_result.stdout = "..."
+mock_result.returncode = 0
+```
+
+Structured results make tests closer to production behavior and give `ty` real
+attributes to check.
+
+---
+
+## Exceptions
+
+- Catch specific recoverable error families in production code. Avoid
+  `except Exception`.
+- In tests, raise concrete exceptions that match the production recovery path
+  (`OSError`, `RuntimeError`, `subprocess.CalledProcessError`,
+  `subprocess.TimeoutExpired`, etc.).
+- Do not use raw `Exception` in mocks just to force a fallback branch; doing so
+  weakens the contract that the production code is meant to enforce.
+
+---
+
+## Test Helpers
+
+Put reusable typed test helpers near the top of the test module or in
+`scripts/tests/conftest.py` when they are shared. Prefer one helper that returns
+the real structured type over repeating partially configured mocks throughout a
+file.
diff --git a/pyproject.toml b/pyproject.toml
index e8533ed3..a07be6f3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -67,6 +67,10 @@ select = [
     "E",
     "F",
     "W",
+    "ANN201",
+    "ANN202",
+    "ANN204",
+    "C90",
     "I",
     "N",
     "UP",
@@ -138,7 +142,7 @@ ignore = [
     "PLR2004", # Magic value used in comparison - acceptable for CLI constants and thresholds
     "FBT001", # Boolean-typed positional argument - appropriate for CLI flag arguments
     "FBT002", # Boolean default positional argument - standard CLI pattern
-    "BLE001", # Do not catch blind exception - intentional defensive programming for CLI robustness
+    # "BLE001" - Re-enabled: broad exception catches must name recoverable error families
     # "S603" - Re-enabled: subprocess call: check for execution of untrusted input - now using secure subprocess wrappers
     # "S607" - Re-enabled: Starting a process with a partial executable path - now using full paths
     "T201", # print found - appropriate for CLI output and user feedback
@@ -161,6 +165,9 @@ ignore = [
 # docstrings for each pytest case while still checking production scripts.
 "**/tests/test_*.py" = [ "S101", "SLF001", "D101", "D102", "D103" ]
 
+[tool.ruff.lint.mccabe]
+max-complexity = 10
+
 # Import sorting and organization configuration
 [tool.ruff.lint.isort]
 known-first-party = [
diff --git a/scripts/benchmark_utils.py b/scripts/benchmark_utils.py
index dbf709ad..4a10e097 100755
--- a/scripts/benchmark_utils.py
+++ b/scripts/benchmark_utils.py
@@ -31,7 +31,7 @@
 from urllib.parse import urlparse
 from uuid import uuid4
 
-from packaging.version import Version
+from packaging.version import InvalidVersion, Version
 
 logger = logging.getLogger(__name__)
 
@@ -98,6 +98,17 @@
             run_safe_command,
         )
 
+_RECOVERABLE_CLI_ERRORS: tuple[type[BaseException], ...] = (
+    ExecutableNotFoundError,
+    ProjectRootNotFoundError,
+    OSError,
+    RuntimeError,
+    TypeError,
+    ValueError,
+    KeyError,
+    subprocess.SubprocessError,
+)
+
 # Trusted benchmark commands use this Cargo profile so local, CI, and release
 # numbers are generated with the same ThinLTO/codegen-units settings.
 TRUSTED_BENCH_PROFILE = "perf"
@@ -185,13 +196,14 @@ def _ci_performance_manifest_ids_path(criterion_dir: Path) -> Path:
 def _write_ci_performance_manifest_ids(project_root: Path, stdout: str) -> None:
     """Persist the runtime ci_performance_suite manifest beside Criterion results."""
     if not isinstance(stdout, str):
-        return
+        msg = "ci_performance_suite completed but stdout was not text; cannot extract api_benchmark manifest"
+        raise TypeError(msg)
     criterion_dir = project_root / "target" / "criterion"
     manifest_path = _ci_performance_manifest_ids_path(criterion_dir)
     manifest_ids = _parse_ci_performance_manifest_ids(stdout)
     if not manifest_ids:
-        manifest_path.unlink(missing_ok=True)
-        return
+        msg = f"ci_performance_suite completed but emitted no api_benchmark manifest in stdout: {stdout!r}"
+        raise RuntimeError(msg)
     criterion_dir.mkdir(parents=True, exist_ok=True)
     manifest_path.write_text(
         "\n".join(sorted(manifest_ids)) + "\n",
@@ -211,6 +223,34 @@ def _load_ci_performance_manifest_ids(criterion_dir: Path) -> set[str] | None:
     return manifest_ids or None
 
 
+def _collect_ci_suite_estimates(criterion_dir: Path) -> list[tuple[tuple[str, ...], Path]]:
+    """Collect deduplicated ci_performance_suite estimates, preferring new over base."""
+    manifest_ids = _load_ci_performance_manifest_ids(criterion_dir)
+    estimates_by_id: dict[tuple[str, ...], tuple[str, Path]] = {}
+
+    for estimates_path in sorted(criterion_dir.glob("**/estimates.json")):
+        if estimates_path.parent.name not in {"base", "new"}:
+            continue
+
+        try:
+            path_parts = estimates_path.relative_to(criterion_dir).parts[:-2]
+        except ValueError:
+            continue
+
+        if not path_parts or ci_suite_group_key(path_parts[0]) is None:
+            continue
+
+        benchmark_id = "/".join(path_parts)
+        if manifest_ids is not None and benchmark_id not in manifest_ids:
+            continue
+
+        existing = estimates_by_id.get(path_parts)
+        if existing is None or (existing[0] == "base" and estimates_path.parent.name == "new"):
+            estimates_by_id[path_parts] = (estimates_path.parent.name, estimates_path)
+
+    return [(path_parts, estimates_path) for path_parts, (_, estimates_path) in estimates_by_id.items()]
+
+
 # Development mode arguments - centralized to keep baseline generation and comparison in sync
 # Reduces samples for faster iteration during development (10x faster than full benchmarks)
 #
@@ -296,7 +336,7 @@ def _sampling_metadata(dev_mode: bool) -> dict[str, str]:
 class PerformanceSummaryGenerator:
     """Generate performance summary markdown from benchmark results."""
 
-    def __init__(self, project_root: Path):
+    def __init__(self, project_root: Path) -> None:
         """Initialize with project root directory."""
         self.project_root = project_root
         # Prefer CI artifact location; fall back to benches/ for local runs
@@ -366,7 +406,7 @@ def generate_summary(
             print(f"📊 Generated performance summary: {output_path}")
             return True
 
-        except Exception as e:
+        except _RECOVERABLE_CLI_ERRORS as e:
             print(f"❌ Failed to generate performance summary: {e}", file=sys.stderr)
             return False
 
@@ -399,7 +439,7 @@ def _generate_markdown_content(self, generator_name: str | None = None) -> str:
             commit_hash = get_git_commit_hash(cwd=self.project_root)
             if commit_hash and commit_hash != "unknown":
                 lines.append(f"**Git Commit**: {commit_hash}")
-        except Exception as e:
+        except _RECOVERABLE_CLI_ERRORS as e:
             logger.debug("Could not get git commit hash: %s", e)
 
         # Add hardware information
@@ -414,7 +454,7 @@ def _generate_markdown_content(self, generator_name: str | None = None) -> str:
                     f"**Rust**: {hw_info['RUST']}",
                 ],
             )
-        except Exception as e:
+        except _RECOVERABLE_CLI_ERRORS as e:
             logger.debug("Could not get hardware info: %s", e)
             lines.append("**Hardware**: Unknown")
 
@@ -470,7 +510,7 @@ def _get_current_version(self) -> str:
             if result.startswith("v"):
                 return result[1:]  # Remove 'v' prefix
             return "unknown"
-        except Exception:
+        except _RECOVERABLE_CLI_ERRORS:
             # Fallback: try to get any recent tag
             try:
                 cp = run_git_command(["tag", "-l", "--sort=-version:refname"], cwd=self.project_root)
@@ -481,7 +521,7 @@ def _get_current_version(self) -> str:
                         if tag.startswith("v") and len(tag) > 1:
                             return tag[1:]
                 return "unknown"
-            except Exception:
+            except _RECOVERABLE_CLI_ERRORS:
                 return "unknown"
 
     def _get_version_date(self) -> str:
@@ -502,7 +542,7 @@ def _get_version_date(self) -> str:
 
             # Fallback to current date
             return datetime.now(UTC).strftime("%Y-%m-%d")
-        except Exception:
+        except _RECOVERABLE_CLI_ERRORS:
             return datetime.now(UTC).strftime("%Y-%m-%d")
 
     def _run_circumsphere_benchmarks(self, cargo_profile: str | None = None) -> tuple[bool, dict[str, str] | None]:
@@ -538,11 +578,11 @@ def _run_circumsphere_benchmarks(self, cargo_profile: str | None = None) -> tupl
             print("✅ Circumsphere benchmarks completed successfully")
             return True, numerical_accuracy_data
 
-        except Exception as e:
+        except _RECOVERABLE_CLI_ERRORS as e:
             print(f"❌ Error running circumsphere benchmarks: {e}")
             return False, None
 
-    def _run_ci_performance_suite(self, cargo_profile: str | None = None) -> bool:
+    def _run_ci_performance_suite(self, cargo_profile: str | None = None, *, use_dev_mode: bool = False) -> bool:
         """
         Run the public API CI performance suite to generate fresh Criterion data.
 
@@ -550,6 +590,9 @@ def _run_ci_performance_suite(self, cargo_profile: str | None = None) -> bool:
             cargo_profile: Cargo profile for the fresh run. Defaults to
                 :data:`TRUSTED_BENCH_PROFILE` so summary, baseline, and
                 comparison measurements use the same optimized profile.
+            use_dev_mode: When true, pass reduced Criterion sampling arguments
+                for local development feedback. Full sampling is used by
+                default.
 
         Returns:
             True if the benchmark completed successfully, False otherwise.
@@ -558,7 +601,9 @@ def _run_ci_performance_suite(self, cargo_profile: str | None = None) -> bool:
             print("🔄 Running ci_performance_suite benchmarks...")
 
             profile = cargo_profile if cargo_profile is not None else TRUSTED_BENCH_PROFILE
-            cargo_args = ["bench", "--profile", profile, "--bench", "ci_performance_suite", "--", *DEV_MODE_BENCH_ARGS]
+            cargo_args = ["bench", "--profile", profile, "--bench", "ci_performance_suite"]
+            if use_dev_mode:
+                cargo_args.extend(["--", *DEV_MODE_BENCH_ARGS])
 
             result = run_cargo_command(
                 cargo_args,
@@ -623,7 +668,7 @@ def _parse_numerical_accuracy_output(self, stdout: str) -> dict[str, str] | None
 
             return accuracy_data or None
 
-        except Exception:
+        except (IndexError, TypeError, ValueError):
             return None
 
     def _get_numerical_accuracy_analysis(self) -> list[str]:
@@ -835,7 +880,7 @@ def _parse_single_method_result(self, criterion_path: Path, method_name: str) ->
                 mean_ns = estimates["mean"]["point_estimate"]
                 return CircumspherePerformanceData(method=method_name, time_ns=mean_ns)
 
-            except Exception as e:
+            except (OSError, KeyError, TypeError, ValueError, json.JSONDecodeError) as e:
                 print(f"⚠️ Could not parse {estimates_file}: {e}")
 
         return None
@@ -1014,34 +1059,8 @@ def _parse_ci_performance_suite_results(self) -> list[CiPerformanceResult]:
         if not criterion_dir.exists():
             return []
 
-        manifest_ids = _load_ci_performance_manifest_ids(criterion_dir)
-        estimates_by_id: dict[tuple[str, ...], tuple[str, Path]] = {}
-        for estimates_path in sorted(criterion_dir.glob("**/estimates.json")):
-            if estimates_path.parent.name not in {"base", "new"}:
-                continue
-
-            try:
-                path_parts = estimates_path.relative_to(criterion_dir).parts[:-2]
-            except ValueError:
-                continue
-
-            if not path_parts:
-                continue
-
-            benchmark_id = "/".join(path_parts)
-            if manifest_ids is not None and benchmark_id not in manifest_ids:
-                continue
-
-            group_key = ci_suite_group_key(path_parts[0])
-            if group_key is None:
-                continue
-
-            existing = estimates_by_id.get(path_parts)
-            if existing is None or (existing[0] == "base" and estimates_path.parent.name == "new"):
-                estimates_by_id[path_parts] = (estimates_path.parent.name, estimates_path)
-
         results = []
-        for path_parts, (_, estimates_path) in estimates_by_id.items():
+        for path_parts, estimates_path in _collect_ci_suite_estimates(criterion_dir):
             estimates = self._load_criterion_estimate(estimates_path)
             if estimates is None:
                 continue
@@ -1264,7 +1283,7 @@ def _parse_baseline_results(self) -> list[str]:
             if benchmarks:
                 lines.extend(format_benchmark_tables(benchmarks))
 
-        except Exception as e:
+        except (OSError, TypeError, ValueError, KeyError) as e:
             lines.extend(
                 [
                     "### Baseline Results",
@@ -1311,7 +1330,7 @@ def _parse_comparison_results(self) -> list[str]:
                     ],
                 )
 
-        except Exception:
+        except OSError:
             lines.extend(
                 [
                     "### Comparison Results",
@@ -1388,23 +1407,13 @@ def _get_dynamic_analysis_sections(self) -> list[str]:
 
         return lines
 
-    def _analyze_performance_ranking(self, test_data: list[CircumsphereTestCase]) -> list[tuple[str, float, str]]:
-        """
-        Analyze performance data to generate dynamic rankings.
-
-        Args:
-            test_data: List of CircumsphereTestCase objects
-
-        Returns:
-            List of tuples (method_name, average_performance, description)
-        """
+    @staticmethod
+    def _collect_method_performance(test_data: list[CircumsphereTestCase]) -> tuple[dict[str, list[float]], dict[str, list[str]]]:
+        """Collect per-method timings and dimension wins, excluding trivial boundary cases."""
         method_totals: dict[str, list[float]] = {"insphere": [], "insphere_distance": [], "insphere_lifted": []}
         method_wins: dict[str, list[str]] = {"insphere": [], "insphere_distance": [], "insphere_lifted": []}
 
-        # Collect performance data from non-boundary test cases only
-        # Boundary cases are trivial outliers with early-exit optimizations
         for test_case in test_data:
-            # Skip boundary vertex cases as they're trivial outliers (3-4ns)
             if test_case.is_boundary_case:
                 continue
 
@@ -1415,6 +1424,36 @@ def _analyze_performance_ranking(self, test_data: list[CircumsphereTestCase]) ->
             for method_name, perf_data in test_case.methods.items():
                 method_totals[method_name].append(perf_data.time_ns)
 
+        return method_totals, method_wins
+
+    @staticmethod
+    def _ranking_description(method: str, avg_time: float, fastest_time: float, method_wins: dict[str, list[str]]) -> str:
+        """Describe relative method performance for the dynamic ranking table."""
+        if avg_time == float("inf"):
+            return "No benchmark data available"
+
+        slowdown = (avg_time / fastest_time) if fastest_time > 0 and fastest_time != float("inf") else 1
+        wins = method_wins.get(method, [])
+        if not wins:
+            return f"~{slowdown:.1f}x slower than fastest on average"
+
+        dims_text = ", ".join(sorted(set(wins)))
+        if slowdown > 1.01:
+            return f"(best in {dims_text}) - ~{slowdown:.1f}x average vs fastest"
+        return f"(best in {dims_text}) - Best average performance"
+
+    def _analyze_performance_ranking(self, test_data: list[CircumsphereTestCase]) -> list[tuple[str, float, str]]:
+        """
+        Analyze performance data to generate dynamic rankings.
+
+        Args:
+            test_data: List of CircumsphereTestCase objects
+
+        Returns:
+            List of tuples (method_name, average_performance, description)
+        """
+        method_totals, method_wins = self._collect_method_performance(test_data)
+
         # Calculate averages and determine ranking
         method_averages = {}
         for method, times in method_totals.items():
@@ -1426,33 +1465,12 @@ def _analyze_performance_ranking(self, test_data: list[CircumsphereTestCase]) ->
         # Sort by performance (lowest time first)
         sorted_methods = sorted(method_averages.items(), key=lambda x: x[1])
 
-        # Generate descriptions with relative performance and dimension wins
         rankings = []
         if sorted_methods:
             fastest_time = sorted_methods[0][1]
 
             for method, avg_time in sorted_methods:
-                # Handle missing data (float("inf") from no samples)
-                if avg_time == float("inf"):
-                    desc = "No benchmark data available"
-                    rankings.append((method, avg_time, desc))
-                    continue
-
-                slowdown = (avg_time / fastest_time) if fastest_time > 0 and fastest_time != float("inf") else 1
-
-                # Generate description based on actual wins by dimension
-                wins = method_wins.get(method, [])
-                if wins:
-                    dims_text = ", ".join(sorted(set(wins)))
-                    desc = (
-                        f"(best in {dims_text}) - ~{slowdown:.1f}x average vs fastest"
-                        if slowdown > 1.01
-                        else f"(best in {dims_text}) - Best average performance"
-                    )
-                else:
-                    desc = f"~{slowdown:.1f}x slower than fastest on average"
-
-                rankings.append((method, avg_time, desc))
+                rankings.append((method, avg_time, self._ranking_description(method, avg_time, fastest_time, method_wins)))
 
         return rankings
 
@@ -1697,31 +1715,8 @@ def _ci_suite_input_points(path_parts: tuple[str, ...]) -> int | None:
     @staticmethod
     def _process_ci_performance_suite_results(criterion_dir: Path) -> list[BenchmarkData]:
         """Discover ci_performance_suite Criterion results with expanded benchmark IDs."""
-        manifest_ids = _load_ci_performance_manifest_ids(criterion_dir)
-        estimates_by_id: dict[tuple[str, ...], tuple[str, Path]] = {}
-
-        for estimates_path in sorted(criterion_dir.glob("**/estimates.json")):
-            if estimates_path.parent.name not in {"base", "new"}:
-                continue
-
-            try:
-                path_parts = estimates_path.relative_to(criterion_dir).parts[:-2]
-            except ValueError:
-                continue
-
-            if not path_parts or ci_suite_group_key(path_parts[0]) is None:
-                continue
-
-            benchmark_id = "/".join(path_parts)
-            if manifest_ids is not None and benchmark_id not in manifest_ids:
-                continue
-
-            existing = estimates_by_id.get(path_parts)
-            if existing is None or (existing[0] == "base" and estimates_path.parent.name == "new"):
-                estimates_by_id[path_parts] = (estimates_path.parent.name, estimates_path)
-
         results: list[BenchmarkData] = []
-        for path_parts, (_, estimates_path) in estimates_by_id.items():
+        for path_parts, estimates_path in _collect_ci_suite_estimates(criterion_dir):
             benchmark_id = "/".join(path_parts)
             dimension = ci_suite_dimension(benchmark_id)
             if dimension == "n/a":
@@ -1867,7 +1862,7 @@ def find_criterion_results(target_dir: Path) -> list[BenchmarkData]:
 class BaselineGenerator:
     """Generate performance baselines from benchmark data."""
 
-    def __init__(self, project_root: Path, tag: str | None = None):
+    def __init__(self, project_root: Path, tag: str | None = None) -> None:
         """Initialize baseline generation for a project root and optional tag."""
         self.project_root = project_root
         self.hardware = HardwareInfo()
@@ -1948,7 +1943,7 @@ def generate_baseline(self, dev_mode: bool = False, output_file: Path | None = N
                 print("=== end stdout ===\n", file=sys.stderr)
             logger.exception("Error in generate_baseline")
             return False
-        except Exception:
+        except _RECOVERABLE_CLI_ERRORS:
             logger.exception("Error in generate_baseline")
             return False
 
@@ -1962,7 +1957,7 @@ def _write_baseline_file(self, benchmark_results: list[BenchmarkData], output_fi
         try:
             # Use secure subprocess wrapper for git command
             git_commit = get_git_commit_hash(cwd=self.project_root)
-        except Exception:
+        except _RECOVERABLE_CLI_ERRORS:
             git_commit = "unknown"
 
         hardware_info = self.hardware.format_hardware_info(cwd=self.project_root)
@@ -1990,7 +1985,7 @@ def _write_baseline_file(self, benchmark_results: list[BenchmarkData], output_fi
 class PerformanceComparator:
     """Compare current performance against baseline."""
 
-    def __init__(self, project_root: Path):
+    def __init__(self, project_root: Path) -> None:
         """Initialize comparison state for benchmark results under a project root."""
         self.project_root = project_root
         self.hardware = HardwareInfo()
@@ -2090,7 +2085,7 @@ def compare_with_baseline(
             self._write_error_file(output_file, "Benchmark execution error", str(e))
             logger.exception("Error in compare_with_baseline")
             return False, False
-        except Exception as e:
+        except _RECOVERABLE_CLI_ERRORS as e:
             self._write_error_file(output_file, "Benchmark execution error", str(e))
             logger.exception("Error in compare_with_baseline")
             return False, False
@@ -2213,7 +2208,7 @@ def _prepare_comparison_metadata(self, baseline_content: str) -> dict[str, str]:
 
         try:
             git_commit = get_git_commit_hash(cwd=self.project_root)
-        except Exception:
+        except _RECOVERABLE_CLI_ERRORS:
             git_commit = "unknown"
 
         # Parse baseline metadata
@@ -2513,7 +2508,7 @@ def _write_error_file(self, output_file: Path, error_title: str, error_detail: s
                 f.write(f"Details: {error_detail}\n\n")
                 f.write("This error prevented the benchmark comparison from completing successfully.\n")
                 f.write("Please check the CI logs for more information.\n")
-        except Exception:
+        except OSError:
             logger.exception("Failed to write error file")
 
 
@@ -2592,7 +2587,7 @@ def create_metadata(tag_name: str, output_dir: Path) -> bool:
             print(f"📦 Created metadata file: {metadata_file}")
             return True
 
-        except Exception as e:
+        except (OSError, TypeError, ValueError) as e:
             print(f"❌ Failed to create metadata: {e}", file=sys.stderr)
             return False
 
@@ -2628,7 +2623,7 @@ def display_baseline_summary(baseline_file: Path) -> bool:
 
             return True
 
-        except Exception as e:
+        except OSError as e:
             print(f"❌ Failed to display baseline summary: {e}", file=sys.stderr)
             return False
 
@@ -2798,7 +2793,7 @@ def _version_key(p: Path) -> tuple[int, Version | str, str]:
                     version = Version(version_str)
                     # Valid version: priority 1 (sorts first when reversed)
                     return (1, version, p.name)
-                except Exception as e:
+                except InvalidVersion as e:
                     # Invalid version format, treat as non-semver
                     logger.debug("Invalid version format in %s: %s", p.name, e)
             # Fallback: put non-matching names last (priority 0, sorts after valid versions when reversed)
@@ -2934,7 +2929,7 @@ def determine_benchmark_skip(baseline_commit: str, current_commit: str) -> tuple
 
         except subprocess.CalledProcessError:
             return False, "baseline_commit_not_found"
-        except Exception:
+        except _RECOVERABLE_CLI_ERRORS:
             return False, "error_checking_changes"
 
     @staticmethod
@@ -3001,7 +2996,7 @@ def run_regression_test(baseline_path: Path, bench_timeout: int = 1800, dev_mode
             print("✅ No significant performance regressions detected")
             return True
 
-        except Exception as e:
+        except _RECOVERABLE_CLI_ERRORS as e:
             print(f"❌ Error running regression test: {e}", file=sys.stderr)
             return False
 
@@ -3793,7 +3788,7 @@ def execute_command(args: argparse.Namespace, project_root: Path) -> None:
         return
 
 
-def main():
+def main() -> None:
     """Command-line interface for benchmark utilities."""
     parser = create_argument_parser()
     args = parser.parse_args()
diff --git a/scripts/compare_storage_backends.py b/scripts/compare_storage_backends.py
index 40fed144..9b8207f6 100644
--- a/scripts/compare_storage_backends.py
+++ b/scripts/compare_storage_backends.py
@@ -29,6 +29,7 @@
 import logging
 import re
 import shutil
+import subprocess
 import sys
 from datetime import UTC, datetime
 from json import loads
@@ -36,18 +37,28 @@
 
 try:
     from benchmark_utils import TRUSTED_BENCH_PROFILE  # type: ignore[import-not-found]
-    from subprocess_utils import find_project_root, run_cargo_command  # type: ignore[import-not-found]
+    from subprocess_utils import ExecutableNotFoundError, find_project_root, run_cargo_command  # type: ignore[import-not-found]
 except ModuleNotFoundError:
     from scripts.benchmark_utils import TRUSTED_BENCH_PROFILE  # type: ignore[no-redef,import-not-found]
-    from scripts.subprocess_utils import find_project_root, run_cargo_command  # type: ignore[no-redef,import-not-found]
+    from scripts.subprocess_utils import ExecutableNotFoundError, find_project_root, run_cargo_command  # type: ignore[no-redef,import-not-found]
 
 logger = logging.getLogger(__name__)
 
+_RECOVERABLE_COMPARISON_ERRORS: tuple[type[BaseException], ...] = (
+    ExecutableNotFoundError,
+    OSError,
+    RuntimeError,
+    TypeError,
+    ValueError,
+    KeyError,
+    subprocess.SubprocessError,
+)
+
 
 class StorageBackendComparator:
     """Compare performance between SlotMap and DenseSlotMap storage backends."""
 
-    def __init__(self, project_root: Path):
+    def __init__(self, project_root: Path) -> None:
         """Initialize with project root directory."""
         self.project_root = project_root
         self.criterion_dir = project_root / "target" / "criterion"
@@ -117,7 +128,7 @@ def run_comparison(
             print(f"\n✅ Comparison report saved: {output_path}")
             return True
 
-        except Exception as e:
+        except _RECOVERABLE_COMPARISON_ERRORS as e:
             print(f"❌ Comparison failed: {e}", file=sys.stderr)
             logger.exception("Comparison failed")
             return False
@@ -199,7 +210,7 @@ def _run_benchmark(self, benchmark_name: str, use_dense_slotmap: bool, dev_mode:
 
             return results
 
-        except Exception:
+        except _RECOVERABLE_COMPARISON_ERRORS:
             logger.exception("Benchmark execution failed")
             return None
 
@@ -247,10 +258,10 @@ def _parse_criterion_output(self, output: str) -> dict:
                         )
                         json_found = True
                         logger.debug("Parsed JSON for benchmark: %s", name)
-                    except Exception as e:
+                    except (OSError, KeyError, TypeError, ValueError) as e:
                         logger.debug("Failed to parse JSON from %s: %s", path, e)
                         continue
-        except Exception:
+        except OSError:
             logger.debug("JSON parsing failed, falling back to regex")
 
         # Fallback to stdout regex parsing if no JSON found
@@ -446,7 +457,7 @@ def _generate_comparison_report(
         return "\n".join(lines)
 
 
-def main():
+def main() -> None:
     """Main entry point for storage backend comparison."""
     parser = argparse.ArgumentParser(
         description="Compare SlotMap vs DenseSlotMap storage backend performance",
@@ -510,7 +521,7 @@ def main():
 
         sys.exit(0 if success else 1)
 
-    except Exception as e:
+    except _RECOVERABLE_COMPARISON_ERRORS as e:
         print(f"❌ Error: {e}", file=sys.stderr)
         logger.exception("Fatal error")
         sys.exit(1)
diff --git a/scripts/hardware_utils.py b/scripts/hardware_utils.py
index 9be2244e..cfc22348 100755
--- a/scripts/hardware_utils.py
+++ b/scripts/hardware_utils.py
@@ -37,7 +37,7 @@
 class HardwareInfo:
     """Cross-platform hardware information detection."""
 
-    def __init__(self):
+    def __init__(self) -> None:
         """Initialize cached platform identifiers for hardware probes."""
         self.os_type = platform.system()
         self.machine = platform.machine()
@@ -118,33 +118,29 @@ def _get_linux_cpu_model(self) -> str:
 
         return "Unknown"
 
-    def _get_linux_cpu_cores(self) -> str:
-        """
-        Get CPU core count on Linux.
-
-        Returns:
-            CPU core count or "Unknown"
-        """
-        if not shutil.which("lscpu"):
-            # Fallback: parse physical core count from /proc/cpuinfo
-            try:
-                physical_cores: set[tuple[str, str]] = set()
-                with open("/proc/cpuinfo", encoding="utf-8") as f:
-                    physical_id = core_id = None
-                    for line in f:
-                        if line.startswith("physical id"):
-                            physical_id = line.split(":", 1)[1].strip()
-                        elif line.startswith("core id"):
-                            core_id = line.split(":", 1)[1].strip()
-                        if physical_id is not None and core_id is not None:
-                            physical_cores.add((physical_id, core_id))
-                            physical_id = core_id = None
-                if physical_cores:
-                    return str(len(physical_cores))
-            except (FileNotFoundError, PermissionError, ValueError):
-                return "Unknown"
+    def _get_linux_cpu_cores_from_proc(self) -> str:
+        """Parse physical CPU cores from /proc/cpuinfo when lscpu is unavailable."""
+        try:
+            physical_cores: set[tuple[str, str]] = set()
+            with open("/proc/cpuinfo", encoding="utf-8") as f:
+                physical_id = core_id = None
+                for line in f:
+                    if line.startswith("physical id"):
+                        physical_id = line.split(":", 1)[1].strip()
+                    elif line.startswith("core id"):
+                        core_id = line.split(":", 1)[1].strip()
+                    if physical_id is not None and core_id is not None:
+                        physical_cores.add((physical_id, core_id))
+                        physical_id = core_id = None
+            if physical_cores:
+                return str(len(physical_cores))
+        except (FileNotFoundError, PermissionError, ValueError):
             return "Unknown"
 
+        return "Unknown"
+
+    def _get_linux_cpu_cores_from_lscpu(self) -> str:
+        """Parse physical CPU cores from lscpu output."""
         try:
             lscpu_output = self._run_command(["lscpu"])
             cores_per_socket = None
@@ -159,10 +155,21 @@ def _get_linux_cpu_cores(self) -> str:
             if cores_per_socket is not None and sockets is not None:
                 return str(cores_per_socket * sockets)
         except (subprocess.CalledProcessError, ValueError, IndexError):
-            pass
+            return "Unknown"
 
         return "Unknown"
 
+    def _get_linux_cpu_cores(self) -> str:
+        """
+        Get CPU core count on Linux.
+
+        Returns:
+            CPU core count or "Unknown"
+        """
+        if not shutil.which("lscpu"):
+            return self._get_linux_cpu_cores_from_proc()
+        return self._get_linux_cpu_cores_from_lscpu()
+
     def _get_linux_cpu_threads(self) -> str:
         """
         Get CPU thread count on Linux.
@@ -353,7 +360,7 @@ def get_rust_info(self, cwd: Path | None = None) -> tuple[str, str]:
                         break
         except subprocess.CalledProcessError as e:
             logger.debug("rustc command failed: %s", e)
-        except Exception as e:
+        except (OSError, subprocess.SubprocessError) as e:
             logger.debug("Failed to get Rust info: %s", e)
 
         return rust_version, rust_target
@@ -638,7 +645,7 @@ def _extract_memory_value(memory_str: str) -> float | None:
         return None
 
 
-def main():
+def main() -> None:
     """Command-line interface for hardware utilities."""
     parser = argparse.ArgumentParser(description="Cross-platform hardware information detection and comparison")
     parser.add_argument("command", choices=["info", "kv", "compare"], help="Command to run")
diff --git a/scripts/postprocess_changelog.py b/scripts/postprocess_changelog.py
index 8fd91726..1ee4ee3b 100644
--- a/scripts/postprocess_changelog.py
+++ b/scripts/postprocess_changelog.py
@@ -453,6 +453,42 @@ def _process_code_fence(line: str, result: list[str], in_code_block: bool) -> tu
     return True, in_code_block
 
 
+def _update_entry_summary(line: str, current_entry_summary: str | None) -> str | None:
+    """Track the active changelog entry summary for squash-body cleanup."""
+    if line.startswith("- ") and _COMMIT_LINK_RE.search(line):
+        return _plain_summary(line)
+    if line.startswith(("### ", "## ", "# ")):
+        return None
+    return current_entry_summary
+
+
+def _should_skip_duplicate_heading(
+    line: str,
+    result: list[str],
+    current_entry_summary: str | None,
+    is_isolated_body_heading: bool,
+) -> tuple[bool, bool]:
+    """Return whether to skip a duplicate squash heading and the following blank."""
+    if is_isolated_body_heading and _is_duplicate_squash_heading(line, current_entry_summary):
+        return True, bool(result and not result[-1].strip())
+    return False, False
+
+
+def _normalize_body_line(line: str, lines: list[str], idx: int, result: list[str], current_entry_summary: str | None) -> str:
+    """Apply markdown hygiene transforms to a non-code line."""
+    is_isolated_body_heading = _is_isolated_body_heading(lines, idx)
+    line = _deindent_orphan(line, lines, idx)
+    line = _normalize_indented_heading(line)
+
+    if is_isolated_body_heading:
+        line = _normalize_squash_heading(line, nested=current_entry_summary is not None)
+
+    if _needs_blank_before(line.lstrip(), result):
+        result.append("")
+
+    return _reflow_line(line) if len(line) > MAX_LINE_WIDTH else line
+
+
 def postprocess(path: Path) -> None:
     """Read *path*, apply hygiene fixes, and write it back."""
     text = path.read_text(encoding="utf-8")
@@ -486,44 +522,25 @@ def postprocess(path: Path) -> None:
         # --- MD030: normalise spaces after list marker ---
         line = _LIST_MARKER_SPACE_RE.sub(r"\1 ", line)
 
-        if line.startswith("- ") and _COMMIT_LINK_RE.search(line):
-            current_entry_summary = _plain_summary(line)
-        elif line.startswith(("### ", "## ", "# ")):
-            current_entry_summary = None
-
+        current_entry_summary = _update_entry_summary(line, current_entry_summary)
         is_isolated_body_heading = _is_isolated_body_heading(lines, idx)
 
         # --- GitHub squash bodies: collapse duplicate pseudo-headings ---
-        if is_isolated_body_heading and _is_duplicate_squash_heading(line, current_entry_summary):
-            drop_next_blank = bool(result and not result[-1].strip())
+        should_skip, next_drop_blank = _should_skip_duplicate_heading(
+            line,
+            result,
+            current_entry_summary,
+            is_isolated_body_heading,
+        )
+        if should_skip:
+            drop_next_blank = next_drop_blank
             continue
         if drop_next_blank and not line.strip():
             drop_next_blank = False
             continue
         drop_next_blank = False
 
-        # --- MD007: de-indent orphaned body list items ---
-        line = _deindent_orphan(line, lines, idx)
-        stripped = line.lstrip()
-
-        # --- MD023: headings must start at the beginning of the line ---
-        line = _normalize_indented_heading(line)
-        stripped = line.lstrip()
-
-        # --- GitHub squash bodies: render pseudo-headings as prose ---
-        if is_isolated_body_heading:
-            line = _normalize_squash_heading(line, nested=current_entry_summary is not None)
-        stripped = line.lstrip()
-
-        # --- MD032: blank line before a list item that follows prose ---
-        if _needs_blank_before(stripped, result):
-            result.append("")
-
-        # --- reflow long lines ---
-        if len(line) > MAX_LINE_WIDTH:
-            result.append(_reflow_line(line))
-        else:
-            result.append(line)
+        result.append(_normalize_body_line(line, lines, idx, result, current_entry_summary))
 
     # 1. Reassemble and strip trailing blank lines.
     text = "\n".join(result)
diff --git a/scripts/tests/conftest.py b/scripts/tests/conftest.py
index 35af23eb..3a102336 100644
--- a/scripts/tests/conftest.py
+++ b/scripts/tests/conftest.py
@@ -5,10 +5,11 @@
 """
 
 import os
+import subprocess
 import sys
-from contextlib import contextmanager
+from collections.abc import Callable, Iterator
+from contextlib import AbstractContextManager, contextmanager
 from pathlib import Path
-from unittest.mock import Mock
 
 import pytest
 
@@ -20,7 +21,7 @@
 
 
 @pytest.fixture
-def temp_chdir():
+def temp_chdir() -> Callable[[os.PathLike | str], AbstractContextManager[None]]:
     """
     Pytest fixture for temporarily changing working directory.
 
@@ -36,7 +37,7 @@ def test_something(temp_chdir):
     """
 
     @contextmanager
-    def _temp_chdir_context(path: os.PathLike | str):
+    def _temp_chdir_context(path: os.PathLike | str) -> Iterator[None]:
         """Context manager for temporarily changing working directory."""
         original_cwd = Path.cwd()
         target = Path(path)
@@ -52,11 +53,11 @@ def _temp_chdir_context(path: os.PathLike | str):
 
 
 @pytest.fixture
-def mock_git_command_result():
+def mock_git_command_result() -> Callable[[str], subprocess.CompletedProcess[str]]:
     """
     Pytest fixture for creating mock CompletedProcess objects for git commands.
 
-    Returns a function that creates a mock object with the specified stdout output.
+    Returns a function that creates a CompletedProcess with the specified stdout output.
     This standardizes git command mocking across all test files.
 
     Usage:
@@ -65,12 +66,8 @@ def test_something(mock_git_command_result):
             # mock_result.stdout.strip() will return "v0.4.2"
     """
 
-    def _create_mock_result(output: str) -> Mock:
-        """Create a mock CompletedProcess object for git commands."""
-        mock_result = Mock()
-        mock_result.stdout = output  # mimic CompletedProcess.stdout (str)
-        mock_result.returncode = 0
-        mock_result.args = ["git"]
-        return mock_result
+    def _create_mock_result(output: str) -> subprocess.CompletedProcess[str]:
+        """Create a typed CompletedProcess object for git commands."""
+        return subprocess.CompletedProcess(args=["git"], returncode=0, stdout=output, stderr="")
 
     return _create_mock_result
diff --git a/scripts/tests/test_benchmark_models.py b/scripts/tests/test_benchmark_models.py
index 3930392a..5b8c529c 100644
--- a/scripts/tests/test_benchmark_models.py
+++ b/scripts/tests/test_benchmark_models.py
@@ -27,7 +27,7 @@
 class TestBenchmarkData:
     """Test cases for BenchmarkData class."""
 
-    def test_init(self):
+    def test_init(self) -> None:
         """Test BenchmarkData initialization."""
         data = BenchmarkData(points=1000, dimension="2D")
         assert data.points == 1000
@@ -35,7 +35,7 @@ def test_init(self):
         assert data.time_mean == 0.0
         assert data.throughput_mean is None
 
-    def test_with_timing_fluent_interface(self):
+    def test_with_timing_fluent_interface(self) -> None:
         """Test fluent interface for setting timing data."""
         data = BenchmarkData(1000, "3D").with_timing(100.0, 110.0, 120.0, "µs")
 
@@ -44,7 +44,7 @@ def test_with_timing_fluent_interface(self):
         assert data.time_high == 120.0
         assert data.time_unit == "µs"
 
-    def test_with_throughput_fluent_interface(self):
+    def test_with_throughput_fluent_interface(self) -> None:
         """Test fluent interface for setting throughput data."""
         data = BenchmarkData(1000, "2D").with_throughput(800.0, 900.0, 1000.0, "Kelem/s")
 
@@ -53,7 +53,7 @@ def test_with_throughput_fluent_interface(self):
         assert data.throughput_high == 1000.0
         assert data.throughput_unit == "Kelem/s"
 
-    def test_to_baseline_format_with_timing_only(self):
+    def test_to_baseline_format_with_timing_only(self) -> None:
         """Test baseline format output with timing data only."""
         data = BenchmarkData(1000, "2D").with_timing(100.0, 110.0, 120.0, "µs")
 
@@ -63,7 +63,7 @@ def test_to_baseline_format_with_timing_only(self):
 """
         assert result == expected
 
-    def test_to_baseline_format_with_timing_and_throughput(self):
+    def test_to_baseline_format_with_timing_and_throughput(self) -> None:
         """Test baseline format output with both timing and throughput data."""
         data = BenchmarkData(1000, "3D").with_timing(100.0, 110.0, 120.0, "µs").with_throughput(800.0, 900.0, 1000.0, "Kelem/s")
 
@@ -74,7 +74,7 @@ def test_to_baseline_format_with_timing_and_throughput(self):
 """
         assert result == expected
 
-    def test_to_baseline_format_with_unsized_workload(self):
+    def test_to_baseline_format_with_unsized_workload(self) -> None:
         """Test baseline format output for workloads without numeric input size."""
         data = BenchmarkData(None, "4D", benchmark_id="bistellar_flips_4d/k2_roundtrip").with_timing(0.8, 0.95, 1.1, "µs")
 
@@ -84,7 +84,7 @@ def test_to_baseline_format_with_unsized_workload(self):
         assert "Benchmark ID: bistellar_flips_4d/k2_roundtrip" in result
         assert "0 Points" not in result
 
-    def test_unsized_comparison_key_requires_benchmark_id(self):
+    def test_unsized_comparison_key_requires_benchmark_id(self) -> None:
         """Test unsized workloads cannot silently collide on comparison keys."""
         data = BenchmarkData(None, "4D")
 
@@ -95,7 +95,7 @@ def test_unsized_comparison_key_requires_benchmark_id(self):
 class TestCircumspherePerformanceData:
     """Test cases for CircumspherePerformanceData class."""
 
-    def test_init(self):
+    def test_init(self) -> None:
         """Test CircumspherePerformanceData initialization."""
         data = CircumspherePerformanceData(method="insphere", time_ns=1000.0)
         assert data.method == "insphere"
@@ -107,7 +107,7 @@ def test_init(self):
 class TestCircumsphereTestCase:
     """Test cases for CircumsphereTestCase class."""
 
-    def test_init_and_get_winner(self):
+    def test_init_and_get_winner(self) -> None:
         """Test CircumsphereTestCase initialization and winner detection."""
         methods = {
             "insphere": CircumspherePerformanceData("insphere", 1000.0),
@@ -120,7 +120,7 @@ def test_init_and_get_winner(self):
         assert test_case.dimension == "3D"
         assert test_case.get_winner() == "insphere_lifted"  # Lowest time
 
-    def test_get_relative_performance(self):
+    def test_get_relative_performance(self) -> None:
         """Test relative performance calculation."""
         methods = {
             "insphere": CircumspherePerformanceData("insphere", 1000.0),
@@ -134,12 +134,12 @@ def test_get_relative_performance(self):
         assert test_case.get_relative_performance("insphere") == pytest.approx(1.25)  # 1000/800
         assert test_case.get_relative_performance("insphere_distance") == pytest.approx(1.5)  # 1200/800
 
-    def test_get_winner_empty_methods(self):
+    def test_get_winner_empty_methods(self) -> None:
         """Test get_winner with empty methods dict."""
         test_case = CircumsphereTestCase("test_empty", "3D", {})
         assert test_case.get_winner() is None
 
-    def test_get_relative_performance_nonexistent_method(self):
+    def test_get_relative_performance_nonexistent_method(self) -> None:
         """Test get_relative_performance with non-existent method returns 0.0."""
         methods = {
             "insphere": CircumspherePerformanceData("insphere", 1000.0),
@@ -149,7 +149,7 @@ def test_get_relative_performance_nonexistent_method(self):
         # Should return 0.0 for non-existent method
         assert test_case.get_relative_performance("nonexistent_method") == pytest.approx(0.0)
 
-    def test_version_comparison_data_division_by_zero_edge_case(self):
+    def test_version_comparison_data_division_by_zero_edge_case(self) -> None:
         """Test VersionComparisonData handles edge case gracefully."""
         # This doesn't raise an exception but demonstrates pytest usage for edge case testing
         comparison = VersionComparisonData(
@@ -169,7 +169,7 @@ def test_version_comparison_data_division_by_zero_edge_case(self):
 class TestVersionComparisonData:
     """Test cases for VersionComparisonData class."""
 
-    def test_improvement_calculation(self):
+    def test_improvement_calculation(self) -> None:
         """Test improvement percentage calculation."""
         comparison = VersionComparisonData(
             test_case="Basic 3D",
@@ -184,7 +184,7 @@ def test_improvement_calculation(self):
         expected_improvement = ((808.0 - 805.0) / 808.0) * 100
         assert comparison.improvement_pct == pytest.approx(expected_improvement, abs=0.001)
 
-    def test_zero_old_value(self):
+    def test_zero_old_value(self) -> None:
         """Test improvement calculation with zero old value."""
         comparison = VersionComparisonData(
             test_case="Basic 3D",
@@ -202,7 +202,7 @@ def test_zero_old_value(self):
 class TestParsingFunctions:
     """Test cases for parsing functions."""
 
-    def test_extract_benchmark_data(self):
+    def test_extract_benchmark_data(self) -> None:
         """Test extracting benchmark data from baseline content."""
         baseline_content = """Date: 2024-01-15 10:30:00 UTC
 Git commit: abc123def456
@@ -234,7 +234,7 @@ def test_extract_benchmark_data(self):
         assert second.dimension == "3D"
         assert second.time_mean == 550.0
 
-    def test_parse_benchmark_header(self):
+    def test_parse_benchmark_header(self) -> None:
         """Test parsing benchmark header lines."""
         # Valid header
         result = parse_benchmark_header("=== 1000 Points (2D) ===")
@@ -251,7 +251,7 @@ def test_parse_benchmark_header(self):
         result = parse_benchmark_header("Invalid header")
         assert result is None
 
-    def test_parse_time_data(self):
+    def test_parse_time_data(self) -> None:
         """Test parsing time data lines."""
         benchmark = BenchmarkData(1000, "2D")
 
@@ -266,7 +266,7 @@ def test_parse_time_data(self):
         success = parse_time_data(benchmark2, "Invalid time data")
         assert success is False
 
-    def test_parse_throughput_data(self):
+    def test_parse_throughput_data(self) -> None:
         """Test parsing throughput data lines."""
         benchmark = BenchmarkData(1000, "2D")
 
@@ -285,7 +285,7 @@ def test_parse_throughput_data(self):
 class TestFormattingFunctions:
     """Test cases for formatting functions."""
 
-    def test_format_benchmark_tables(self):
+    def test_format_benchmark_tables(self) -> None:
         """Test formatting benchmark data as markdown tables."""
         # Create test benchmarks
         benchmarks = [
@@ -310,7 +310,7 @@ def test_format_benchmark_tables(self):
         assert "| 5000 |" in markdown_content  # Should contain the 5000 point row
         assert "4.5x" in markdown_content  # Scaling: 500/110 ≈ 4.5
 
-    def test_format_benchmark_tables_includes_benchmark_ids(self):
+    def test_format_benchmark_tables_includes_benchmark_ids(self) -> None:
         """Test expanded benchmark IDs are shown in baseline summary tables."""
         benchmarks = [
             BenchmarkData(50, "3D", benchmark_id="boundary_facets/boundary_facets_3d/50")
@@ -331,7 +331,7 @@ def test_format_benchmark_tables_includes_benchmark_ids(self):
         assert "| `boundary_facets/boundary_facets_3d/50` | 50 | 10.00 µs | 5.00 Kelem/s | N/A |" in markdown_content
         assert "| `validation/validate_3d/50` | 50 | 20.00 µs | N/A | N/A |" in markdown_content
 
-    def test_format_benchmark_tables_renders_unsized_points(self):
+    def test_format_benchmark_tables_renders_unsized_points(self) -> None:
         """Test unsized workloads render without fake numeric point counts."""
         benchmarks = [
             BenchmarkData(None, "4D", benchmark_id="bistellar_flips_4d/k2_roundtrip").with_timing(
@@ -348,7 +348,7 @@ def test_format_benchmark_tables_renders_unsized_points(self):
         assert "| `bistellar_flips_4d/k2_roundtrip` | n/a | 0.950 µs | N/A | N/A |" in markdown_content
         assert "0 Points" not in markdown_content
 
-    def test_format_time_value(self):
+    def test_format_time_value(self) -> None:
         """Test formatting time values with appropriate precision."""
         # Test zero and negative values (should return N/A)
         assert format_time_value(0.0, "µs") == "N/A"
@@ -361,7 +361,7 @@ def test_format_time_value(self):
         assert format_time_value(2500.0, "ms") == "2.5000 s"  # Converts to s
         assert format_time_value(50000.0, "ms") == "50.0000 s"  # Large values convert to s
 
-    def test_format_throughput_value(self):
+    def test_format_throughput_value(self) -> None:
         """Test formatting throughput values with appropriate precision."""
         # Test different value ranges
         assert format_throughput_value(0.5, "Kelem/s") == "0.500 Kelem/s"
@@ -372,7 +372,7 @@ def test_format_throughput_value(self):
         assert format_throughput_value(None, "Kelem/s") == "N/A"
         assert format_throughput_value(110.0, None) == "N/A"
 
-    def test_format_time_value_with_unit_aliases(self):
+    def test_format_time_value_with_unit_aliases(self) -> None:
         """Test time value formatting with microsecond unit aliases."""
         # Test microsecond alias normalization
         assert format_time_value(500.0, "us") == "500.00 µs"  # us -> µs
@@ -383,7 +383,7 @@ def test_format_time_value_with_unit_aliases(self):
         assert format_time_value(1500.0, "us") == "1.500 ms"  # us -> µs -> ms conversion
         assert format_time_value(2500.0, "μs") == "2.500 ms"  # μs -> µs -> ms conversion
 
-    def test_parse_time_data_with_scientific_notation(self):
+    def test_parse_time_data_with_scientific_notation(self) -> None:
         """Test parsing time data with scientific notation and flexible formatting."""
         benchmark = BenchmarkData(1000, "3D")
 
@@ -406,7 +406,7 @@ def test_parse_time_data_with_scientific_notation(self):
         assert benchmark3.time_mean == 110.0
         assert benchmark3.time_unit == "µs"
 
-    def test_parse_throughput_data_with_scientific_notation(self):
+    def test_parse_throughput_data_with_scientific_notation(self) -> None:
         """Test parsing throughput data with scientific notation and flexible formatting."""
         benchmark = BenchmarkData(1000, "2D")
 
@@ -423,7 +423,7 @@ def test_parse_throughput_data_with_scientific_notation(self):
         assert benchmark2.throughput_mean == 9090.9
         assert benchmark2.throughput_unit == "Kelem/s"
 
-    def test_format_benchmark_tables_dimension_sorting(self):
+    def test_format_benchmark_tables_dimension_sorting(self) -> None:
         """Test that dimensions are sorted numerically rather than lexically."""
         # Create benchmarks with dimensions that would sort incorrectly lexically
         benchmarks = [
@@ -445,7 +445,7 @@ def test_format_benchmark_tables_dimension_sorting(self):
         # Verify they appear in numeric order: 1D < 2D < 3D < 10D
         assert pos_1d < pos_2d < pos_3d < pos_10d
 
-    def test_format_benchmark_tables_mixed_dimension_formats(self):
+    def test_format_benchmark_tables_mixed_dimension_formats(self) -> None:
         """Test dimension sorting with mixed formats and edge cases."""
         benchmarks = [
             BenchmarkData(1000, "2D").with_timing(50.0, 55.0, 60.0, "µs"),
@@ -466,7 +466,7 @@ def test_format_benchmark_tables_mixed_dimension_formats(self):
         # Numeric dimensions should come first (1d, 2D, 3D), then non-numeric (custom_format)
         assert pos_1d < pos_2d < pos_3d < pos_custom
 
-    def test_format_benchmark_tables_scaling_baseline_with_zero_first_entry(self):
+    def test_format_benchmark_tables_scaling_baseline_with_zero_first_entry(self) -> None:
         """Test scaling baseline calculation when first entry has zero/empty time.
 
         This tests the fix for the issue where using 1.0 as fallback when the
@@ -493,7 +493,7 @@ def test_format_benchmark_tables_scaling_baseline_with_zero_first_entry(self):
         # Should not contain inflated scaling that would result from 1.0 fallback
         assert "500.0x" not in markdown_content  # This would be 500/1.0 if bug existed
 
-    def test_format_benchmark_tables_scaling_baseline_all_zero_times(self):
+    def test_format_benchmark_tables_scaling_baseline_all_zero_times(self) -> None:
         """Test scaling baseline calculation when all entries have zero/empty time."""
         benchmarks = [
             BenchmarkData(1000, "2D").with_timing(0.0, 0.0, 0.0, "µs"),
diff --git a/scripts/tests/test_benchmark_utils.py b/scripts/tests/test_benchmark_utils.py
index ceb648fb..a1edbfd8 100644
--- a/scripts/tests/test_benchmark_utils.py
+++ b/scripts/tests/test_benchmark_utils.py
@@ -20,6 +20,7 @@
 import time
 from io import StringIO
 from pathlib import Path
+from typing import Any
 from unittest.mock import Mock, patch
 
 import pytest
@@ -49,6 +50,10 @@
 )
 
 THRESHOLD_PERCENT = f"{DEFAULT_REGRESSION_THRESHOLD:.1f}%"
+CI_MANIFEST_STDOUT = (
+    "api_benchmark group=boundary_facets public_api=DelaunayTriangulation::boundary_facets "
+    "dimensions=3 benchmark_ids=boundary_facets/boundary_facets_3d/50 note=test\n"
+)
 PUBLIC_API_TITLE = "### Public API Performance Contract (`ci_performance_suite`)"
 CIRCUMSPHERE_TITLE = "## Circumsphere Predicate Analysis"
 PERFORMANCE_RANKING_TITLE = "### Performance Ranking"
@@ -56,7 +61,18 @@
 PERFORMANCE_UPDATES_TITLE = "## Performance Data Updates"
 
 
-def write_estimate(target_dir: Path, path_parts, mean_ns):
+def completed_process(
+    stdout: str = "",
+    *,
+    returncode: int = 0,
+    stderr: str = "",
+    args: list[str] | None = None,
+) -> subprocess.CompletedProcess[str]:
+    """Return a typed subprocess result for command-wrapper mocks."""
+    return subprocess.CompletedProcess(args=args or [], returncode=returncode, stdout=stdout, stderr=stderr)
+
+
+def write_estimate(target_dir: Path, path_parts, mean_ns) -> None:
     """Write a minimal Criterion estimates.json fixture."""
     estimates_dir = target_dir / "criterion" / Path(*path_parts) / "base"
     estimates_dir.mkdir(parents=True)
@@ -72,7 +88,7 @@ def write_estimate(target_dir: Path, path_parts, mean_ns):
     (estimates_dir / "estimates.json").write_text(json.dumps(estimates), encoding="utf-8")
 
 
-def write_ci_performance_manifest(target_dir: Path, benchmark_ids: list[str]):
+def write_ci_performance_manifest(target_dir: Path, benchmark_ids: list[str]) -> None:
     """Write the ci_performance_suite runtime manifest sidecar."""
     criterion_dir = target_dir / "criterion"
     criterion_dir.mkdir(parents=True, exist_ok=True)
@@ -82,7 +98,7 @@ def write_ci_performance_manifest(target_dir: Path, benchmark_ids: list[str]):
     )
 
 
-def compute_average_time_change(current_results, baseline_results):
+def compute_average_time_change(current_results, baseline_results) -> float:
     """Replicate PerformanceComparator's geometric mean logic for tests."""
     time_changes = []
     for current in current_results:
@@ -106,7 +122,7 @@ def compute_average_time_change(current_results, baseline_results):
 
 
 @pytest.fixture
-def sample_estimates_data():
+def sample_estimates_data() -> dict[str, object]:
     """Fixture for common estimates.json test data."""
     return {
         "mean": {
@@ -117,7 +133,7 @@ def sample_estimates_data():
 
 
 @pytest.fixture
-def sample_benchmark_data():
+def sample_benchmark_data() -> dict[str, BenchmarkData]:
     """Fixture for common BenchmarkData test objects."""
     return {
         "2d_1000": BenchmarkData(1000, "2D").with_timing(100.0, 110.0, 120.0, "µs"),
@@ -129,7 +145,7 @@ def sample_benchmark_data():
 class TestCriterionParser:
     """Test cases for CriterionParser class."""
 
-    def test_parse_estimates_json_valid_data(self, sample_estimates_data):
+    def test_parse_estimates_json_valid_data(self, sample_estimates_data) -> None:
         """Test parsing valid estimates.json data."""
         with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
             json.dump(sample_estimates_data, f)
@@ -151,7 +167,7 @@ def test_parse_estimates_json_valid_data(self, sample_estimates_data):
         finally:
             estimates_path.unlink()
 
-    def test_benchmark_data_positional_timing_compatibility(self):
+    def test_benchmark_data_positional_timing_compatibility(self) -> None:
         """Test legacy positional construction still maps the third argument to time_low."""
         benchmark = BenchmarkData(1000, "2D", 1.0, 2.0, 3.0, "µs")
 
@@ -161,7 +177,7 @@ def test_benchmark_data_positional_timing_compatibility(self):
         assert benchmark.time_unit == "µs"
         assert benchmark.benchmark_id == ""
 
-    def test_parse_estimates_json_preserves_unsized_workload(self, sample_estimates_data):
+    def test_parse_estimates_json_preserves_unsized_workload(self, sample_estimates_data) -> None:
         """Test Criterion estimates without numeric input size do not get fake throughput."""
         with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
             json.dump(sample_estimates_data, f)
@@ -179,7 +195,7 @@ def test_parse_estimates_json_preserves_unsized_workload(self, sample_estimates_
         finally:
             estimates_path.unlink()
 
-    def test_parse_estimates_json_zero_mean(self):
+    def test_parse_estimates_json_zero_mean(self) -> None:
         """Test parsing estimates.json with zero mean time."""
         estimates_data = {"mean": {"point_estimate": 0.0, "confidence_interval": {"lower_bound": 0.0, "upper_bound": 0.0}}}
 
@@ -194,7 +210,7 @@ def test_parse_estimates_json_zero_mean(self):
         finally:
             estimates_path.unlink()
 
-    def test_parse_estimates_json_very_fast_benchmark_division_by_zero_protection(self):
+    def test_parse_estimates_json_very_fast_benchmark_division_by_zero_protection(self) -> None:
         """Test division by zero protection for very fast benchmarks with near-zero confidence intervals."""
         estimates_data = {
             "mean": {
@@ -231,12 +247,12 @@ def test_parse_estimates_json_very_fast_benchmark_division_by_zero_protection(se
         finally:
             estimates_path.unlink()
 
-    def test_parse_estimates_json_invalid_file(self):
+    def test_parse_estimates_json_invalid_file(self) -> None:
         """Test parsing non-existent estimates.json file."""
         result = CriterionParser.parse_estimates_json(Path("nonexistent.json"), 1000, "2D")
         assert result is None
 
-    def test_parse_estimates_json_malformed_json(self):
+    def test_parse_estimates_json_malformed_json(self) -> None:
         """Test parsing malformed JSON file."""
         with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
             f.write("{ invalid json")
@@ -251,7 +267,7 @@ def test_parse_estimates_json_malformed_json(self):
 
     @patch("benchmark_utils.Path.exists")
     @patch("benchmark_utils.Path.iterdir")
-    def test_find_criterion_results_no_criterion_dir(self, mock_iterdir, mock_exists):  # noqa: ARG002
+    def test_find_criterion_results_no_criterion_dir(self, mock_iterdir, mock_exists) -> None:  # noqa: ARG002
         """Test finding criterion results when criterion directory doesn't exist."""
         mock_exists.return_value = False
 
@@ -260,7 +276,7 @@ def test_find_criterion_results_no_criterion_dir(self, mock_iterdir, mock_exists
 
         assert results == []
 
-    def test_find_criterion_results_sorting(self):
+    def test_find_criterion_results_sorting(self) -> None:
         """Test that results are sorted by dimension and points."""
         # Create test data that would be unsorted initially
         test_results = [
@@ -283,7 +299,7 @@ def test_find_criterion_results_sorting(self):
         assert test_results[3].dimension == "4D"
         assert test_results[3].points == 1000
 
-    def test_ci_performance_suite_patterns(self):
+    def test_ci_performance_suite_patterns(self) -> None:
         """Test CI performance suite benchmark patterns (2D, 3D, 4D, 5D with 10, 25, 50 points)."""
         # Test data representing CI performance suite dimensions and point counts
         ci_suite_results = [
@@ -309,7 +325,7 @@ def test_ci_performance_suite_patterns(self):
         actual_order = [(b.dimension, b.points) for b in ci_suite_results]
         assert actual_order == expected_order
 
-    def test_ci_benchmark_id_pattern_expands_braced_segments(self):
+    def test_ci_benchmark_id_pattern_expands_braced_segments(self) -> None:
         """Test ci_performance_suite manifest brace patterns expand to concrete IDs."""
         result = _expand_ci_benchmark_id_pattern("tds_new_2d/{tds_new,tds_new_adversarial}/{10,25}")
 
@@ -320,7 +336,7 @@ def test_ci_benchmark_id_pattern_expands_braced_segments(self):
             "tds_new_2d/tds_new_adversarial/25",
         }
 
-    def test_find_criterion_results_preserves_ci_suite_ids(self):
+    def test_find_criterion_results_preserves_ci_suite_ids(self) -> None:
         """Test ci_performance_suite results keep expanded Criterion benchmark IDs."""
         with tempfile.TemporaryDirectory() as temp_dir:
             target_dir = Path(temp_dir) / "target"
@@ -346,7 +362,7 @@ def test_find_criterion_results_preserves_ci_suite_ids(self):
             assert roundtrip.dimension == "4D"
             assert roundtrip.throughput_mean is None
 
-    def test_find_criterion_results_filters_stale_ci_suite_ids_with_manifest(self):
+    def test_find_criterion_results_filters_stale_ci_suite_ids_with_manifest(self) -> None:
         """Test ci_performance_suite parsing ignores stale Criterion files outside the manifest."""
         with tempfile.TemporaryDirectory() as temp_dir:
             target_dir = Path(temp_dir) / "target"
@@ -374,13 +390,13 @@ class TestPerformanceComparator:
     """Test cases for PerformanceComparator class."""
 
     @pytest.fixture
-    def comparator(self):
+    def comparator(self) -> PerformanceComparator:
         """Fixture for PerformanceComparator instance."""
         project_root = Path("/fake/project")
         return PerformanceComparator(project_root)
 
     @pytest.fixture
-    def sample_baseline_content(self):
+    def sample_baseline_content(self) -> str:
         """Fixture for sample baseline content."""
         return """Date: 2023-06-15 10:30:00 PDT
 Git commit: abc123def456
@@ -406,7 +422,7 @@ def sample_baseline_content(self):
 Throughput: [4.167, 4.545, 5.0] Kelem/s
 """
 
-    def test_parse_baseline_file(self, comparator, sample_baseline_content):
+    def test_parse_baseline_file(self, comparator, sample_baseline_content) -> None:
         """Test parsing baseline file content."""
         results = comparator._parse_baseline_file(sample_baseline_content)
 
@@ -422,7 +438,7 @@ def test_parse_baseline_file(self, comparator, sample_baseline_content):
         assert bench_2d_1000.time_mean == 110.0
         assert bench_2d_1000.throughput_mean == 9.091
 
-    def test_parse_baseline_file_with_benchmark_ids(self, comparator):
+    def test_parse_baseline_file_with_benchmark_ids(self, comparator) -> None:
         """Test parsing expanded ci_performance_suite baseline identifiers."""
         baseline_content = """Date: 2023-06-15 10:30:00 PDT
 Git commit: abc123def456
@@ -447,7 +463,7 @@ def test_parse_baseline_file_with_benchmark_ids(self, comparator):
         assert results["boundary_facets/boundary_facets_3d/50"].time_mean == 10.0
         assert results["validation/validate_3d/50"].time_mean == 20.0
 
-    def test_parse_baseline_file_with_unsized_benchmark_id(self, comparator):
+    def test_parse_baseline_file_with_unsized_benchmark_id(self, comparator) -> None:
         """Test parsing expanded CI benchmarks without numeric input sizes."""
         baseline_content = """Date: 2023-06-15 10:30:00 PDT
 Git commit: abc123def456
@@ -464,7 +480,7 @@ def test_parse_baseline_file_with_unsized_benchmark_id(self, comparator):
         assert benchmark.dimension == "4D"
         assert benchmark.throughput_mean is None
 
-    def test_write_performance_comparison_matches_benchmark_ids(self, comparator):
+    def test_write_performance_comparison_matches_benchmark_ids(self, comparator) -> None:
         """Test comparison uses expanded benchmark IDs instead of point/dimension collisions."""
         current_results = [
             BenchmarkData(50, "3D", benchmark_id="boundary_facets/boundary_facets_3d/50").with_timing(9.0, 10.0, 11.0, "µs"),
@@ -492,7 +508,7 @@ def test_write_performance_comparison_matches_benchmark_ids(self, comparator):
         assert "OK: Time change +0.0%" in content
         assert "IMPROVEMENT: Time decreased by 50.0%" in content
 
-    def test_write_performance_comparison_no_legacy_fallback_for_benchmark_id(self, comparator):
+    def test_write_performance_comparison_no_legacy_fallback_for_benchmark_id(self, comparator) -> None:
         """Test expanded IDs do not compare against unrelated collapsed legacy baselines."""
         current_results = [
             BenchmarkData(50, "3D", benchmark_id="validation/validate_3d/50").with_timing(
@@ -513,7 +529,7 @@ def test_write_performance_comparison_no_legacy_fallback_for_benchmark_id(self,
         assert "Baseline: N/A (no matching entry)" in content
         assert "IMPROVEMENT: Time decreased by 50.0%" not in content
 
-    def test_write_time_comparison_no_regression(self, comparator):
+    def test_write_time_comparison_no_regression(self, comparator) -> None:
         """Test time comparison writing with no regression."""
         current = BenchmarkData(1000, "2D").with_timing(100.0, 110.0, 120.0, "µs")
         baseline = BenchmarkData(1000, "2D").with_timing(95.0, 105.0, 115.0, "µs")
@@ -529,7 +545,7 @@ def test_write_time_comparison_no_regression(self, comparator):
         assert "4.8%" in result
         assert "✅ OK: Time change +4.8% within acceptable range" in result
 
-    def test_write_time_comparison_with_regression(self, comparator):
+    def test_write_time_comparison_with_regression(self, comparator) -> None:
         """Test time comparison writing with regression."""
         current = BenchmarkData(1000, "2D").with_timing(100.0, 115.0, 130.0, "µs")
         baseline = BenchmarkData(1000, "2D").with_timing(95.0, 100.0, 105.0, "µs")
@@ -545,7 +561,7 @@ def test_write_time_comparison_with_regression(self, comparator):
         assert "15.0%" in result
         assert "⚠️  REGRESSION" in result
 
-    def test_write_time_comparison_with_improvement(self, comparator):
+    def test_write_time_comparison_with_improvement(self, comparator) -> None:
         """Test time comparison writing with significant improvement."""
         current = BenchmarkData(1000, "2D").with_timing(80.0, 90.0, 100.0, "µs")
         baseline = BenchmarkData(1000, "2D").with_timing(95.0, 100.0, 105.0, "µs")
@@ -561,7 +577,7 @@ def test_write_time_comparison_with_improvement(self, comparator):
         assert "10.0%" in result
         assert "✅ IMPROVEMENT: Time decreased by 10.0% (faster performance)" in result
 
-    def test_write_time_comparison_zero_baseline(self, comparator):
+    def test_write_time_comparison_zero_baseline(self, comparator) -> None:
         """Test time comparison with zero baseline time."""
         current = BenchmarkData(1000, "2D").with_timing(100.0, 110.0, 120.0, "µs")
         baseline = BenchmarkData(1000, "2D").with_timing(0.0, 0.0, 0.0, "µs")
@@ -577,7 +593,7 @@ def test_write_time_comparison_zero_baseline(self, comparator):
 
     @pytest.mark.parametrize("dev_mode", [False, True])
     @patch("benchmark_utils.run_cargo_command")
-    def test_compare_omits_quiet_flag(self, mock_cargo, dev_mode):
+    def test_compare_omits_quiet_flag(self, mock_cargo, dev_mode) -> None:
         """Test that PerformanceComparator invokes cargo without --quiet flag (removed for better error visibility)."""
         with tempfile.TemporaryDirectory() as temp_dir:
             temp_path = Path(temp_dir)
@@ -592,10 +608,7 @@ def test_compare_omits_quiet_flag(self, mock_cargo, dev_mode):
             baseline_file.write_text(baseline_content)
 
             # Mock successful cargo command
-            mock_result = Mock()
-            mock_result.returncode = 0
-            mock_result.stdout = ""
-            mock_cargo.return_value = mock_result
+            mock_cargo.return_value = completed_process(CI_MANIFEST_STDOUT)
 
             comparator = PerformanceComparator(temp_path)
             comparator.compare_with_baseline(baseline_file, dev_mode=dev_mode)
@@ -611,7 +624,7 @@ def test_compare_omits_quiet_flag(self, mock_cargo, dev_mode):
             # And output is captured
             assert mock_cargo.call_args.kwargs.get("capture_output") is True
 
-    def test_write_performance_comparison_no_average_regression(self, comparator):
+    def test_write_performance_comparison_no_average_regression(self, comparator) -> None:
         """Test performance comparison with individual regressions but no average regression."""
         # Create current results with mixed performance changes
         current_results = [
@@ -644,7 +657,7 @@ def test_write_performance_comparison_no_average_regression(self, comparator):
         assert re.search(r"Average time change:\s*-?0\.0%", result)
         assert "✅ OVERALL OK" in result
 
-    def test_write_performance_comparison_with_average_regression(self, comparator):
+    def test_write_performance_comparison_with_average_regression(self, comparator) -> None:
         """Test performance comparison with average regression exceeding threshold."""
         # Create current results with overall performance degradation
         current_results = [
@@ -677,7 +690,7 @@ def test_write_performance_comparison_with_average_regression(self, comparator):
         assert "Average time change: 11.0%" in result
         assert "🚨 OVERALL REGRESSION" in result
 
-    def test_write_performance_comparison_with_average_improvement(self, comparator):
+    def test_write_performance_comparison_with_average_improvement(self, comparator) -> None:
         """Test performance comparison with significant average improvement."""
         # Create current results with overall performance improvement
         current_results = [
@@ -712,7 +725,7 @@ def test_write_performance_comparison_with_average_improvement(self, comparator)
         assert expected_average_line in result
         assert "✅ OVERALL OK" in result
 
-    def test_write_performance_comparison_missing_baseline(self, comparator):
+    def test_write_performance_comparison_missing_baseline(self, comparator) -> None:
         """Test performance comparison when some baselines are missing."""
         current_results = [
             BenchmarkData(1000, "2D").with_timing(105.0, 110.0, 115.0, "µs"),
@@ -734,7 +747,7 @@ def test_write_performance_comparison_missing_baseline(self, comparator):
         assert "Total benchmarks compared: 1" in result
         assert "3000 Points (2D)" in result  # Should still show the benchmark without baseline
 
-    def test_write_performance_comparison_no_benchmarks(self, comparator):
+    def test_write_performance_comparison_no_benchmarks(self, comparator) -> None:
         """Test performance comparison with no benchmarks."""
         output = StringIO()
         regression_found = comparator._write_performance_comparison(output, [], {})
@@ -744,7 +757,7 @@ def test_write_performance_comparison_no_benchmarks(self, comparator):
 
     @patch("benchmark_utils.get_git_commit_hash")
     @patch("benchmark_utils.datetime")
-    def test_prepare_comparison_metadata(self, mock_datetime, mock_git, comparator, sample_baseline_content):
+    def test_prepare_comparison_metadata(self, mock_datetime, mock_git, comparator, sample_baseline_content) -> None:
         """Test preparation of comparison metadata."""
         # Mock current datetime
         mock_now = Mock()
@@ -762,15 +775,15 @@ def test_prepare_comparison_metadata(self, mock_datetime, mock_git, comparator,
         assert metadata["baseline_commit"] == "abc123def456"
 
     @patch("benchmark_utils.get_git_commit_hash")
-    def test_prepare_comparison_metadata_git_failure(self, mock_git, comparator, sample_baseline_content):
+    def test_prepare_comparison_metadata_git_failure(self, mock_git, comparator, sample_baseline_content) -> None:
         """Test metadata preparation when git command fails."""
-        mock_git.side_effect = Exception("Git not available")
+        mock_git.side_effect = RuntimeError("Git not available")
 
         metadata = comparator._prepare_comparison_metadata(sample_baseline_content)
 
         assert metadata["current_commit"] == "unknown"
 
-    def test_regression_threshold_configuration(self, comparator):
+    def test_regression_threshold_configuration(self, comparator) -> None:
         """Test that regression threshold can be configured."""
         # Test default threshold
         assert comparator.regression_threshold == DEFAULT_REGRESSION_THRESHOLD
@@ -788,7 +801,7 @@ def test_regression_threshold_configuration(self, comparator):
         assert time_change == pytest.approx(7.0, abs=0.001)  # Use pytest.approx for floating-point comparison
         assert not is_regression
 
-    def test_write_error_file_baseline_not_found(self, comparator):
+    def test_write_error_file_baseline_not_found(self, comparator) -> None:
         """Test writing error file when baseline is not found."""
         with tempfile.TemporaryDirectory() as temp_dir:
             output_file = Path(temp_dir) / "error_results.txt"
@@ -803,7 +816,7 @@ def test_write_error_file_baseline_not_found(self, comparator):
             assert str(baseline_file) in content
             assert "This error prevented the benchmark comparison from completing successfully" in content
 
-    def test_write_error_file_benchmark_error(self, comparator):
+    def test_write_error_file_benchmark_error(self, comparator) -> None:
         """Test writing error file when benchmark execution fails."""
         with tempfile.TemporaryDirectory() as temp_dir:
             output_file = Path(temp_dir) / "error_results.txt"
@@ -817,7 +830,7 @@ def test_write_error_file_benchmark_error(self, comparator):
             assert error_message in content
             assert "Please check the CI logs for more information" in content
 
-    def test_write_error_file_creates_parent_directory(self, comparator):
+    def test_write_error_file_creates_parent_directory(self, comparator) -> None:
         """Test that _write_error_file creates parent directory if it doesn't exist."""
         with tempfile.TemporaryDirectory() as temp_dir:
             output_file = Path(temp_dir) / "nested" / "path" / "error_results.txt"
@@ -829,7 +842,7 @@ def test_write_error_file_creates_parent_directory(self, comparator):
             content = output_file.read_text()
             assert "❌ Error: Test error" in content
 
-    def test_write_error_file_handles_write_failure(self, comparator):
+    def test_write_error_file_handles_write_failure(self, comparator) -> None:
         """Test that _write_error_file handles write failures gracefully."""
         with tempfile.TemporaryDirectory() as temp_dir:
             output_file = Path(temp_dir) / "error_results.txt"
@@ -842,7 +855,7 @@ def test_write_error_file_handles_write_failure(self, comparator):
             # File should not exist due to write failure
             assert not output_file.exists()
 
-    def test_sampling_warning_reports_dev_full_mismatch(self):
+    def test_sampling_warning_reports_dev_full_mismatch(self) -> None:
         """Test that comparison warns when baseline and current sampling modes differ."""
         with tempfile.TemporaryDirectory() as temp_dir:
             comparator = PerformanceComparator(Path(temp_dir))
@@ -863,7 +876,7 @@ def test_sampling_warning_reports_dev_full_mismatch(self):
             assert "Criterion measurement time: baseline=2, current=criterion-default" in warning
             assert "Criterion warm-up time: baseline=1, current=criterion-default" in warning
 
-    def test_sampling_warning_reports_missing_baseline_metadata(self, comparator, sample_baseline_content):
+    def test_sampling_warning_reports_missing_baseline_metadata(self, comparator, sample_baseline_content) -> None:
         """Test that legacy baselines without sampling metadata produce a warning."""
         warning = comparator._sampling_warning(sample_baseline_content, dev_mode=False)
 
@@ -886,9 +899,9 @@ def _sample_benchmark_results() -> list[BenchmarkData]:
     @patch("benchmark_utils.get_git_commit_hash", return_value="abc123")
     @patch("benchmark_utils.CriterionParser.find_criterion_results")
     @patch("benchmark_utils.run_cargo_command")
-    def test_generate_baseline_uses_perf_profile(self, mock_cargo, mock_find_results, mock_git):
+    def test_generate_baseline_uses_perf_profile(self, mock_cargo, mock_find_results, mock_git) -> None:
         """Test that full baseline generation benchmarks with the trusted Cargo profile."""
-        mock_cargo.return_value = Mock(stdout="")
+        mock_cargo.return_value = completed_process(CI_MANIFEST_STDOUT)
         mock_find_results.return_value = self._sample_benchmark_results()
 
         with tempfile.TemporaryDirectory() as temp_dir:
@@ -912,9 +925,9 @@ def test_generate_baseline_uses_perf_profile(self, mock_cargo, mock_find_results
     @patch("benchmark_utils.get_git_commit_hash", return_value="abc123")
     @patch("benchmark_utils.CriterionParser.find_criterion_results")
     @patch("benchmark_utils.run_cargo_command")
-    def test_generate_baseline_dev_mode_keeps_perf_profile(self, mock_cargo, mock_find_results, mock_git):
+    def test_generate_baseline_dev_mode_keeps_perf_profile(self, mock_cargo, mock_find_results, mock_git) -> None:
         """Test that dev baseline mode reduces Criterion settings without changing Cargo profile."""
-        mock_cargo.return_value = Mock(stdout="")
+        mock_cargo.return_value = completed_process(CI_MANIFEST_STDOUT)
         mock_find_results.return_value = self._sample_benchmark_results()
 
         with tempfile.TemporaryDirectory() as temp_dir:
@@ -943,12 +956,12 @@ class TestIntegrationScenarios:
     """Integration test scenarios for real-world use cases."""
 
     @pytest.fixture
-    def comparator(self):
+    def comparator(self) -> PerformanceComparator:
         """Fixture for PerformanceComparator instance."""
         project_root = Path("/fake/project")
         return PerformanceComparator(project_root)
 
-    def test_realistic_mixed_performance_scenario(self, comparator):
+    def test_realistic_mixed_performance_scenario(self, comparator) -> None:
         """Test a realistic scenario with mixed performance changes."""
         # Simulate a realistic benchmark run with various performance changes
         current_results = [
@@ -987,7 +1000,7 @@ def test_realistic_mixed_performance_scenario(self, comparator):
         assert expected_average_line in result
         assert "✅ OVERALL OK" in result
 
-    def test_gradual_performance_degradation_scenario(self, comparator):
+    def test_gradual_performance_degradation_scenario(self, comparator) -> None:
         """Test scenario where performance gradually degrades across all benchmarks."""
         # Simulate gradual performance degradation that individually isn't alarming
         # but collectively indicates a problem
@@ -1021,7 +1034,7 @@ def test_gradual_performance_degradation_scenario(self, comparator):
         assert "Average time change: 9.0%" in result
         assert "🚨 OVERALL REGRESSION" in result
 
-    def test_noisy_benchmarks_scenario(self, comparator):
+    def test_noisy_benchmarks_scenario(self, comparator) -> None:
         """Test scenario with noisy benchmarks that have high individual variance."""
         # Simulate noisy benchmarks where individual results vary significantly
         # but overall trend is acceptable
@@ -1062,12 +1075,12 @@ class TestEdgeCases:
     """Test edge cases and error conditions."""
 
     @pytest.fixture
-    def comparator(self):
+    def comparator(self) -> PerformanceComparator:
         """Fixture for PerformanceComparator instance."""
         project_root = Path("/fake/project")
         return PerformanceComparator(project_root)
 
-    def test_empty_current_results(self, comparator):
+    def test_empty_current_results(self, comparator) -> None:
         """Test comparison with empty current results."""
         baseline_results = {
             "1000_2D": BenchmarkData(1000, "2D").with_timing(95.0, 100.0, 105.0, "µs"),
@@ -1079,7 +1092,7 @@ def test_empty_current_results(self, comparator):
         assert not regression_found
         assert "SUMMARY" not in output.getvalue()
 
-    def test_empty_baseline_results(self, comparator):
+    def test_empty_baseline_results(self, comparator) -> None:
         """Test comparison with empty baseline results."""
         current_results = [
             BenchmarkData(1000, "2D").with_timing(105.0, 110.0, 115.0, "µs"),
@@ -1093,7 +1106,7 @@ def test_empty_baseline_results(self, comparator):
         assert "1000 Points (2D)" in result
         assert "SUMMARY" not in result
 
-    def test_all_zero_baseline_times(self, comparator):
+    def test_all_zero_baseline_times(self, comparator) -> None:
         """Test comparison when all baseline times are zero."""
         current_results = [
             BenchmarkData(1000, "2D").with_timing(105.0, 110.0, 115.0, "µs"),
@@ -1113,7 +1126,7 @@ def test_all_zero_baseline_times(self, comparator):
         assert "N/A (baseline mean is 0)" in result
         assert "SUMMARY" not in result  # No valid comparisons
 
-    def test_mixed_valid_invalid_baselines(self, comparator):
+    def test_mixed_valid_invalid_baselines(self, comparator) -> None:
         """Test comparison with mix of valid and invalid baseline data."""
         current_results = [
             BenchmarkData(1000, "2D").with_timing(105.0, 110.0, 115.0, "µs"),
@@ -1141,14 +1154,14 @@ class TestWorkflowHelper:
     """Test cases for WorkflowHelper class."""
 
     @patch.dict(os.environ, {"GITHUB_REF": "refs/tags/v1.2.3"}, clear=False)
-    def test_determine_tag_name_from_github_ref(self):
+    def test_determine_tag_name_from_github_ref(self) -> None:
         """Test tag name determination from GITHUB_REF with tag."""
         tag_name = WorkflowHelper.determine_tag_name()
         assert tag_name == "v1.2.3"
 
     @patch.dict(os.environ, {"GITHUB_REF": "refs/heads/main"}, clear=False)
     @patch("benchmark_utils.datetime")
-    def test_determine_tag_name_generated(self, mock_datetime):
+    def test_determine_tag_name_generated(self, mock_datetime) -> None:
         """Test tag name generation when not from a tag push."""
         # Mock datetime
         mock_now = Mock()
@@ -1159,7 +1172,7 @@ def test_determine_tag_name_generated(self, mock_datetime):
         assert tag_name == "manual-20231215-143000"
 
     @patch.dict(os.environ, {"GITHUB_REF": "refs/tags/v2.0.0"}, clear=False)
-    def test_determine_tag_name_with_github_output(self):
+    def test_determine_tag_name_with_github_output(self) -> None:
         """Test tag name determination with GITHUB_OUTPUT file."""
         with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
             output_file = f.name
@@ -1176,7 +1189,7 @@ def test_determine_tag_name_with_github_output(self):
         finally:
             Path(output_file).unlink(missing_ok=True)
 
-    def test_create_metadata_success(self):
+    def test_create_metadata_success(self) -> None:
         """Test successful metadata creation."""
         with tempfile.TemporaryDirectory() as temp_dir:
             output_dir = Path(temp_dir)
@@ -1210,7 +1223,7 @@ def test_create_metadata_success(self):
             # Check ISO format timestamp
             assert metadata["generated_at"].endswith("Z")
 
-    def test_create_metadata_with_safe_env_vars(self):
+    def test_create_metadata_with_safe_env_vars(self) -> None:
         """Test metadata creation with SAFE_ prefixed environment variables."""
         with tempfile.TemporaryDirectory() as temp_dir:
             output_dir = Path(temp_dir)
@@ -1239,7 +1252,7 @@ def test_create_metadata_with_safe_env_vars(self):
             assert metadata["commit"] == "def456abc789"
             assert metadata["workflow_run_id"] == "987654321"
 
-    def test_create_metadata_missing_env_vars(self):
+    def test_create_metadata_missing_env_vars(self) -> None:
         """Test metadata creation with missing environment variables."""
         with tempfile.TemporaryDirectory() as temp_dir:
             output_dir = Path(temp_dir)
@@ -1260,7 +1273,7 @@ def test_create_metadata_missing_env_vars(self):
             assert metadata["runner_os"] == "unknown"
             assert metadata["runner_arch"] == "unknown"
 
-    def test_create_metadata_directory_creation(self):
+    def test_create_metadata_directory_creation(self) -> None:
         """Test that metadata creation creates directory if it doesn't exist."""
         with tempfile.TemporaryDirectory() as temp_dir:
             output_dir = Path(temp_dir) / "nested" / "path"
@@ -1270,7 +1283,7 @@ def test_create_metadata_directory_creation(self):
             assert output_dir.exists()
             assert (output_dir / "metadata.json").exists()
 
-    def test_display_baseline_summary_success(self, capsys):
+    def test_display_baseline_summary_success(self, capsys) -> None:
         """Test successful baseline summary display."""
         baseline_content = """Date: 2023-12-15 14:30:00 UTC
 Git commit: abc123def456
@@ -1307,7 +1320,7 @@ def test_display_baseline_summary_success(self, capsys):
         finally:
             baseline_file.unlink()
 
-    def test_display_baseline_summary_nonexistent_file(self, capsys):
+    def test_display_baseline_summary_nonexistent_file(self, capsys) -> None:
         """Test baseline summary with non-existent file."""
         baseline_file = Path("/nonexistent/file.txt")
 
@@ -1318,7 +1331,7 @@ def test_display_baseline_summary_nonexistent_file(self, capsys):
         captured = capsys.readouterr()
         assert "❌ Baseline file not found" in captured.err
 
-    def test_display_baseline_summary_long_file(self, capsys):
+    def test_display_baseline_summary_long_file(self, capsys) -> None:
         """Test baseline summary with file longer than 10 lines."""
         baseline_content = "\n".join([f"Line {i}" for i in range(20)])
 
@@ -1337,17 +1350,17 @@ def test_display_baseline_summary_long_file(self, capsys):
         finally:
             baseline_file.unlink()
 
-    def test_sanitize_artifact_name_basic(self):
+    def test_sanitize_artifact_name_basic(self) -> None:
         """Test basic artifact name sanitization."""
         artifact_name = WorkflowHelper.sanitize_artifact_name("v1.2.3")
         assert artifact_name == "performance-baseline-v1_2_3"
 
-    def test_sanitize_artifact_name_with_special_chars(self):
+    def test_sanitize_artifact_name_with_special_chars(self) -> None:
         """Test artifact name sanitization with special characters."""
         artifact_name = WorkflowHelper.sanitize_artifact_name("manual-2023/12/15-14:30:00")
         assert artifact_name == "performance-baseline-manual-2023_12_15-14_30_00"
 
-    def test_sanitize_artifact_name_with_github_output(self):
+    def test_sanitize_artifact_name_with_github_output(self) -> None:
         """Test artifact name sanitization with GITHUB_OUTPUT file."""
         with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
             output_file = f.name
@@ -1372,12 +1385,12 @@ def test_sanitize_artifact_name_with_github_output(self):
             ("v1.0.0+build.123", "performance-baseline-v1_0_0_build_123"),
         ],
     )
-    def test_sanitize_artifact_name_edge_cases(self, input_tag, expected_output):
+    def test_sanitize_artifact_name_edge_cases(self, input_tag, expected_output) -> None:
         """Test artifact name sanitization with edge cases."""
         result = WorkflowHelper.sanitize_artifact_name(input_tag)
         assert result == expected_output
 
-    def test_sanitize_artifact_name_special_characters(self):
+    def test_sanitize_artifact_name_special_characters(self) -> None:
         """Test that special characters are properly replaced in artifact names."""
         special_chars_input = "@#$%^&*()[]{}|\\<>?"
         result = WorkflowHelper.sanitize_artifact_name(special_chars_input)
@@ -1388,7 +1401,7 @@ def test_sanitize_artifact_name_special_characters(self):
 class TestBenchmarkRegressionHelper:
     """Test cases for BenchmarkRegressionHelper class."""
 
-    def test_prepare_baseline_success(self, capsys):
+    def test_prepare_baseline_success(self, capsys) -> None:
         """Test successful baseline preparation."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -1429,7 +1442,7 @@ def test_prepare_baseline_success(self, capsys):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_prepare_baseline_copy_error_handling(self, capsys):
+    def test_prepare_baseline_copy_error_handling(self, capsys) -> None:
         """Test error handling when copying baseline file fails."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -1470,7 +1483,7 @@ def test_prepare_baseline_copy_error_handling(self, capsys):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_prepare_baseline_read_summary_error_handling(self, capsys):
+    def test_prepare_baseline_read_summary_error_handling(self, capsys) -> None:
         """Test graceful error handling when baseline summary cannot be read."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -1496,7 +1509,7 @@ def test_prepare_baseline_read_summary_error_handling(self, capsys):
                 # Mock Path.open method to fail for read operations on baseline_results.txt
                 original_path_open = Path.open
 
-                def mock_path_open(self, mode="r", *args, **kwargs):
+                def mock_path_open(self, mode="r", *args, **kwargs) -> Any:
                     if self.name == "baseline_results.txt" and "r" in mode:
                         msg = "Read permission denied"
                         raise OSError(msg)
@@ -1526,7 +1539,7 @@ def mock_path_open(self, mode="r", *args, **kwargs):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_prepare_baseline_missing_file(self, capsys):
+    def test_prepare_baseline_missing_file(self, capsys) -> None:
         """Test baseline preparation when baseline file is missing."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -1553,7 +1566,7 @@ def test_prepare_baseline_missing_file(self, capsys):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_set_no_baseline_status(self, capsys):
+    def test_set_no_baseline_status(self, capsys) -> None:
         """Test setting no baseline status."""
         with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
             env_path = env_file.name
@@ -1575,7 +1588,7 @@ def test_set_no_baseline_status(self, capsys):
         finally:
             Path(env_path).unlink(missing_ok=True)
 
-    def test_extract_baseline_commit_from_baseline_file(self):
+    def test_extract_baseline_commit_from_baseline_file(self) -> None:
         """Test extracting commit SHA from baseline_results.txt."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -1605,7 +1618,7 @@ def test_extract_baseline_commit_from_baseline_file(self):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_extract_baseline_commit_from_metadata(self):
+    def test_extract_baseline_commit_from_metadata(self) -> None:
         """Test extracting commit SHA from metadata.json when baseline file fails."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -1633,7 +1646,7 @@ def test_extract_baseline_commit_from_metadata(self):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_extract_baseline_commit_unknown(self):
+    def test_extract_baseline_commit_unknown(self) -> None:
         """Test extracting commit SHA when no valid SHA is found."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -1656,14 +1669,14 @@ def test_extract_baseline_commit_unknown(self):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_determine_benchmark_skip_unknown_baseline(self):
+    def test_determine_benchmark_skip_unknown_baseline(self) -> None:
         """Test skip determination with unknown baseline commit."""
         should_skip, reason = BenchmarkRegressionHelper.determine_benchmark_skip("unknown", "def4567")
 
         assert not should_skip
         assert reason == "unknown_baseline"
 
-    def test_determine_benchmark_skip_same_commit(self):
+    def test_determine_benchmark_skip_same_commit(self) -> None:
         """Test skip determination with same commit."""
         should_skip, reason = BenchmarkRegressionHelper.determine_benchmark_skip("abc1234", "abc1234")
 
@@ -1671,7 +1684,7 @@ def test_determine_benchmark_skip_same_commit(self):
         assert reason == "same_commit"
 
     @patch("benchmark_utils.run_git_command")
-    def test_determine_benchmark_skip_baseline_not_found(self, mock_git):
+    def test_determine_benchmark_skip_baseline_not_found(self, mock_git) -> None:
         """Test skip determination when baseline commit not found in history."""
         # Simulate git cat-file failing
         mock_git.side_effect = subprocess.CalledProcessError(1, "git")
@@ -1682,12 +1695,12 @@ def test_determine_benchmark_skip_baseline_not_found(self, mock_git):
         assert reason == "baseline_commit_not_found"
 
     @patch("benchmark_utils.run_git_command")
-    def test_determine_benchmark_skip_no_changes(self, mock_git):
+    def test_determine_benchmark_skip_no_changes(self, mock_git) -> None:
         """Test skip determination when no relevant changes found."""
         # Mock successful git commands
         mock_git.side_effect = [
-            Mock(returncode=0),  # git cat-file succeeds
-            Mock(returncode=0, stdout="docs/README.md\n.github/workflows/other.yml\n", stderr=""),  # git diff
+            completed_process(),  # git cat-file succeeds
+            completed_process("docs/README.md\n.github/workflows/other.yml\n"),  # git diff
         ]
 
         should_skip, reason = BenchmarkRegressionHelper.determine_benchmark_skip("abc1234", "def4567")
@@ -1696,12 +1709,12 @@ def test_determine_benchmark_skip_no_changes(self, mock_git):
         assert reason == "no_relevant_changes"
 
     @patch("benchmark_utils.run_git_command")
-    def test_determine_benchmark_skip_changes_detected(self, mock_git):
+    def test_determine_benchmark_skip_changes_detected(self, mock_git) -> None:
         """Test skip determination when relevant changes are detected."""
         # Mock successful git commands
         mock_git.side_effect = [
-            Mock(returncode=0),  # git cat-file succeeds
-            Mock(returncode=0, stdout="src/core/mod.rs\nbenches/performance.rs\n", stderr=""),  # git diff
+            completed_process(),  # git cat-file succeeds
+            completed_process("src/core/mod.rs\nbenches/performance.rs\n"),  # git diff
         ]
 
         should_skip, reason = BenchmarkRegressionHelper.determine_benchmark_skip("abc1234", "def4567")
@@ -1709,14 +1722,14 @@ def test_determine_benchmark_skip_changes_detected(self, mock_git):
         assert not should_skip
         assert reason == "changes_detected"
 
-    def test_display_skip_message(self, capsys):
+    def test_display_skip_message(self, capsys) -> None:
         """Test displaying skip messages."""
         BenchmarkRegressionHelper.display_skip_message("same_commit", "abc1234")
 
         captured = capsys.readouterr()
         assert "🔍 Current commit matches baseline (abc1234)" in captured.out
 
-    def test_display_no_baseline_message(self, capsys):
+    def test_display_no_baseline_message(self, capsys) -> None:
         """Test displaying no baseline message."""
         BenchmarkRegressionHelper.display_no_baseline_message()
 
@@ -1724,7 +1737,7 @@ def test_display_no_baseline_message(self, capsys):
         assert "⚠️ No performance baseline available" in captured.out
         assert "💡 To enable performance regression testing:" in captured.out
 
-    def test_run_regression_test_success(self, capsys):
+    def test_run_regression_test_success(self, capsys) -> None:
         """Test successful regression test run."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_file = Path(temp_dir) / "baseline.txt"
@@ -1743,7 +1756,7 @@ def test_run_regression_test_success(self, capsys):
                 captured = capsys.readouterr()
                 assert "🚀 Running performance regression test" in captured.out
 
-    def test_run_regression_test_dev_mode(self, capsys):
+    def test_run_regression_test_dev_mode(self, capsys) -> None:
         """Test regression test run with dev mode enabled."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_file = Path(temp_dir) / "baseline.txt"
@@ -1762,7 +1775,7 @@ def test_run_regression_test_dev_mode(self, capsys):
                 captured = capsys.readouterr()
                 assert "dev mode (10x faster)" in captured.out
 
-    def test_run_regression_test_failure(self):
+    def test_run_regression_test_failure(self) -> None:
         """Test regression test run failure."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_file = Path(temp_dir) / "baseline.txt"
@@ -1777,7 +1790,7 @@ def test_run_regression_test_failure(self):
 
                 assert not success
 
-    def test_run_regression_test_custom_timeout(self, capsys):
+    def test_run_regression_test_custom_timeout(self, capsys) -> None:
         """Test regression test run with custom bench_timeout parameter."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_file = Path(temp_dir) / "baseline.txt"
@@ -1796,7 +1809,7 @@ def test_run_regression_test_custom_timeout(self, capsys):
                 captured = capsys.readouterr()
                 assert "🚀 Running performance regression test" in captured.out
 
-    def test_display_results_file_exists(self, capsys):
+    def test_display_results_file_exists(self, capsys) -> None:
         """Test displaying results when file exists."""
         with tempfile.TemporaryDirectory() as temp_dir:
             results_file = Path(temp_dir) / "results.txt"
@@ -1809,7 +1822,7 @@ def test_display_results_file_exists(self, capsys):
             assert "=== Performance Regression Test Results ===" in captured.out
             assert "All tests passed" in captured.out
 
-    def test_display_results_file_missing(self, capsys):
+    def test_display_results_file_missing(self, capsys) -> None:
         """Test displaying results when file is missing."""
         missing_file = Path("/nonexistent/results.txt")
 
@@ -1818,7 +1831,7 @@ def test_display_results_file_missing(self, capsys):
         captured = capsys.readouterr()
         assert "⚠️ No comparison results file found" in captured.out
 
-    def test_generate_summary_with_regression(self, temp_chdir, capsys):
+    def test_generate_summary_with_regression(self, temp_chdir, capsys) -> None:
         """Test generating summary when regression is detected."""
         with tempfile.TemporaryDirectory() as temp_dir:
             results_file = Path(temp_dir) / "benches" / "compare_results.txt"
@@ -1843,7 +1856,7 @@ def test_generate_summary_with_regression(self, temp_chdir, capsys):
                 assert "Baseline source: artifact" in captured.out
                 assert "Result: ⚠️ Performance regressions detected" in captured.out
 
-    def test_generate_summary_skip_same_commit(self, capsys):
+    def test_generate_summary_skip_same_commit(self, capsys) -> None:
         """Test generating summary when benchmarks skipped due to same commit."""
         env_vars = {
             "BASELINE_SOURCE": "artifact",
@@ -1859,7 +1872,7 @@ def test_generate_summary_skip_same_commit(self, capsys):
             captured = capsys.readouterr()
             assert "Result: ⏭️ Benchmarks skipped (same commit as baseline)" in captured.out
 
-    def test_generate_summary_no_baseline(self, capsys):
+    def test_generate_summary_no_baseline(self, capsys) -> None:
         """Test generating summary when no baseline available."""
         env_vars = {
             "BASELINE_EXISTS": "false",
@@ -1872,7 +1885,7 @@ def test_generate_summary_no_baseline(self, capsys):
             captured = capsys.readouterr()
             assert "Result: ⏭️ Benchmarks skipped (no baseline available)" in captured.out
 
-    def test_generate_summary_sets_regression_environment_variable(self, temp_chdir, capsys):
+    def test_generate_summary_sets_regression_environment_variable(self, temp_chdir, capsys) -> None:
         """Test that generate_summary sets BENCHMARK_REGRESSION_DETECTED environment variable when regressions are found."""
         with tempfile.TemporaryDirectory() as temp_dir:
             results_file = Path(temp_dir) / "benches" / "compare_results.txt"
@@ -1898,7 +1911,7 @@ def test_generate_summary_sets_regression_environment_variable(self, temp_chdir,
                 captured = capsys.readouterr()
                 assert "Exported BENCHMARK_REGRESSION_DETECTED=true for downstream CI steps" in captured.out
 
-    def test_generate_summary_github_env_export(self, temp_chdir):
+    def test_generate_summary_github_env_export(self, temp_chdir) -> None:
         """Test that BENCHMARK_REGRESSION_DETECTED is also exported to GITHUB_ENV when available."""
         with tempfile.TemporaryDirectory() as temp_dir:
             results_file = Path(temp_dir) / "benches" / "compare_results.txt"
@@ -1920,7 +1933,7 @@ def test_generate_summary_github_env_export(self, temp_chdir):
                 github_env_content = github_env_file.read_text()
                 assert "BENCHMARK_REGRESSION_DETECTED=true" in github_env_content
 
-    def test_generate_summary_with_error_file(self, temp_chdir, capsys):
+    def test_generate_summary_with_error_file(self, temp_chdir, capsys) -> None:
         """Test generating summary when comparison failed with error file."""
         with tempfile.TemporaryDirectory() as temp_dir:
             results_file = Path(temp_dir) / "benches" / "compare_results.txt"
@@ -1960,7 +1973,7 @@ def test_generate_summary_with_error_file(self, temp_chdir, capsys):
 class TestProjectRootHandling:
     """Test cases for find_project_root functionality."""
 
-    def test_find_project_root_success(self, temp_chdir):
+    def test_find_project_root_success(self, temp_chdir) -> None:
         """Test finding project root when Cargo.toml exists."""
         with tempfile.TemporaryDirectory() as temp_dir:
             temp_path = Path(temp_dir)
@@ -1978,7 +1991,7 @@ def test_find_project_root_success(self, temp_chdir):
                 # Resolve both paths to handle symlinks (macOS /var -> /private/var)
                 assert result.resolve() == temp_path.resolve()
 
-    def test_find_project_root_not_found(self, temp_chdir):
+    def test_find_project_root_not_found(self, temp_chdir) -> None:
         """Test finding project root when Cargo.toml doesn't exist."""
         with tempfile.TemporaryDirectory() as temp_dir:
             temp_path = Path(temp_dir)
@@ -2005,7 +2018,7 @@ class TestTimeoutHandling:
             ),
         ],
     )
-    def test_timeout_parameter_passed(self, component_class, method_name, setup_func):
+    def test_timeout_parameter_passed(self, component_class, method_name, setup_func) -> None:
         """Test that benchmark components accept and use timeout parameter."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2038,7 +2051,7 @@ def test_timeout_parameter_passed(self, component_class, method_name, setup_func
                 assert mock_cargo.call_count >= 1
                 assert any(call.kwargs.get("timeout") == 120 for call in mock_cargo.call_args_list)
 
-    def test_timeout_error_handling_baseline_generator(self, capsys):
+    def test_timeout_error_handling_baseline_generator(self, capsys) -> None:
         """Test proper error handling when benchmark times out in BaselineGenerator."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2056,7 +2069,7 @@ def test_timeout_error_handling_baseline_generator(self, capsys):
                 assert "timed out after 1800 seconds" in captured.err
                 assert "Consider increasing --bench-timeout" in captured.err
 
-    def test_timeout_error_handling_performance_comparator(self, capsys):
+    def test_timeout_error_handling_performance_comparator(self, capsys) -> None:
         """Test proper error handling when benchmark times out in PerformanceComparator."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2086,7 +2099,7 @@ def test_timeout_error_handling_performance_comparator(self, capsys):
                 assert "cargo bench" in error_content  # Command from exception
                 assert "timeout after 1800 seconds" in error_content  # Explicit timeout value
 
-    def test_cli_bench_timeout_validation(self, monkeypatch, temp_chdir):
+    def test_cli_bench_timeout_validation(self, monkeypatch, temp_chdir) -> None:
         """Test that CLI validates bench_timeout is positive via main()."""
         # Create a temporary project with Cargo.toml to satisfy find_project_root
         with tempfile.TemporaryDirectory() as temp_dir:
@@ -2115,7 +2128,7 @@ def test_cli_bench_timeout_validation(self, monkeypatch, temp_chdir):
                 assert hasattr(args, "validate_bench_timeout")
                 assert args.validate_bench_timeout
 
-    def test_parser_accepts_verbose_flag(self):
+    def test_parser_accepts_verbose_flag(self) -> None:
         """Test that the CLI parser accepts the shared verbose logging flag."""
         parser = create_argument_parser()
         args = parser.parse_args(["--verbose", "generate-summary"])
@@ -2123,7 +2136,7 @@ def test_parser_accepts_verbose_flag(self):
         assert args.verbose
         assert args.command == "generate-summary"
 
-    def test_configure_logging_uses_debug_when_verbose(self):
+    def test_configure_logging_uses_debug_when_verbose(self) -> None:
         """Test that verbose mode configures debug-level CLI logging."""
         with patch("benchmark_utils.logging.basicConfig") as mock_basic_config:
             configure_logging(verbose=True)
@@ -2133,7 +2146,7 @@ def test_configure_logging_uses_debug_when_verbose(self):
             format="%(levelname)s: %(message)s",
         )
 
-    def test_configure_logging_defaults_to_info(self):
+    def test_configure_logging_defaults_to_info(self) -> None:
         """Test that non-verbose mode configures info-level CLI logging."""
         with patch("benchmark_utils.logging.basicConfig") as mock_basic_config:
             configure_logging(verbose=False)
@@ -2147,7 +2160,7 @@ def test_configure_logging_defaults_to_info(self):
 class TestPerformanceSummaryGenerator:
     """Test cases for PerformanceSummaryGenerator class."""
 
-    def test_init(self):
+    def test_init(self) -> None:
         """Test PerformanceSummaryGenerator initialization."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2161,7 +2174,7 @@ def test_init(self):
             assert isinstance(generator.current_version, str)
             assert isinstance(generator.current_date, str)
 
-    def test_generate_summary_parser_defaults_to_trusted_profile(self):
+    def test_generate_summary_parser_defaults_to_trusted_profile(self) -> None:
         """Test that fresh summary benchmarks default to the trusted Cargo profile."""
         parser = create_argument_parser()
         args = parser.parse_args(["generate-summary", "--run-benchmarks"])
@@ -2169,11 +2182,9 @@ def test_generate_summary_parser_defaults_to_trusted_profile(self):
         assert args.profile == TRUSTED_BENCH_PROFILE
 
     @patch("benchmark_utils.run_git_command")
-    def test_get_current_version_with_tag(self, mock_git_command):
+    def test_get_current_version_with_tag(self, mock_git_command) -> None:
         """Test getting current version from git tags."""
-        mock_result = Mock()
-        mock_result.stdout.strip.return_value = "v1.2.3"
-        mock_git_command.return_value = mock_result
+        mock_git_command.return_value = completed_process("v1.2.3\n")
 
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2184,14 +2195,13 @@ def test_get_current_version_with_tag(self, mock_git_command):
             mock_git_command.assert_called_with(["describe", "--tags", "--abbrev=0", "--match=v*"], cwd=project_root)
 
     @patch("benchmark_utils.run_git_command")
-    def test_get_current_version_fallback(self, mock_git_command):
+    def test_get_current_version_fallback(self, mock_git_command) -> None:
         """Test fallback version detection when describe fails."""
         # First call (describe) fails, second call (tag -l) succeeds
-        mock_result = Mock()
-        mock_result.stdout.strip.return_value = "v0.1.0\nv0.2.0"
+        mock_result = completed_process("v0.1.0\nv0.2.0")
 
         # The second call is made within the exception handler
-        def side_effect(*args, **kwargs):
+        def side_effect(*args, **kwargs) -> subprocess.CompletedProcess[str]:
             if "describe" in args[0]:
                 raise subprocess.CalledProcessError(1, "git describe", "describe failed")
             return mock_result
@@ -2206,9 +2216,9 @@ def side_effect(*args, **kwargs):
             assert version == "0.1.0"
 
     @patch("benchmark_utils.run_git_command")
-    def test_get_current_version_no_tags(self, mock_git_command):
+    def test_get_current_version_no_tags(self, mock_git_command) -> None:
         """Test version detection when no tags are found."""
-        mock_git_command.side_effect = Exception("No tags found")
+        mock_git_command.side_effect = RuntimeError("No tags found")
 
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2219,11 +2229,9 @@ def test_get_current_version_no_tags(self, mock_git_command):
 
     @patch("benchmark_utils.run_git_command")
     @patch("benchmark_utils.datetime")
-    def test_get_version_date_with_tag(self, mock_datetime, mock_git_command):  # noqa: ARG002
+    def test_get_version_date_with_tag(self, mock_datetime, mock_git_command) -> None:  # noqa: ARG002
         """Test getting version date from git tag."""
-        mock_result = Mock()
-        mock_result.stdout.strip.return_value = "2024-01-15"
-        mock_git_command.return_value = mock_result
+        mock_git_command.return_value = completed_process("2024-01-15\n")
 
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2236,9 +2244,9 @@ def test_get_version_date_with_tag(self, mock_datetime, mock_git_command):  # no
 
     @patch("benchmark_utils.run_git_command")
     @patch("benchmark_utils.datetime")
-    def test_get_version_date_fallback(self, mock_datetime, mock_git_command):
+    def test_get_version_date_fallback(self, mock_datetime, mock_git_command) -> None:
         """Test version date fallback to current date."""
-        mock_git_command.side_effect = Exception("Git command failed")
+        mock_git_command.side_effect = RuntimeError("Git command failed")
         mock_now = Mock()
         mock_now.strftime.return_value = "2024-01-15"
         mock_datetime.now.return_value = mock_now
@@ -2252,7 +2260,7 @@ def test_get_version_date_fallback(self, mock_datetime, mock_git_command):
             assert date == "2024-01-15"
             mock_now.strftime.assert_called_with("%Y-%m-%d")
 
-    def test_parse_baseline_results_nonexistent_file(self):
+    def test_parse_baseline_results_nonexistent_file(self) -> None:
         """Test parsing baseline results when file doesn't exist."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2264,7 +2272,7 @@ def test_parse_baseline_results_nonexistent_file(self):
             assert "### Baseline Results" in content
             assert "Error parsing baseline results" in content
 
-    def test_parse_baseline_results_with_data(self):
+    def test_parse_baseline_results_with_data(self) -> None:
         """Test parsing baseline results with actual data."""
         baseline_content = """Date: 2024-01-15 10:30:00 UTC
 Git commit: abc123def456
@@ -2301,7 +2309,7 @@ def test_parse_baseline_results_with_data(self):
             assert "### 3D Triangulation Performance" in markdown_content
             assert "| Points | Time (mean) | Throughput (mean) | Scaling |" in markdown_content
 
-    def test_parse_comparison_results_with_regression(self):
+    def test_parse_comparison_results_with_regression(self) -> None:
         """Test parsing comparison results that show regression."""
         comparison_content = """Performance Comparison Results
 ⚠️  REGRESSION: Time increased by 15.2% (slower performance)
@@ -2324,7 +2332,7 @@ def test_parse_comparison_results_with_regression(self):
             assert "REGRESSION: Time increased by 15.2%" in markdown_content
             assert "IMPROVEMENT: Time decreased by 8.5%" in markdown_content
 
-    def test_parse_comparison_results_no_regression(self):
+    def test_parse_comparison_results_no_regression(self) -> None:
         """Test parsing comparison results with no regression."""
         comparison_content = """Performance Comparison Results
 ✅ OK: Time change +2.1% within acceptable range
@@ -2349,10 +2357,10 @@ def test_parse_comparison_results_no_regression(self):
     @patch("benchmark_utils.get_git_commit_hash")
     @patch("benchmark_utils.run_git_command")
     @patch("benchmark_utils.datetime")
-    def test_generate_markdown_content(self, mock_datetime, mock_run_git, mock_git_commit):
+    def test_generate_markdown_content(self, mock_datetime, mock_run_git, mock_git_commit) -> None:
         """Test generating complete markdown content."""
         # Avoid calling actual git in __init__ helpers
-        mock_run_git.side_effect = Exception("git unavailable in test")
+        mock_run_git.side_effect = RuntimeError("git unavailable in test")
         mock_git_commit.return_value = "abc123def456"
         mock_now = Mock()
         mock_now.strftime.return_value = "2024-01-15 10:30:00 UTC"
@@ -2379,7 +2387,7 @@ def test_generate_markdown_content(self, mock_datetime, mock_run_git, mock_git_c
             assert RECOMMENDATIONS_TITLE in content
             assert PERFORMANCE_UPDATES_TITLE in content
 
-    def test_get_ci_performance_suite_results(self):
+    def test_get_ci_performance_suite_results(self) -> None:
         """Test public API summary generation from ci_performance_suite Criterion data."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2404,7 +2412,7 @@ def test_get_ci_performance_suite_results(self):
             assert "#### Bistellar flips" in content
             assert "`bistellar_flips_4d/k2_roundtrip`" in content
 
-    def test_get_circumsphere_performance_results(self):
+    def test_get_circumsphere_performance_results(self) -> None:
         """Test getting circumsphere performance results."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2417,7 +2425,7 @@ def test_get_circumsphere_performance_results(self):
             # Should contain fallback performance data when no criterion results exist
             assert "Basic 3D" in content or "Version unknown" in content
 
-    def test_get_update_instructions(self):
+    def test_get_update_instructions(self) -> None:
         """Test getting performance data update instructions."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2431,7 +2439,7 @@ def test_get_update_instructions(self):
             assert "uv run benchmark-utils generate-summary" in content
             assert "PerformanceSummaryGenerator" in content
 
-    def test_parse_numerical_accuracy_output_success(self):
+    def test_parse_numerical_accuracy_output_success(self) -> None:
         """Test parsing numerical accuracy output successfully."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2454,7 +2462,7 @@ def test_parse_numerical_accuracy_output_success(self):
             assert result["distance_lifted"] == "20.3%"
             assert result["all_agree"] == "0.8%"
 
-    def test_parse_numerical_accuracy_output_no_data(self):
+    def test_parse_numerical_accuracy_output_no_data(self) -> None:
         """Test parsing numerical accuracy output with no relevant data."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2468,7 +2476,7 @@ def test_parse_numerical_accuracy_output_no_data(self):
 
             assert result is None
 
-    def test_parse_numerical_accuracy_output_malformed(self):
+    def test_parse_numerical_accuracy_output_malformed(self) -> None:
         """Test parsing numerical accuracy output with malformed data."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2483,9 +2491,9 @@ def test_parse_numerical_accuracy_output_malformed(self):
             assert result is None
 
     @patch("benchmark_utils.run_cargo_command")
-    def test_run_circumsphere_benchmarks_success(self, mock_cargo):
+    def test_run_circumsphere_benchmarks_success(self, mock_cargo) -> None:
         """Test running circumsphere benchmarks successfully."""
-        mock_cargo.return_value = Mock(stdout="")
+        mock_cargo.return_value = completed_process()
 
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2509,9 +2517,9 @@ def test_run_circumsphere_benchmarks_success(self, mock_cargo):
             ]
 
     @patch("benchmark_utils.run_cargo_command")
-    def test_run_circumsphere_benchmarks_uses_requested_cargo_profile(self, mock_cargo):
+    def test_run_circumsphere_benchmarks_uses_requested_cargo_profile(self, mock_cargo) -> None:
         """Test running circumsphere benchmarks with an explicit Cargo profile."""
-        mock_cargo.return_value = Mock(stdout="")
+        mock_cargo.return_value = completed_process()
 
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2527,17 +2535,18 @@ def test_run_circumsphere_benchmarks_uses_requested_cargo_profile(self, mock_car
             assert args[:5] == ["bench", "--profile", requested_profile, "--bench", "circumsphere_containment"]
 
     @patch("benchmark_utils.run_cargo_command")
-    def test_run_circumsphere_benchmarks_with_numerical_data(self, mock_cargo):
+    def test_run_circumsphere_benchmarks_with_numerical_data(self, mock_cargo) -> None:
         """Test running circumsphere benchmarks with numerical accuracy data."""
         # Mock cargo command to return output with numerical accuracy data
-        mock_result = Mock()
-        mock_result.stdout = """Running benchmarks...
+        mock_result = completed_process(
+            """Running benchmarks...
 Method Comparisons (1000 total tests):
   insphere vs insphere_distance:  820/1000 (82.0%)
   insphere vs insphere_lifted:  5/1000 (0.5%)
   insphere_distance vs insphere_lifted:  180/1000 (18.0%)
   All three methods agree:  2/1000 (0.2%)
-Benchmark completed."""
+Benchmark completed.""",
+        )
         mock_cargo.return_value = mock_result
 
         with tempfile.TemporaryDirectory() as temp_dir:
@@ -2566,9 +2575,9 @@ def test_run_circumsphere_benchmarks_with_numerical_data(self, mock_cargo):
             ]
 
     @patch("benchmark_utils.run_cargo_command")
-    def test_run_circumsphere_benchmarks_failure(self, mock_cargo, capsys):
+    def test_run_circumsphere_benchmarks_failure(self, mock_cargo, capsys) -> None:
         """Test handling circumsphere benchmark failures."""
-        mock_cargo.side_effect = Exception("Benchmark failed")
+        mock_cargo.side_effect = RuntimeError("Benchmark failed")
 
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2584,15 +2593,9 @@ def test_run_circumsphere_benchmarks_failure(self, mock_cargo, capsys):
             assert "Error running circumsphere benchmarks" in captured.out
 
     @patch("benchmark_utils.run_cargo_command")
-    def test_run_ci_performance_suite_success(self, mock_cargo):
+    def test_run_ci_performance_suite_success(self, mock_cargo) -> None:
         """Test running the public API CI performance suite successfully."""
-        mock_cargo.return_value = Mock(
-            returncode=0,
-            stdout=(
-                "api_benchmark group=boundary_facets public_api=DelaunayTriangulation::boundary_facets "
-                "dimensions=3 benchmark_ids=boundary_facets/boundary_facets_3d/50 note=test\n"
-            ),
-        )
+        mock_cargo.return_value = completed_process(CI_MANIFEST_STDOUT)
 
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2610,19 +2613,17 @@ def test_run_ci_performance_suite_success(self, mock_cargo):
                 "--bench",
                 "ci_performance_suite",
             ]
+            assert "--" not in args
             manifest_path = project_root / "target" / "criterion" / _CI_PERFORMANCE_SUITE_MANIFEST_IDS_FILE
             assert manifest_path.read_text(encoding="utf-8") == "boundary_facets/boundary_facets_3d/50\n"
 
     @patch("benchmark_utils.run_cargo_command")
-    def test_run_ci_performance_suite_uses_requested_cargo_profile(self, mock_cargo):
+    def test_run_ci_performance_suite_uses_requested_cargo_profile(self, mock_cargo) -> None:
         """Test running the public API CI performance suite with an explicit profile."""
-        mock_cargo.return_value = Mock(returncode=0, stdout="")
+        mock_cargo.return_value = completed_process(CI_MANIFEST_STDOUT)
 
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
-            stale_manifest_path = project_root / "target" / "criterion" / _CI_PERFORMANCE_SUITE_MANIFEST_IDS_FILE
-            stale_manifest_path.parent.mkdir(parents=True)
-            stale_manifest_path.write_text("stale/benchmark/id\n", encoding="utf-8")
             generator = PerformanceSummaryGenerator(project_root)
 
             requested_profile = "release"
@@ -2632,12 +2633,46 @@ def test_run_ci_performance_suite_uses_requested_cargo_profile(self, mock_cargo)
             mock_cargo.assert_called_once()
             args = mock_cargo.call_args.args[0]
             assert args[:5] == ["bench", "--profile", requested_profile, "--bench", "ci_performance_suite"]
-            assert not stale_manifest_path.exists()
+            assert "--" not in args
+
+    @patch("benchmark_utils.run_cargo_command")
+    def test_run_ci_performance_suite_dev_mode_uses_reduced_sampling(self, mock_cargo) -> None:
+        """Test dev mode appends reduced Criterion sampling args explicitly."""
+        mock_cargo.return_value = completed_process(CI_MANIFEST_STDOUT)
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_root = Path(temp_dir)
+            generator = PerformanceSummaryGenerator(project_root)
+
+            success = generator._run_ci_performance_suite(use_dev_mode=True)
+
+            assert success is True
+            args = mock_cargo.call_args.args[0]
+            assert "--" in args
+            for arg in DEV_MODE_BENCH_ARGS:
+                assert arg in args
+
+    @patch("benchmark_utils.run_cargo_command")
+    def test_run_ci_performance_suite_requires_manifest(self, mock_cargo) -> None:
+        """Test successful ci_performance_suite runs must emit the manifest."""
+        mock_cargo.return_value = completed_process()
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_root = Path(temp_dir)
+            stale_manifest_path = project_root / "target" / "criterion" / _CI_PERFORMANCE_SUITE_MANIFEST_IDS_FILE
+            stale_manifest_path.parent.mkdir(parents=True)
+            stale_manifest_path.write_text("stale/benchmark/id\n", encoding="utf-8")
+            generator = PerformanceSummaryGenerator(project_root)
+
+            with pytest.raises(RuntimeError, match="emitted no api_benchmark manifest"):
+                generator._run_ci_performance_suite()
+
+            assert stale_manifest_path.read_text(encoding="utf-8") == "stale/benchmark/id\n"
 
     @patch("benchmark_utils.run_cargo_command")
-    def test_run_ci_performance_suite_nonzero_exit(self, mock_cargo, capsys):
+    def test_run_ci_performance_suite_nonzero_exit(self, mock_cargo, capsys) -> None:
         """Test handling ci_performance_suite nonzero process exits."""
-        mock_cargo.return_value = Mock(returncode=101, stdout="", stderr="benchmark failed")
+        mock_cargo.return_value = completed_process(returncode=101, stderr="benchmark failed")
 
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2650,7 +2685,7 @@ def test_run_ci_performance_suite_nonzero_exit(self, mock_cargo, capsys):
             assert "cargo exited with status 101" in captured.out
 
     @patch("benchmark_utils.run_cargo_command")
-    def test_run_ci_performance_suite_failure(self, mock_cargo, capsys):
+    def test_run_ci_performance_suite_failure(self, mock_cargo, capsys) -> None:
         """Test handling ci_performance_suite benchmark failures."""
         mock_cargo.side_effect = OSError("Benchmark failed")
 
@@ -2665,9 +2700,9 @@ def test_run_ci_performance_suite_failure(self, mock_cargo, capsys):
             assert "Error running ci_performance_suite benchmarks" in captured.out
 
     @patch("benchmark_utils.run_git_command")
-    def test_generate_summary_success(self, mock_git, capsys):
+    def test_generate_summary_success(self, mock_git, capsys) -> None:
         """Test successful generation of performance summary."""
-        mock_git.side_effect = Exception("git unavailable in test")
+        mock_git.side_effect = RuntimeError("git unavailable in test")
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
             generator = PerformanceSummaryGenerator(project_root)
@@ -2690,7 +2725,7 @@ def test_generate_summary_success(self, mock_git, capsys):
 
     @patch("benchmark_utils.PerformanceSummaryGenerator._run_circumsphere_benchmarks")
     @patch("benchmark_utils.PerformanceSummaryGenerator._run_ci_performance_suite")
-    def test_generate_summary_with_benchmarks(self, mock_run_ci_suite, mock_run_benchmarks):
+    def test_generate_summary_with_benchmarks(self, mock_run_ci_suite, mock_run_benchmarks) -> None:
         """Test generating summary with fresh benchmark run."""
         mock_run_ci_suite.return_value = True
         mock_run_benchmarks.return_value = (True, None)
@@ -2712,7 +2747,7 @@ def test_generate_summary_with_benchmarks(self, mock_run_ci_suite, mock_run_benc
 
     @patch("benchmark_utils.PerformanceSummaryGenerator._run_circumsphere_benchmarks")
     @patch("benchmark_utils.PerformanceSummaryGenerator._run_ci_performance_suite")
-    def test_generate_summary_passes_cargo_profile_to_benchmarks(self, mock_run_ci_suite, mock_run_benchmarks):
+    def test_generate_summary_passes_cargo_profile_to_benchmarks(self, mock_run_ci_suite, mock_run_benchmarks) -> None:
         """Test generating a summary with fresh benchmarks under a specific Cargo profile."""
         mock_run_ci_suite.return_value = True
         mock_run_benchmarks.return_value = (True, None)
@@ -2733,7 +2768,7 @@ def test_generate_summary_passes_cargo_profile_to_benchmarks(self, mock_run_ci_s
 
     @patch("benchmark_utils.PerformanceSummaryGenerator._run_circumsphere_benchmarks")
     @patch("benchmark_utils.PerformanceSummaryGenerator._run_ci_performance_suite")
-    def test_generate_summary_benchmark_failure_continues(self, mock_run_ci_suite, mock_run_benchmarks, capsys):
+    def test_generate_summary_benchmark_failure_continues(self, mock_run_ci_suite, mock_run_benchmarks, capsys) -> None:
         """Test that summary generation continues even if benchmark run fails."""
         mock_run_ci_suite.return_value = False
         mock_run_benchmarks.return_value = (False, None)
@@ -2753,7 +2788,7 @@ def test_generate_summary_benchmark_failure_continues(self, mock_run_ci_suite, m
             captured = capsys.readouterr()
             assert "Benchmark run failed" in captured.out
 
-    def test_generate_summary_exception_handling(self, capsys):
+    def test_generate_summary_exception_handling(self, capsys) -> None:
         """Test exception handling in generate_summary."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2769,7 +2804,7 @@ def test_generate_summary_exception_handling(self, capsys):
             captured = capsys.readouterr()
             assert "Failed to generate performance summary" in captured.err
 
-    def test_get_static_content(self):
+    def test_get_static_content(self) -> None:
         """Test getting static content sections."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2782,7 +2817,7 @@ def test_get_static_content(self):
             assert "## Implementation Notes" in content
             assert "## Benchmark Structure" in content
 
-    def test_empty_benchmark_results_edge_case(self):
+    def test_empty_benchmark_results_edge_case(self) -> None:
         """Test handling of empty benchmark results (edge case)."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2792,7 +2827,7 @@ def test_empty_benchmark_results_edge_case(self):
             results = generator._parse_circumsphere_benchmark_results()
             assert len(results) > 0
 
-    def test_malformed_estimates_json_edge_case(self):
+    def test_malformed_estimates_json_edge_case(self) -> None:
         """Test handling of malformed estimates.json files (edge case)."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2810,7 +2845,7 @@ def test_malformed_estimates_json_edge_case(self):
             results = generator._parse_circumsphere_benchmark_results()
             assert len(results) > 0
 
-    def test_missing_git_info_edge_case(self):
+    def test_missing_git_info_edge_case(self) -> None:
         """Test handling when git information is not available (edge case)."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2820,8 +2855,8 @@ def test_missing_git_info_edge_case(self):
                 patch("benchmark_utils.run_git_command") as mock_git,
                 patch("benchmark_utils.get_git_commit_hash") as mock_commit,
             ):
-                mock_git.side_effect = Exception("Git not available")
-                mock_commit.side_effect = Exception("Git not available")
+                mock_git.side_effect = RuntimeError("Git not available")
+                mock_commit.side_effect = RuntimeError("Git not available")
 
                 generator = PerformanceSummaryGenerator(project_root)
                 success = generator.generate_summary(output_file)
@@ -2832,7 +2867,7 @@ def test_missing_git_info_edge_case(self):
                 content = output_file.read_text()
                 assert "Version unknown" in content
 
-    def test_baseline_fallback_behavior_edge_case(self):
+    def test_baseline_fallback_behavior_edge_case(self) -> None:
         """Test baseline file fallback from primary to secondary location (edge case)."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2878,7 +2913,7 @@ def test_baseline_fallback_behavior_edge_case(self):
                 # Performance data "1000 Points (3D)" would come from benchmark parsing,
                 # not baseline parsing. The important test is that the fallback file is read.
 
-    def test_full_generation_workflow_integration(self):
+    def test_full_generation_workflow_integration(self) -> None:
         """Test complete summary generation workflow (integration test)."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2925,7 +2960,7 @@ def test_full_generation_workflow_integration(self):
                 assert RECOMMENDATIONS_TITLE.removeprefix("### ") in content
                 assert PERFORMANCE_UPDATES_TITLE.removeprefix("## ") in content
 
-    def test_dimension_sorting_numeric_order(self):
+    def test_dimension_sorting_numeric_order(self) -> None:
         """Test that dimensions are sorted numerically, not lexically."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2961,7 +2996,7 @@ def test_dimension_sorting_numeric_order(self):
                 assert "Test9" in content  # 9D test case
                 assert "Test10" in content  # 10D test case
 
-    def test_hardware_metadata_parsing_with_cores(self):
+    def test_hardware_metadata_parsing_with_cores(self) -> None:
         """Test that hardware metadata parsing includes cores and guards against IndexError."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -3008,7 +3043,7 @@ def test_hardware_metadata_parsing_with_cores(self):
             assert "Apple M4 Max" in content
             assert "(" not in content.split("Apple M4 Max")[1].split("\n")[0] if "Apple M4 Max" in content else True
 
-    def test_dev_mode_args_consistency(self):
+    def test_dev_mode_args_consistency(self) -> None:
         """Test that DEV_MODE_BENCH_ARGS is used consistently."""
         # Verify the constant exists and has expected structure
         assert isinstance(DEV_MODE_BENCH_ARGS, list)
@@ -3020,7 +3055,7 @@ def test_dev_mode_args_consistency(self):
         # with pairs of argument name and value
         assert len(DEV_MODE_BENCH_ARGS) >= 6  # At least 3 arg-value pairs
 
-    def test_numerical_accuracy_phrasing_flexibility(self):
+    def test_numerical_accuracy_phrasing_flexibility(self) -> None:
         """Test that numerical accuracy section doesn't hardcode sample size."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -3038,7 +3073,7 @@ def test_numerical_accuracy_phrasing_flexibility(self):
 class TestTagSpecificBaselineHandling:
     """Test cases for tag-specific baseline file handling functionality."""
 
-    def test_prepare_baseline_with_tag_specific_file(self, capsys):
+    def test_prepare_baseline_with_tag_specific_file(self, capsys) -> None:
         """Test baseline preparation with tag-specific file (baseline-v*.txt)."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3090,7 +3125,7 @@ def test_prepare_baseline_with_tag_specific_file(self, capsys):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_prepare_baseline_with_generic_baseline_file(self, capsys):
+    def test_prepare_baseline_with_generic_baseline_file(self, capsys) -> None:
         """Test baseline preparation with generic baseline*.txt file."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3137,7 +3172,7 @@ def test_prepare_baseline_with_generic_baseline_file(self, capsys):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_prepare_baseline_prefers_standard_name(self, capsys):
+    def test_prepare_baseline_prefers_standard_name(self, capsys) -> None:
         """Test that prepare_baseline prefers baseline_results.txt over tag-specific files."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3179,7 +3214,7 @@ def test_prepare_baseline_prefers_standard_name(self, capsys):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_prepare_baseline_no_matching_files(self, capsys):
+    def test_prepare_baseline_no_matching_files(self, capsys) -> None:
         """Test baseline preparation when no matching baseline files are found."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3212,7 +3247,7 @@ def test_prepare_baseline_no_matching_files(self, capsys):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_extract_baseline_commit_from_tag_file(self):
+    def test_extract_baseline_commit_from_tag_file(self) -> None:
         """Test extracting commit SHA from tag-specific baseline file."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3244,7 +3279,7 @@ def test_extract_baseline_commit_from_tag_file(self):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_extract_baseline_commit_fallback_to_metadata(self):
+    def test_extract_baseline_commit_fallback_to_metadata(self) -> None:
         """Test extracting commit SHA from metadata.json when baseline files have no commit info."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3282,7 +3317,7 @@ def test_extract_baseline_commit_fallback_to_metadata(self):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_extract_baseline_commit_handles_multiple_tag_files(self):
+    def test_extract_baseline_commit_handles_multiple_tag_files(self) -> None:
         """Test that extract_baseline_commit selects the highest semver tag file when multiple exist."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3316,7 +3351,7 @@ def test_extract_baseline_commit_handles_multiple_tag_files(self):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_semver_prefers_stable_over_prerelease(self):
+    def test_semver_prefers_stable_over_prerelease(self) -> None:
         """Test that stable releases are preferred over pre-releases of the same version."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3335,7 +3370,7 @@ def test_semver_prefers_stable_over_prerelease(self):
             assert selected is not None
             assert selected.name == "baseline-v1.2.3.txt"
 
-    def test_semver_v043_vs_v043_beta1_preference(self):
+    def test_semver_v043_vs_v043_beta1_preference(self) -> None:
         """Test specific case: v0.4.3 is preferred over v0.4.3-beta.1."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3357,7 +3392,7 @@ def test_semver_v043_vs_v043_beta1_preference(self):
             assert "stable043" in content
             assert "Tag: v0.4.3" in content
 
-    def test_semver_prefers_higher_prerelease_when_no_stable(self):
+    def test_semver_prefers_higher_prerelease_when_no_stable(self) -> None:
         """Test that higher pre-release is selected when only pre-releases exist."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3377,7 +3412,7 @@ def test_semver_prefers_higher_prerelease_when_no_stable(self):
             # Current behavior: lexicographic prerelease ordering; expect beta.2 to win
             assert selected.name == "baseline-v1.2.3-beta.2.txt"
 
-    def test_baseline_commit_source_from_baseline_file(self):
+    def test_baseline_commit_source_from_baseline_file(self) -> None:
         """Test that BASELINE_COMMIT_SOURCE is 'baseline' when commit is extracted from baseline file."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3406,7 +3441,7 @@ def test_baseline_commit_source_from_baseline_file(self):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_baseline_commit_source_from_metadata_file(self):
+    def test_baseline_commit_source_from_metadata_file(self) -> None:
         """Test that BASELINE_COMMIT_SOURCE is 'metadata' when commit is extracted from metadata.json."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3437,7 +3472,7 @@ def test_baseline_commit_source_from_metadata_file(self):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_baseline_commit_source_unknown_when_no_commit_found(self):
+    def test_baseline_commit_source_unknown_when_no_commit_found(self) -> None:
         """Test that BASELINE_COMMIT_SOURCE is 'unknown' when no commit is found anywhere."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3460,7 +3495,7 @@ def test_baseline_commit_source_unknown_when_no_commit_found(self):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_env_vars_mirrored_to_current_process(self):
+    def test_env_vars_mirrored_to_current_process(self) -> None:
         """Test that write_github_env_vars mirrors variables into current process."""
         with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
             env_path = env_file.name
@@ -3494,7 +3529,7 @@ def test_env_vars_mirrored_to_current_process(self):
             for key in ["TEST_BASELINE_EXISTS", "TEST_BASELINE_SOURCE"]:
                 os.environ.pop(key, None)
 
-    def test_env_vars_multiline_handling(self):
+    def test_env_vars_multiline_handling(self) -> None:
         """Test that write_github_env_vars correctly handles multiline values with heredoc format."""
         with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
             env_path = env_file.name
@@ -3543,7 +3578,7 @@ def test_env_vars_multiline_handling(self):
             for key in ["TEST_MULTILINE", "TEST_SINGLE_LINE", "TEST_WITH_CR"]:
                 os.environ.pop(key, None)
 
-    def test_env_vars_none_value_handling(self):
+    def test_env_vars_none_value_handling(self) -> None:
         """Test that write_github_env_vars correctly handles None values without errors."""
         with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
             env_path = env_file.name
@@ -3578,7 +3613,7 @@ def test_env_vars_none_value_handling(self):
             for key in ["TEST_NONE", "TEST_NORMAL"]:
                 os.environ.pop(key, None)
 
-    def test_baseline_tag_sanitization(self):
+    def test_baseline_tag_sanitization(self) -> None:
         """Test that BASELINE_TAG is sanitized before being exported to GITHUB_ENV."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3619,7 +3654,7 @@ def test_baseline_tag_sanitization(self):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_baseline_tag_length_capping(self):
+    def test_baseline_tag_length_capping(self) -> None:
         """Test that BASELINE_TAG is capped at 64 characters."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3662,7 +3697,7 @@ def test_baseline_tag_length_capping(self):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_packaging_version_complex_comparisons(self):
+    def test_packaging_version_complex_comparisons(self) -> None:
         """Test that packaging.version handles complex version comparisons correctly."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3688,7 +3723,7 @@ def test_packaging_version_complex_comparisons(self):
             assert selected is not None
             assert selected.name == "baseline-v2.0.0.txt"
 
-    def test_packaging_version_invalid_versions(self):
+    def test_packaging_version_invalid_versions(self) -> None:
         """Test that invalid version formats are handled gracefully."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3710,7 +3745,7 @@ def test_packaging_version_invalid_versions(self):
             assert selected.name == "baseline-v1.2.txt"
             assert "Valid 1.2.0 content" in selected.read_text()
 
-    def test_packaging_version_truly_invalid_versions(self):
+    def test_packaging_version_truly_invalid_versions(self) -> None:
         """Test that truly invalid version formats fall back to generic baseline selection."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3732,7 +3767,7 @@ def test_packaging_version_truly_invalid_versions(self):
             assert selected.name == "baseline_results.txt"
             assert "Generic baseline content" in selected.read_text()
 
-    def test_generic_baseline_prefers_newest_mtime(self):
+    def test_generic_baseline_prefers_newest_mtime(self) -> None:
         """Test that generic baseline files are selected by most recent mtime."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3757,7 +3792,7 @@ def test_generic_baseline_prefers_newest_mtime(self):
             assert selected.name == "baseline-newer.txt"
             assert "Newer baseline content" in selected.read_text()
 
-    def test_prerelease_detection_fix_validation(self):
+    def test_prerelease_detection_fix_validation(self) -> None:
         """Test that prerelease detection correctly identifies stable vs prerelease versions."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3775,7 +3810,7 @@ def test_prerelease_detection_fix_validation(self):
             assert selected.name == "baseline-v1.0.0.txt"
             assert "Stable content" in selected.read_text()
 
-    def test_prepare_baseline_and_extract_commit_integration(self):
+    def test_prepare_baseline_and_extract_commit_integration(self) -> None:
         """Test the integration between prepare_baseline and extract_baseline_commit."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
diff --git a/scripts/tests/test_compare_storage_backends.py b/scripts/tests/test_compare_storage_backends.py
index 3fa981dc..62468ef3 100644
--- a/scripts/tests/test_compare_storage_backends.py
+++ b/scripts/tests/test_compare_storage_backends.py
@@ -21,7 +21,7 @@
 
 
 @pytest.fixture
-def temp_project_root(tmp_path):
+def temp_project_root(tmp_path) -> Path:
     """Create a temporary project root with necessary directories."""
     project_root = tmp_path / "project"
     project_root.mkdir()
@@ -38,13 +38,13 @@ def temp_project_root(tmp_path):
 
 
 @pytest.fixture
-def comparator(temp_project_root):
+def comparator(temp_project_root) -> StorageBackendComparator:
     """Create a StorageBackendComparator instance with temp project root."""
     return StorageBackendComparator(temp_project_root)
 
 
 @pytest.fixture
-def sample_criterion_json():
+def sample_criterion_json() -> dict[str, object]:
     """Sample Criterion estimates.json data."""
     return {
         "mean": {
@@ -58,7 +58,7 @@ def sample_criterion_json():
 
 
 @pytest.fixture
-def sample_criterion_stdout():
+def sample_criterion_stdout() -> str:
     """Sample Criterion stdout output for regex parsing."""
     return """
 Running benchmarks...
@@ -73,7 +73,7 @@ def sample_criterion_stdout():
 
 
 @pytest.fixture
-def completed_ok():
+def completed_ok() -> CompletedProcess[str]:
     """Reusable fixture for successful cargo bench results."""
     return CompletedProcess(
         args=["cargo", "bench"],
@@ -86,14 +86,14 @@ def completed_ok():
 class TestStorageBackendComparator:
     """Test cases for StorageBackendComparator class."""
 
-    def test_init(self, temp_project_root):
+    def test_init(self, temp_project_root) -> None:
         """Test comparator initialization."""
         comparator = StorageBackendComparator(temp_project_root)
 
         assert comparator.project_root == temp_project_root
         assert comparator.criterion_dir == temp_project_root / "target" / "criterion"
 
-    def test_parse_criterion_output_json_success(self, comparator, sample_criterion_json):
+    def test_parse_criterion_output_json_success(self, comparator, sample_criterion_json) -> None:
         """Test parsing Criterion output from JSON files."""
         # Create fake criterion directory structure
         bench_dir = comparator.criterion_dir / "construction" / "2D" / "1000v"
@@ -118,7 +118,7 @@ def test_parse_criterion_output_json_success(self, comparator, sample_criterion_
         assert bench["lower"] == 145000000.0
         assert bench["upper"] == 155000000.0
 
-    def test_parse_criterion_output_regex_fallback(self, comparator, sample_criterion_stdout):
+    def test_parse_criterion_output_regex_fallback(self, comparator, sample_criterion_stdout) -> None:
         """Test parsing Criterion output using regex fallback when JSON unavailable."""
         results = comparator._parse_criterion_output(sample_criterion_stdout)
 
@@ -145,7 +145,7 @@ def test_parse_criterion_output_regex_fallback(self, comparator, sample_criterio
         assert bench3["estimate"] == 9.012
         assert bench3["unit"] == "ms"
 
-    def test_parse_criterion_output_empty(self, comparator):
+    def test_parse_criterion_output_empty(self, comparator) -> None:
         """Test parsing empty Criterion output."""
         results = comparator._parse_criterion_output("")
 
@@ -153,7 +153,7 @@ def test_parse_criterion_output_empty(self, comparator):
         assert len(results["benchmarks"]) == 0
         assert "raw_output" in results
 
-    def test_build_comparison_table_basic(self, comparator):
+    def test_build_comparison_table_basic(self, comparator) -> None:
         """Test building comparison table with matching benchmarks."""
         slotmap_by_name = {
             "test1": {"estimate": 100.0, "unit": "ms"},
@@ -186,7 +186,7 @@ def test_build_comparison_table_basic(self, comparator):
         assert "+10.0%" in lines[1]
         assert "SlotMap" in lines[1]
 
-    def test_build_comparison_table_similar_performance(self, comparator):
+    def test_build_comparison_table_similar_performance(self, comparator) -> None:
         """Test comparison table with similar performance (< 2% difference)."""
         slotmap_by_name = {
             "test": {"estimate": 100.0, "unit": "ms"},
@@ -203,7 +203,7 @@ def test_build_comparison_table_similar_performance(self, comparator):
         assert len(lines) == 1
         assert "~Same" in lines[0]
 
-    def test_build_comparison_table_missing_data(self, comparator):
+    def test_build_comparison_table_missing_data(self, comparator) -> None:
         """Test comparison table with missing data for one backend."""
         slotmap_by_name = {
             "test1": {"estimate": 100.0, "unit": "ms"},
@@ -226,7 +226,7 @@ def test_build_comparison_table_missing_data(self, comparator):
         assert "N/A" in lines[1]
 
     @patch("compare_storage_backends.run_cargo_command")
-    def test_run_benchmark_success(self, mock_run_cargo, comparator, completed_ok):
+    def test_run_benchmark_success(self, mock_run_cargo, comparator, completed_ok) -> None:
         """Test successful benchmark execution."""
         mock_run_cargo.return_value = completed_ok
 
@@ -244,7 +244,7 @@ def test_run_benchmark_success(self, mock_run_cargo, comparator, completed_ok):
         assert args[5] == "test_bench"
 
     @patch("compare_storage_backends.run_cargo_command")
-    def test_run_benchmark_with_dense_slotmap(self, mock_run_cargo, comparator, completed_ok):
+    def test_run_benchmark_with_dense_slotmap(self, mock_run_cargo, comparator, completed_ok) -> None:
         """Test benchmark execution with DenseSlotMap feature."""
         mock_run_cargo.return_value = completed_ok
 
@@ -263,7 +263,7 @@ def test_run_benchmark_with_dense_slotmap(self, mock_run_cargo, comparator, comp
         assert "dense-slotmap" in args
 
     @patch("compare_storage_backends.run_cargo_command")
-    def test_run_benchmark_dev_mode(self, mock_run_cargo, comparator, completed_ok):
+    def test_run_benchmark_dev_mode(self, mock_run_cargo, comparator, completed_ok) -> None:
         """Test benchmark execution in development mode."""
         mock_run_cargo.return_value = completed_ok
 
@@ -281,7 +281,7 @@ def test_run_benchmark_dev_mode(self, mock_run_cargo, comparator, completed_ok):
         assert "--noplot" in args
 
     @patch("compare_storage_backends.run_cargo_command")
-    def test_run_benchmark_with_extra_args(self, mock_run_cargo, comparator, completed_ok):
+    def test_run_benchmark_with_extra_args(self, mock_run_cargo, comparator, completed_ok) -> None:
         """Test benchmark execution with extra arguments."""
         mock_run_cargo.return_value = completed_ok
 
@@ -301,7 +301,7 @@ def test_run_benchmark_with_extra_args(self, mock_run_cargo, comparator, complet
         assert "construction" in args
 
     @patch("compare_storage_backends.run_cargo_command")
-    def test_run_benchmark_failure(self, mock_run_cargo, comparator, capsys):
+    def test_run_benchmark_failure(self, mock_run_cargo, comparator, capsys) -> None:
         """Test benchmark execution failure handling."""
         # Mock failed cargo bench run
         mock_result = CompletedProcess(
@@ -321,7 +321,7 @@ def test_run_benchmark_failure(self, mock_run_cargo, comparator, capsys):
         assert "Benchmark failed" in captured.err
 
     @patch("compare_storage_backends.run_cargo_command")
-    def test_run_comparison_success(self, mock_run_cargo, comparator, tmp_path):
+    def test_run_comparison_success(self, mock_run_cargo, comparator, tmp_path) -> None:
         """Test full comparison workflow success."""
         # Mock successful benchmark runs for both backends
         mock_result = CompletedProcess(
@@ -350,7 +350,7 @@ def test_run_comparison_success(self, mock_run_cargo, comparator, tmp_path):
         assert "DenseSlotMap" in report
 
     @patch("compare_storage_backends.run_cargo_command")
-    def test_run_comparison_slotmap_failure(self, mock_run_cargo, comparator):
+    def test_run_comparison_slotmap_failure(self, mock_run_cargo, comparator) -> None:
         """Test comparison when SlotMap benchmark fails."""
         # Mock failed SlotMap run
         mock_result = CompletedProcess(
@@ -366,7 +366,7 @@ def test_run_comparison_slotmap_failure(self, mock_run_cargo, comparator):
         assert success is False
 
     @patch("compare_storage_backends.run_cargo_command")
-    def test_run_comparison_denseslotmap_failure(self, mock_run_cargo, comparator):
+    def test_run_comparison_denseslotmap_failure(self, mock_run_cargo, comparator) -> None:
         """Test comparison when DenseSlotMap benchmark fails."""
         # Mock successful SlotMap, failed DenseSlotMap
         mock_run_cargo.side_effect = [
@@ -390,7 +390,7 @@ def test_run_comparison_denseslotmap_failure(self, mock_run_cargo, comparator):
 
         assert success is False
 
-    def test_generate_comparison_report_structure(self, comparator):
+    def test_generate_comparison_report_structure(self, comparator) -> None:
         """Test comparison report generation structure."""
         slotmap_results = {
             "backend": "SlotMap",
@@ -431,7 +431,7 @@ def test_generate_comparison_report_structure(self, comparator):
 class TestIntegration:
     """Integration tests for compare_storage_backends module."""
 
-    def test_find_project_root_integration(self, tmp_path):
+    def test_find_project_root_integration(self, tmp_path) -> None:
         """Test integration with find_project_root utility."""
         # Create a fake project with Cargo.toml
         project_root = tmp_path / "project"
diff --git a/scripts/tests/test_hardware_utils.py b/scripts/tests/test_hardware_utils.py
index f36a1e8a..b9f8596f 100644
--- a/scripts/tests/test_hardware_utils.py
+++ b/scripts/tests/test_hardware_utils.py
@@ -8,7 +8,7 @@
 
 import platform
 import subprocess
-from unittest.mock import Mock, mock_open, patch
+from unittest.mock import mock_open, patch
 
 import pytest
 
@@ -16,7 +16,7 @@
 
 
 @pytest.fixture
-def hardware():
+def hardware() -> HardwareInfo:
     """Fixture for HardwareInfo instance."""
     return HardwareInfo()
 
@@ -24,29 +24,32 @@ def hardware():
 class TestHardwareInfo:
     """Test cases for HardwareInfo class."""
 
-    def test_init(self, hardware):
+    def test_init(self, hardware) -> None:
         """Test HardwareInfo initialization."""
         assert hardware.os_type == platform.system()
         assert hardware.machine == platform.machine()
 
     @patch("hardware_utils.platform.system")
-    def test_init_with_different_os(self, mock_system):
+    def test_init_with_different_os(self, mock_system) -> None:
         """Test initialization with different OS types."""
         mock_system.return_value = "Linux"
         hardware = HardwareInfo()
         assert hardware.os_type == "Linux"
 
-    def test_run_command_empty_cmd(self, hardware):
+    def test_run_command_empty_cmd(self, hardware) -> None:
         """Test _run_command with empty command list."""
         with pytest.raises(ValueError, match="Command list cannot be empty"):
             hardware._run_command([])
 
     @patch("hardware_utils.run_safe_command")
-    def test_run_command_success(self, mock_run_safe, hardware):
+    def test_run_command_success(self, mock_run_safe, hardware) -> None:
         """Test successful command execution."""
-        mock_result = Mock()
-        mock_result.stdout = "test output\n"
-        mock_run_safe.return_value = mock_result
+        mock_run_safe.return_value = subprocess.CompletedProcess(
+            args=["echo", "test"],
+            returncode=0,
+            stdout="test output\n",
+            stderr="",
+        )
 
         result = hardware._run_command(["echo", "test"])
 
@@ -61,7 +64,7 @@ def test_run_command_success(self, mock_run_safe, hardware):
         )
 
     @patch("hardware_utils.run_safe_command")
-    def test_run_command_failure(self, mock_run_safe, hardware):
+    def test_run_command_failure(self, mock_run_safe, hardware) -> None:
         """Test command execution failure."""
         mock_run_safe.side_effect = subprocess.CalledProcessError(1, "cmd")
 
@@ -70,7 +73,7 @@ def test_run_command_failure(self, mock_run_safe, hardware):
 
     @patch("hardware_utils.platform.system")
     @patch.object(HardwareInfo, "_run_command")
-    def test_get_cpu_info_darwin(self, mock_run_command, mock_system):
+    def test_get_cpu_info_darwin(self, mock_run_command, mock_system) -> None:
         """Test CPU info detection on macOS."""
         mock_system.return_value = "Darwin"
         mock_run_command.side_effect = ["Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz", "6", "12"]
@@ -85,7 +88,7 @@ def test_get_cpu_info_darwin(self, mock_run_command, mock_system):
     @patch("hardware_utils.platform.system")
     @patch("hardware_utils.shutil.which")
     @patch.object(HardwareInfo, "_run_command")
-    def test_get_cpu_info_linux_with_lscpu(self, mock_run_command, mock_which, mock_system):
+    def test_get_cpu_info_linux_with_lscpu(self, mock_run_command, mock_which, mock_system) -> None:
         """Test CPU info detection on Linux with lscpu available."""
         mock_system.return_value = "Linux"
         mock_which.side_effect = lambda cmd: cmd in ["lscpu", "nproc"]
@@ -113,7 +116,7 @@ def test_get_cpu_info_linux_with_lscpu(self, mock_run_command, mock_which, mock_
     @patch("hardware_utils.platform.system")
     @patch("hardware_utils.shutil.which")
     @patch("builtins.open", new_callable=mock_open, read_data="processor\t: 0\nmodel name\t: AMD Ryzen 5 3600\nprocessor\t: 1\n")
-    def test_get_cpu_info_linux_fallback_cpuinfo(self, _mock_file, mock_which, mock_system):  # noqa: PT019
+    def test_get_cpu_info_linux_fallback_cpuinfo(self, _mock_file, mock_which, mock_system) -> None:  # noqa: PT019
         """Test CPU info detection on Linux using /proc/cpuinfo fallback."""
         mock_system.return_value = "Linux"
         mock_which.return_value = None  # No commands available
@@ -128,7 +131,7 @@ def test_get_cpu_info_linux_fallback_cpuinfo(self, _mock_file, mock_which, mock_
     @patch("hardware_utils.platform.system")
     @patch("hardware_utils.shutil.which")
     @patch.object(HardwareInfo, "_run_command")
-    def test_get_cpu_info_windows(self, mock_run_command, mock_which, mock_system):
+    def test_get_cpu_info_windows(self, mock_run_command, mock_which, mock_system) -> None:
         """Test CPU info detection on Windows."""
         mock_system.return_value = "Windows"
         mock_which.side_effect = lambda cmd: cmd == "powershell"
@@ -143,7 +146,7 @@ def test_get_cpu_info_windows(self, mock_run_command, mock_which, mock_system):
         assert cpu_threads == "16"
 
     @patch("hardware_utils.platform.system")
-    def test_get_cpu_info_unknown_os(self, mock_system):
+    def test_get_cpu_info_unknown_os(self, mock_system) -> None:
         """Test CPU info detection on unknown OS."""
         mock_system.return_value = "UnknownOS"
 
@@ -156,7 +159,7 @@ def test_get_cpu_info_unknown_os(self, mock_system):
 
     @patch("hardware_utils.platform.system")
     @patch.object(HardwareInfo, "_run_command")
-    def test_get_cpu_info_command_failure(self, mock_run_command, mock_system):
+    def test_get_cpu_info_command_failure(self, mock_run_command, mock_system) -> None:
         """Test CPU info detection when commands fail."""
         mock_system.return_value = "Darwin"
         mock_run_command.side_effect = subprocess.CalledProcessError(1, "cmd")
@@ -170,7 +173,7 @@ def test_get_cpu_info_command_failure(self, mock_run_command, mock_system):
 
     @patch("hardware_utils.platform.system")
     @patch.object(HardwareInfo, "_run_command")
-    def test_get_memory_info_darwin(self, mock_run_command, mock_system):
+    def test_get_memory_info_darwin(self, mock_run_command, mock_system) -> None:
         """Test memory info detection on macOS."""
         mock_system.return_value = "Darwin"
         mock_run_command.return_value = "17179869184"  # 16 GB in bytes
@@ -182,7 +185,7 @@ def test_get_memory_info_darwin(self, mock_run_command, mock_system):
 
     @patch("hardware_utils.platform.system")
     @patch("builtins.open", new_callable=mock_open, read_data="MemTotal:       16384000 kB\n")
-    def test_get_memory_info_linux(self, _mock_file, mock_system):  # noqa: PT019
+    def test_get_memory_info_linux(self, _mock_file, mock_system) -> None:  # noqa: PT019
         """Test memory info detection on Linux."""
         mock_system.return_value = "Linux"
 
@@ -194,7 +197,7 @@ def test_get_memory_info_linux(self, _mock_file, mock_system):  # noqa: PT019
     @patch("hardware_utils.platform.system")
     @patch("hardware_utils.shutil.which")
     @patch.object(HardwareInfo, "_run_command")
-    def test_get_memory_info_windows(self, mock_run_command, mock_which, mock_system):
+    def test_get_memory_info_windows(self, mock_run_command, mock_which, mock_system) -> None:
         """Test memory info detection on Windows."""
         mock_system.return_value = "Windows"
         mock_which.side_effect = lambda cmd: cmd == "powershell"
@@ -206,7 +209,7 @@ def test_get_memory_info_windows(self, mock_run_command, mock_which, mock_system
         assert memory == "32.0 GB"
 
     @patch("hardware_utils.platform.system")
-    def test_get_memory_info_unknown_os(self, mock_system):
+    def test_get_memory_info_unknown_os(self, mock_system) -> None:
         """Test memory info detection on unknown OS."""
         mock_system.return_value = "UnknownOS"
 
@@ -217,7 +220,7 @@ def test_get_memory_info_unknown_os(self, mock_system):
 
     @patch("hardware_utils.shutil.which")
     @patch.object(HardwareInfo, "_run_command")
-    def test_get_rust_info_success(self, mock_run_command, mock_which, hardware):
+    def test_get_rust_info_success(self, mock_run_command, mock_which, hardware) -> None:
         """Test Rust info detection when rustc is available."""
         mock_which.return_value = "/usr/bin/rustc"
         mock_run_command.side_effect = ["rustc 1.70.0 (90c541806 2023-05-31)", "rustc 1.70.0 (90c541806 2023-05-31)\nhost: x86_64-apple-darwin\n"]
@@ -228,7 +231,7 @@ def test_get_rust_info_success(self, mock_run_command, mock_which, hardware):
         assert rust_target == "x86_64-apple-darwin"
 
     @patch("hardware_utils.shutil.which")
-    def test_get_rust_info_no_rustc(self, mock_which, hardware):
+    def test_get_rust_info_no_rustc(self, mock_which, hardware) -> None:
         """Test Rust info detection when rustc is not available."""
         mock_which.return_value = None
 
@@ -239,7 +242,7 @@ def test_get_rust_info_no_rustc(self, mock_which, hardware):
 
     @patch("hardware_utils.shutil.which")
     @patch.object(HardwareInfo, "_run_command")
-    def test_get_rust_info_command_failure(self, mock_run_command, mock_which, hardware):
+    def test_get_rust_info_command_failure(self, mock_run_command, mock_which, hardware) -> None:
         """Test Rust info detection when rustc commands fail."""
         mock_which.return_value = "/usr/bin/rustc"
         mock_run_command.side_effect = subprocess.CalledProcessError(1, "cmd")
@@ -249,7 +252,7 @@ def test_get_rust_info_command_failure(self, mock_run_command, mock_which, hardw
         assert rust_version == "Unknown"
         assert rust_target == "Unknown"
 
-    def test_get_hardware_info(self, hardware):
+    def test_get_hardware_info(self, hardware) -> None:
         """Test comprehensive hardware info collection."""
         with (
             patch.object(hardware, "get_cpu_info") as mock_cpu,
@@ -275,7 +278,7 @@ def test_get_hardware_info(self, hardware):
         ("system_name", "expected_os"), [("Darwin", "macOS"), ("Linux", "Linux"), ("Windows", "Windows"), ("FreeBSD", "Unknown (FreeBSD)")]
     )
     @patch("hardware_utils.platform.system")
-    def test_get_hardware_info_os_mapping(self, mock_system, system_name, expected_os):
+    def test_get_hardware_info_os_mapping(self, mock_system, system_name, expected_os) -> None:
         """Test OS name mapping in hardware info."""
         mock_system.return_value = system_name
         hardware = HardwareInfo()
@@ -292,7 +295,7 @@ def test_get_hardware_info_os_mapping(self, mock_system, system_name, expected_o
             info = hardware.get_hardware_info()
             assert info["OS"] == expected_os
 
-    def test_format_hardware_info(self, hardware):
+    def test_format_hardware_info(self, hardware) -> None:
         """Test hardware info formatting."""
         test_info = {
             "OS": "macOS",
@@ -315,7 +318,7 @@ def test_format_hardware_info(self, hardware):
         assert "Rust: rustc 1.70.0" in formatted
         assert "Target: x86_64-apple-darwin" in formatted
 
-    def test_format_hardware_info_none(self, hardware):
+    def test_format_hardware_info_none(self, hardware) -> None:
         """Test hardware info formatting with None input."""
         with patch.object(hardware, "get_hardware_info") as mock_get_info:
             mock_get_info.return_value = {
@@ -335,7 +338,7 @@ def test_format_hardware_info_none(self, hardware):
 class TestHardwareComparator:
     """Test cases for HardwareComparator class."""
 
-    def test_parse_baseline_hardware_complete(self):
+    def test_parse_baseline_hardware_complete(self) -> None:
         """Test parsing complete baseline hardware info."""
         baseline_content = """Benchmark Results
 Generated on: 2023-06-15 10:30:00
@@ -367,7 +370,7 @@ def test_parse_baseline_hardware_complete(self):
 
         assert info == expected
 
-    def test_parse_baseline_hardware_partial(self):
+    def test_parse_baseline_hardware_partial(self) -> None:
         """Test parsing partial baseline hardware info."""
         baseline_content = """Hardware Information:
   OS: Linux
@@ -385,7 +388,7 @@ def test_parse_baseline_hardware_partial(self):
         assert info["CPU_CORES"] == "Unknown"  # Not specified
         assert info["RUST"] == "Unknown"  # Not specified
 
-    def test_parse_baseline_hardware_empty(self):
+    def test_parse_baseline_hardware_empty(self) -> None:
         """Test parsing baseline with no hardware info."""
         baseline_content = "No hardware information found"
 
@@ -395,7 +398,7 @@ def test_parse_baseline_hardware_empty(self):
         for value in info.values():
             assert value == "Unknown"
 
-    def test_compare_hardware_identical(self):
+    def test_compare_hardware_identical(self) -> None:
         """Test hardware comparison with identical configurations."""
         current_info = {
             "OS": "macOS",
@@ -414,7 +417,7 @@ def test_compare_hardware_identical(self):
         assert not has_warnings
         assert "Hardware configurations are compatible" in report
 
-    def test_compare_hardware_different_os(self):
+    def test_compare_hardware_different_os(self) -> None:
         """Test hardware comparison with different OS."""
         current_info = {
             "OS": "Linux",
@@ -434,7 +437,7 @@ def test_compare_hardware_different_os(self):
         assert has_warnings
         assert "OS differs: Linux vs macOS" in report
 
-    def test_compare_hardware_different_cpu(self):
+    def test_compare_hardware_different_cpu(self) -> None:
         """Test hardware comparison with different CPU."""
         current_info = {
             "OS": "Linux",
@@ -455,7 +458,7 @@ def test_compare_hardware_different_cpu(self):
         assert "CPU differs:" in report
         assert "results may not be directly comparable" in report
 
-    def test_compare_hardware_different_cores(self):
+    def test_compare_hardware_different_cores(self) -> None:
         """Test hardware comparison with different core counts."""
         current_info = {
             "OS": "Linux",
@@ -475,7 +478,7 @@ def test_compare_hardware_different_cores(self):
         assert has_warnings
         assert "CPU core count differs: 8 vs 6 cores" in report
 
-    def test_compare_hardware_memory_tolerance(self):
+    def test_compare_hardware_memory_tolerance(self) -> None:
         """Test memory comparison with numeric (percentage-based) tolerance."""
         current_info = {
             "OS": "Linux",
@@ -501,7 +504,7 @@ def test_compare_hardware_memory_tolerance(self):
         assert has_warnings
         assert "Memory differs:" in report
 
-    def test_compare_hardware_unknown_baseline(self):
+    def test_compare_hardware_unknown_baseline(self) -> None:
         """Test hardware comparison with unknown baseline values."""
         current_info = {
             "OS": "Linux",
@@ -543,7 +546,7 @@ def test_compare_hardware_unknown_baseline(self):
             ("", None),
         ],
     )
-    def test_extract_memory_value(self, memory_str, expected):
+    def test_extract_memory_value(self, memory_str, expected) -> None:
         """Test memory value extraction from strings."""
         result = HardwareComparator._extract_memory_value(memory_str)
         if expected is None:
@@ -555,7 +558,7 @@ def test_extract_memory_value(self, memory_str, expected):
 class TestHardwareUtilsIntegration:
     """Integration tests for hardware_utils functionality."""
 
-    def test_real_hardware_info_structure(self):
+    def test_real_hardware_info_structure(self) -> None:
         """Test that real hardware info returns expected structure."""
         hardware = HardwareInfo()
         info = hardware.get_hardware_info()
@@ -567,7 +570,7 @@ def test_real_hardware_info_structure(self):
         for key, value in info.items():
             assert isinstance(value, str), f"Key {key} should have string value"
 
-    def test_cpu_info_returns_tuples(self):
+    def test_cpu_info_returns_tuples(self) -> None:
         """Test that CPU info methods return proper tuple structure."""
         hardware = HardwareInfo()
 
@@ -580,14 +583,14 @@ def test_cpu_info_returns_tuples(self):
         assert isinstance(rust_version, str)
         assert isinstance(rust_target, str)
 
-    def test_memory_info_returns_string(self):
+    def test_memory_info_returns_string(self) -> None:
         """Test that memory info returns a string."""
         hardware = HardwareInfo()
         memory = hardware.get_memory_info()
 
         assert isinstance(memory, str)
 
-    def test_formatted_output_structure(self):
+    def test_formatted_output_structure(self) -> None:
         """Test that formatted output has expected structure."""
         hardware = HardwareInfo()
         formatted = hardware.format_hardware_info()
diff --git a/scripts/tests/test_subprocess_utils.py b/scripts/tests/test_subprocess_utils.py
index 829fec7b..ae24f8b9 100644
--- a/scripts/tests/test_subprocess_utils.py
+++ b/scripts/tests/test_subprocess_utils.py
@@ -16,6 +16,8 @@
 # Add scripts directory to path
 sys.path.insert(0, str(Path(__file__).parent.parent))
 
+from typing import Never
+
 from subprocess_utils import (
     ExecutableNotFoundError,
     check_git_history,
@@ -34,7 +36,7 @@ class TestGetSafeExecutable:
     """Test get_safe_executable function."""
 
     @pytest.mark.parametrize("command", ["echo", "git", "ls"])
-    def test_finds_existing_executables(self, command):
+    def test_finds_existing_executables(self, command) -> None:
         """Test that it finds common executables."""
         result = get_safe_executable(command)
         assert isinstance(result, str)
@@ -47,7 +49,7 @@ def test_finds_existing_executables(self, command):
             pytest.skip(f"{command} may not be an external executable on Windows")
 
     @pytest.mark.parametrize("fake_command", ["definitely-nonexistent-command-xyz", "fake-command-for-testing", "nonexistent123"])
-    def test_raises_on_nonexistent_executables(self, fake_command):
+    def test_raises_on_nonexistent_executables(self, fake_command) -> None:
         """Test that it raises ExecutableNotFoundError for nonexistent commands."""
         with pytest.raises(ExecutableNotFoundError, match="not found in PATH") as exc_info:
             get_safe_executable(fake_command)
@@ -58,26 +60,26 @@ def test_raises_on_nonexistent_executables(self, fake_command):
 class TestRunGitCommand:
     """Test run_git_command function."""
 
-    def test_git_version(self):
+    def test_git_version(self) -> None:
         """Test basic git command execution."""
         result = run_git_command(["--version"])
         assert result.returncode == 0
         assert "git version" in result.stdout.lower()
         assert isinstance(result.stdout, str)
 
-    def test_git_command_with_custom_params(self):
+    def test_git_command_with_custom_params(self) -> None:
         """Test git command with custom parameters."""
         result = run_git_command(["status", "--porcelain"], check=False)
         # Should not raise even if there are changes (check=False)
         assert isinstance(result.returncode, int)
         assert isinstance(result.stdout, str)
 
-    def test_git_command_failure_handling(self):
+    def test_git_command_failure_handling(self) -> None:
         """Test that failed git commands raise CalledProcessError when check=True."""
         with pytest.raises(subprocess.CalledProcessError):
             run_git_command(["invalid-git-subcommand-xyz"], check=True)
 
-    def test_git_command_no_failure_with_check_false(self):
+    def test_git_command_no_failure_with_check_false(self) -> None:
         """Test that failed git commands don't raise when check=False."""
         result = run_git_command(["invalid-git-subcommand-xyz"], check=False)
         assert result.returncode != 0
@@ -88,7 +90,7 @@ class TestRunCargoCommand:
     """Test run_cargo_command function."""
 
     @pytest.mark.skipif(shutil.which("cargo") is None, reason="cargo not installed in PATH")
-    def test_cargo_version(self):
+    def test_cargo_version(self) -> None:
         """Test basic cargo command execution."""
         result = run_cargo_command(["--version"])
         assert result.returncode == 0
@@ -96,7 +98,7 @@ def test_cargo_version(self):
         assert isinstance(result.stdout, str)
 
     @pytest.mark.skipif(shutil.which("cargo") is None, reason="cargo not installed in PATH")
-    def test_cargo_command_with_custom_params(self):
+    def test_cargo_command_with_custom_params(self) -> None:
         """Test cargo command with custom parameters."""
         result = run_cargo_command(["check", "--dry-run"], check=False)
         assert isinstance(result.returncode, int)
@@ -106,14 +108,14 @@ def test_cargo_command_with_custom_params(self):
 class TestRunSafeCommand:
     """Test run_safe_command function with various scenarios."""
 
-    def test_basic_command_execution(self):
+    def test_basic_command_execution(self) -> None:
         """Test basic command execution with default parameters."""
         result = run_safe_command("echo", ["hello world"])
         assert result.returncode == 0
         assert result.stdout.strip() == "hello world"
         assert isinstance(result.stdout, str)
 
-    def test_secure_defaults_are_applied(self):
+    def test_secure_defaults_are_applied(self) -> None:
         """Test that secure defaults are applied."""
         result = run_safe_command("echo", ["test"])
         # Should use secure defaults:
@@ -123,21 +125,21 @@ def test_secure_defaults_are_applied(self):
         assert isinstance(result.stdout, str)
         assert result.stdout.strip() == "test"
 
-    def test_text_parameter_enforced(self):
+    def test_text_parameter_enforced(self) -> None:
         """Test that text parameter is enforced for security/stability."""
         # run_safe_command enforces text=True for stable CompletedProcess[str] typing
         result = run_safe_command("echo", ["test output"], text=False)  # text=False is ignored
         assert isinstance(result.stdout, str)  # Should still be string
         assert "test output" in result.stdout
 
-    def test_custom_check_parameter(self):
+    def test_custom_check_parameter(self) -> None:
         """Test overriding check parameter."""
         # Command that will fail
         result = run_safe_command("git", ["invalid-git-subcommand-xyz"], check=False)
         assert result.returncode != 0
         # Should not raise because check=False
 
-    def test_custom_capture_output_parameter(self):
+    def test_custom_capture_output_parameter(self) -> None:
         """Test overriding capture_output parameter."""
         if sys.platform.startswith("win"):
             pytest.skip("echo may not be an external executable on Windows")
@@ -145,19 +147,19 @@ def test_custom_capture_output_parameter(self):
         # When capture_output=False, stdout should be None
         assert result.stdout is None
 
-    def test_multiple_custom_parameters(self):
+    def test_multiple_custom_parameters(self) -> None:
         """Test multiple custom parameters at once (text is enforced)."""
         result = run_safe_command("echo", ["multi param test"], text=False, check=False, capture_output=True)
         assert isinstance(result.stdout, str)  # text=False is ignored, still returns string
         assert result.returncode == 0
         assert "multi param test" in result.stdout
 
-    def test_nonexistent_command_raises_error(self):
+    def test_nonexistent_command_raises_error(self) -> None:
         """Test that nonexistent commands raise ExecutableNotFoundError."""
         with pytest.raises(ExecutableNotFoundError):
             run_safe_command("definitely-nonexistent-command", ["arg"])
 
-    def test_additional_kwargs_passed_through(self):
+    def test_additional_kwargs_passed_through(self) -> None:
         """Test that additional kwargs are passed through to subprocess.run."""
         # Test with timeout (a subprocess.run parameter not explicitly handled)
         result = run_safe_command("echo", ["timeout test"], timeout=10)
@@ -168,19 +170,19 @@ def test_additional_kwargs_passed_through(self):
 class TestGitRepositoryFunctions:
     """Test git repository detection functions."""
 
-    def test_check_git_repo_in_git_repo(self):
+    def test_check_git_repo_in_git_repo(self) -> None:
         """Test check_git_repo returns True when in a git repository."""
         if not check_git_repo():
             pytest.skip("Not running inside a git repository")
         assert check_git_repo() is True
 
-    def test_check_git_history_with_history(self):
+    def test_check_git_history_with_history(self) -> None:
         """Test check_git_history returns True when git history exists."""
         if not check_git_history():
             pytest.skip("Repository has no commit history")
         assert check_git_history() is True
 
-    def test_get_git_commit_hash_returns_hash(self):
+    def test_get_git_commit_hash_returns_hash(self) -> None:
         """Test that get_git_commit_hash returns a valid commit hash."""
         commit_hash = get_git_commit_hash()
         assert isinstance(commit_hash, str)
@@ -188,7 +190,7 @@ def test_get_git_commit_hash_returns_hash(self):
         # Should be hexadecimal
         assert all(c in "0123456789abcdef" for c in commit_hash.lower())
 
-    def test_get_git_remote_url_returns_url(self):
+    def test_get_git_remote_url_returns_url(self) -> None:
         """Test that get_git_remote_url returns a valid URL."""
         remotes = run_git_command(["remote"]).stdout.split()
         if "origin" not in remotes:
@@ -203,17 +205,17 @@ def test_get_git_remote_url_returns_url(self):
 class TestErrorHandling:
     """Test error handling and edge cases."""
 
-    def test_executable_not_found_error_attributes(self):
+    def test_executable_not_found_error_attributes(self) -> None:
         """Test ExecutableNotFoundError has proper attributes."""
         error = ExecutableNotFoundError("test message")
         assert str(error) == "test message"
         assert isinstance(error, Exception)
 
-    def test_git_functions_handle_missing_git(self, monkeypatch):
+    def test_git_functions_handle_missing_git(self, monkeypatch) -> None:
         """Test git functions handle missing git executable gracefully."""
 
         # Mock get_safe_executable to raise ExecutableNotFoundError for git
-        def mock_get_safe_executable(command):
+        def mock_get_safe_executable(command) -> str:
             if command == "git":
                 raise ExecutableNotFoundError(f"Required executable '{command}' not found in PATH")
             return "/bin/echo"  # Return echo for other commands
@@ -235,7 +237,7 @@ def mock_get_safe_executable(command):
 class TestSecurityFeatures:
     """Test security-related features of the utilities."""
 
-    def test_uses_full_executable_paths(self):
+    def test_uses_full_executable_paths(self) -> None:
         """Test that commands use full executable paths."""
         # This is implicitly tested by get_safe_executable tests,
         # but let's verify the behavior
@@ -243,7 +245,7 @@ def test_uses_full_executable_paths(self):
         assert Path(git_path).is_absolute()  # Should be absolute path
         assert "git" in git_path
 
-    def test_no_shell_execution(self):
+    def test_no_shell_execution(self) -> None:
         """Test that commands don't use shell=True."""
         # The functions should not use shell=True, which would be a security risk
         # We can't directly test this, but the implementation uses subprocess.run
@@ -252,7 +254,7 @@ def test_no_shell_execution(self):
         # If shell=True was used, this would expand the environment variable
         assert result.stdout.strip() == "$HOME"
 
-    def test_check_parameter_security_default(self):
+    def test_check_parameter_security_default(self) -> None:
         """Test that check=True is the default for security."""
         # Command that will fail should raise by default
         with pytest.raises(subprocess.CalledProcessError):
@@ -266,11 +268,11 @@ def test_check_parameter_security_default(self):
             (run_safe_command, ("echo", ["test"]), {"executable": "/malicious/fake/command"}),
         ],
     )
-    def test_rejects_executable_override(self, function, args, kwargs, monkeypatch):
+    def test_rejects_executable_override(self, function, args, kwargs, monkeypatch) -> None:
         """Test that functions reject executable override for security."""
         called = {"run": False}
 
-        def fake_run(*_a, **_k):
+        def fake_run(*_a, **_k) -> Never:
             called["run"] = True  # should never be set
             msg = "subprocess.run should not be called on override"
             raise AssertionError(msg)
@@ -280,7 +282,7 @@ def fake_run(*_a, **_k):
             function(*args, **kwargs)
         assert called["run"] is False
 
-    def test_run_git_command_with_input_rejects_executable_override(self):
+    def test_run_git_command_with_input_rejects_executable_override(self) -> None:
         """Test that run_git_command_with_input raises ValueError when executable is overridden."""
         with pytest.raises(ValueError, match="Overriding 'executable' is not allowed"):
             run_git_command_with_input(["hash-object", "--stdin"], "test content", executable="/malicious/fake/git")
diff --git a/scripts/tests/test_tag_release.py b/scripts/tests/test_tag_release.py
index d579204b..26561200 100644
--- a/scripts/tests/test_tag_release.py
+++ b/scripts/tests/test_tag_release.py
@@ -14,7 +14,7 @@
 # ---------------------------------------------------------------------------
 
 
-def _fake_remote(url: str):
+def _fake_remote(url: str) -> subprocess.CompletedProcess[str]:
     """Return a mock CompletedProcess whose stdout is *url*."""
     return subprocess.CompletedProcess(args=[], returncode=0, stdout=url + "\n")