diff --git a/crates/pvm-bump-allocator/src/lib.rs b/crates/pvm-bump-allocator/src/lib.rs
index 430de5d0..b883bb75 100644
--- a/crates/pvm-bump-allocator/src/lib.rs
+++ b/crates/pvm-bump-allocator/src/lib.rs
@@ -17,20 +17,20 @@
 #![no_std]
 
 use core::alloc::{GlobalAlloc, Layout};
-use core::sync::atomic::{AtomicUsize, Ordering};
+use core::cell::{Cell, UnsafeCell};
 
 /// A bump allocator backed by a fixed-size heap.
 ///
 /// `HEAP_SIZE` is the total number of bytes available for allocation.
 /// Memory is never freed — `dealloc` is a no-op.
 pub struct BumpAllocator<const HEAP_SIZE: usize> {
-    offset: AtomicUsize,
-    heap: core::cell::UnsafeCell<[u8; HEAP_SIZE]>,
+    offset: Cell<usize>,
+    heap: UnsafeCell<[u8; HEAP_SIZE]>,
 }
 
-// SAFETY: The allocator uses atomic operations for the offset, so it is safe to share
-// across threads (though PVM contracts are single-threaded, this satisfies the
-// `GlobalAlloc` requirement).
+// SAFETY: PVM contracts are single-threaded. The `Sync` bound is required by
+// `GlobalAlloc` (the allocator must live in a `static`), but no concurrent
+// access actually occurs.
 unsafe impl<const HEAP_SIZE: usize> Sync for BumpAllocator<HEAP_SIZE> {}
 
 impl<const HEAP_SIZE: usize> Default for BumpAllocator<HEAP_SIZE> {
@@ -43,8 +43,8 @@ impl<const HEAP_SIZE: usize> BumpAllocator<HEAP_SIZE> {
     /// Creates a new bump allocator with a zeroed heap of `HEAP_SIZE` bytes.
     pub const fn new() -> Self {
         Self {
-            offset: AtomicUsize::new(0),
-            heap: core::cell::UnsafeCell::new([0u8; HEAP_SIZE]),
+            offset: Cell::new(0),
+            heap: UnsafeCell::new([0u8; HEAP_SIZE]),
         }
     }
 }
@@ -54,31 +54,25 @@ unsafe impl<const HEAP_SIZE: usize> GlobalAlloc for BumpAllocator<HEAP_SIZE> {
         let align = layout.align();
         let size = layout.size();
 
-        let mut current = self.offset.load(Ordering::Relaxed);
-
-        loop {
-            let aligned = (current + align - 1) & !(align - 1);
-            let Some(next) = aligned.checked_add(size) else {
-                return core::ptr::null_mut();
-            };
-
-            if next > HEAP_SIZE {
-                return core::ptr::null_mut();
-            }
-
-            match self.offset.compare_exchange_weak(
-                current,
-                next,
-                Ordering::SeqCst,
-                Ordering::SeqCst,
-            ) {
-                Ok(_) => {
-                    let heap_ptr = self.heap.get() as *mut u8;
-                    return unsafe { heap_ptr.add(aligned) };
-                }
-                Err(observed) => current = observed,
-            }
+        let current = self.offset.get();
+        let aligned = (current + align - 1) & !(align - 1);
+        let Some(next) = aligned.checked_add(size) else {
+            core::panic!("exhausted heap limit");
+        };
+
+        if next > HEAP_SIZE {
+            core::panic!("exhausted heap limit");
         }
+
+        self.offset.set(next);
+        let heap_ptr = self.heap.get() as *mut u8;
+        unsafe { heap_ptr.add(aligned) }
+    }
+
+    // The heap is zero-initialized and memory is never reused, so every
+    // region returned by `alloc` is already zeroed.
+    unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
+        unsafe { self.alloc(layout) }
     }
 
     unsafe fn dealloc(&self, _ptr: *mut u8, _layout: Layout) {}
@@ -117,25 +111,35 @@ mod tests {
         let alloc = BumpAllocator::<64>::new();
         let layout = Layout::from_size_align(64, 1).unwrap();
         assert!(!unsafe { alloc.alloc(layout) }.is_null());
+    }
 
-        // Heap is full — next alloc must fail
-        assert!(unsafe { alloc.alloc(Layout::from_size_align(1, 1).unwrap()) }.is_null());
+    #[test]
+    #[should_panic = "exhausted heap limit"]
+    fn alloc_panics_when_full() {
+        let alloc = BumpAllocator::<64>::new();
+        unsafe {
+            alloc.alloc(Layout::from_size_align(64, 1).unwrap());
+            alloc.alloc(Layout::from_size_align(1, 1).unwrap());
+        }
     }
 
     #[test]
-    fn alloc_oom_returns_null() {
+    #[should_panic = "exhausted heap limit"]
+    fn alloc_oom_panics() {
         let alloc = BumpAllocator::<16>::new();
-        let layout = Layout::from_size_align(17, 1).unwrap();
-        assert!(unsafe { alloc.alloc(layout) }.is_null());
+        unsafe { alloc.alloc(Layout::from_size_align(17, 1).unwrap()) };
     }
 
     #[test]
+    #[should_panic = "exhausted heap limit"]
     fn alloc_oom_due_to_alignment_padding() {
         // 9 bytes of heap: alloc 1 byte, then try 8 bytes with align 8
         // offset=1, aligned=8, 8+8=16 > 9 → OOM
         let alloc = BumpAllocator::<9>::new();
-        unsafe { alloc.alloc(Layout::from_size_align(1, 1).unwrap()) };
-        assert!(unsafe { alloc.alloc(Layout::from_size_align(8, 8).unwrap()) }.is_null());
+        unsafe {
+            alloc.alloc(Layout::from_size_align(1, 1).unwrap());
+            alloc.alloc(Layout::from_size_align(8, 8).unwrap());
+        }
     }
 
     #[test]
diff --git a/crates/pvm-contract-benchmarks/reports/encoding-benchmarks.md b/crates/pvm-contract-benchmarks/reports/encoding-benchmarks.md
index fb3882ab..7f344bac 100644
--- a/crates/pvm-contract-benchmarks/reports/encoding-benchmarks.md
+++ b/crates/pvm-contract-benchmarks/reports/encoding-benchmarks.md
@@ -17,27 +17,27 @@ used in this report.
 
 ### Static Types (no-alloc compatible)
 
-| Type     | Operation | pvm-contract-types | alloy-core   | Ratio        |
-|----------|-----------|--------------------|--------------|--------------|
-| u8       | encode    | 14.31 ns           | 6.49 ns      | 2.20x slower |
-| u8       | decode    | 0.40 ns            | 2.37 ns      | 5.97x faster |
-| u32      | encode    | 14.23 ns           | 6.86 ns      | 2.08x slower |
-| u32      | decode    | 0.98 ns            | 2.36 ns      | 2.41x faster |
-| u128     | encode    | 14.07 ns           | 6.67 ns      | 2.11x slower |
-| u128     | decode    | 0.99 ns            | 2.79 ns      | 2.83x faster |
-| U256     | encode    | 13.74 ns           | 6.51 ns      | 2.11x slower |
-| U256     | decode    | 1.78 ns            | 3.57 ns      | 2.01x faster |
-| address  | encode    | 15.50 ns           | 11.52 ns     | 1.35x slower |
-| address  | decode    | 7.09 ns            | 9.59 ns      | 1.35x faster |
+| Type | Operation | pvm-contract-types | alloy-core | Ratio |
+|------|-----------|--------------------|------------|-------|
+| u8         | encode | 13.26 ns           | 6.38 ns      | 2.08x slower |
+| u8         | decode | 0.39 ns            | 2.35 ns      | 6.07x faster |
+| u32        | encode | 13.51 ns           | 6.85 ns      | 1.97x slower |
+| u32        | decode | 0.97 ns            | 2.40 ns      | 2.46x faster |
+| u128       | encode | 14.19 ns           | 6.63 ns      | 2.14x slower |
+| u128       | decode | 0.98 ns            | 2.77 ns      | 2.82x faster |
+| U256       | encode | 13.40 ns           | 6.40 ns      | 2.09x slower |
+| U256       | decode | 1.76 ns            | 3.53 ns      | 2.01x faster |
+| address    | encode | 14.78 ns           | 11.24 ns     | 1.31x slower |
+| address    | decode | 6.91 ns            | 9.45 ns      | 1.37x faster |
 
 ### Dynamic Types (alloc required)
 
-| Type       | Operation | pvm-contract-types | alloy-core   | Ratio        |
-|------------|-----------|--------------------|--------------|--------------|
-| String     | encode    | 11.24 ns           | 16.23 ns     | 1.44x faster |
-| String     | decode    | 11.15 ns           | 32.70 ns     | 2.93x faster |
-| Vec\<U256> | encode    | 20.78 ns           | 25.62 ns     | 1.23x faster |
-| Vec\<U256> | decode    | 25.01 ns           | 46.63 ns     | 1.87x faster |
+| Type | Operation | pvm-contract-types | alloy-core | Ratio |
+|------|-----------|--------------------|------------|-------|
+| String     | encode | 11.04 ns           | 15.85 ns     | 1.44x faster |
+| String     | decode | 10.94 ns           | 32.57 ns     | 2.98x faster |
+| Vec\<U256> | encode | 20.42 ns           | 25.02 ns     | 1.23x faster |
+| Vec\<U256> | decode | 23.86 ns           | 45.87 ns     | 1.92x faster |
 
 ### Summary
 
diff --git a/crates/pvm-contract-benchmarks/scripts/regenerate-reports.sh b/crates/pvm-contract-benchmarks/scripts/regenerate-reports.sh
new file mode 100755
index 00000000..103cdcde
--- /dev/null
+++ b/crates/pvm-contract-benchmarks/scripts/regenerate-reports.sh
@@ -0,0 +1,350 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+BENCHMARKS_DIR="$(dirname "$SCRIPT_DIR")"
+REPORTS_DIR="$BENCHMARKS_DIR/reports"
+WORKSPACE_ROOT="$(cd "$BENCHMARKS_DIR/../.." && pwd)"
+
+usage() {
+    echo "Usage: $0 [encoding|binary-sizes|all]"
+    echo ""
+    echo "Regenerate benchmark reports."
+    echo ""
+    echo "  encoding      Run criterion encoding benchmarks and regenerate encoding-benchmarks.md"
+    echo "  binary-sizes  Run build-and-measure and regenerate macro-vs-builder.md"
+    echo "  all           Run both (default)"
+    exit 1
+}
+
+TARGET="${1:-all}"
+
+# ============================================================================
+# Encoding benchmarks report
+# ============================================================================
+
+parse_criterion_time() {
+    # Extract the estimate (middle value) from criterion output line:
+    #   time:   [14.20 ns 14.31 ns 14.42 ns]
+    # Returns e.g. "14.31 ns"
+    local line="$1"
+    echo "$line" | sed -E 's/.*\[.+ (.+ [a-zµ]+) .+\]/\1/'
+}
+
+normalize_to_ns() {
+    # Convert a time string like "14.31 ns" or "1.23 µs" or "1.23 us" to nanoseconds
+    local time_str="$1"
+    local value unit
+    value=$(echo "$time_str" | awk '{print $1}')
+    unit=$(echo "$time_str" | awk '{print $2}')
+    case "$unit" in
+        ps)  echo "$value * 0.001" | bc -l ;;
+        ns)  echo "$value" ;;
+        µs|us) echo "$value * 1000" | bc -l ;;
+        ms)  echo "$value * 1000000" | bc -l ;;
+        *)   echo "$value" ;;
+    esac
+}
+
+fmt_ns() {
+    # Format nanoseconds to 2 decimal places with " ns" suffix
+    printf "%.2f ns" "$1"
+}
+
+compute_ratio() {
+    # Compute ratio and direction label: "Nx faster" or "Nx slower"
+    local pvm_ns="$1" alloy_ns="$2"
+    local ratio
+    if (( $(echo "$pvm_ns < $alloy_ns" | bc -l) )); then
+        ratio=$(echo "$alloy_ns / $pvm_ns" | bc -l)
+        printf "%.2fx faster" "$ratio"
+    else
+        ratio=$(echo "$pvm_ns / $alloy_ns" | bc -l)
+        printf "%.2fx slower" "$ratio"
+    fi
+}
+
+regenerate_encoding() {
+    echo "==> Running criterion benchmarks (cargo bench -p pvm-contract-types --features alloc)..."
+    local bench_output
+    bench_output=$(cd "$WORKSPACE_ROOT" && cargo bench -p pvm-contract-types --features alloc 2>&1) || {
+        echo "ERROR: cargo bench failed"
+        echo "$bench_output"
+        exit 1
+    }
+
+    # Parse all "time:" lines with their benchmark names
+    # Criterion outputs lines like:
+    #   u8_encode_pvm           time:   [14.20 ns 14.31 ns 14.42 ns]
+    declare -A bench_times
+    local current_bench=""
+    while IFS= read -r line; do
+        # Match benchmark name lines (e.g. "Benchmarking u8_encode_pvm" or the result line)
+        if [[ "$line" =~ ^([a-z0-9_]+)[[:space:]]+time: ]]; then
+            current_bench="${BASH_REMATCH[1]}"
+            local time_str
+            time_str=$(parse_criterion_time "$line")
+            bench_times["$current_bench"]="$time_str"
+        fi
+    done <<< "$bench_output"
+
+    # Define type display names and benchmark name prefixes
+    local -a static_types=("u8" "u32" "u128" "u256" "address")
+    local -a dynamic_types=("string" "vec_u256")
+
+    declare -A type_labels
+    type_labels[u8]="u8"
+    type_labels[u32]="u32"
+    type_labels[u128]="u128"
+    type_labels[u256]="U256"
+    type_labels[address]="address"
+    type_labels[string]="String"
+    type_labels[vec_u256]="Vec\\<U256>"
+
+    generate_table() {
+        local -a types=("$@")
+        echo "| Type | Operation | pvm-contract-types | alloy-core | Ratio |"
+        echo "|------|-----------|--------------------|------------|-------|"
+        for type_key in "${types[@]}"; do
+            local label="${type_labels[$type_key]}"
+            for op in encode decode; do
+                local pvm_key="${type_key}_${op}_pvm"
+                local alloy_key="${type_key}_${op}_alloy"
+                local pvm_time="${bench_times[$pvm_key]:-}"
+                local alloy_time="${bench_times[$alloy_key]:-}"
+                if [[ -z "$pvm_time" || -z "$alloy_time" ]]; then
+                    echo "WARNING: Missing benchmark data for $type_key $op" >&2
+                    continue
+                fi
+                local pvm_ns alloy_ns
+                pvm_ns=$(normalize_to_ns "$pvm_time")
+                alloy_ns=$(normalize_to_ns "$alloy_time")
+                local pvm_fmt alloy_fmt ratio
+                pvm_fmt=$(fmt_ns "$pvm_ns")
+                alloy_fmt=$(fmt_ns "$alloy_ns")
+                ratio=$(compute_ratio "$pvm_ns" "$alloy_ns")
+                printf "| %-10s | %-6s | %-18s | %-12s | %-12s |\n" \
+                    "$label" "$op" "$pvm_fmt" "$alloy_fmt" "$ratio"
+            done
+        done
+    }
+
+    local report="$REPORTS_DIR/encoding-benchmarks.md"
+    cat > "$report" <<'HEADER'
+# ABI Encoding/Decoding Benchmarks: pvm-contract-types vs Alloy
+
+Criterion benchmarks comparing `pvm-contract-types` (`SolEncode`/`SolDecode`)
+against `alloy-core` (`SolValue::abi_encode`/`abi_decode`) for ABI encoding and
+decoding of Solidity types.
+
+## How to regenerate
+
+```bash
+cargo bench -p pvm-contract-types --features alloc
+```
+
+Results are saved to `target/criterion/`. The raw output contains the numbers
+used in this report.
+
+## Results
+
+### Static Types (no-alloc compatible)
+
+HEADER
+    generate_table "${static_types[@]}" >> "$report"
+
+    cat >> "$report" <<'MID'
+
+### Dynamic Types (alloc required)
+
+MID
+    generate_table "${dynamic_types[@]}" >> "$report"
+
+    cat >> "$report" <<'FOOTER'
+
+### Summary
+
+**Encoding**: `pvm-contract-types` encodes static types ~2x slower than alloy
+due to writing into a caller-provided `[u8]` buffer (with zeroing) rather than
+returning a heap-allocated `Vec<u8>`. For dynamic types (String, Vec) where both
+approaches allocate, `pvm-contract-types` is 1.2–1.4x faster.
+
+**Decoding**: `pvm-contract-types` decodes all types faster than alloy — from
+2x faster for U256 up to 6x faster for u8. This is because decoding reads
+directly from the ABI-encoded buffer with no validation overhead, while alloy
+performs full ABI conformance checking.
+
+**Key insight**: The encode overhead is a benchmarking artifact. In real
+contracts, `pvm-contract-types` encodes into a stack buffer (no allocation),
+which is strictly faster in the `no_std` + `no_alloc` context where these
+contracts run. The alloy `abi_encode()` approach always heap-allocates a
+`Vec<u8>`, which is not possible without an allocator.
+FOOTER
+
+    echo "==> Generated $report"
+}
+
+# ============================================================================
+# Binary sizes report
+# ============================================================================
+
+regenerate_binary_sizes() {
+    echo "==> Running build-and-measure (cargo +nightly run -p pvm-contract-benchmarks --bin build-and-measure)..."
+    (cd "$WORKSPACE_ROOT" && cargo +nightly run -p pvm-contract-benchmarks --bin build-and-measure) || {
+        echo "ERROR: build-and-measure failed"
+        exit 1
+    }
+
+    local artifacts_dir="$WORKSPACE_ROOT/target/benchmark-artifacts"
+
+    # Collect release artifact sizes: contract_variant.release.polkavm
+    declare -A sizes
+    local -a contracts=()
+    local -a seen_contracts=()
+
+    for f in "$artifacts_dir"/*.release.polkavm; do
+        [[ -e "$f" ]] || continue
+        local basename
+        basename=$(basename "$f" .release.polkavm)
+        # basename is e.g. "fibonacci_no-alloc" or "mytoken_builder-dsl"
+        # Split on last underscore
+        local contract variant
+        contract="${basename%_*}"
+        variant="${basename##*_}"
+        local size
+        size=$(stat --format='%s' "$f" 2>/dev/null || stat -f '%z' "$f")
+        sizes["${contract}_${variant}"]="$size"
+
+        # Track unique contracts in order
+        local found=0
+        for c in "${seen_contracts[@]+"${seen_contracts[@]}"}"; do
+            [[ "$c" == "$contract" ]] && found=1 && break
+        done
+        if [[ $found -eq 0 ]]; then
+            seen_contracts+=("$contract")
+        fi
+    done
+    contracts=("${seen_contracts[@]}")
+
+    local -a variants=("no-alloc" "builder-dsl" "with-alloc")
+
+    fmt_bytes() {
+        printf "%'d" "$1"
+    }
+
+    fmt_kb() {
+        echo "scale=2; $1 / 1024" | bc -l | awk '{printf "%.2f", $0}'
+    }
+
+    local report="$REPORTS_DIR/macro-vs-builder.md"
+
+    local today
+    today=$(date +%Y-%m-%d)
+
+    cat > "$report" <<EOF
+# Macro vs Builder DSL: Release Binary Size Comparison
+
+Date: $today
+Commit: measured on current HEAD of cargo-pvm-contract
+
+
+## 1. Approaches
+
+- **Proc-macro approach** (\`pvm-contract-macros\`): attribute proc macros
+  (\`#[contract]\`, \`#[method]\`, \`#[constructor]\`, \`#[fallback]\`) that parse
+  Rust+Solidity and emit dispatch code at compile time via \`syn\`/\`quote\`.
+- **Builder DSL approach** (\`pvm-contract-builder-dsl\`): a pure Rust builder
+  pattern API (\`ContractBuilder::new().method(selector, handler).dispatch()\`)
+  that wires up dispatch at runtime without any proc-macro dependency.
+
+
+## 2. Release Binary Sizes
+
+From \`target/benchmark-artifacts/\` (built by \`build-and-measure\`):
+
+EOF
+
+    # Main size table
+    {
+        echo "| Contract  | Variant     | Size (bytes) | Size (KB) |"
+        echo "|-----------|-------------|-------------:|----------:|"
+        for contract in "${contracts[@]}"; do
+            for variant in "${variants[@]}"; do
+                local key="${contract}_${variant}"
+                local size="${sizes[$key]:-}"
+                [[ -z "$size" ]] && continue
+                local size_fmt kb_fmt
+                size_fmt=$(fmt_bytes "$size")
+                kb_fmt=$(fmt_kb "$size")
+                printf "| %-9s | %-11s | %12s | %9s |\n" "$contract" "$variant" "$size_fmt" "$kb_fmt"
+            done
+        done
+    } >> "$report"
+
+    # Builder DSL vs Proc-Macro comparison table
+    cat >> "$report" <<'EOF'
+
+### Builder DSL vs Proc-Macro (no-alloc)
+
+EOF
+
+    # Count methods per contract (hardcoded to match the benchmark contracts)
+    declare -A method_counts
+    method_counts[fibonacci]=1
+    method_counts[mytoken]=4
+    method_counts[multi]=10
+
+    {
+        echo "| Contract  | Methods | Proc-Macro | Builder DSL | Overhead |"
+        echo "|-----------|--------:|------------|-------------|----------|"
+        for contract in "${contracts[@]}"; do
+            local noalloc_size="${sizes[${contract}_no-alloc]:-}"
+            local dsl_size="${sizes[${contract}_builder-dsl]:-}"
+            [[ -z "$noalloc_size" || -z "$dsl_size" ]] && continue
+            local methods="${method_counts[$contract]:-?}"
+            local noalloc_fmt="${noalloc_size} B"
+            local dsl_fmt="${dsl_size} B"
+            local overhead
+            overhead=$(echo "scale=1; ($dsl_size - $noalloc_size) * 100 / $noalloc_size" | bc -l)
+            # Format: remove trailing zeros but keep at least one decimal
+            overhead=$(printf "%.1f" "$overhead")
+            printf "| %-9s | %7s | %10s | %11s | +%-7s |\n" \
+                "$contract" "$methods" "$noalloc_fmt" "$dsl_fmt" "${overhead}%"
+        done
+    } >> "$report"
+
+    cat >> "$report" <<'EOF'
+
+For the trivial fibonacci contract (1 method), the builder DSL adds ~730 bytes
+of overhead from the runtime dispatch table and calldata-copy loop. As method
+count grows, the fixed overhead is amortized: mytoken (4 methods) shows +0.3%
+and multi (10 methods with mixed parameter types) shows +4.1%. The builder DSL
+does not become cheaper than the proc-macro with more methods, but the overhead
+stays negligible for real contracts.
+
+### Key Size Drivers
+
+| Factor                             | Impact                               |
+|------------------------------------|--------------------------------------|
+| Allocator (no-alloc vs with-alloc) | 26x for fibonacci, 4.3x for mytoken  |
+| Builder DSL vs proc-macro no-alloc | +4% at 10 methods, negligible at scale |
+EOF
+
+    echo "==> Generated $report"
+}
+
+# ============================================================================
+# Main
+# ============================================================================
+
+case "$TARGET" in
+    encoding)      regenerate_encoding ;;
+    binary-sizes)  regenerate_binary_sizes ;;
+    all)
+        regenerate_encoding
+        regenerate_binary_sizes
+        ;;
+    *)  usage ;;
+esac
+
+echo "==> Done."