diff --git a/crates/pvm-bump-allocator/src/lib.rs b/crates/pvm-bump-allocator/src/lib.rs index 430de5d0..b883bb75 100644 --- a/crates/pvm-bump-allocator/src/lib.rs +++ b/crates/pvm-bump-allocator/src/lib.rs @@ -17,20 +17,20 @@ #![no_std] use core::alloc::{GlobalAlloc, Layout}; -use core::sync::atomic::{AtomicUsize, Ordering}; +use core::cell::{Cell, UnsafeCell}; /// A bump allocator backed by a fixed-size heap. /// /// `HEAP_SIZE` is the total number of bytes available for allocation. /// Memory is never freed — `dealloc` is a no-op. pub struct BumpAllocator { - offset: AtomicUsize, - heap: core::cell::UnsafeCell<[u8; HEAP_SIZE]>, + offset: Cell, + heap: UnsafeCell<[u8; HEAP_SIZE]>, } -// SAFETY: The allocator uses atomic operations for the offset, so it is safe to share -// across threads (though PVM contracts are single-threaded, this satisfies the -// `GlobalAlloc` requirement). +// SAFETY: PVM contracts are single-threaded. The `Sync` bound is required by +// `GlobalAlloc` (the allocator must live in a `static`), but no concurrent +// access actually occurs. unsafe impl Sync for BumpAllocator {} impl Default for BumpAllocator { @@ -43,8 +43,8 @@ impl BumpAllocator { /// Creates a new bump allocator with a zeroed heap of `HEAP_SIZE` bytes. pub const fn new() -> Self { Self { - offset: AtomicUsize::new(0), - heap: core::cell::UnsafeCell::new([0u8; HEAP_SIZE]), + offset: Cell::new(0), + heap: UnsafeCell::new([0u8; HEAP_SIZE]), } } } @@ -54,31 +54,25 @@ unsafe impl GlobalAlloc for BumpAllocator { let align = layout.align(); let size = layout.size(); - let mut current = self.offset.load(Ordering::Relaxed); - - loop { - let aligned = (current + align - 1) & !(align - 1); - let Some(next) = aligned.checked_add(size) else { - return core::ptr::null_mut(); - }; - - if next > HEAP_SIZE { - return core::ptr::null_mut(); - } - - match self.offset.compare_exchange_weak( - current, - next, - Ordering::SeqCst, - Ordering::SeqCst, - ) { - Ok(_) => { - let heap_ptr = self.heap.get() as *mut u8; - return unsafe { heap_ptr.add(aligned) }; - } - Err(observed) => current = observed, - } + let current = self.offset.get(); + let aligned = (current + align - 1) & !(align - 1); + let Some(next) = aligned.checked_add(size) else { + core::panic!("exhausted heap limit"); + }; + + if next > HEAP_SIZE { + core::panic!("exhausted heap limit"); } + + self.offset.set(next); + let heap_ptr = self.heap.get() as *mut u8; + unsafe { heap_ptr.add(aligned) } + } + + // The heap is zero-initialized and memory is never reused, so every + // region returned by `alloc` is already zeroed. + unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 { + unsafe { self.alloc(layout) } } unsafe fn dealloc(&self, _ptr: *mut u8, _layout: Layout) {} @@ -117,25 +111,35 @@ mod tests { let alloc = BumpAllocator::<64>::new(); let layout = Layout::from_size_align(64, 1).unwrap(); assert!(!unsafe { alloc.alloc(layout) }.is_null()); + } - // Heap is full — next alloc must fail - assert!(unsafe { alloc.alloc(Layout::from_size_align(1, 1).unwrap()) }.is_null()); + #[test] + #[should_panic = "exhausted heap limit"] + fn alloc_panics_when_full() { + let alloc = BumpAllocator::<64>::new(); + unsafe { + alloc.alloc(Layout::from_size_align(64, 1).unwrap()); + alloc.alloc(Layout::from_size_align(1, 1).unwrap()); + } } #[test] - fn alloc_oom_returns_null() { + #[should_panic = "exhausted heap limit"] + fn alloc_oom_panics() { let alloc = BumpAllocator::<16>::new(); - let layout = Layout::from_size_align(17, 1).unwrap(); - assert!(unsafe { alloc.alloc(layout) }.is_null()); + unsafe { alloc.alloc(Layout::from_size_align(17, 1).unwrap()) }; } #[test] + #[should_panic = "exhausted heap limit"] fn alloc_oom_due_to_alignment_padding() { // 9 bytes of heap: alloc 1 byte, then try 8 bytes with align 8 // offset=1, aligned=8, 8+8=16 > 9 → OOM let alloc = BumpAllocator::<9>::new(); - unsafe { alloc.alloc(Layout::from_size_align(1, 1).unwrap()) }; - assert!(unsafe { alloc.alloc(Layout::from_size_align(8, 8).unwrap()) }.is_null()); + unsafe { + alloc.alloc(Layout::from_size_align(1, 1).unwrap()); + alloc.alloc(Layout::from_size_align(8, 8).unwrap()); + } } #[test] diff --git a/crates/pvm-contract-benchmarks/reports/encoding-benchmarks.md b/crates/pvm-contract-benchmarks/reports/encoding-benchmarks.md index fb3882ab..7f344bac 100644 --- a/crates/pvm-contract-benchmarks/reports/encoding-benchmarks.md +++ b/crates/pvm-contract-benchmarks/reports/encoding-benchmarks.md @@ -17,27 +17,27 @@ used in this report. ### Static Types (no-alloc compatible) -| Type | Operation | pvm-contract-types | alloy-core | Ratio | -|----------|-----------|--------------------|--------------|--------------| -| u8 | encode | 14.31 ns | 6.49 ns | 2.20x slower | -| u8 | decode | 0.40 ns | 2.37 ns | 5.97x faster | -| u32 | encode | 14.23 ns | 6.86 ns | 2.08x slower | -| u32 | decode | 0.98 ns | 2.36 ns | 2.41x faster | -| u128 | encode | 14.07 ns | 6.67 ns | 2.11x slower | -| u128 | decode | 0.99 ns | 2.79 ns | 2.83x faster | -| U256 | encode | 13.74 ns | 6.51 ns | 2.11x slower | -| U256 | decode | 1.78 ns | 3.57 ns | 2.01x faster | -| address | encode | 15.50 ns | 11.52 ns | 1.35x slower | -| address | decode | 7.09 ns | 9.59 ns | 1.35x faster | +| Type | Operation | pvm-contract-types | alloy-core | Ratio | +|------|-----------|--------------------|------------|-------| +| u8 | encode | 13.26 ns | 6.38 ns | 2.08x slower | +| u8 | decode | 0.39 ns | 2.35 ns | 6.07x faster | +| u32 | encode | 13.51 ns | 6.85 ns | 1.97x slower | +| u32 | decode | 0.97 ns | 2.40 ns | 2.46x faster | +| u128 | encode | 14.19 ns | 6.63 ns | 2.14x slower | +| u128 | decode | 0.98 ns | 2.77 ns | 2.82x faster | +| U256 | encode | 13.40 ns | 6.40 ns | 2.09x slower | +| U256 | decode | 1.76 ns | 3.53 ns | 2.01x faster | +| address | encode | 14.78 ns | 11.24 ns | 1.31x slower | +| address | decode | 6.91 ns | 9.45 ns | 1.37x faster | ### Dynamic Types (alloc required) -| Type | Operation | pvm-contract-types | alloy-core | Ratio | -|------------|-----------|--------------------|--------------|--------------| -| String | encode | 11.24 ns | 16.23 ns | 1.44x faster | -| String | decode | 11.15 ns | 32.70 ns | 2.93x faster | -| Vec\ | encode | 20.78 ns | 25.62 ns | 1.23x faster | -| Vec\ | decode | 25.01 ns | 46.63 ns | 1.87x faster | +| Type | Operation | pvm-contract-types | alloy-core | Ratio | +|------|-----------|--------------------|------------|-------| +| String | encode | 11.04 ns | 15.85 ns | 1.44x faster | +| String | decode | 10.94 ns | 32.57 ns | 2.98x faster | +| Vec\ | encode | 20.42 ns | 25.02 ns | 1.23x faster | +| Vec\ | decode | 23.86 ns | 45.87 ns | 1.92x faster | ### Summary diff --git a/crates/pvm-contract-benchmarks/scripts/regenerate-reports.sh b/crates/pvm-contract-benchmarks/scripts/regenerate-reports.sh new file mode 100755 index 00000000..103cdcde --- /dev/null +++ b/crates/pvm-contract-benchmarks/scripts/regenerate-reports.sh @@ -0,0 +1,350 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +BENCHMARKS_DIR="$(dirname "$SCRIPT_DIR")" +REPORTS_DIR="$BENCHMARKS_DIR/reports" +WORKSPACE_ROOT="$(cd "$BENCHMARKS_DIR/../.." && pwd)" + +usage() { + echo "Usage: $0 [encoding|binary-sizes|all]" + echo "" + echo "Regenerate benchmark reports." + echo "" + echo " encoding Run criterion encoding benchmarks and regenerate encoding-benchmarks.md" + echo " binary-sizes Run build-and-measure and regenerate macro-vs-builder.md" + echo " all Run both (default)" + exit 1 +} + +TARGET="${1:-all}" + +# ============================================================================ +# Encoding benchmarks report +# ============================================================================ + +parse_criterion_time() { + # Extract the estimate (middle value) from criterion output line: + # time: [14.20 ns 14.31 ns 14.42 ns] + # Returns e.g. "14.31 ns" + local line="$1" + echo "$line" | sed -E 's/.*\[.+ (.+ [a-zµ]+) .+\]/\1/' +} + +normalize_to_ns() { + # Convert a time string like "14.31 ns" or "1.23 µs" or "1.23 us" to nanoseconds + local time_str="$1" + local value unit + value=$(echo "$time_str" | awk '{print $1}') + unit=$(echo "$time_str" | awk '{print $2}') + case "$unit" in + ps) echo "$value * 0.001" | bc -l ;; + ns) echo "$value" ;; + µs|us) echo "$value * 1000" | bc -l ;; + ms) echo "$value * 1000000" | bc -l ;; + *) echo "$value" ;; + esac +} + +fmt_ns() { + # Format nanoseconds to 2 decimal places with " ns" suffix + printf "%.2f ns" "$1" +} + +compute_ratio() { + # Compute ratio and direction label: "Nx faster" or "Nx slower" + local pvm_ns="$1" alloy_ns="$2" + local ratio + if (( $(echo "$pvm_ns < $alloy_ns" | bc -l) )); then + ratio=$(echo "$alloy_ns / $pvm_ns" | bc -l) + printf "%.2fx faster" "$ratio" + else + ratio=$(echo "$pvm_ns / $alloy_ns" | bc -l) + printf "%.2fx slower" "$ratio" + fi +} + +regenerate_encoding() { + echo "==> Running criterion benchmarks (cargo bench -p pvm-contract-types --features alloc)..." + local bench_output + bench_output=$(cd "$WORKSPACE_ROOT" && cargo bench -p pvm-contract-types --features alloc 2>&1) || { + echo "ERROR: cargo bench failed" + echo "$bench_output" + exit 1 + } + + # Parse all "time:" lines with their benchmark names + # Criterion outputs lines like: + # u8_encode_pvm time: [14.20 ns 14.31 ns 14.42 ns] + declare -A bench_times + local current_bench="" + while IFS= read -r line; do + # Match benchmark name lines (e.g. "Benchmarking u8_encode_pvm" or the result line) + if [[ "$line" =~ ^([a-z0-9_]+)[[:space:]]+time: ]]; then + current_bench="${BASH_REMATCH[1]}" + local time_str + time_str=$(parse_criterion_time "$line") + bench_times["$current_bench"]="$time_str" + fi + done <<< "$bench_output" + + # Define type display names and benchmark name prefixes + local -a static_types=("u8" "u32" "u128" "u256" "address") + local -a dynamic_types=("string" "vec_u256") + + declare -A type_labels + type_labels[u8]="u8" + type_labels[u32]="u32" + type_labels[u128]="u128" + type_labels[u256]="U256" + type_labels[address]="address" + type_labels[string]="String" + type_labels[vec_u256]="Vec\\" + + generate_table() { + local -a types=("$@") + echo "| Type | Operation | pvm-contract-types | alloy-core | Ratio |" + echo "|------|-----------|--------------------|------------|-------|" + for type_key in "${types[@]}"; do + local label="${type_labels[$type_key]}" + for op in encode decode; do + local pvm_key="${type_key}_${op}_pvm" + local alloy_key="${type_key}_${op}_alloy" + local pvm_time="${bench_times[$pvm_key]:-}" + local alloy_time="${bench_times[$alloy_key]:-}" + if [[ -z "$pvm_time" || -z "$alloy_time" ]]; then + echo "WARNING: Missing benchmark data for $type_key $op" >&2 + continue + fi + local pvm_ns alloy_ns + pvm_ns=$(normalize_to_ns "$pvm_time") + alloy_ns=$(normalize_to_ns "$alloy_time") + local pvm_fmt alloy_fmt ratio + pvm_fmt=$(fmt_ns "$pvm_ns") + alloy_fmt=$(fmt_ns "$alloy_ns") + ratio=$(compute_ratio "$pvm_ns" "$alloy_ns") + printf "| %-10s | %-6s | %-18s | %-12s | %-12s |\n" \ + "$label" "$op" "$pvm_fmt" "$alloy_fmt" "$ratio" + done + done + } + + local report="$REPORTS_DIR/encoding-benchmarks.md" + cat > "$report" <<'HEADER' +# ABI Encoding/Decoding Benchmarks: pvm-contract-types vs Alloy + +Criterion benchmarks comparing `pvm-contract-types` (`SolEncode`/`SolDecode`) +against `alloy-core` (`SolValue::abi_encode`/`abi_decode`) for ABI encoding and +decoding of Solidity types. + +## How to regenerate + +```bash +cargo bench -p pvm-contract-types --features alloc +``` + +Results are saved to `target/criterion/`. The raw output contains the numbers +used in this report. + +## Results + +### Static Types (no-alloc compatible) + +HEADER + generate_table "${static_types[@]}" >> "$report" + + cat >> "$report" <<'MID' + +### Dynamic Types (alloc required) + +MID + generate_table "${dynamic_types[@]}" >> "$report" + + cat >> "$report" <<'FOOTER' + +### Summary + +**Encoding**: `pvm-contract-types` encodes static types ~2x slower than alloy +due to writing into a caller-provided `[u8]` buffer (with zeroing) rather than +returning a heap-allocated `Vec`. For dynamic types (String, Vec) where both +approaches allocate, `pvm-contract-types` is 1.2–1.4x faster. + +**Decoding**: `pvm-contract-types` decodes all types faster than alloy — from +2x faster for U256 up to 6x faster for u8. This is because decoding reads +directly from the ABI-encoded buffer with no validation overhead, while alloy +performs full ABI conformance checking. + +**Key insight**: The encode overhead is a benchmarking artifact. In real +contracts, `pvm-contract-types` encodes into a stack buffer (no allocation), +which is strictly faster in the `no_std` + `no_alloc` context where these +contracts run. The alloy `abi_encode()` approach always heap-allocates a +`Vec`, which is not possible without an allocator. +FOOTER + + echo "==> Generated $report" +} + +# ============================================================================ +# Binary sizes report +# ============================================================================ + +regenerate_binary_sizes() { + echo "==> Running build-and-measure (cargo +nightly run -p pvm-contract-benchmarks --bin build-and-measure)..." + (cd "$WORKSPACE_ROOT" && cargo +nightly run -p pvm-contract-benchmarks --bin build-and-measure) || { + echo "ERROR: build-and-measure failed" + exit 1 + } + + local artifacts_dir="$WORKSPACE_ROOT/target/benchmark-artifacts" + + # Collect release artifact sizes: contract_variant.release.polkavm + declare -A sizes + local -a contracts=() + local -a seen_contracts=() + + for f in "$artifacts_dir"/*.release.polkavm; do + [[ -e "$f" ]] || continue + local basename + basename=$(basename "$f" .release.polkavm) + # basename is e.g. "fibonacci_no-alloc" or "mytoken_builder-dsl" + # Split on last underscore + local contract variant + contract="${basename%_*}" + variant="${basename##*_}" + local size + size=$(stat --format='%s' "$f" 2>/dev/null || stat -f '%z' "$f") + sizes["${contract}_${variant}"]="$size" + + # Track unique contracts in order + local found=0 + for c in "${seen_contracts[@]+"${seen_contracts[@]}"}"; do + [[ "$c" == "$contract" ]] && found=1 && break + done + if [[ $found -eq 0 ]]; then + seen_contracts+=("$contract") + fi + done + contracts=("${seen_contracts[@]}") + + local -a variants=("no-alloc" "builder-dsl" "with-alloc") + + fmt_bytes() { + printf "%'d" "$1" + } + + fmt_kb() { + echo "scale=2; $1 / 1024" | bc -l | awk '{printf "%.2f", $0}' + } + + local report="$REPORTS_DIR/macro-vs-builder.md" + + local today + today=$(date +%Y-%m-%d) + + cat > "$report" <> "$report" + + # Builder DSL vs Proc-Macro comparison table + cat >> "$report" <<'EOF' + +### Builder DSL vs Proc-Macro (no-alloc) + +EOF + + # Count methods per contract (hardcoded to match the benchmark contracts) + declare -A method_counts + method_counts[fibonacci]=1 + method_counts[mytoken]=4 + method_counts[multi]=10 + + { + echo "| Contract | Methods | Proc-Macro | Builder DSL | Overhead |" + echo "|-----------|--------:|------------|-------------|----------|" + for contract in "${contracts[@]}"; do + local noalloc_size="${sizes[${contract}_no-alloc]:-}" + local dsl_size="${sizes[${contract}_builder-dsl]:-}" + [[ -z "$noalloc_size" || -z "$dsl_size" ]] && continue + local methods="${method_counts[$contract]:-?}" + local noalloc_fmt="${noalloc_size} B" + local dsl_fmt="${dsl_size} B" + local overhead + overhead=$(echo "scale=1; ($dsl_size - $noalloc_size) * 100 / $noalloc_size" | bc -l) + # Format: remove trailing zeros but keep at least one decimal + overhead=$(printf "%.1f" "$overhead") + printf "| %-9s | %7s | %10s | %11s | +%-7s |\n" \ + "$contract" "$methods" "$noalloc_fmt" "$dsl_fmt" "${overhead}%" + done + } >> "$report" + + cat >> "$report" <<'EOF' + +For the trivial fibonacci contract (1 method), the builder DSL adds ~730 bytes +of overhead from the runtime dispatch table and calldata-copy loop. As method +count grows, the fixed overhead is amortized: mytoken (4 methods) shows +0.3% +and multi (10 methods with mixed parameter types) shows +4.1%. The builder DSL +does not become cheaper than the proc-macro with more methods, but the overhead +stays negligible for real contracts. + +### Key Size Drivers + +| Factor | Impact | +|------------------------------------|--------------------------------------| +| Allocator (no-alloc vs with-alloc) | 26x for fibonacci, 4.3x for mytoken | +| Builder DSL vs proc-macro no-alloc | +4% at 10 methods, negligible at scale | +EOF + + echo "==> Generated $report" +} + +# ============================================================================ +# Main +# ============================================================================ + +case "$TARGET" in + encoding) regenerate_encoding ;; + binary-sizes) regenerate_binary_sizes ;; + all) + regenerate_encoding + regenerate_binary_sizes + ;; + *) usage ;; +esac + +echo "==> Done."