diff --git a/.github/scripts/benchmark.py b/.github/scripts/benchmark.py new file mode 100755 index 0000000..520d72a --- /dev/null +++ b/.github/scripts/benchmark.py @@ -0,0 +1,227 @@ +#!/usr/bin/env python3 +"""Self-contained PR benchmark — lives entirely in .github/scripts, nothing in the package. + +Subcommands: + run --out FILE generate seeded synthetic inputs, time every cp_measure get_* -> JSON + compare --base F --head F [--md F] diff two run JSONs into a head-vs-main timing table + +Run once per environment (PR head, main) on the SAME seeded inputs (pure-numpy generation is +deterministic), then compare. The driver installs each env; this script only needs cp_measure +importable plus numpy. +""" + +import os + +for _v in ( + "OMP_NUM_THREADS", + "OPENBLAS_NUM_THREADS", + "MKL_NUM_THREADS", + "NUMEXPR_NUM_THREADS", + "VECLIB_MAXIMUM_THREADS", + "NUMBA_NUM_THREADS", +): + os.environ.setdefault(_v, "1") + +import argparse # noqa: E402 +import json # noqa: E402 +import signal # noqa: E402 +import statistics # noqa: E402 +from contextlib import contextmanager # noqa: E402 +from pathlib import Path # noqa: E402 +from time import perf_counter # noqa: E402 + +import numpy # noqa: E402 + +MATRIX = {"sizes": (256, 512, 1024), "counts": (16, 64, 256), "seeds": (0, 1, 2)} +BLOBS_PER_CHANNEL = 5 +WARMUP, REPS, TIMEOUT = 1, 3, 120.0 +AFFECTED = 1.05 # report a function if any cell moves by this factor either way (faster or slower) + + +# --- synthetic generator: n ellipses on a grid + random Gaussian blobs per channel -------------- +def generate(size: int, n: int, seed: int = 0): + rng = numpy.random.default_rng(seed) + yy, xx = numpy.mgrid[0:size, 0:size] + labels = numpy.zeros((size, size), numpy.int32) + if n: + cols = int(numpy.ceil(numpy.sqrt(n))) + rows = int(numpy.ceil(n / cols)) + a, b = 0.35 * size / rows, 0.35 * size / cols + for k in range(n): + r, c = divmod(k, cols) + cy, cx = (r + 0.5) * size / rows, (c + 0.5) * size / cols + labels[((yy - cy) / a) ** 2 + ((xx - cx) / b) ** 2 <= 1] = k + 1 + channels = [] + for _ in range(2): # ch0 → core features, ch0+ch1 → colocalisation + img = numpy.zeros((size, size)) + for _ in range(BLOBS_PER_CHANNEL): + cy, cx = rng.uniform(0, size, 2) + s = rng.uniform(size / 10, size / 5) + img += numpy.exp(-((yy - cy) ** 2 + (xx - cx) ** 2) / (2 * s * s)) + channels.append(img.astype(numpy.float32)) + return labels, numpy.stack(channels) + + +# --- timing ------------------------------------------------------------------------------------- +class _Timeout(Exception): + pass + + +def _raise_timeout(*_): + raise _Timeout() + + +@contextmanager +def _time_limit(seconds: float): + signal.signal(signal.SIGALRM, _raise_timeout) + signal.setitimer(signal.ITIMER_REAL, seconds) + try: + yield + finally: + signal.setitimer(signal.ITIMER_REAL, 0) + + +def _norm01(img): + img = img.astype("float64") + lo, hi = float(img.min()), float(img.max()) + return (img - lo) / (hi - lo) if hi > lo else img - lo + + +def _functions(): + from cp_measure import bulk + + out = [] + for arity, reg in ( + (1, bulk.get_core_measurements()), + (2, bulk.get_correlation_measurements()), + ): + for name, fn in reg.items(): + out.append((name, fn, arity)) + return out + + +def _time(fn, args) -> dict: + try: + with _time_limit(TIMEOUT): + for _ in range(WARMUP): + fn(*args) + reps = [] + for _ in range(REPS): + t = perf_counter() + fn(*args) + reps.append(perf_counter() - t) + except _Timeout: + return {"status": "timeout"} + except Exception as exc: + return {"status": "error", "error": f"{type(exc).__name__}: {exc}"[:200]} + return {"status": "ok", "reps": reps} + + +def run(out_path: str): + funcs = _functions() + cells, results = [], {name: {} for name, _, _ in funcs} + for size in MATRIX["sizes"]: + for n in MATRIX["counts"]: + for seed in MATRIX["seeds"]: + labels, channels = generate(size, n, seed) + imgs = (_norm01(channels[0]), _norm01(channels[1])) + key = f"s{size}_n{n}_seed{seed}" + cells.append({"key": key, "size": size, "n_objects": n}) + for name, fn, arity in funcs: + args = ( + (labels, imgs[0]) if arity == 1 else (imgs[0], imgs[1], labels) + ) + results[name][key] = _time(fn, args) + Path(out_path).write_text( + json.dumps({"cells": cells, "results": results}, indent=2) + ) + + +# --- compare ------------------------------------------------------------------------------------ +def _median_ms(results_for_fn: dict, keys: list[str]): + """Median (ms) over all ok rep times in a cell's seeds, or None.""" + times = [ + t + for k in keys + if results_for_fn.get(k, {}).get("status") == "ok" + for t in results_for_fn[k]["reps"] + ] + return statistics.median(times) * 1e3 if times else None + + +def compare(base: dict, head: dict, commit: str = "") -> str: + groups: dict[tuple, list[str]] = {} + for e in head["cells"]: + groups.setdefault((e["size"], e["n_objects"]), []).append(e["key"]) + sizes = sorted({s for s, _ in groups}) + counts = sorted({n for _, n in groups}) + br, hr = base["results"], head["results"] + + ref = f"`{commit[:7]}`" if commit else "PR head" + out = [ + f"### Benchmark — {ref} vs `main`", + "", + f"`speedup = main/head` (>1 faster, <1 slower) · median per cell · " + f"showing functions that moved ≥{AFFECTED:.2f}× either way", + ] + + lo = 1.0 / AFFECTED # a cell at or below this is a regression worth reporting + affected = [] # (function, {(size, count): speedup}) + for fn in sorted(hr): + grid, speedups = {}, [] + for size in sizes: + for n in counts: + m = _median_ms(br.get(fn, {}), groups.get((size, n), [])) + h = _median_ms(hr[fn], groups.get((size, n), [])) + grid[(size, n)] = (m / h) if (m and h) else None + if grid[(size, n)]: + speedups.append(grid[(size, n)]) + if speedups and (max(speedups) >= AFFECTED or min(speedups) <= lo): + affected.append((fn, grid)) + + if not affected: + out += ["", f"_No function moved by ≥{AFFECTED:.2f}× (faster or slower)._"] + return "\n".join(out) + + for fn, grid in affected: + out += [ + "", + f"#### `{fn}`", + "", + "| size \\ objects | " + " | ".join(str(n) for n in counts) + " |", + "|---" + "|--:" * len(counts) + "|", + ] + for size in sizes: + row = [ + (f"{grid[(size, n)]:.2f}×" if grid.get((size, n)) else "—") + for n in counts + ] + out.append(f"| **{size}** | " + " | ".join(row) + " |") + return "\n".join(out) + + +def main(argv=None) -> int: + p = argparse.ArgumentParser(description=__doc__) + sub = p.add_subparsers(dest="cmd", required=True) + r = sub.add_parser("run") + r.add_argument("--out", required=True) + c = sub.add_parser("compare") + c.add_argument("--base", required=True) + c.add_argument("--head", required=True) + c.add_argument("--commit", default="") + c.add_argument("--md") + a = p.parse_args(argv) + if a.cmd == "run": + run(a.out) + else: + md = compare( + json.loads(Path(a.base).read_text()), + json.loads(Path(a.head).read_text()), + a.commit, + ) + (Path(a.md).write_text(md) if a.md else print(md)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.github/scripts/run_benchmark.sh b/.github/scripts/run_benchmark.sh new file mode 100755 index 0000000..fb101ae --- /dev/null +++ b/.github/scripts/run_benchmark.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Install the PR head and main into separate venvs and run benchmark.py (from this checkout) in +# each, then compare. Each run regenerates the same seeded inputs, so nothing is shared on disk. +# Usage: run_benchmark.sh +set -euo pipefail + +OUT="${1:-bench-out}" +COMMIT="${2:-}" +HEAD_DIR="$(pwd)" +WORK="$(mktemp -d)" +BENCH="$HEAD_DIR/.github/scripts/benchmark.py" +mkdir -p "$OUT" +trap 'git worktree remove --force "$WORK/main" 2>/dev/null || true; rm -rf "$WORK"' EXIT + +# six is a centrosome runtime dep not declared in its metadata; install it into the bench venvs only. +echo "::group::PR head env" +uv venv "$WORK/venv-head" +uv pip install --python "$WORK/venv-head/bin/python" -e "$HEAD_DIR" six +"$WORK/venv-head/bin/python" "$BENCH" run --out "$OUT/head.json" +echo "::endgroup::" + +echo "::group::main env" +git fetch --no-tags --depth=1 origin main +git worktree add --detach "$WORK/main" origin/main +uv venv "$WORK/venv-main" +uv pip install --python "$WORK/venv-main/bin/python" -e "$WORK/main" six +"$WORK/venv-main/bin/python" "$BENCH" run --out "$OUT/main.json" +echo "::endgroup::" + +"$WORK/venv-head/bin/python" "$BENCH" compare \ + --base "$OUT/main.json" --head "$OUT/head.json" --commit "$COMMIT" --md "$OUT/table.md" +cat "$OUT/table.md" diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 0000000..9c64cb9 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,45 @@ +name: benchmark + +# Runs on every commit to a PR: times every public get_* on the PR head vs main and posts a +# sticky comment with the timing table. Self-contained — workflow, tooling and generator all live +# on the PR branch (pull_request runs the workflow from the head), so nothing is needed on main. + +on: + pull_request: + +permissions: + pull-requests: write + +concurrency: + group: bench-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + benchmark: + runs-on: ubuntu-latest + timeout-minutes: 60 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: astral-sh/setup-uv@v5 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Benchmark head vs main + run: bash .github/scripts/run_benchmark.sh bench-out "${{ github.event.pull_request.head.sha }}" + - name: Post sticky comment + env: + GH_TOKEN: ${{ github.token }} + REPO: ${{ github.repository }} + PR: ${{ github.event.pull_request.number }} + run: | + set -euo pipefail + { echo ''; echo; cat bench-out/table.md; } > body.md + CID="$(gh api "repos/$REPO/issues/$PR/comments?per_page=100" \ + --jq 'map(select(.body | startswith("")))[0].id // empty')" + if [ -n "$CID" ]; then + gh api -X PATCH "repos/$REPO/issues/comments/$CID" -F body=@body.md >/dev/null + else + gh api -X POST "repos/$REPO/issues/$PR/comments" -F body=@body.md >/dev/null + fi diff --git a/.gitignore b/.gitignore index 099eb8c..2f3d1d4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ __pycache__/ /.agent-shell/ .pre-commit-config.yaml + +# local benchmark output +bench-out/