-
Notifications
You must be signed in to change notification settings - Fork 11
perf(sizeshape): vectorize get_zernike on foreground pixels (~8x) #74
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
d75d3f0
844acdf
2c1a1f1
35ca892
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,7 +2,11 @@ | |
| Utilities reused in multiple measurements. | ||
| """ | ||
|
|
||
| import centrosome.zernike | ||
| import numpy | ||
| from numpy.typing import NDArray | ||
|
|
||
| from cp_measure.primitives.segment import label_to_idx_lut | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So maybe I should have caught this with the previous PR that introduced label_to_idx_lut. Doesn't that function not make the assumption of dense (i.e., contiguous) labels? This is a contract I decided an cp_measure and if it can simplify the code and/or speed up things then I think it should be assumed that we have contiguous labels.
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, after re-reading the function and discussing the options with claude, I think we should not need the lookup table given that we assume a clean (contiguous) input.
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should be aware of the costs of this (especially for 3D images, where the find_objects function may be more costly) |
||
|
|
||
|
|
||
| def _ensure_np_array(value): | ||
|
|
@@ -49,3 +53,76 @@ def labels_to_binmasks(masks: numpy.ndarray) -> numpy.ndarray: | |
| labels = numpy.unique(masks) | ||
| labels = labels[labels > 0] | ||
| return masks == labels.reshape((-1,) + (1,) * masks.ndim) | ||
|
|
||
|
|
||
| def _zernike_scores( | ||
| masks: NDArray[numpy.integer], | ||
| zernike_indexes: NDArray[numpy.integer], | ||
| weight: NDArray[numpy.floating] | None = None, | ||
| ) -> tuple[ | ||
| NDArray[numpy.floating], | ||
| NDArray[numpy.floating], | ||
| NDArray[numpy.floating], | ||
| NDArray[numpy.floating], | ||
| ]: | ||
| """Per-object real/imaginary Zernike moment sums, vectorised on foreground pixels. | ||
|
|
||
| Mirrors ``centrosome.zernike.zernike`` without its two area-scaling costs: the basis is | ||
| never scattered into a full ``(H, W, K)`` complex array, and each moment is segment-summed | ||
| by label with one ``numpy.bincount`` instead of a whole-image ``scipy.ndimage.sum``. The | ||
| Horner basis evaluation is copied from ``centrosome.construct_zernike_polynomials`` (same | ||
| lookup table, ``r**2 > 1`` cutoff, ``z = y + i*x`` convention), so results match centrosome | ||
| to round-off. | ||
|
|
||
| ``weight`` (co-shaped with ``masks``, e.g. an intensity image) scales each pixel's | ||
| contribution; ``None`` gives the unweighted shape moments. Returns | ||
| ``(real_sums, imag_sums, radii, counts)`` of shapes ``(n, K)``, ``(n, K)``, ``(n,)`` and | ||
| ``(n,)``, ordered by ascending label, where ``radii`` is the enclosing-circle radius and | ||
| ``counts`` the object pixel count. ``get_zernike`` normalises by ``pi * radii**2``; the | ||
| intensity-weighted radial Zernikes normalise by ``counts``. | ||
| """ | ||
| lut, n = label_to_idx_lut(masks) | ||
| k = len(zernike_indexes) | ||
| labels = numpy.flatnonzero(lut >= 0) | ||
| centers, radii = centrosome.zernike.minimum_enclosing_circle(masks, labels) | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. centrosome is not explicitly imported (it's centrosome.zernike). This should be just |
||
| radii = numpy.asarray(radii, dtype=float) | ||
| real_sums = numpy.zeros((n, k)) | ||
| imag_sums = numpy.zeros((n, k)) | ||
| if n == 0: | ||
| return real_sums, imag_sums, radii, numpy.zeros(0) | ||
|
|
||
| # Foreground pixels, their object row, and unit-disk coordinates relative to each | ||
| # object's enclosing circle — no full (H, W) coordinate grid is materialised. | ||
| seg_full = lut[masks] | ||
| keep = seg_full >= 0 | ||
| rows, cols = numpy.nonzero(keep) | ||
| seg = seg_full[keep] | ||
| counts = numpy.bincount(seg, minlength=n).astype(float) | ||
| # Single-pixel objects have an enclosing-circle radius of 0; the resulting 0/0 yields NaN | ||
| # coordinates (which the r**2 > 1 cutoff later discards), matching centrosome — suppress the | ||
| # warning since it is expected, not a fault. | ||
| with numpy.errstate(invalid="ignore", divide="ignore"): | ||
| ym = (rows - centers[seg, 0]) / radii[seg] | ||
| xm = (cols - centers[seg, 1]) / radii[seg] | ||
|
|
||
| coeffs = centrosome.zernike.construct_zernike_lookuptable(zernike_indexes) | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto above |
||
| r_square = xm * xm + ym * ym | ||
| z = ym + 1j * xm | ||
| w = None if weight is None else weight[keep].astype(float) | ||
| z_pows = {m: z**m for m in numpy.unique(zernike_indexes[:, 1]) if m} | ||
| for idx, (zn, zm) in enumerate(zernike_indexes): | ||
| s = numpy.zeros_like(xm) | ||
| for c in coeffs[idx, : (zn - zm) // 2 + 1]: # Horner scheme on r**2 | ||
| s *= r_square | ||
| s += c | ||
| s[r_square > 1] = 0 | ||
| if w is not None: | ||
| s *= w | ||
| if zm == 0: # purely real moment; the imaginary segment-sum is identically zero | ||
| real_sums[:, idx] = numpy.bincount(seg, weights=s, minlength=n) | ||
| else: | ||
| zf = s * z_pows[zm] | ||
| real_sums[:, idx] = numpy.bincount(seg, weights=zf.real, minlength=n) | ||
| imag_sums[:, idx] = numpy.bincount(seg, weights=zf.imag, minlength=n) | ||
|
|
||
| return real_sums, imag_sums, radii, counts | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,171 @@ | ||
| """Golden + edge tests for the vectorised numpy ``get_zernike``. | ||
|
|
||
| The vectorised implementation avoids centrosome's full (H, W, K) scatter and the per-channel | ||
| ``scipy.ndimage.sum``; it must match ``centrosome.zernike.zernike`` (called with the actual | ||
| label values) to floating-point round-off. Existing ``test_core_measurements`` already covers | ||
| shape / 3D-empty; here we lock the numerical result and the edge cases. | ||
| """ | ||
|
|
||
| import centrosome.zernike | ||
| import numpy | ||
| import scipy.ndimage | ||
|
|
||
| from cp_measure.core.measureobjectsizeshape import get_zernike | ||
| from cp_measure.utils import _zernike_scores | ||
|
timtreis marked this conversation as resolved.
|
||
|
|
||
| ATOL = 1e-10 # >> the ~2e-16 round-off observed, << any real signal | ||
|
|
||
|
|
||
| def _reference(masks, zernike_numbers=9): | ||
| """Old centrosome path, called with the real label values as indices.""" | ||
| uniq = numpy.unique(masks) | ||
| uniq = uniq[uniq > 0] | ||
| zidx = centrosome.zernike.get_zernike_indexes(zernike_numbers + 1) | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto centrosome.zernike -> zernike to be consistent with the imports (avoiding hidden state). I'm still curious as to whether or not centrosome.zernike automagically imports centrosome itself.
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the following line too btw |
||
| zf = centrosome.zernike.zernike(zidx, masks, uniq) | ||
| return {f"Zernike_{n}_{m}": zf[:, i] for i, (n, m) in enumerate(zidx)} | ||
|
|
||
|
|
||
| def _assert_matches(masks, zernike_numbers=9): | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a fixture to test more than one zernike number (e.g., 5,9,14) uncommon, but worth checking) |
||
| ref = _reference(masks, zernike_numbers) | ||
| got = get_zernike(masks, None, zernike_numbers) | ||
| assert list(got) == list(ref), "key set / order changed" | ||
| for k in ref: | ||
| assert got[k].shape == ref[k].shape, k | ||
| assert numpy.allclose(got[k], ref[k], atol=ATOL, rtol=1e-10, equal_nan=True), ( | ||
| f"{k}: max|diff|={numpy.nanmax(numpy.abs(got[k] - ref[k]))}" | ||
| ) | ||
|
|
||
|
|
||
| def _square_objects(size, n, gap_frac=0.75): | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My personal preference is to prefix the test-case generators with |
||
| masks = numpy.zeros((size, size), numpy.int32) | ||
| step = size // n | ||
| obj = int(step * gap_frac) | ||
| lab = 0 | ||
| for a in range(n): | ||
| for b in range(n): | ||
| lab += 1 | ||
| masks[a * step : a * step + obj, b * step : b * step + obj] = lab | ||
| return masks | ||
|
|
||
|
|
||
| def test_zernike_matches_centrosome_single_object(): | ||
| masks = numpy.zeros((64, 64), numpy.int32) | ||
| masks[20:45, 18:50] = 1 | ||
| _assert_matches(masks) | ||
|
|
||
|
|
||
| def test_zernike_matches_centrosome_multi_object(): | ||
| _assert_matches(_square_objects(256, 4)) | ||
|
|
||
|
|
||
| def test_zernike_matches_centrosome_irregular(): | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It may seem silly, but I think this covers most of the previous two tests (single_object and multi_object). Please remove those two and keep this one. |
||
| rng = numpy.random.default_rng(0) | ||
| masks = numpy.zeros((128, 128), numpy.int32) | ||
| for lab, (cy, cx) in enumerate(rng.integers(20, 108, size=(6, 2)), 1): | ||
| yy, xx = numpy.mgrid[0:128, 0:128] | ||
| masks[(yy - cy) ** 2 + (xx - cx) ** 2 < rng.integers(40, 120)] = lab | ||
| _assert_matches(masks) | ||
|
|
||
|
|
||
| def test_zernike_object_touching_edge(): | ||
| masks = numpy.zeros((64, 64), numpy.int32) | ||
| masks[0:20, 0:20] = 1 # clipped at the top-left corner | ||
| masks[40:64, 40:64] = 2 # clipped at the bottom-right corner | ||
| _assert_matches(masks) | ||
|
|
||
|
|
||
| def test_zernike_noncontiguous_labels(): | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since we are not supporting non_contiguous labels this is not needed |
||
| # labels {1, 3, 7}: the old range(1, n+1) assumption would be wrong; the new code | ||
| # uses the real label values and must match centrosome called the same way. | ||
| masks = numpy.zeros((96, 96), numpy.int32) | ||
| masks[10:30, 10:30] = 1 | ||
| masks[40:60, 40:60] = 3 | ||
| masks[70:90, 70:90] = 7 | ||
| _assert_matches(masks) | ||
|
|
||
|
|
||
| def test_zernike_single_pixel_object(): | ||
| # minimum_enclosing_circle radius -> 0, so both paths divide by zero identically. | ||
| masks = numpy.zeros((32, 32), numpy.int32) | ||
| masks[16, 16] = 1 | ||
| masks[5:15, 5:15] = 2 # a normal object alongside the degenerate one | ||
| _assert_matches(masks) | ||
|
|
||
|
|
||
| def test_zernike_non_default_degree(): | ||
| _assert_matches(_square_objects(128, 3), zernike_numbers=6) | ||
|
|
||
|
|
||
| def test_zernike_empty_mask(): | ||
| masks = numpy.zeros((40, 40), numpy.int32) | ||
| got = get_zernike(masks, None) | ||
| zidx = centrosome.zernike.get_zernike_indexes(9 + 1) | ||
| assert list(got) == [f"Zernike_{n}_{m}" for n, m in zidx] | ||
| assert all(v.shape == (0,) for v in got.values()) | ||
|
|
||
|
|
||
| def test_zernike_3d_returns_empty(): | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should not encode this here. Eventually we should have a layer that validates which measurements are for 2d, 3d and both, and this will get in the way (As it normally it should raise an error, #35 is a band-aid). Whether or not we get 2D or 3D measurements is to be determined before they are actually used (at one of the three entry points, not within the "math") |
||
| assert get_zernike(numpy.zeros((4, 16, 16), numpy.int32), None) == {} | ||
|
|
||
|
|
||
| def _weighted_reference(masks, pixels, zernike_numbers=9): | ||
| """Independent weighted real/imag/count via centrosome's own basis + scipy segment-sum. | ||
|
|
||
| Mirrors centrosome.zernike()'s coordinate construction but feeds the intensity image as | ||
| ``weight`` to ``construct_zernike_polynomials`` — the path the radial Zernikes (PR #75) use. | ||
| """ | ||
| zidx = centrosome.zernike.get_zernike_indexes(zernike_numbers + 1) | ||
| labels = numpy.unique(masks) | ||
| labels = labels[labels > 0] | ||
| centers, radii = centrosome.zernike.minimum_enclosing_circle(masks, labels) | ||
| rev = numpy.full(int(masks.max()) + 1, -1, int) | ||
| rev[labels] = numpy.arange(len(labels)) | ||
| mask = rev[masks] != -1 | ||
| ny, nx = masks.shape | ||
| y, x = numpy.mgrid[0:ny, 0:nx].astype(float) | ||
| rev_ind = rev[masks[mask]] | ||
| xc = numpy.zeros((ny, nx)) | ||
| yc = numpy.zeros((ny, nx)) | ||
| yc[mask] = (y[mask] - centers[rev_ind, 0]) / radii[rev_ind] | ||
| xc[mask] = (x[mask] - centers[rev_ind, 1]) / radii[rev_ind] | ||
| zf = centrosome.zernike.construct_zernike_polynomials( | ||
| xc, yc, zidx, mask, weight=pixels | ||
| ) | ||
| real = numpy.column_stack( | ||
| [ | ||
| scipy.ndimage.sum_labels(zf[:, :, i].real, masks, labels) | ||
| for i in range(len(zidx)) | ||
| ] | ||
| ) | ||
| imag = numpy.column_stack( | ||
| [ | ||
| scipy.ndimage.sum_labels(zf[:, :, i].imag, masks, labels) | ||
| for i in range(len(zidx)) | ||
| ] | ||
| ) | ||
| counts = numpy.asarray( | ||
| scipy.ndimage.sum_labels(numpy.ones((ny, nx)), masks, labels), float | ||
| ) | ||
| return real, imag, counts | ||
|
|
||
|
|
||
| def test_zernike_scores_weighted_matches_centrosome(): | ||
| # The intensity-weighted path (weight != None) + per-object counts that PR #75 reuses. | ||
| rng = numpy.random.default_rng(1) | ||
| masks = _square_objects(160, 3) | ||
| pixels = rng.random(masks.shape) | ||
| zidx = centrosome.zernike.get_zernike_indexes(9 + 1) | ||
| real, imag, radii, counts = _zernike_scores(masks, zidx, weight=pixels) | ||
| ref_real, ref_imag, ref_counts = _weighted_reference(masks, pixels) | ||
| assert numpy.allclose(real, ref_real, atol=ATOL, rtol=1e-10) | ||
| assert numpy.allclose(imag, ref_imag, atol=ATOL, rtol=1e-10) | ||
| assert numpy.allclose(counts, ref_counts) | ||
|
|
||
|
|
||
| def test_zernike_scores_unit_weight_equals_unweighted(): | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't love this test, it is essentially checking that the following two branches work:
So there is way more test code than the actual code that it's checking. Please remove this test |
||
| masks = _square_objects(128, 3) | ||
| zidx = centrosome.zernike.get_zernike_indexes(9 + 1) | ||
| r0, i0, _rad0, c0 = _zernike_scores(masks, zidx) | ||
| r1, i1, _rad1, c1 = _zernike_scores(masks, zidx, weight=numpy.ones(masks.shape)) | ||
| assert numpy.allclose(r0, r1) and numpy.allclose(i0, i1) | ||
| assert numpy.array_equal(c0, c1) | ||
Uh oh!
There was an error while loading. Please reload this page.