Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def tests(session: nox.Session) -> None:
"pytest-cov",
"pytest-markdown-docs",
"six", # centrosome runtime dep, not declared in its metadata
".",
".[numba]", # exercise the numba backend + correctness harness in CI
)
except Exception:
session.skip(
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ dependencies = [
]

[project.optional-dependencies]
numba = [
"numba>=0.59",
]
test = [
"pytest>=8.4.2",
"pytest-cov",
Expand Down
14 changes: 14 additions & 0 deletions src/cp_measure/_detect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""Backend capability detection.

Detected ONCE at import via ``importlib.util.find_spec`` — availability is
checked without importing the package or catching ImportErrors. Dispatch reads
the flag; the resolved backend path is then called directly and unguarded
(a backend that is flagged present but raises is a real bug and must surface,
not be papered over by a try/except fallback).

Other backends (jax, ...) add their own flags here as they are wired.
"""

import importlib.util

HAS_NUMBA: bool = importlib.util.find_spec("numba") is not None
31 changes: 28 additions & 3 deletions src/cp_measure/bulk.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,26 @@
_3D_FEATURES = ("intensity", "sizeshape", "texture", "granularity")


def _numba_registries() -> dict[str, dict[str, Callable]]:
"""Registries for the 'numba' accelerator.

Composes the numba implementations (``intensity``, ``texture``) with the
numpy implementations of every other feature — a single global "numba"
selection still yields a full, working feature set, accelerated where a
numba backend exists. This is explicit per-function composition, NOT an
error-driven fallback.
"""
from cp_measure.core.numba import (
get_intensity as _numba_intensity,
get_texture as _numba_texture,
)

return {
"core": {**_CORE, "intensity": _numba_intensity, "texture": _numba_texture},
"correlation": _CORRELATION,
}


def _dispatch(name: str) -> dict[str, Callable]:
from cp_measure import _ACCELERATOR

Expand All @@ -55,9 +75,14 @@ def _dispatch(name: str) -> dict[str, Callable]:
f"'jax' accelerator not yet wired for {name} measurements"
)
if _ACCELERATOR == "numba":
raise NotImplementedError(
f"'numba' accelerator not yet wired for {name} measurements"
)
from cp_measure._detect import HAS_NUMBA

if not HAS_NUMBA:
raise RuntimeError(
"accelerator 'numba' selected but numba is not installed; "
"you can install it via `pip install cp_measure[numba]`"
)
return _numba_registries()[name]
if _ACCELERATOR == "fastest":
raise NotImplementedError("'fastest' logic not yet implemented")
raise ValueError(
Expand Down
15 changes: 15 additions & 0 deletions src/cp_measure/core/numba/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""Numba-accelerated backend.

Selected explicitly by import (``from cp_measure.core.numba import get_intensity``)
or globally via ``cp_measure.set_accelerator("numba")``. Requires the optional
``numba`` extra; availability is gated by ``cp_measure._detect.HAS_NUMBA``.

This backend accelerates ``intensity`` and ``texture``; the global "numba"
accelerator composes them with the numpy implementations of every other feature
(see ``cp_measure.bulk``).
"""

from cp_measure.core.numba.measureobjectintensity import get_intensity
from cp_measure.core.numba.measuretexture import get_texture

__all__ = ["get_intensity", "get_texture"]
189 changes: 189 additions & 0 deletions src/cp_measure/core/numba/_texture.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
"""Numba Haralick texture kernel (single-threaded, cached).

Reimplements ``mahotas.features.haralick(crop, distance, ignore_zeros=True)`` — the
~99% cost of ``measuretexture.get_texture`` — as one fused per-object kernel:

- build the symmetric grey-level co-occurrence matrix (GLCM) for each direction
(BIT-EXACT to ``mahotas.features.texture.cooccurence(symmetric=True)`` — an
integer histogram of pixel pairs at the direction offset, counted both ways),
- apply ``ignore_zeros`` (drop pairs touching background 0),
- compute the 13 Haralick features per direction from the GLCM.

One kernel covers 2D (4 directions) and 3D (13 directions): the crop is always
``(Z, Y, X)`` and ``offsets`` are ``distance * (dz, dy, dx)`` deltas. The exact
formulas + edge cases mirror ``mahotas/features/texture.py::haralick_features``
(``preserve_haralick_bug=False``, ``use_x_minus_y_variance=False``). The GLCM is
sized to ``crop.max() + 1`` (NOT a fixed 256) because feature 9
(``px_minus_y.var()``) is taken over a length-``fm1`` array.

``img_as_ubyte`` / ``regionprops`` stay host-side (scipy/skimage). Serial; no
``prange``/``nogil``.
"""

import numpy as np
from numba import njit

# Direction deltas as (dz, dy, dx). 2D uses dz=0 (crop is (1, Y, X)); these mirror
# mahotas ``_2d_deltas`` [(0,1),(1,1),(1,0),(1,-1)] (as (dy,dx)) and ``_3d_deltas``.
DELTAS_2D = np.array([[0, 0, 1], [0, 1, 1], [0, 1, 0], [0, 1, -1]], np.int64)
DELTAS_3D = np.array(
[
[1, 0, 0],
[1, 1, 0],
[0, 1, 0],
[1, -1, 0],
[0, 0, 1],
[1, 0, 1],
[0, 1, 1],
[1, 1, 1],
[1, -1, 1],
[1, 0, -1],
[0, 1, -1],
[1, 1, -1],
[1, -1, -1],
],
np.int64,
)


@njit(cache=True, error_model="numpy")
def _entropy(a):
"""``-sum(a * log2(a))`` over positive entries (0*log2(1)=0), as mahotas."""
s = 0.0
for i in range(a.shape[0]):
v = a[i]
if v > 0.0:
s -= v * np.log2(v)
return s


@njit(cache=True, error_model="numpy")
def haralick_object(crop, offsets):
"""The 13 Haralick features per direction for one ``(Z, Y, X)`` object crop.

Returns ``(n_dir, 13)`` float64. If ANY direction's GLCM is empty after
``ignore_zeros`` (no non-background pairs), the whole object's block is NaN —
matching mahotas raising ``ValueError`` and cp_measure catching it.
"""
n_dir = offsets.shape[0]
out = np.empty((n_dir, 13), np.float64)
Z, Y, X = crop.shape
fm1 = 0
for z in range(Z):
for y in range(Y):
for x in range(X):
if crop[z, y, x] + 1 > fm1:
fm1 = crop[z, y, x] + 1

cmat = np.empty((fm1, fm1), np.int64)
for d in range(n_dir):
dz, dy, dx = offsets[d, 0], offsets[d, 1], offsets[d, 2]
cmat[:] = 0
total = 0
for z in range(Z):
for y in range(Y):
for x in range(X):
z2 = z + dz
y2 = y + dy
x2 = x + dx
if 0 <= z2 < Z and 0 <= y2 < Y and 0 <= x2 < X:
a = crop[z, y, x]
b = crop[z2, y2, x2]
cmat[a, b] += 1
cmat[b, a] += 1 # symmetric=True
total += 2
# ignore_zeros: T excludes pairs touching background (grey level 0), i.e.
# row 0 + col 0 (symmetric, so 2*row0 - cmat[0,0]) -> O(fm1), no fm1^2 sum.
row0 = 0
for j in range(fm1):
row0 += cmat[0, j]
T = total - 2 * row0 + cmat[0, 0]
if T == 0: # mahotas raises -> cp_measure -> whole object NaN
for dd in range(n_dir):
for f in range(13):
out[dd, f] = np.nan
return out

_haralick_13(cmat, fm1, T, out, d)
return out


@njit(cache=True, error_model="numpy")
def _haralick_13(cmat, fm1, T, out, d):
"""Fill ``out[d, :]`` with the 13 Haralick features of GLCM ``cmat`` (size fm1)."""
invT = 1.0 / T
px = np.zeros(fm1) # marginal (== row & col marginal; GLCM is symmetric)
px_plus_y = np.zeros(2 * fm1) # P(i+j)
px_minus_y = np.zeros(fm1) # P(|i-j|)
asm = 0.0
entropy = 0.0
ij_sum = 0.0 # sum_{i,j} i*j*p
idm = 0.0
# rows/cols 0 are the ignore_zeros background -> skip (start at 1)
for i in range(1, fm1):
for j in range(1, fm1):
c = cmat[i, j]
if c == 0:
continue
p = c * invT
asm += p * p
entropy -= p * np.log2(p)
ij_sum += i * j * p
idm += p / ((i - j) * (i - j) + 1)
px[j] += p
px_plus_y[i + j] += p
px_minus_y[i - j if i >= j else j - i] += p

ux = 0.0
vx = 0.0
for k in range(fm1):
ux += k * px[k]
for k in range(fm1):
vx += (k - ux) * (k - ux) * px[k]
sx = np.sqrt(vx)

sum_avg = 0.0
sum_var = 0.0
for s in range(2 * fm1):
sum_avg += s * px_plus_y[s]
for s in range(2 * fm1):
sum_var += s * s * px_plus_y[s]
sum_var -= sum_avg * sum_avg

contrast = 0.0
for dd in range(fm1):
contrast += dd * dd * px_minus_y[dd]

# difference variance = var of the length-fm1 px_minus_y array (population)
dm = 0.0
for dd in range(fm1):
dm += px_minus_y[dd]
dm /= fm1
diff_var = 0.0
for dd in range(fm1):
diff_var += (px_minus_y[dd] - dm) * (px_minus_y[dd] - dm)
diff_var /= fm1

# Info measures of correlation. For a SYMMETRIC GLCM both cross-entropies
# collapse to the marginals: HXY1 = HXY2 = 2*HX (verified vs mahotas to ~1e-15),
# so the two O(fm1^2) double-loops become O(fm1) here. (HXY1 = -sum p*log2(px*py)
# = -sum_i py[i]log2 px[i] - sum_j px[j]log2 py[j] = 2*HX when px==py; likewise HXY2.)
hx = _entropy(px)
hxy1 = 2.0 * hx
hxy2 = 2.0 * hx

out[d, 0] = asm
out[d, 1] = contrast
out[d, 2] = 1.0 if sx == 0.0 else (ij_sum - ux * ux) / (sx * sx)
out[d, 3] = vx
out[d, 4] = idm
out[d, 5] = sum_avg
out[d, 6] = sum_var
out[d, 7] = _entropy(px_plus_y)
out[d, 8] = entropy
out[d, 9] = diff_var
out[d, 10] = _entropy(px_minus_y)
out[d, 11] = (entropy - hxy1) if hx == 0.0 else (entropy - hxy1) / hx
diff = hxy2 - entropy
arg = 1.0 - np.exp(-2.0 * diff)
out[d, 12] = np.sqrt(arg if arg > 0.0 else 0.0)
Loading