Skip to content
Merged
Show file tree
Hide file tree
Changes from 57 commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
ba2cbe9
resolve conflicts with main
selmanozleyen Jan 18, 2026
d951c56
load_obs thing was removed by auto formatting
selmanozleyen Jan 18, 2026
aa348fe
update tests to resolve conflict
selmanozleyen Jan 18, 2026
6220a8a
readthedocs merge
selmanozleyen Jan 18, 2026
309d33e
chore: clarify compatibility of `h5ad` + forward compat of old shuffl…
ilan-gold Jan 19, 2026
437f184
breaking: clarify obs handling + change output keys (#115)
ilan-gold Jan 19, 2026
679ee50
fix: header level (#116)
ilan-gold Jan 19, 2026
ad1ec55
merge changes
selmanozleyen Jan 19, 2026
c8b4395
apply suggestions
selmanozleyen Jan 19, 2026
62d7d48
checkout readme from main
selmanozleyen Jan 19, 2026
d7539bf
breaking: clarify obs handling + change output keys (#115)
ilan-gold Jan 19, 2026
0d7764d
parent 627eb08d699b9cb07ab24fa67775e1e794c07245
selmanozleyen Jan 18, 2026
f7742a1
restore from main
selmanozleyen Jan 19, 2026
3d73e3a
fix: checking out: confused origin and upstream again...
selmanozleyen Jan 19, 2026
dff2a01
continuation of the upstream origin confusion fix
selmanozleyen Jan 19, 2026
0c13efa
breaking: clarify obs handling + change output keys (#115)
ilan-gold Jan 19, 2026
ffe23be
fix: header level (#116)
ilan-gold Jan 19, 2026
e10d300
Merge branch 'main' into feat/sampler
selmanozleyen Jan 19, 2026
5d522fe
refactor _prepare_dataset_and_obs
selmanozleyen Jan 19, 2026
d8168c1
update docstring for loadrequest
selmanozleyen Jan 19, 2026
c501646
separate files for samplers
selmanozleyen Jan 19, 2026
f9862b0
prepare_output is no longer needed
selmanozleyen Jan 19, 2026
6a1153e
clarify docs
selmanozleyen Jan 19, 2026
418f79a
fix overlook: already sorted batch_indices no need to resort them
selmanozleyen Jan 19, 2026
9b786f3
fix prepare_output refactor
selmanozleyen Jan 19, 2026
fc1661e
add todo
selmanozleyen Jan 19, 2026
d764adc
rename from leftover to remainder for clarity. since there is no left…
selmanozleyen Jan 19, 2026
5bc2751
simplify validate_sampler
selmanozleyen Jan 19, 2026
66d5d3c
remove old generic params
selmanozleyen Jan 19, 2026
0e8a472
add broad typing
selmanozleyen Jan 19, 2026
ae3e1bc
clarify todos and add username
selmanozleyen Jan 20, 2026
742605a
type and modify decorator
selmanozleyen Jan 20, 2026
cf30686
no need for lambdas in decorators
selmanozleyen Jan 20, 2026
261c5e8
make decorator compatible in multiple cases
selmanozleyen Jan 20, 2026
4402d4e
put ABC in abc folder
selmanozleyen Jan 20, 2026
0929849
update test with the fix
selmanozleyen Jan 20, 2026
899cc18
qualname for fix. no sampler in public API
selmanozleyen Jan 20, 2026
89e7ccd
check coverage when shuffled otherwise also check order
selmanozleyen Jan 20, 2026
0356374
fix to prev commit
selmanozleyen Jan 20, 2026
87f1ccb
clarify doc
selmanozleyen Jan 20, 2026
8a7f8c2
update worker tests
selmanozleyen Jan 20, 2026
a85634a
new * location for ChunkSampler
selmanozleyen Jan 20, 2026
61efe81
add typing but can revert if too verbose
selmanozleyen Jan 20, 2026
faaf525
remove unused fields. (maybe linter check can be added)
selmanozleyen Jan 20, 2026
eecb0b1
remove old SO link
selmanozleyen Jan 20, 2026
2cd09ca
don't put generators into np.all !!
selmanozleyen Jan 20, 2026
4fcb553
apply typing and docstring suggestion
selmanozleyen Jan 21, 2026
b53d685
change in folder structure
selmanozleyen Jan 21, 2026
d464caa
make batch sampler getter
selmanozleyen Jan 21, 2026
0b4883b
remove empty line
selmanozleyen Jan 21, 2026
c450586
apply docstring suggestions for Loader args
selmanozleyen Jan 21, 2026
15a11b9
remove empty line
selmanozleyen Jan 21, 2026
84c9124
conf.py is same as main
selmanozleyen Jan 21, 2026
c38160d
change shuffle
selmanozleyen Jan 21, 2026
b8472e3
remove todo
selmanozleyen Jan 21, 2026
e620194
update to match old behaviour
selmanozleyen Jan 21, 2026
1b38afd
Merge branch 'main' into feat/sampler
selmanozleyen Jan 21, 2026
7237023
put vstack inside accumulate chunks
selmanozleyen Jan 21, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

Loader
Loader.__iter__
ChunkSampler
```

(io-helpers)=
Expand All @@ -28,6 +29,15 @@
DatasetCollection
```

(abc)=
## abc
```{eval-rst}
.. autosummary::
:toctree: generated/

abc.Sampler
```

(types)=
## types

Expand All @@ -36,4 +46,5 @@
:toctree: generated/

types.LoaderOutput
types.LoadRequest
```
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,10 @@ omit = [
"**/test_*.py",
]

[[tool.mypy.overrides]]
module = [ "anndata.*", "cupyx.*", "cupy.*" ]
ignore_missing_imports = true

[tool.cruft]
skip = [
"tests",
Expand Down
12 changes: 10 additions & 2 deletions src/annbatch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,18 @@

from importlib.metadata import version

from . import types
from . import abc, types
from .io import DatasetCollection, write_sharded
from .loader import Loader
from .samplers._chunk_sampler import ChunkSampler

__version__ = version("annbatch")

__all__ = ["Loader", "write_sharded", "DatasetCollection", "types"]
__all__ = [
"Loader",
"DatasetCollection",
"types",
"write_sharded",
"ChunkSampler",
"abc",
]
5 changes: 5 additions & 0 deletions src/annbatch/abc/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .sampler import Sampler

__all__ = [
"Sampler",
]
70 changes: 70 additions & 0 deletions src/annbatch/abc/sampler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""Sampler classes for efficient chunk-based data access."""

from __future__ import annotations

from abc import ABC, abstractmethod
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from collections.abc import Iterator

from annbatch.types import LoadRequest


class Sampler(ABC):
"""Base sampler class.

Samplers control how data is batched and loaded from the underlying datasets.
"""

def sample(self, n_obs: int) -> Iterator[LoadRequest]:
"""Sample load requests given the total number of observations.

Parameters
----------
n_obs
The total number of observations available.

Yields
------
LoadRequest
Load requests for batching data.
"""
self.validate(n_obs)
yield from self._sample(n_obs)

@abstractmethod
def validate(self, n_obs: int) -> None:
"""Validate the sampler configuration against the given n_obs.

This method is called at the start of each `sample()` call.
Override this method to add custom validation for sampler parameters.

Parameters
----------
n_obs
The total number of observations in the loader.

Raises
------
ValueError
If the sampler configuration is invalid for the given n_obs.
"""

@abstractmethod
def _sample(self, n_obs: int) -> Iterator[LoadRequest]:
"""Implementation of the sample method.

This method is called by the sample method to perform the actual sampling after
validation has passed.

Parameters
----------
n_obs
The total number of observations available.

Yields
------
LoadRequest
Load requests for batching data.
"""
Loading
Loading