scverse · ilan-gold · Mar 29, 2026 · Mar 29, 2026 · Apr 1, 2026
diff --git a/src/annbatch/io.py b/src/annbatch/io.py
@@ -540,7 +540,7 @@ def add_adatas(
         shuffle: bool = True,
         rng: np.random.Generator | None = None,
     ) -> Self:
-        """Take AnnData paths and create or add to an on-disk set of AnnData datasets with uniform var spaces at the desired path (with `dataset_size` rows per dataset if running for the first time).
+        """Take AnnData paths (or unique ids) and create or add to an on-disk set of AnnData datasets with uniform var spaces at the desired path (with `dataset_size` rows per dataset if running for the first time).
 
         The set of AnnData datasets is collectively referred to as a "collection" where each dataset is called `dataset_i{.h5ad}`.
         The main purpose of this function is to create shuffled sharded zarr datasets, which is the default behavior of this function.
@@ -555,11 +555,13 @@ def add_adatas(
         Parameters
         ----------
             adata_paths
-                Paths to the AnnData files used to create the zarr store.
+                Paths to/unique ids for the AnnData (files) used to create the zarr store.
             load_adata
                 Function to customize (lazy-)loading the invidiual input anndata files. By default, :func:`anndata.experimental.read_lazy` is used with categoricals/nullables read into memory and `(-1)` chunks for `obs`.
                 If you only need a subset of the input anndata files' elems (e.g., only `X` and certain `obs` columns), you can provide a custom function here to speed up loading and harmonize your data.
                 Beware that concatenating nullables/categoricals (i.e., what happens if `len(adata_paths) > 1` internally in this function) from {class}`anndata.experimental.backed.Dataset2D` `obs` is very time consuming - consider loading these into memory if you use this argument.
+                Note that this function does not have to return "lazy" `AnnData` objects nor does it have to do I/O.
+                This function can return in-memory objects (i.e., after pre-processing) and treat `adata_paths` as simply a unqiue identifier for identifying the returned object.
             var_subset
                 Subset of gene names to include in the store. If None, all genes are included.
                 Genes are subset based on the `var_names` attribute of the concatenated AnnData object.