From fd81c7b9e1d55063eadead10dde1ae32aa4a12cc Mon Sep 17 00:00:00 2001
From: Ilan Gold <ilanbassgold@gmail.com>
Date: Sun, 29 Mar 2026 11:21:01 +0200
Subject: [PATCH 1/2] further clarify

---
 src/annbatch/io.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/annbatch/io.py b/src/annbatch/io.py
index a0fd6265..5e1dda78 100644
--- a/src/annbatch/io.py
+++ b/src/annbatch/io.py
@@ -540,7 +540,7 @@ def add_adatas(
         shuffle: bool = True,
         rng: np.random.Generator | None = None,
     ) -> Self:
-        """Take AnnData paths and create or add to an on-disk set of AnnData datasets with uniform var spaces at the desired path (with `dataset_size` rows per dataset if running for the first time).
+        """Take AnnData paths (or unique ids) and create or add to an on-disk set of AnnData datasets with uniform var spaces at the desired path (with `dataset_size` rows per dataset if running for the first time).
 
         The set of AnnData datasets is collectively referred to as a "collection" where each dataset is called `dataset_i{.h5ad}`.
         The main purpose of this function is to create shuffled sharded zarr datasets, which is the default behavior of this function.
@@ -555,11 +555,13 @@ def add_adatas(
         Parameters
         ----------
             adata_paths
-                Paths to the AnnData files used to create the zarr store.
+                Paths to/unique ids for the AnnData (files) used to create the zarr store.
             load_adata
                 Function to customize (lazy-)loading the invidiual input anndata files. By default, :func:`anndata.experimental.read_lazy` is used with categoricals/nullables read into memory and `(-1)` chunks for `obs`.
                 If you only need a subset of the input anndata files' elems (e.g., only `X` and certain `obs` columns), you can provide a custom function here to speed up loading and harmonize your data.
                 Beware that concatenating nullables/categoricals (i.e., what happens if `len(adata_paths) > 1` internally in this function) from {class}`anndata.experimental.backed.Dataset2D` `obs` is very time consuming - consider loading these into memory if you use this argument.
+                Note that this function does not have to return "lazy" `AnnData` objects nor does it have to do I/O.
+                This function can return in-memory objects after pre-processing and treat `adata_paths` as simply a unqiue identifier for identifying the returned object.
             var_subset
                 Subset of gene names to include in the store. If None, all genes are included.
                 Genes are subset based on the `var_names` attribute of the concatenated AnnData object.

From 55f23899a622f0b38a888fde69c77de66a725525 Mon Sep 17 00:00:00 2001
From: Ilan Gold <ilanbassgold@gmail.com>
Date: Sun, 29 Mar 2026 11:22:03 +0200
Subject: [PATCH 2/2] i.e.,

---
 src/annbatch/io.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/annbatch/io.py b/src/annbatch/io.py
index 5e1dda78..32548aa4 100644
--- a/src/annbatch/io.py
+++ b/src/annbatch/io.py
@@ -561,7 +561,7 @@ def add_adatas(
                 If you only need a subset of the input anndata files' elems (e.g., only `X` and certain `obs` columns), you can provide a custom function here to speed up loading and harmonize your data.
                 Beware that concatenating nullables/categoricals (i.e., what happens if `len(adata_paths) > 1` internally in this function) from {class}`anndata.experimental.backed.Dataset2D` `obs` is very time consuming - consider loading these into memory if you use this argument.
                 Note that this function does not have to return "lazy" `AnnData` objects nor does it have to do I/O.
-                This function can return in-memory objects after pre-processing and treat `adata_paths` as simply a unqiue identifier for identifying the returned object.
+                This function can return in-memory objects (i.e., after pre-processing) and treat `adata_paths` as simply a unqiue identifier for identifying the returned object.
             var_subset
                 Subset of gene names to include in the store. If None, all genes are included.
                 Genes are subset based on the `var_names` attribute of the concatenated AnnData object.