basicmachines-co · phernandez · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026
diff --git a/src/basic_memory/cli/commands/db.py b/src/basic_memory/cli/commands/db.py
@@ -1,5 +1,6 @@
 """Database management commands."""
 
+from dataclasses import dataclass
 from pathlib import Path
 
 import typer
@@ -12,13 +13,47 @@
 from basic_memory.cli.app import app
 from basic_memory.cli.commands.command_utils import run_with_cleanup
 from basic_memory.config import ConfigManager, ProjectMode
+from basic_memory.indexing import IndexProgress
 from basic_memory.repository import ProjectRepository
 from basic_memory.services.initialization import reconcile_projects_with_config
 from basic_memory.sync.sync_service import get_sync_service
 
 console = Console()
 
 
+@dataclass(slots=True)
+class EmbeddingProgress:
+    """Typed CLI progress payload for embedding backfills."""
+
+    entity_id: int
+    index: int
+    total: int
+
+
+def _format_eta(seconds: float | None) -> str:
+    """Render a compact ETA string for CLI progress descriptions."""
+    if seconds is None:
+        return "--:--"
+
+    whole_seconds = max(int(seconds), 0)
+    minutes, remaining_seconds = divmod(whole_seconds, 60)
+    hours, remaining_minutes = divmod(minutes, 60)
+    if hours:
+        return f"{hours:d}:{remaining_minutes:02d}:{remaining_seconds:02d}"
+    return f"{remaining_minutes:02d}:{remaining_seconds:02d}"
+
+
+def _format_index_progress(progress: IndexProgress) -> str:
+    """Render typed index progress as a compact Rich task description."""
+    files_per_minute = int(progress.files_per_minute) if progress.files_per_minute else 0
+    return (
+        "  Indexing files... "
+        f"{progress.files_processed}/{progress.files_total} files | "
+        f"{progress.batches_completed}/{progress.batches_total} batches | "
+        f"{files_per_minute}/min | ETA {_format_eta(progress.eta_seconds)}"
+    )
+
+
 async def _reindex_projects(app_config):
     """Reindex all projects in a single async context.
 
@@ -185,10 +220,34 @@ async def _reindex(app_config, search: bool, embeddings: bool, project: str | No
             console.print(f"\n[bold]Project: [cyan]{proj.name}[/cyan][/bold]")
 
             if search:
-                console.print("  Rebuilding full-text search index...")
                 sync_service = await get_sync_service(proj)
                 sync_dir = Path(proj.path)
-                await sync_service.sync(sync_dir, project_name=proj.name)
+                with Progress(
+                    SpinnerColumn(),
+                    TextColumn("[progress.description]{task.description}"),
+                    BarColumn(),
+                    TaskProgressColumn(),
+                    console=console,
+                ) as progress:
+                    task = progress.add_task("  Indexing files... scanning changes", total=1)
+
+                    async def on_index_progress(update: IndexProgress) -> None:
+                        total = update.files_total or 1
+                        completed = update.files_processed if update.files_total else 1
+                        progress.update(
+                            task,
+                            description=_format_index_progress(update),
+                            total=total,
+                            completed=min(completed, total),
+                        )
+
+                    await sync_service.sync(
+                        sync_dir,
+                        project_name=proj.name,
+                        progress_callback=on_index_progress,
+                    )
+                    progress.update(task, completed=progress.tasks[task].total or 1)
+
                 console.print("  [green]✓[/green] Full-text search index rebuilt")
 
             if embeddings:
@@ -213,7 +272,16 @@ async def _reindex(app_config, search: bool, embeddings: bool, project: str | No
                     task = progress.add_task("  Embedding entities...", total=None)
 
                     def on_progress(entity_id, index, total):
-                        progress.update(task, total=total, completed=index)
+                        embedding_progress = EmbeddingProgress(
+                            entity_id=entity_id,
+                            index=index,
+                            total=total,
+                        )
+                        progress.update(
+                            task,
+                            total=embedding_progress.total,
+                            completed=embedding_progress.index,
+                        )
 
                     stats = await search_service.reindex_vectors(progress_callback=on_progress)
                     progress.update(task, completed=stats["total_entities"])

diff --git a/src/basic_memory/config.py b/src/basic_memory/config.py
@@ -193,6 +193,11 @@ class BasicMemoryConfig(BaseSettings):
         description="Batch size for embedding generation.",
         gt=0,
     )
+    semantic_embedding_request_concurrency: int = Field(
+        default=4,
+        description="Maximum number of concurrent provider requests for batched embedding generation when the active provider supports request-level concurrency.",
+        gt=0,
+    )
     semantic_embedding_sync_batch_size: int = Field(
         default=64,
         description="Batch size for vector sync orchestration flushes.",
@@ -286,6 +291,31 @@ class BasicMemoryConfig(BaseSettings):
         description="Maximum number of files to process concurrently during sync. Limits memory usage on large projects (2000+ files). Lower values reduce memory consumption.",
         gt=0,
     )
+    index_batch_size: int = Field(
+        default=32,
+        description="Maximum number of changed files to load into one indexing batch.",
+        gt=0,
+    )
+    index_batch_max_bytes: int = Field(
+        default=8 * 1024 * 1024,
+        description="Maximum total bytes to load into one indexing batch. Large files still run as single-file batches.",
+        gt=0,
+    )
+    index_parse_max_concurrent: int = Field(
+        default=8,
+        description="Maximum number of markdown parse tasks to run concurrently inside one indexing batch.",
+        gt=0,
+    )
+    index_entity_max_concurrent: int = Field(
+        default=4,
+        description="Maximum number of entity create/update tasks to run concurrently inside one indexing batch.",
+        gt=0,
+    )
+    index_metadata_update_max_concurrent: int = Field(
+        default=4,
+        description="Maximum number of metadata/search refresh tasks to run concurrently inside one indexing batch.",
+        gt=0,
+    )
 
     kebab_filenames: bool = Field(
         default=False,

diff --git a/src/basic_memory/indexing/__init__.py b/src/basic_memory/indexing/__init__.py
@@ -0,0 +1,29 @@
+"""Reusable indexing primitives shared by local sync and future remote callers."""
+
+from basic_memory.indexing.batch_indexer import BatchIndexer
+from basic_memory.indexing.batching import build_index_batches
+from basic_memory.indexing.models import (
+    IndexedEntity,
+    IndexBatch,
+    IndexFileMetadata,
+    IndexFileWriter,
+    IndexFrontmatterUpdate,
+    IndexFrontmatterWriteResult,
+    IndexingBatchResult,
+    IndexInputFile,
+    IndexProgress,
+)
+
+__all__ = [
+    "BatchIndexer",
+    "IndexedEntity",
+    "IndexBatch",
+    "IndexFileMetadata",
+    "IndexFileWriter",
+    "IndexFrontmatterUpdate",
+    "IndexFrontmatterWriteResult",
+    "IndexingBatchResult",
+    "IndexInputFile",
+    "IndexProgress",
+    "build_index_batches",
+]