perf(search): record vector-sync metrics at batch level, not per entity

phernandez · claude · phernandez · commit 009d735d7513 · 2026-04-17T13:46:16.000-05:00
The previous fix introduced a _METRIC_INSTRUMENTS cache + helper
functions to work around instrument re-creation in the per-entity
hot path. That was the same kind of wrapper we just deleted from
telemetry.py — cache to work around a wrapper around logfire is
not the right shape.

Proper fix: record these metrics where they belong — once per
batch, using the totals VectorSyncBatchResult already accumulates
(prepare_seconds_total, queue_wait_seconds_total, etc.). The
per-entity histogram calls in _log_vector_sync_complete are gone;
that function now only emits the slow-entity warning log.

Batch-level block now:
- shares one `batch_attrs` dict across all recordings (was
  repeated 7x)
- records 5 histograms + 6 counters per batch with direct
  logfire.metric_* calls, no cache, no helpers
- _METRIC_INSTRUMENTS + _metric_histogram + _metric_counter all
  removed

Tests updated: per-entity histogram counts (was 2 per 2-entity
batch) are now 1 per batch.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
Signed-off-by: phernandez &lt;paul@basicmachines.co&gt;
diff --git a/src/basic_memory/repository/search_repository_base.py b/src/basic_memory/repository/search_repository_base.py
@@ -41,29 +41,6 @@
 _SQLITE_MAX_PREPARE_WINDOW = 8
 
 
-# Cache instruments so the per-entity hot path in _log_vector_sync_complete
-# doesn't re-enter the OTel MeterProvider lookup on every sample.
-_METRIC_INSTRUMENTS: dict[tuple[str, str, str], Any] = {}
-
-
-def _metric_histogram(name: str, unit: str = "") -> Any:
-    key = ("histogram", name, unit)
-    instrument = _METRIC_INSTRUMENTS.get(key)
-    if instrument is None:
-        instrument = logfire.metric_histogram(name, unit=unit)
-        _METRIC_INSTRUMENTS[key] = instrument
-    return instrument
-
-
-def _metric_counter(name: str) -> Any:
-    key = ("counter", name, "")
-    instrument = _METRIC_INSTRUMENTS.get(key)
-    if instrument is None:
-        instrument = logfire.metric_counter(name)
-        _METRIC_INSTRUMENTS[key] = instrument
-    return instrument
-
-
 @dataclass
 class VectorSyncBatchResult:
     """Aggregate result for batched semantic vector sync runs."""
@@ -1092,57 +1069,42 @@ def emit_progress(entity_id: int) -> None:
                 write_seconds_total=result.write_seconds_total,
             )
             batch_total_seconds = time.perf_counter() - batch_start
-            _metric_histogram(
-                "vector_sync_batch_total_seconds",
-                unit="s",
-            ).record(
-                batch_total_seconds,
-                attributes={
-                    "backend": backend_name,
-                    "skip_only_batch": result.embedding_jobs_total == 0,
-                },
+            batch_attrs = {
+                "backend": backend_name,
+                "skip_only_batch": result.embedding_jobs_total == 0,
+            }
+            logfire.metric_histogram("vector_sync_batch_total_seconds", unit="s").record(
+                batch_total_seconds, attributes=batch_attrs
             )
-            _metric_counter("vector_sync_entities_total").add(
-                result.entities_total,
-                attributes={
-                    "backend": backend_name,
-                    "skip_only_batch": result.embedding_jobs_total == 0,
-                },
+            logfire.metric_histogram("vector_sync_prepare_seconds", unit="s").record(
+                result.prepare_seconds_total, attributes=batch_attrs
             )
-            _metric_counter("vector_sync_entities_skipped").add(
-                result.entities_skipped,
-                attributes={
-                    "backend": backend_name,
-                    "skip_only_batch": result.embedding_jobs_total == 0,
-                },
+            logfire.metric_histogram("vector_sync_queue_wait_seconds", unit="s").record(
+                result.queue_wait_seconds_total, attributes=batch_attrs
             )
-            _metric_counter("vector_sync_entities_deferred").add(
-                result.entities_deferred,
-                attributes={
-                    "backend": backend_name,
-                    "skip_only_batch": result.embedding_jobs_total == 0,
-                },
+            logfire.metric_histogram("vector_sync_embed_seconds", unit="s").record(
+                result.embed_seconds_total, attributes=batch_attrs
             )
-            _metric_counter("vector_sync_embedding_jobs_total").add(
-                result.embedding_jobs_total,
-                attributes={
-                    "backend": backend_name,
-                    "skip_only_batch": result.embedding_jobs_total == 0,
-                },
+            logfire.metric_histogram("vector_sync_write_seconds", unit="s").record(
+                result.write_seconds_total, attributes=batch_attrs
             )
-            _metric_counter("vector_sync_chunks_total").add(
-                result.chunks_total,
-                attributes={
-                    "backend": backend_name,
-                    "skip_only_batch": result.embedding_jobs_total == 0,
-                },
+            logfire.metric_counter("vector_sync_entities_total").add(
+                result.entities_total, attributes=batch_attrs
             )
-            _metric_counter("vector_sync_chunks_skipped").add(
-                result.chunks_skipped,
-                attributes={
-                    "backend": backend_name,
-                    "skip_only_batch": result.embedding_jobs_total == 0,
-                },
+            logfire.metric_counter("vector_sync_entities_skipped").add(
+                result.entities_skipped, attributes=batch_attrs
+            )
+            logfire.metric_counter("vector_sync_entities_deferred").add(
+                result.entities_deferred, attributes=batch_attrs
+            )
+            logfire.metric_counter("vector_sync_embedding_jobs_total").add(
+                result.embedding_jobs_total, attributes=batch_attrs
+            )
+            logfire.metric_counter("vector_sync_chunks_total").add(
+                result.chunks_total, attributes=batch_attrs
+            )
+            logfire.metric_counter("vector_sync_chunks_skipped").add(
+                result.chunks_skipped, attributes=batch_attrs
             )
             if batch_span is not None:
                 batch_span.set_attributes(
@@ -1715,48 +1677,12 @@ def _log_vector_sync_complete(
         shard_count: int,
         remaining_jobs_after_shard: int,
     ) -> None:
-        """Log completion and slow-entity warnings with a consistent format."""
-        backend_name = type(self).__name__.removesuffix("SearchRepository").lower()
-        _metric_histogram(
-            "vector_sync_prepare_seconds",
-            unit="s",
-        ).record(
-            prepare_seconds,
-            attributes={
-                "backend": backend_name,
-                "skip_only_entity": entity_skipped and embedding_jobs_count == 0,
-            },
-        )
-        _metric_histogram(
-            "vector_sync_queue_wait_seconds",
-            unit="s",
-        ).record(
-            queue_wait_seconds,
-            attributes={
-                "backend": backend_name,
-                "skip_only_entity": entity_skipped and embedding_jobs_count == 0,
-            },
-        )
-        _metric_histogram(
-            "vector_sync_embed_seconds",
-            unit="s",
-        ).record(
-            embed_seconds,
-            attributes={
-                "backend": backend_name,
-                "skip_only_entity": entity_skipped and embedding_jobs_count == 0,
-            },
-        )
-        _metric_histogram(
-            "vector_sync_write_seconds",
-            unit="s",
-        ).record(
-            write_seconds,
-            attributes={
-                "backend": backend_name,
-                "skip_only_entity": entity_skipped and embedding_jobs_count == 0,
-            },
-        )
+        """Log completion and slow-entity warnings with a consistent format.
+
+        Per-entity timings are aggregated into `VectorSyncBatchResult` and
+        recorded as batch-level histograms once the batch completes — this
+        function stays on the per-entity hot path so it only emits logs.
+        """
         if total_seconds > 10:
             logger.warning(
                 "Vector sync slow entity: project_id={project_id} entity_id={entity_id} "
diff --git a/tests/repository/test_semantic_search_base.py b/tests/repository/test_semantic_search_base.py
@@ -712,8 +712,6 @@ def add(self, amount, attributes=None) -> None:
 
     monkeypatch.setattr(repo, "_prepare_entity_vector_jobs_window", _stub_prepare_window)
     monkeypatch.setattr(repo, "_flush_embedding_jobs", _stub_flush)
-    # Reset the module-level metric cache so the fake factories below win.
-    monkeypatch.setattr(search_repository_base_module, "_METRIC_INSTRUMENTS", {})
     monkeypatch.setattr(
         search_repository_base_module.logfire,
         "metric_histogram",
@@ -732,12 +730,14 @@ def add(self, amount, attributes=None) -> None:
 
     result = await repo.sync_entity_vectors_batch([1, 2])
 
+    # Batch-level histograms record once per batch using aggregated totals
+    # from VectorSyncBatchResult — not per entity. See _sync_entity_vectors_internal.
     assert result.entities_synced == 2
     histogram_names = [name for name, _, _ in histogram_calls]
-    assert histogram_names.count("vector_sync_prepare_seconds") == 2
-    assert histogram_names.count("vector_sync_queue_wait_seconds") == 2
-    assert histogram_names.count("vector_sync_embed_seconds") == 2
-    assert histogram_names.count("vector_sync_write_seconds") == 2
+    assert histogram_names.count("vector_sync_prepare_seconds") == 1
+    assert histogram_names.count("vector_sync_queue_wait_seconds") == 1
+    assert histogram_names.count("vector_sync_embed_seconds") == 1
+    assert histogram_names.count("vector_sync_write_seconds") == 1
     assert histogram_names.count("vector_sync_batch_total_seconds") == 1
     assert [name for name, _, _ in counter_calls].count("vector_sync_entities_total") == 1