Skip to content

Commit 1207e8c

Browse files
committed
fix(core): respect embedding opt-outs during reindex
Signed-off-by: phernandez <paul@basicmachines.co>
1 parent cdffd87 commit 1207e8c

3 files changed

Lines changed: 43 additions & 5 deletions

File tree

src/basic_memory/services/search_service.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Service for search operations."""
22

3+
import asyncio
34
import ast
45
import re
56
from datetime import datetime
@@ -462,8 +463,10 @@ async def sync_entity_vectors_batch(
462463
and not self._entity_embeddings_enabled(entity)
463464
)
464465
]
465-
for entity_id in opted_out_ids:
466-
await self._clear_entity_vectors(entity_id)
466+
if opted_out_ids:
467+
await asyncio.gather(
468+
*(self._clear_entity_vectors(entity_id) for entity_id in opted_out_ids)
469+
)
467470

468471
eligible_entity_ids = [
469472
entity_id
@@ -502,7 +505,7 @@ async def reindex_vectors(self, progress_callback=None) -> dict:
502505
# that reference entity_ids no longer in the entity table
503506
await self._purge_stale_search_rows()
504507

505-
batch_result = await self.repository.sync_entity_vectors_batch(
508+
batch_result = await self.sync_entity_vectors_batch(
506509
entity_ids,
507510
progress_callback=progress_callback,
508511
)

src/basic_memory/sync/sync_service.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -626,8 +626,6 @@ async def load(path: str) -> None:
626626
created_at=metadata.created_at,
627627
content=content,
628628
)
629-
except FileNotFoundError:
630-
await self.handle_delete(path)
631629
except FileOperationError as exc:
632630
# Trigger: FileService wraps binary read failures in FileOperationError.
633631
# Why: the service contract should stay consistent for direct callers.

tests/services/test_semantic_search.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,3 +217,40 @@ async def test_embed_opt_out_note_still_participates_in_fts(
217217
)
218218

219219
assert any(result.entity_id == entity.id for result in results)
220+
221+
222+
@pytest.mark.asyncio
223+
async def test_reindex_vectors_respects_embed_opt_out(search_service, monkeypatch):
224+
"""Full vector reindex should route through the service-level opt-out filter."""
225+
monkeypatch.setattr(
226+
search_service.entity_repository,
227+
"find_all",
228+
AsyncMock(
229+
return_value=[
230+
SimpleNamespace(id=41, entity_metadata={"embed": False}),
231+
SimpleNamespace(id=42, entity_metadata={}),
232+
]
233+
),
234+
)
235+
purge_stale_rows = AsyncMock()
236+
sync_batch = AsyncMock(
237+
return_value=VectorSyncBatchResult(
238+
entities_total=2,
239+
entities_synced=1,
240+
entities_failed=0,
241+
entities_skipped=1,
242+
)
243+
)
244+
monkeypatch.setattr(search_service, "_purge_stale_search_rows", purge_stale_rows)
245+
monkeypatch.setattr(search_service, "sync_entity_vectors_batch", sync_batch)
246+
247+
stats = await search_service.reindex_vectors()
248+
249+
purge_stale_rows.assert_awaited_once()
250+
sync_batch.assert_awaited_once_with([41, 42], progress_callback=None)
251+
assert stats == {
252+
"total_entities": 2,
253+
"embedded": 1,
254+
"skipped": 1,
255+
"errors": 0,
256+
}

0 commit comments

Comments
 (0)