Skip to content

Commit 0023e73

Browse files
groksrcclaude
andauthored
feat: add context-aware wiki link resolution with source_path support (#527)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 0b20801 commit 0023e73

7 files changed

Lines changed: 749 additions & 8 deletions

File tree

src/basic_memory/api/v2/routers/knowledge_router.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,12 @@ async def resolve_identifier(
103103
resolution_method = "external_id" if entity else "search"
104104

105105
# If not found by external_id, try other resolution methods
106+
# Pass source_path for context-aware resolution (prefers notes closer to source)
107+
# Pass strict to control fuzzy search fallback (default False allows fuzzy matching)
106108
if not entity:
107-
entity = await link_resolver.resolve_link(data.identifier)
109+
entity = await link_resolver.resolve_link(
110+
data.identifier, source_path=data.source_path, strict=data.strict
111+
)
108112
if entity:
109113
# Determine resolution method
110114
if entity.permalink == data.identifier:

src/basic_memory/repository/entity_repository.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66

77
from loguru import logger
8-
from sqlalchemy import select
8+
from sqlalchemy import select, func
99
from sqlalchemy.exc import IntegrityError
1010
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
1111
from sqlalchemy.orm import selectinload
@@ -69,12 +69,21 @@ async def get_by_permalink(self, permalink: str) -> Optional[Entity]:
6969
return await self.find_one(query)
7070

7171
async def get_by_title(self, title: str) -> Sequence[Entity]:
72-
"""Get entity by title.
72+
"""Get entities by title, ordered by shortest path first.
73+
74+
When multiple entities share the same title (in different folders),
75+
returns them ordered by file_path length then alphabetically.
76+
This provides "shortest path" resolution for duplicate titles.
7377
7478
Args:
7579
title: Title of the entity to find
7680
"""
77-
query = self.select().where(Entity.title == title).options(*self.get_load_options())
81+
query = (
82+
self.select()
83+
.where(Entity.title == title)
84+
.order_by(func.length(Entity.file_path), Entity.file_path)
85+
.options(*self.get_load_options())
86+
)
7887
result = await self.execute_query(query)
7988
return list(result.scalars().all())
8089

src/basic_memory/schemas/v2/entity.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ class EntityResolveRequest(BaseModel):
1515
- Permalinks (e.g., "specs/search")
1616
- Titles (e.g., "Search Specification")
1717
- File paths (e.g., "specs/search.md")
18+
19+
When source_path is provided, resolution prefers notes closer to the source
20+
(context-aware resolution for duplicate titles).
1821
"""
1922

2023
identifier: str = Field(
@@ -23,6 +26,15 @@ class EntityResolveRequest(BaseModel):
2326
min_length=1,
2427
max_length=500,
2528
)
29+
source_path: Optional[str] = Field(
30+
None,
31+
description="Path of the source file containing the link (for context-aware resolution)",
32+
max_length=500,
33+
)
34+
strict: bool = Field(
35+
False,
36+
description="If True, only exact matches are allowed (no fuzzy search fallback)",
37+
)
2638

2739

2840
class EntityResolveResponse(BaseModel):

src/basic_memory/services/link_resolver.py

Lines changed: 134 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,20 +28,81 @@ def __init__(self, entity_repository: EntityRepository, search_service: SearchSe
2828
self.search_service = search_service
2929

3030
async def resolve_link(
31-
self, link_text: str, use_search: bool = True, strict: bool = False
31+
self,
32+
link_text: str,
33+
use_search: bool = True,
34+
strict: bool = False,
35+
source_path: Optional[str] = None,
3236
) -> Optional[Entity]:
3337
"""Resolve a markdown link to a permalink.
3438
3539
Args:
3640
link_text: The link text to resolve
3741
use_search: Whether to use search-based fuzzy matching as fallback
3842
strict: If True, only exact matches are allowed (no fuzzy search fallback)
43+
source_path: Optional path of the source file containing the link.
44+
Used to prefer notes closer to the source (context-aware resolution).
3945
"""
40-
logger.trace(f"Resolving link: {link_text}")
46+
logger.trace(f"Resolving link: {link_text} (source: {source_path})")
4147

4248
# Clean link text and extract any alias
4349
clean_text, alias = self._normalize_link_text(link_text)
4450

51+
# --- Path Resolution ---
52+
# Note: All paths in Basic Memory are stored as POSIX strings (forward slashes)
53+
# for cross-platform compatibility. See entity_repository.py which normalizes
54+
# paths using Path().as_posix(). This allows consistent path operations here.
55+
56+
# --- Relative Path Resolution ---
57+
# Trigger: source_path is provided AND link contains "/"
58+
# Why: Resolve paths like [[nested/deep-note]] relative to source folder first
59+
# Outcome: [[nested/deep-note]] from testing/link-test.md → testing/nested/deep-note.md
60+
if source_path and "/" in clean_text:
61+
source_folder = source_path.rsplit("/", 1)[0] if "/" in source_path else ""
62+
if source_folder:
63+
# Construct relative path from source folder
64+
relative_path = f"{source_folder}/{clean_text}"
65+
66+
# Try with .md extension
67+
if not relative_path.endswith(".md"):
68+
relative_path_md = f"{relative_path}.md"
69+
entity = await self.entity_repository.get_by_file_path(relative_path_md)
70+
if entity:
71+
return entity
72+
73+
# Try as-is (already has extension or is a permalink)
74+
entity = await self.entity_repository.get_by_file_path(relative_path)
75+
if entity:
76+
return entity
77+
78+
# When source_path is provided, use context-aware resolution:
79+
# Check both permalink and title matches, prefer closest to source.
80+
# Example: [[testing]] from folder/note.md prefers folder/testing.md
81+
# over a root testing.md with permalink "testing".
82+
if source_path:
83+
# Gather all potential matches
84+
candidates: list[Entity] = []
85+
86+
# Check permalink match
87+
permalink_entity = await self.entity_repository.get_by_permalink(clean_text)
88+
if permalink_entity:
89+
candidates.append(permalink_entity)
90+
91+
# Check title matches
92+
title_entities = await self.entity_repository.get_by_title(clean_text)
93+
for entity in title_entities:
94+
# Avoid duplicates (permalink match might also be in title matches)
95+
if entity.id not in [c.id for c in candidates]:
96+
candidates.append(entity)
97+
98+
if candidates:
99+
if len(candidates) == 1:
100+
return candidates[0]
101+
else:
102+
# Multiple candidates - pick closest to source
103+
return self._find_closest_entity(candidates, source_path)
104+
105+
# Standard resolution (no source context): permalink first, then title
45106
# 1. Try exact permalink match first (most efficient)
46107
entity = await self.entity_repository.get_by_permalink(clean_text)
47108
if entity:
@@ -51,7 +112,7 @@ async def resolve_link(
51112
# 2. Try exact title match
52113
found = await self.entity_repository.get_by_title(clean_text)
53114
if found:
54-
# Return first match if there are duplicates (consistent behavior)
115+
# Return first match (shortest path) if no source context
55116
entity = found[0]
56117
logger.debug(f"Found title match: {entity.title}")
57118
return entity
@@ -108,7 +169,7 @@ def _normalize_link_text(self, link_text: str) -> Tuple[str, Optional[str]]:
108169
if text.startswith("[[") and text.endswith("]]"):
109170
text = text[2:-2]
110171

111-
# Handle Obsidian-style aliases (format: [[actual|alias]])
172+
# Handle wiki link aliases (format: [[actual|alias]])
112173
alias = None
113174
if "|" in text:
114175
text, alias = text.split("|", 1)
@@ -119,3 +180,72 @@ def _normalize_link_text(self, link_text: str) -> Tuple[str, Optional[str]]:
119180
text = text.strip()
120181

121182
return text, alias
183+
184+
def _find_closest_entity(self, entities: list[Entity], source_path: str) -> Entity:
185+
"""Find the entity closest to the source file path.
186+
187+
Context-aware resolution: prefer notes in the same folder or closer in hierarchy.
188+
189+
Proximity Scoring Algorithm:
190+
- Priority 0: Same folder as source (best match)
191+
- Priority 1-N: Ancestor folders (N = levels up from source)
192+
- Priority 100+N: Descendant folders (N = levels down, deprioritized)
193+
- Priority 1000: Completely unrelated paths (least preferred)
194+
- Ties are broken by shortest absolute path (consistent behavior)
195+
196+
Args:
197+
entities: List of entities with the same title
198+
source_path: Path of the file containing the link
199+
200+
Returns:
201+
The entity closest to the source path
202+
"""
203+
# Extract source folder (everything before the last /)
204+
source_folder = source_path.rsplit("/", 1)[0] if "/" in source_path else ""
205+
206+
def path_proximity(entity: Entity) -> Tuple[int, int]:
207+
"""Return (proximity_score, path_length) for sorting.
208+
209+
Lower is better for both values.
210+
"""
211+
entity_path = entity.file_path
212+
entity_folder = entity_path.rsplit("/", 1)[0] if "/" in entity_path else ""
213+
214+
# Trigger: entity is in the same folder as source
215+
# Why: same-folder notes are most contextually relevant
216+
# Outcome: priority = 0 (best), ties broken by shortest path
217+
if entity_folder == source_folder:
218+
return (0, len(entity_path))
219+
220+
# Trigger: entity is in an ancestor folder of source
221+
# e.g., source is "a/b/c/file.md", entity is "a/b/note.md" -> ancestor
222+
# Why: ancestors are contextually relevant (shared parent context)
223+
# Outcome: priority = levels_up (1, 2, 3...), closer ancestors preferred
224+
if source_folder.startswith(entity_folder + "/") if entity_folder else source_folder:
225+
# Count how many levels up
226+
if entity_folder:
227+
levels_up = source_folder.count("/") - entity_folder.count("/")
228+
else:
229+
# Root level
230+
levels_up = source_folder.count("/") + 1
231+
return (levels_up, len(entity_path))
232+
233+
# Trigger: entity is in a descendant folder of source
234+
# e.g., source is "a/file.md", entity is "a/b/c/note.md" -> descendant
235+
# Why: descendants are less contextually relevant than ancestors
236+
# Outcome: priority = 100 + levels_down, significantly deprioritized
237+
if entity_folder.startswith(source_folder + "/") if source_folder else entity_folder:
238+
if source_folder:
239+
levels_down = entity_folder.count("/") - source_folder.count("/")
240+
else:
241+
# Source is at root
242+
levels_down = entity_folder.count("/") + 1
243+
return (100 + levels_down, len(entity_path))
244+
245+
# Trigger: entity is in a completely unrelated path
246+
# Why: no folder relationship means minimal contextual relevance
247+
# Outcome: priority = 1000, only selected if no related paths exist
248+
return (1000, len(entity_path))
249+
250+
# Sort by proximity (lower is better), then by path length (shorter is better)
251+
return min(entities, key=path_proximity)

tests/api/v2/test_knowledge_router.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,60 @@ async def test_resolve_identifier_not_found(client: AsyncClient, v2_project_url)
5252
assert "Entity not found" in response.json()["detail"]
5353

5454

55+
@pytest.mark.asyncio
56+
async def test_resolve_identifier_no_fuzzy_match(client: AsyncClient, v2_project_url):
57+
"""Test that resolve uses strict mode - no fuzzy search fallback.
58+
59+
This ensures wiki links only resolve to exact matches (permalink, title, or path),
60+
not to similar-sounding entities via fuzzy search.
61+
"""
62+
# Create an entity with a specific name
63+
entity_data = {
64+
"title": "link-test",
65+
"folder": "testing",
66+
"content": "A test note",
67+
}
68+
response = await client.post(f"{v2_project_url}/knowledge/entities", json=entity_data)
69+
assert response.status_code == 200
70+
71+
# Try to resolve "nonexistent" - should NOT fuzzy match to "link-test"
72+
resolve_data = {"identifier": "nonexistent"}
73+
response = await client.post(f"{v2_project_url}/knowledge/resolve", json=resolve_data)
74+
75+
# Must return 404, not a fuzzy match to "link-test"
76+
assert response.status_code == 404
77+
assert "Entity not found" in response.json()["detail"]
78+
79+
80+
@pytest.mark.asyncio
81+
async def test_resolve_identifier_with_source_path_no_fuzzy_match(client: AsyncClient, v2_project_url):
82+
"""Test that context-aware resolution also uses strict mode.
83+
84+
Even with source_path for context-aware resolution, nonexistent
85+
links should return 404, not fuzzy match to nearby entities.
86+
"""
87+
# Create entities in a folder structure
88+
entity_data = {
89+
"title": "link-test",
90+
"folder": "testing/nested",
91+
"content": "A nested test note",
92+
}
93+
response = await client.post(f"{v2_project_url}/knowledge/entities", json=entity_data)
94+
assert response.status_code == 200
95+
96+
# Try to resolve "nonexistent" with source_path context
97+
# Should NOT fuzzy match to "link-test" in the same or nearby folder
98+
resolve_data = {
99+
"identifier": "nonexistent",
100+
"source_path": "testing/nested/other-note.md",
101+
}
102+
response = await client.post(f"{v2_project_url}/knowledge/resolve", json=resolve_data)
103+
104+
# Must return 404, not a fuzzy match
105+
assert response.status_code == 404
106+
assert "Entity not found" in response.json()["detail"]
107+
108+
55109
@pytest.mark.asyncio
56110
async def test_get_entity_by_id(client: AsyncClient, test_graph, v2_project_url, entity_repository):
57111
"""Test getting an entity by its external_id (UUID)."""

tests/repository/test_entity_repository.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,66 @@ async def test_get_by_title(entity_repository: EntityRepository, session_maker):
456456
assert len(found) == 2
457457

458458

459+
@pytest.mark.asyncio
460+
async def test_get_by_title_returns_shortest_path_first(
461+
entity_repository: EntityRepository, session_maker
462+
):
463+
"""Test that duplicate titles are returned with shortest path first.
464+
465+
When multiple entities share the same title in different folders,
466+
the one with the shortest file path should be returned first.
467+
This provides consistent, predictable link resolution.
468+
"""
469+
async with db.scoped_session(session_maker) as session:
470+
# Create entities with same title but different path lengths
471+
# Insert in reverse order to ensure we're testing ordering, not insertion order
472+
entities = [
473+
Entity(
474+
project_id=entity_repository.project_id,
475+
title="My Note",
476+
entity_type="note",
477+
permalink="archive/old/2024/my-note",
478+
file_path="archive/old/2024/My Note.md", # longest path
479+
content_type="text/markdown",
480+
created_at=datetime.now(timezone.utc),
481+
updated_at=datetime.now(timezone.utc),
482+
),
483+
Entity(
484+
project_id=entity_repository.project_id,
485+
title="My Note",
486+
entity_type="note",
487+
permalink="docs/my-note",
488+
file_path="docs/My Note.md", # medium path
489+
content_type="text/markdown",
490+
created_at=datetime.now(timezone.utc),
491+
updated_at=datetime.now(timezone.utc),
492+
),
493+
Entity(
494+
project_id=entity_repository.project_id,
495+
title="My Note",
496+
entity_type="note",
497+
permalink="my-note",
498+
file_path="My Note.md", # shortest path (root)
499+
content_type="text/markdown",
500+
created_at=datetime.now(timezone.utc),
501+
updated_at=datetime.now(timezone.utc),
502+
),
503+
]
504+
session.add_all(entities)
505+
await session.flush()
506+
507+
# Get all entities with title "My Note"
508+
found = await entity_repository.get_by_title("My Note")
509+
510+
# Should return all 3
511+
assert len(found) == 3
512+
513+
# Should be ordered by path length (shortest first)
514+
assert found[0].file_path == "My Note.md" # shortest
515+
assert found[1].file_path == "docs/My Note.md" # medium
516+
assert found[2].file_path == "archive/old/2024/My Note.md" # longest
517+
518+
459519
@pytest.mark.asyncio
460520
async def test_get_by_file_path(entity_repository: EntityRepository, session_maker):
461521
"""Test getting an entity by title."""

0 commit comments

Comments
 (0)