diff --git a/pyproject.toml b/pyproject.toml index 674d8310..5c7cf1ad 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ dependencies = [ "pyjwt>=2.10.1", "python-dotenv>=1.1.0", "pytest-aio>=1.9.0", - "aiofiles>=24.1.0", # Optional observability (disabled by default via config) + "aiofiles>=24.1.0", "asyncpg>=0.30.0", "nest-asyncio>=1.6.0", # For Alembic migrations with Postgres "pytest-asyncio>=1.2.0", @@ -47,6 +47,7 @@ dependencies = [ "fastembed>=0.7.4", "sqlite-vec>=0.1.6", "openai>=1.100.2", + "logfire>=4.19.0", ] [project.urls] @@ -58,9 +59,6 @@ Documentation = "https://github.com/basicmachines-co/basic-memory#readme" basic-memory = "basic_memory.cli.main:app" bm = "basic_memory.cli.main:app" -[project.optional-dependencies] -telemetry = ["logfire>=4.19.0"] - [build-system] requires = ["hatchling", "uv-dynamic-versioning>=0.7.0"] build-backend = "hatchling.build" diff --git a/src/basic_memory/api/app.py b/src/basic_memory/api/app.py index 4b10cc8d..2a553c26 100644 --- a/src/basic_memory/api/app.py +++ b/src/basic_memory/api/app.py @@ -25,7 +25,7 @@ list_projects, synchronize_projects, ) -from basic_memory import telemetry +import logfire from basic_memory.config import init_api_logging from basic_memory.services.exceptions import EntityAlreadyExistsError from basic_memory.services.initialization import initialize_app @@ -44,7 +44,7 @@ async def lifespan(app: FastAPI): # pragma: no cover set_container(container) app.state.container = container - with telemetry.operation( + with logfire.span( "api.lifecycle.startup", entrypoint="api", mode=container.mode.name.lower(), @@ -69,7 +69,7 @@ async def lifespan(app: FastAPI): # pragma: no cover yield # Shutdown - coordinator handles clean task cancellation - with telemetry.operation( + with logfire.span( "api.lifecycle.shutdown", entrypoint="api", mode=container.mode.name.lower(), diff --git a/src/basic_memory/api/v2/routers/knowledge_router.py b/src/basic_memory/api/v2/routers/knowledge_router.py index e674cbb3..eef04bc7 100644 --- a/src/basic_memory/api/v2/routers/knowledge_router.py +++ b/src/basic_memory/api/v2/routers/knowledge_router.py @@ -13,7 +13,7 @@ from fastapi import APIRouter, HTTPException, Response, Path from loguru import logger -from basic_memory import telemetry +import logfire from basic_memory.deps import ( EntityServiceV2ExternalDep, SearchServiceV2ExternalDep, @@ -74,7 +74,7 @@ async def get_graph( Returns a flat node/edge structure optimized for rendering with graph libraries. Only includes resolved relations (where to_id is not null). """ - with telemetry.operation( + with logfire.span( "api.request.knowledge.get_graph", entrypoint="api", domain="knowledge", @@ -148,7 +148,7 @@ async def resolve_identifier( "resolution_method": "permalink" } """ - with telemetry.operation( + with logfire.span( "api.request.knowledge.resolve_entity", entrypoint="api", domain="knowledge", @@ -215,7 +215,7 @@ async def get_entity_by_id( Raises: HTTPException: 404 if entity not found """ - with telemetry.operation( + with logfire.span( "api.request.knowledge.get_entity", entrypoint="api", domain="knowledge", @@ -255,7 +255,7 @@ async def create_entity( Returns: Created entity with generated external_id (UUID) and file content """ - with telemetry.operation( + with logfire.span( "api.request.knowledge.create_entity", entrypoint="api", domain="knowledge", @@ -313,7 +313,7 @@ async def update_entity_by_id( Returns: Updated entity with file content """ - with telemetry.operation( + with logfire.span( "api.request.knowledge.update_entity", entrypoint="api", domain="knowledge", @@ -385,7 +385,7 @@ async def edit_entity_by_id( Raises: HTTPException: 404 if entity not found, 400 if edit fails """ - with telemetry.operation( + with logfire.span( "api.request.knowledge.edit_entity", entrypoint="api", domain="knowledge", @@ -454,7 +454,7 @@ async def delete_entity_by_id( Note: Returns deleted=False if entity doesn't exist (idempotent) """ - with telemetry.operation( + with logfire.span( "api.request.knowledge.delete_entity", entrypoint="api", domain="knowledge", @@ -503,7 +503,7 @@ async def move_entity( Returns: Updated entity with new file path """ - with telemetry.operation( + with logfire.span( "api.request.knowledge.move_entity", entrypoint="api", domain="knowledge", @@ -583,7 +583,7 @@ async def move_directory( Returns: DirectoryMoveResult with counts and details of moved files """ - with telemetry.operation( + with logfire.span( "api.request.knowledge.move_directory", entrypoint="api", domain="knowledge", @@ -647,7 +647,7 @@ async def delete_directory( Returns: DirectoryDeleteResult with counts and details of deleted files """ - with telemetry.operation( + with logfire.span( "api.request.knowledge.delete_directory", entrypoint="api", domain="knowledge", diff --git a/src/basic_memory/api/v2/routers/memory_router.py b/src/basic_memory/api/v2/routers/memory_router.py index 17ef1eed..a5e489c8 100644 --- a/src/basic_memory/api/v2/routers/memory_router.py +++ b/src/basic_memory/api/v2/routers/memory_router.py @@ -9,7 +9,7 @@ from fastapi import APIRouter, Query, Path from loguru import logger -from basic_memory import telemetry +import logfire from basic_memory.deps import ContextServiceV2ExternalDep, EntityRepositoryV2ExternalDep from basic_memory.schemas.base import TimeFrame, parse_timeframe from basic_memory.schemas.memory import ( @@ -51,7 +51,7 @@ async def recent( Returns: GraphContext with recent activity and related entities """ - with telemetry.operation( + with logfire.span( "api.request.memory.recent_activity", entrypoint="api", domain="memory", @@ -72,7 +72,7 @@ async def recent( limit = page_size offset = (page - 1) * page_size - with telemetry.scope( + with logfire.span( "api.memory.recent_activity.build_context", domain="memory", action="recent_activity", @@ -88,7 +88,7 @@ async def recent( offset=offset, max_related=max_related, ) - with telemetry.scope( + with logfire.span( "api.memory.recent_activity.shape_response", domain="memory", action="recent_activity", @@ -137,7 +137,7 @@ async def get_memory_context( Returns: GraphContext with the entity and its related context """ - with telemetry.operation( + with logfire.span( "api.request.memory.build_context", entrypoint="api", domain="memory", @@ -154,7 +154,7 @@ async def get_memory_context( limit = page_size offset = (page - 1) * page_size - with telemetry.scope( + with logfire.span( "api.memory.build_context.build_context", domain="memory", action="build_context", @@ -170,7 +170,7 @@ async def get_memory_context( offset=offset, max_related=max_related, ) - with telemetry.scope( + with logfire.span( "api.memory.build_context.shape_response", domain="memory", action="build_context", diff --git a/src/basic_memory/api/v2/routers/resource_router.py b/src/basic_memory/api/v2/routers/resource_router.py index 4ebf9450..7c4afca8 100644 --- a/src/basic_memory/api/v2/routers/resource_router.py +++ b/src/basic_memory/api/v2/routers/resource_router.py @@ -15,7 +15,7 @@ from fastapi import APIRouter, HTTPException, Response, Path from loguru import logger -from basic_memory import telemetry +import logfire from basic_memory.deps import ( ProjectConfigV2ExternalDep, FileServiceV2ExternalDep, @@ -56,7 +56,7 @@ async def get_resource_content( Raises: HTTPException: 404 if entity or file not found """ - with telemetry.operation( + with logfire.span( "api.request.resource.get_content", entrypoint="api", domain="resource", @@ -64,7 +64,7 @@ async def get_resource_content( ): logger.debug(f"V2 Getting content for project {project_id}, entity_id: {entity_id}") - with telemetry.scope( + with logfire.span( "api.resource.get_content.load_entity", domain="resource", action="get_content", @@ -74,7 +74,7 @@ async def get_resource_content( if not entity: raise HTTPException(status_code=404, detail=f"Entity {entity_id} not found") - with telemetry.scope( + with logfire.span( "api.resource.get_content.validate_path", domain="resource", action="get_content", @@ -90,7 +90,7 @@ async def get_resource_content( detail="Entity contains invalid file path", ) - with telemetry.scope( + with logfire.span( "api.resource.get_content.ensure_exists", domain="resource", action="get_content", @@ -102,7 +102,7 @@ async def get_resource_content( detail=f"File not found: {entity.file_path}", ) - with telemetry.scope( + with logfire.span( "api.resource.get_content.read_content", domain="resource", action="get_content", @@ -139,7 +139,7 @@ async def create_resource( Raises: HTTPException: 400 for invalid file paths, 409 if file already exists """ - with telemetry.operation( + with logfire.span( "api.request.resource.create", entrypoint="api", domain="resource", @@ -166,7 +166,7 @@ async def create_resource( f"Use PUT /resource/{existing_entity.external_id} to update it.", ) - with telemetry.scope( + with logfire.span( "api.resource.create.write_file", domain="resource", action="create", @@ -175,7 +175,7 @@ async def create_resource( await file_service.ensure_directory(PathLib(data.file_path).parent) checksum = await file_service.write_file(data.file_path, data.content) - with telemetry.scope( + with logfire.span( "api.resource.create.read_metadata", domain="resource", action="create", @@ -197,7 +197,7 @@ async def create_resource( created_at=file_metadata.created_at, updated_at=file_metadata.modified_at, ) - with telemetry.scope( + with logfire.span( "api.resource.create.upsert_entity", domain="resource", action="create", @@ -205,7 +205,7 @@ async def create_resource( ): entity = await entity_repository.add(entity) - with telemetry.scope( + with logfire.span( "api.resource.create.search_index", domain="resource", action="create", @@ -258,7 +258,7 @@ async def update_resource( Raises: HTTPException: 404 if entity not found, 400 for invalid paths """ - with telemetry.operation( + with logfire.span( "api.request.resource.update", entrypoint="api", domain="resource", @@ -282,7 +282,7 @@ async def update_resource( "Path must be relative and stay within project boundaries.", ) - with telemetry.scope( + with logfire.span( "api.resource.update.write_file", domain="resource", action="update", @@ -297,7 +297,7 @@ async def update_resource( checksum = await file_service.write_file(target_file_path, data.content) - with telemetry.scope( + with logfire.span( "api.resource.update.read_metadata", domain="resource", action="update", @@ -309,7 +309,7 @@ async def update_resource( content_type = file_service.content_type(target_file_path) note_type = "canvas" if target_file_path.endswith(".canvas") else "file" - with telemetry.scope( + with logfire.span( "api.resource.update.update_entity", domain="resource", action="update", @@ -329,7 +329,7 @@ async def update_resource( if updated_entity is None: raise HTTPException(status_code=404, detail=f"Entity {entity_id} not found") - with telemetry.scope( + with logfire.span( "api.resource.update.search_index", domain="resource", action="update", diff --git a/src/basic_memory/api/v2/routers/search_router.py b/src/basic_memory/api/v2/routers/search_router.py index 37ebfd58..4c8f2f9b 100644 --- a/src/basic_memory/api/v2/routers/search_router.py +++ b/src/basic_memory/api/v2/routers/search_router.py @@ -6,7 +6,7 @@ from fastapi import APIRouter, HTTPException, Path -from basic_memory import telemetry +import logfire from basic_memory.api.v2.utils import to_search_results from basic_memory.repository.semantic_errors import ( SemanticDependenciesMissingError, @@ -48,7 +48,7 @@ async def search( Returns: SearchResponse with paginated search results """ - with telemetry.operation( + with logfire.span( "api.request.search", entrypoint="api", domain="search", @@ -67,7 +67,7 @@ async def search( offset = (page - 1) * page_size fetch_limit = page_size + 1 try: - with telemetry.scope( + with logfire.span( "api.search.search.execute_query", domain="search", action="search", @@ -83,7 +83,7 @@ async def search( except ValueError as exc: raise HTTPException(status_code=400, detail=str(exc)) from exc - with telemetry.scope( + with logfire.span( "api.search.search.paginate_results", domain="search", action="search", @@ -94,7 +94,7 @@ async def search( if has_more: results = results[:page_size] - with telemetry.scope( + with logfire.span( "api.search.search.hydrate_results", domain="search", action="search", @@ -102,7 +102,7 @@ async def search( result_count=len(results), ): search_results = await to_search_results(entity_service, results) - with telemetry.scope( + with logfire.span( "api.search.search.build_response", domain="search", action="search", diff --git a/src/basic_memory/api/v2/utils.py b/src/basic_memory/api/v2/utils.py index 2977e3ef..5c2b101f 100644 --- a/src/basic_memory/api/v2/utils.py +++ b/src/basic_memory/api/v2/utils.py @@ -1,6 +1,6 @@ from typing import Any, Protocol, Optional, List, Sequence -from basic_memory import telemetry +import logfire from basic_memory.repository.search_repository import SearchIndexRow from basic_memory.schemas.memory import ( EntitySummary, @@ -43,7 +43,7 @@ async def to_graph_context( page: Optional[int] = None, page_size: Optional[int] = None, ) -> GraphContext: - with telemetry.scope( + with logfire.span( "memory.hydrate_context", domain="memory", action="build_context", @@ -80,7 +80,7 @@ async def to_graph_context( entity_title_lookup: dict[int, str] = {} entity_external_id_lookup: dict[int, str] = {} if entity_ids_needed: - with telemetry.scope( + with logfire.span( "memory.hydrate_context.lookup_entities", domain="memory", action="build_context", @@ -148,7 +148,7 @@ def to_summary( created_at=item.created_at, ) - with telemetry.scope( + with logfire.span( "memory.hydrate_context.shape_results", domain="memory", action="build_context", @@ -198,7 +198,7 @@ def to_summary( async def to_search_results( entity_service: EntityServiceBatchLookup, results: List[SearchIndexRow] ) -> list[SearchResult]: - with telemetry.scope( + with logfire.span( "search.hydrate_results", domain="search", action="search", @@ -215,7 +215,7 @@ async def to_search_results( # Single batch fetch for all entities entities_by_id: dict[int, Any] = {} - with telemetry.scope( + with logfire.span( "search.hydrate_results.fetch_entities", domain="search", action="search", @@ -227,7 +227,7 @@ async def to_search_results( entities_by_id = {e.id: e for e in entities} search_results = [] - with telemetry.scope( + with logfire.span( "search.hydrate_results.shape_results", domain="search", action="search", diff --git a/src/basic_memory/cli/app.py b/src/basic_memory/cli/app.py index 6c1b0ea6..2e88c810 100644 --- a/src/basic_memory/cli/app.py +++ b/src/basic_memory/cli/app.py @@ -12,7 +12,7 @@ from basic_memory.cli.container import CliContainer, set_container # noqa: E402 from basic_memory.cli.promo import maybe_show_cloud_promo, maybe_show_init_line # noqa: E402 from basic_memory.config import init_cli_logging # noqa: E402 -from basic_memory import telemetry # noqa: E402 +import logfire # noqa: E402 def version_callback(value: bool) -> None: @@ -45,7 +45,7 @@ def app_callback( init_cli_logging() command_name = ctx.invoked_subcommand or "root" ctx.with_resource( - telemetry.operation( + logfire.span( f"cli.command.{command_name}", entrypoint="cli", command_name=command_name, diff --git a/src/basic_memory/indexing/batch_indexer.py b/src/basic_memory/indexing/batch_indexer.py index 5de7ece5..a670d708 100644 --- a/src/basic_memory/indexing/batch_indexer.py +++ b/src/basic_memory/indexing/batch_indexer.py @@ -11,7 +11,7 @@ from loguru import logger from sqlalchemy.exc import IntegrityError -from basic_memory import telemetry +import logfire from basic_memory.config import BasicMemoryConfig from basic_memory.file_utils import compute_checksum, has_frontmatter, remove_frontmatter from basic_memory.markdown.schemas import EntityMarkdown @@ -192,10 +192,10 @@ async def index_markdown_file( if not self._is_markdown(file): raise ValueError(f"index_markdown_file requires markdown input: {file.path}") - with telemetry.span("index.markdown_file.prepare", path=file.path): + with logfire.span("index.markdown_file.prepare", path=file.path): prepared = await self._prepare_markdown_file(file) if existing_permalink_by_path is None: - with telemetry.span("index.markdown_file.load_permalink_map", path=file.path): + with logfire.span("index.markdown_file.load_permalink_map", path=file.path): existing_permalink_by_path = { path: permalink for path, permalink in ( @@ -208,11 +208,11 @@ async def index_markdown_file( for path, permalink in existing_permalink_by_path.items() if path != file.path and permalink } - with telemetry.span("index.markdown_file.normalize", path=file.path): + with logfire.span("index.markdown_file.normalize", path=file.path): prepared = await self._normalize_markdown_file(prepared, reserved_permalinks) existing_permalink_by_path[file.path] = prepared.markdown.frontmatter.permalink - with telemetry.span("index.markdown_file.persist", path=file.path, is_new=new): + with logfire.span("index.markdown_file.persist", path=file.path, is_new=new): persisted = await self._persist_markdown_file( prepared, is_new=new, @@ -221,7 +221,7 @@ async def index_markdown_file( ) existing_permalink_by_path[file.path] = persisted.entity.permalink - with telemetry.span( + with logfire.span( "index.markdown_file.reload_entity", path=file.path, entity_id=persisted.entity.id, @@ -233,7 +233,7 @@ async def index_markdown_file( prepared_entity = self._build_prepared_entity(persisted.prepared, entity) if index_search: - with telemetry.span( + with logfire.span( "index.markdown_file.refresh_search_index", path=file.path, entity_id=entity.id, diff --git a/src/basic_memory/mcp/async_client.py b/src/basic_memory/mcp/async_client.py index 421e658b..4ba4dd18 100644 --- a/src/basic_memory/mcp/async_client.py +++ b/src/basic_memory/mcp/async_client.py @@ -5,7 +5,7 @@ from httpx import ASGITransport, AsyncClient, Timeout from loguru import logger -from basic_memory import telemetry +import logfire from basic_memory.api.app import app as fastapi_app from basic_memory.config import ConfigManager, ProjectMode @@ -44,7 +44,7 @@ def _asgi_client(timeout: Timeout) -> AsyncClient: async def _resolve_cloud_token(config) -> str: """Resolve cloud token with API key preferred, OAuth fallback.""" - with telemetry.span( + with logfire.span( "routing.resolve_cloud_credentials", has_api_key=bool(config.cloud_api_key), ): diff --git a/src/basic_memory/mcp/clients/knowledge.py b/src/basic_memory/mcp/clients/knowledge.py index c9fd5e57..b0f5475b 100644 --- a/src/basic_memory/mcp/clients/knowledge.py +++ b/src/basic_memory/mcp/clients/knowledge.py @@ -7,7 +7,7 @@ from httpx import AsyncClient -from basic_memory import telemetry +import logfire from basic_memory.mcp.tools.utils import call_get, call_post, call_put, call_patch, call_delete from basic_memory.schemas.response import ( EntityResponse, @@ -56,7 +56,7 @@ async def create_entity(self, entity_data: dict[str, Any]) -> EntityResponse: Raises: ToolError: If the request fails """ - with telemetry.scope( + with logfire.span( "mcp.client.knowledge.create_entity", client_name="knowledge", operation="create_entity", @@ -88,7 +88,7 @@ async def update_entity( Raises: ToolError: If the request fails """ - with telemetry.scope( + with logfire.span( "mcp.client.knowledge.update_entity", client_name="knowledge", operation="update_entity", @@ -115,7 +115,7 @@ async def get_entity(self, entity_id: str) -> EntityResponse: Raises: ToolError: If the entity is not found or request fails """ - with telemetry.scope( + with logfire.span( "mcp.client.knowledge.get_entity", client_name="knowledge", operation="get_entity", @@ -146,7 +146,7 @@ async def patch_entity( Raises: ToolError: If the request fails """ - with telemetry.scope( + with logfire.span( "mcp.client.knowledge.patch_entity", client_name="knowledge", operation="patch_entity", @@ -173,7 +173,7 @@ async def delete_entity(self, entity_id: str) -> DeleteEntitiesResponse: Raises: ToolError: If the entity is not found or request fails """ - with telemetry.scope( + with logfire.span( "mcp.client.knowledge.delete_entity", client_name="knowledge", operation="delete_entity", @@ -200,7 +200,7 @@ async def move_entity(self, entity_id: str, destination_path: str) -> EntityResp Raises: ToolError: If the request fails """ - with telemetry.scope( + with logfire.span( "mcp.client.knowledge.move_entity", client_name="knowledge", operation="move_entity", @@ -230,7 +230,7 @@ async def move_directory( Raises: ToolError: If the request fails """ - with telemetry.scope( + with logfire.span( "mcp.client.knowledge.move_directory", client_name="knowledge", operation="move_directory", @@ -260,7 +260,7 @@ async def delete_directory(self, directory: str) -> DirectoryDeleteResult: Raises: ToolError: If the request fails """ - with telemetry.scope( + with logfire.span( "mcp.client.knowledge.delete_directory", client_name="knowledge", operation="delete_directory", @@ -290,7 +290,7 @@ async def resolve_entity(self, identifier: str, *, strict: bool = False) -> str: Raises: ToolError: If the identifier cannot be resolved """ - with telemetry.scope( + with logfire.span( "mcp.client.knowledge.resolve_entity", client_name="knowledge", operation="resolve_entity", diff --git a/src/basic_memory/mcp/clients/memory.py b/src/basic_memory/mcp/clients/memory.py index bc1e8b56..17a3b60f 100644 --- a/src/basic_memory/mcp/clients/memory.py +++ b/src/basic_memory/mcp/clients/memory.py @@ -7,7 +7,7 @@ from httpx import AsyncClient -from basic_memory import telemetry +import logfire from basic_memory.mcp.tools.utils import call_get from basic_memory.schemas.memory import GraphContext @@ -72,7 +72,7 @@ async def build_context( if timeframe: params["timeframe"] = timeframe - with telemetry.scope( + with logfire.span( "mcp.client.memory.build_context", client_name="memory", operation="build_context", @@ -123,7 +123,7 @@ async def recent( # Join types as comma-separated string if provided params["type"] = ",".join(types) if isinstance(types, list) else types - with telemetry.scope( + with logfire.span( "mcp.client.memory.recent_activity", client_name="memory", operation="recent_activity", diff --git a/src/basic_memory/mcp/clients/resource.py b/src/basic_memory/mcp/clients/resource.py index 1b3a50cd..dbcb3ee7 100644 --- a/src/basic_memory/mcp/clients/resource.py +++ b/src/basic_memory/mcp/clients/resource.py @@ -7,7 +7,7 @@ from httpx import AsyncClient, Response -from basic_memory import telemetry +import logfire from basic_memory.mcp.tools.utils import call_get @@ -65,7 +65,7 @@ async def read( if page_size is not None: params["page_size"] = page_size - with telemetry.scope( + with logfire.span( "mcp.client.resource.read", client_name="resource", operation="read", diff --git a/src/basic_memory/mcp/clients/search.py b/src/basic_memory/mcp/clients/search.py index 0727851b..4419ff50 100644 --- a/src/basic_memory/mcp/clients/search.py +++ b/src/basic_memory/mcp/clients/search.py @@ -7,7 +7,7 @@ from httpx import AsyncClient -from basic_memory import telemetry +import logfire from basic_memory.mcp.tools.utils import call_post from basic_memory.schemas.search import SearchResponse @@ -57,7 +57,7 @@ async def search( Raises: ToolError: If the request fails """ - with telemetry.scope( + with logfire.span( "mcp.client.search.search", client_name="search", operation="search", diff --git a/src/basic_memory/mcp/project_context.py b/src/basic_memory/mcp/project_context.py index 2c2f58df..f1fffbd5 100644 --- a/src/basic_memory/mcp/project_context.py +++ b/src/basic_memory/mcp/project_context.py @@ -19,7 +19,7 @@ from fastmcp import Context from mcp.server.fastmcp.exceptions import ToolError -from basic_memory import telemetry +import logfire from basic_memory.config import BasicMemoryConfig, ConfigManager, ProjectMode from basic_memory.project_resolver import ProjectResolver from basic_memory.schemas.cloud import WorkspaceInfo, WorkspaceListResponse @@ -159,7 +159,7 @@ async def resolve_project_parameter( Returns: Resolved project name or None if no resolution possible """ - with telemetry.span( + with logfire.span( "routing.resolve_project", requested_project=project, allow_discovery=allow_discovery, @@ -271,7 +271,7 @@ async def resolve_workspace_parameter( context: Optional[Context] = None, ) -> WorkspaceInfo: """Resolve workspace using explicit input, session cache, and cloud discovery.""" - with telemetry.scope( + with logfire.span( "routing.resolve_workspace", workspace_requested=workspace is not None, has_context=context is not None, @@ -347,7 +347,7 @@ async def get_active_project( ValueError: If no project can be resolved HTTPError: If project doesn't exist or is inaccessible """ - with telemetry.scope( + with logfire.span( "routing.validate_project", requested_project=project, has_context=context is not None, @@ -431,7 +431,7 @@ async def resolve_project_and_path( is_memory_url = identifier.strip().startswith("memory://") config = ConfigManager().config include_project = config.permalinks_include_project if is_memory_url else None - with telemetry.scope( + with logfire.span( "routing.resolve_memory_url", is_memory_url=is_memory_url, requested_project=project, @@ -628,7 +628,7 @@ async def get_project_client( # Outcome: factory client with optional workspace override via inner request headers if is_factory_mode(): route_mode = "factory" - with telemetry.scope( + with logfire.span( "routing.client_session", project_name=resolved_project, route_mode=route_mode, @@ -646,7 +646,7 @@ async def get_project_client( # Outcome: route strictly based on explicit flag, no workspace network calls if _explicit_routing() and _force_local_mode(): route_mode = "explicit_local" - with telemetry.scope( + with logfire.span( "routing.client_session", project_name=resolved_project, route_mode=route_mode, @@ -689,7 +689,7 @@ async def get_project_client( # which checks context cache, auto-selects single workspace, or errors if effective_workspace is not None: # Config-resolved workspace — pass directly to get_client, skip network lookup - with telemetry.scope( + with logfire.span( "routing.client_session", project_name=resolved_project, route_mode=route_mode, @@ -705,7 +705,7 @@ async def get_project_client( else: # No config-based workspace — use resolve_workspace_parameter for discovery active_ws = await resolve_workspace_parameter(workspace=None, context=context) - with telemetry.scope( + with logfire.span( "routing.client_session", project_name=resolved_project, route_mode=route_mode, @@ -722,7 +722,7 @@ async def get_project_client( # Step 4: Local routing (default) route_mode = "local_asgi" - with telemetry.scope( + with logfire.span( "routing.client_session", project_name=resolved_project, route_mode=route_mode, diff --git a/src/basic_memory/mcp/server.py b/src/basic_memory/mcp/server.py index e2a9eaa8..aaee21af 100644 --- a/src/basic_memory/mcp/server.py +++ b/src/basic_memory/mcp/server.py @@ -15,7 +15,7 @@ from basic_memory.db import scoped_session from basic_memory.mcp.container import McpContainer, set_container from basic_memory.services.initialization import initialize_app -from basic_memory import telemetry +import logfire async def _log_embedding_status(session_maker: async_sessionmaker[AsyncSession]) -> None: @@ -63,7 +63,7 @@ async def lifespan(app: FastMCP): set_container(container) config = container.config - with telemetry.operation( + with logfire.span( "mcp.lifecycle.startup", entrypoint="mcp", mode=container.mode.name.lower(), @@ -124,7 +124,7 @@ async def lifespan(app: FastMCP): yield finally: # Shutdown - coordinator handles clean task cancellation - with telemetry.operation( + with logfire.span( "mcp.lifecycle.shutdown", entrypoint="mcp", mode=container.mode.name.lower(), diff --git a/src/basic_memory/mcp/tools/build_context.py b/src/basic_memory/mcp/tools/build_context.py index 4131b365..5a0c2643 100644 --- a/src/basic_memory/mcp/tools/build_context.py +++ b/src/basic_memory/mcp/tools/build_context.py @@ -2,11 +2,11 @@ from typing import Optional, Literal +import logfire from loguru import logger from fastmcp import Context from basic_memory.config import ConfigManager -from basic_memory import telemetry from basic_memory.mcp.project_context import ( detect_project_from_url_prefix, get_project_client, @@ -202,7 +202,7 @@ async def build_context( # URL is already validated and normalized by MemoryUrl type annotation - with telemetry.operation( + with logfire.span( "mcp.tool.build_context", entrypoint="mcp", tool_name="build_context", @@ -217,47 +217,42 @@ async def build_context( is_memory_url=str(url).startswith("memory://"), ): async with get_project_client(project, workspace, context) as (client, active_project): - with telemetry.contextualize( - project_name=active_project.name, - workspace_id=workspace, - tool_name="build_context", - ): - logger.info( - f"MCP tool call tool=build_context project={active_project.name} " - f"url={url} depth={depth} timeframe={timeframe} output_format={output_format}" - ) - - # Resolve memory:// identifier with project-prefix awareness - _, resolved_path, _ = await resolve_project_and_path( - client, - url, - active_project.name, - context, - ) - - # Import here to avoid circular import - from basic_memory.mcp.clients import MemoryClient - - # Use typed MemoryClient for API calls - memory_client = MemoryClient(client, active_project.external_id) - graph = await memory_client.build_context( - resolved_path, - depth=depth or 1, - timeframe=timeframe, - page=page, - page_size=page_size, - max_related=max_related, - ) - - logger.info( - f"MCP tool response: tool=build_context project={active_project.name} " - f"uri={graph.metadata.uri or resolved_path} " - f"primary_count={graph.metadata.primary_count or 0} " - f"related_count={graph.metadata.related_count or 0} " - f"output_format={output_format}" - ) - - if output_format == "text": - return _format_context_markdown(graph, active_project.name) - - return graph.model_dump() + logger.info( + f"MCP tool call tool=build_context project={active_project.name} " + f"url={url} depth={depth} timeframe={timeframe} output_format={output_format}" + ) + + # Resolve memory:// identifier with project-prefix awareness + _, resolved_path, _ = await resolve_project_and_path( + client, + url, + active_project.name, + context, + ) + + # Import here to avoid circular import + from basic_memory.mcp.clients import MemoryClient + + # Use typed MemoryClient for API calls + memory_client = MemoryClient(client, active_project.external_id) + graph = await memory_client.build_context( + resolved_path, + depth=depth or 1, + timeframe=timeframe, + page=page, + page_size=page_size, + max_related=max_related, + ) + + logger.info( + f"MCP tool response: tool=build_context project={active_project.name} " + f"uri={graph.metadata.uri or resolved_path} " + f"primary_count={graph.metadata.primary_count or 0} " + f"related_count={graph.metadata.related_count or 0} " + f"output_format={output_format}" + ) + + if output_format == "text": + return _format_context_markdown(graph, active_project.name) + + return graph.model_dump() diff --git a/src/basic_memory/mcp/tools/edit_note.py b/src/basic_memory/mcp/tools/edit_note.py index d9a623bd..8fbf1dab 100644 --- a/src/basic_memory/mcp/tools/edit_note.py +++ b/src/basic_memory/mcp/tools/edit_note.py @@ -2,11 +2,11 @@ from typing import Optional, Literal +import logfire from loguru import logger from fastmcp import Context from basic_memory.config import ConfigManager -from basic_memory import telemetry from basic_memory.mcp.project_context import ( detect_project_from_url_prefix, get_project_client, @@ -271,7 +271,7 @@ async def edit_note( if detected: project = detected - with telemetry.operation( + with logfire.span( "mcp.tool.edit_note", entrypoint="mcp", tool_name="edit_note", @@ -284,226 +284,217 @@ async def edit_note( expected_replacements=effective_replacements, ): async with get_project_client(project, workspace, context) as (client, active_project): - with telemetry.contextualize( - project_name=active_project.name, - workspace_id=workspace, - tool_name="edit_note", - ): - logger.info( - f"MCP tool call tool=edit_note project={active_project.name} " - f"identifier={identifier} operation={operation} output_format={output_format}" + logger.info( + f"MCP tool call tool=edit_note project={active_project.name} " + f"identifier={identifier} operation={operation} output_format={output_format}" + ) + + # Validate operation + valid_operations = [ + "append", + "prepend", + "find_replace", + "replace_section", + "insert_before_section", + "insert_after_section", + ] + if operation not in valid_operations: + raise ValueError( + f"Invalid operation '{operation}'. Must be one of: {', '.join(valid_operations)}" ) - # Validate operation - valid_operations = [ - "append", - "prepend", - "find_replace", - "replace_section", - "insert_before_section", - "insert_after_section", - ] - if operation not in valid_operations: - raise ValueError( - f"Invalid operation '{operation}'. Must be one of: {', '.join(valid_operations)}" - ) - - # Validate required parameters for specific operations - if operation == "find_replace" and not find_text: - raise ValueError("find_text parameter is required for find_replace operation") - section_ops = ("replace_section", "insert_before_section", "insert_after_section") - if operation in section_ops and not section: - raise ValueError("section parameter is required for section-based operations") - - # Use the PATCH endpoint to edit the entity - try: - # Import here to avoid circular import - from basic_memory.mcp.clients import KnowledgeClient - - # Use typed KnowledgeClient for API calls - knowledge_client = KnowledgeClient(client, active_project.external_id) - - file_created = False - entity_id = "" - result: EntityResponse | None = None - - # Try to resolve the entity; for append/prepend, create it if not found - try: - entity_id = await knowledge_client.resolve_entity(identifier, strict=True) - except Exception as resolve_error: - # Trigger: entity does not exist yet - # Why: append/prepend can meaningfully create a new note from the content, - # while find_replace/replace_section require existing content to modify - # Outcome: note is created via the same path as write_note - error_msg = str(resolve_error).lower() - is_not_found = "entity not found" in error_msg or "not found" in error_msg - - if is_not_found and operation in ("append", "prepend"): - title, directory = _parse_identifier_to_title_and_directory(identifier) - - # Validate directory path (same security check as write_note) - project_path = active_project.home - if directory and not validate_project_path(directory, project_path): - logger.warning( - "Attempted path traversal attack blocked", - directory=directory, - project=active_project.name, - ) - if output_format == "json": - return { - "title": title, - "permalink": None, - "file_path": None, - "checksum": None, - "operation": operation, - "fileCreated": False, - "error": "SECURITY_VALIDATION_ERROR", - } - return f"# Error\n\nDirectory path '{directory}' is not allowed - paths must stay within project boundaries" - - entity = Entity( - title=title, - directory=directory, - content_type="text/markdown", - content=content, - ) + # Validate required parameters for specific operations + if operation == "find_replace" and not find_text: + raise ValueError("find_text parameter is required for find_replace operation") + section_ops = ("replace_section", "insert_before_section", "insert_after_section") + if operation in section_ops and not section: + raise ValueError("section parameter is required for section-based operations") + + # Use the PATCH endpoint to edit the entity + try: + # Import here to avoid circular import + from basic_memory.mcp.clients import KnowledgeClient - logger.info( - "Creating note via edit_note auto-create", - title=title, + # Use typed KnowledgeClient for API calls + knowledge_client = KnowledgeClient(client, active_project.external_id) + + file_created = False + entity_id = "" + result: EntityResponse | None = None + + # Try to resolve the entity; for append/prepend, create it if not found + try: + entity_id = await knowledge_client.resolve_entity(identifier, strict=True) + except Exception as resolve_error: + # Trigger: entity does not exist yet + # Why: append/prepend can meaningfully create a new note from the content, + # while find_replace/replace_section require existing content to modify + # Outcome: note is created via the same path as write_note + error_msg = str(resolve_error).lower() + is_not_found = "entity not found" in error_msg or "not found" in error_msg + + if is_not_found and operation in ("append", "prepend"): + title, directory = _parse_identifier_to_title_and_directory(identifier) + + # Validate directory path (same security check as write_note) + project_path = active_project.home + if directory and not validate_project_path(directory, project_path): + logger.warning( + "Attempted path traversal attack blocked", directory=directory, - operation=operation, + project=active_project.name, ) - result = await knowledge_client.create_entity(entity.model_dump()) - file_created = True - else: - # find_replace/replace_section require existing content — re-raise - raise resolve_error - - # --- Standard edit path (entity already existed) --- - if not file_created: - # Prepare the edit request data - edit_data = { - "operation": operation, - "content": content, - } - - # Add optional parameters - if section: - edit_data["section"] = section - if find_text: - edit_data["find_text"] = find_text - if effective_replacements != 1: # Only send if different from default - edit_data["expected_replacements"] = str(effective_replacements) - - # Call the PATCH endpoint - result = await knowledge_client.patch_entity(entity_id, edit_data) - - # --- Format response --- - # result is always set: either by create_entity (auto-create) or patch_entity (edit) - assert result is not None - if file_created: - summary = [ - f"# Created note ({operation})", - f"project: {active_project.name}", - f"file_path: {result.file_path}", - f"permalink: {result.permalink}", - f"checksum: {result.checksum[:8] if result.checksum else 'unknown'}", - "fileCreated: true", - ] - lines_added = len(content.split("\n")) - summary.append(f"operation: Created note with {lines_added} lines") + if output_format == "json": + return { + "title": title, + "permalink": None, + "file_path": None, + "checksum": None, + "operation": operation, + "fileCreated": False, + "error": "SECURITY_VALIDATION_ERROR", + } + return f"# Error\n\nDirectory path '{directory}' is not allowed - paths must stay within project boundaries" + + entity = Entity( + title=title, + directory=directory, + content_type="text/markdown", + content=content, + ) + + logger.info( + "Creating note via edit_note auto-create", + title=title, + directory=directory, + operation=operation, + ) + result = await knowledge_client.create_entity(entity.model_dump()) + file_created = True else: - summary = [ - f"# Edited note ({operation})", - f"project: {active_project.name}", - f"file_path: {result.file_path}", - f"permalink: {result.permalink}", - f"checksum: {result.checksum[:8] if result.checksum else 'unknown'}", - ] - - # Add operation-specific details - if operation == "append": - lines_added = len(content.split("\n")) - summary.append(f"operation: Added {lines_added} lines to end of note") - elif operation == "prepend": - lines_added = len(content.split("\n")) - summary.append( - f"operation: Added {lines_added} lines to beginning of note" - ) - elif operation == "find_replace": - # For find_replace, we can't easily count replacements from here - # since we don't have the original content, but the server handled it - summary.append("operation: Find and replace operation completed") - elif operation == "replace_section": - summary.append(f"operation: Replaced content under section '{section}'") - elif operation == "insert_before_section": - summary.append( - f"operation: Inserted content before section '{section}'" - ) - elif operation == "insert_after_section": - summary.append(f"operation: Inserted content after section '{section}'") - - # Count observations by category (reuse logic from write_note) - categories = {} - if result.observations: - for obs in result.observations: - categories[obs.category] = categories.get(obs.category, 0) + 1 - - summary.append("\n## Observations") - for category, count in sorted(categories.items()): - summary.append(f"- {category}: {count}") - - # Count resolved/unresolved relations - unresolved = 0 - resolved = 0 - if result.relations: - unresolved = sum(1 for r in result.relations if not r.to_id) - resolved = len(result.relations) - unresolved - - summary.append("\n## Relations") - summary.append(f"- Resolved: {resolved}") - if unresolved: - summary.append(f"- Unresolved: {unresolved}") - - logger.info( - f"MCP tool response: tool=edit_note project={active_project.name} " - f"operation={operation} permalink={result.permalink} " - f"observations_count={len(result.observations)} " - f"relations_count={len(result.relations)} " - f"file_created={str(file_created).lower()}" - ) - - if output_format == "json": - return { - "title": result.title, - "permalink": result.permalink, - "file_path": result.file_path, - "checksum": result.checksum, - "operation": operation, - "fileCreated": file_created, - } - - summary_result = "\n".join(summary) - return add_project_metadata(summary_result, active_project.name) - - except Exception as e: - logger.error(f"Error editing note: {e}") - if output_format == "json": - return { - "title": None, - "permalink": None, - "file_path": None, - "checksum": None, - "operation": operation, - "fileCreated": False, - "error": str(e), - } - return _format_error_response( - str(e), - operation, - identifier, - find_text, - effective_replacements, - active_project.name, - ) + # find_replace/replace_section require existing content — re-raise + raise resolve_error + + # --- Standard edit path (entity already existed) --- + if not file_created: + # Prepare the edit request data + edit_data = { + "operation": operation, + "content": content, + } + + # Add optional parameters + if section: + edit_data["section"] = section + if find_text: + edit_data["find_text"] = find_text + if effective_replacements != 1: # Only send if different from default + edit_data["expected_replacements"] = str(effective_replacements) + + # Call the PATCH endpoint + result = await knowledge_client.patch_entity(entity_id, edit_data) + + # --- Format response --- + # result is always set: either by create_entity (auto-create) or patch_entity (edit) + assert result is not None + if file_created: + summary = [ + f"# Created note ({operation})", + f"project: {active_project.name}", + f"file_path: {result.file_path}", + f"permalink: {result.permalink}", + f"checksum: {result.checksum[:8] if result.checksum else 'unknown'}", + "fileCreated: true", + ] + lines_added = len(content.split("\n")) + summary.append(f"operation: Created note with {lines_added} lines") + else: + summary = [ + f"# Edited note ({operation})", + f"project: {active_project.name}", + f"file_path: {result.file_path}", + f"permalink: {result.permalink}", + f"checksum: {result.checksum[:8] if result.checksum else 'unknown'}", + ] + + # Add operation-specific details + if operation == "append": + lines_added = len(content.split("\n")) + summary.append(f"operation: Added {lines_added} lines to end of note") + elif operation == "prepend": + lines_added = len(content.split("\n")) + summary.append(f"operation: Added {lines_added} lines to beginning of note") + elif operation == "find_replace": + # For find_replace, we can't easily count replacements from here + # since we don't have the original content, but the server handled it + summary.append("operation: Find and replace operation completed") + elif operation == "replace_section": + summary.append(f"operation: Replaced content under section '{section}'") + elif operation == "insert_before_section": + summary.append(f"operation: Inserted content before section '{section}'") + elif operation == "insert_after_section": + summary.append(f"operation: Inserted content after section '{section}'") + + # Count observations by category (reuse logic from write_note) + categories = {} + if result.observations: + for obs in result.observations: + categories[obs.category] = categories.get(obs.category, 0) + 1 + + summary.append("\n## Observations") + for category, count in sorted(categories.items()): + summary.append(f"- {category}: {count}") + + # Count resolved/unresolved relations + unresolved = 0 + resolved = 0 + if result.relations: + unresolved = sum(1 for r in result.relations if not r.to_id) + resolved = len(result.relations) - unresolved + + summary.append("\n## Relations") + summary.append(f"- Resolved: {resolved}") + if unresolved: + summary.append(f"- Unresolved: {unresolved}") + + logger.info( + f"MCP tool response: tool=edit_note project={active_project.name} " + f"operation={operation} permalink={result.permalink} " + f"observations_count={len(result.observations)} " + f"relations_count={len(result.relations)} " + f"file_created={str(file_created).lower()}" + ) + + if output_format == "json": + return { + "title": result.title, + "permalink": result.permalink, + "file_path": result.file_path, + "checksum": result.checksum, + "operation": operation, + "fileCreated": file_created, + } + + summary_result = "\n".join(summary) + return add_project_metadata(summary_result, active_project.name) + + except Exception as e: + logger.error(f"Error editing note: {e}") + if output_format == "json": + return { + "title": None, + "permalink": None, + "file_path": None, + "checksum": None, + "operation": operation, + "fileCreated": False, + "error": str(e), + } + return _format_error_response( + str(e), + operation, + identifier, + find_text, + effective_replacements, + active_project.name, + ) diff --git a/src/basic_memory/mcp/tools/read_note.py b/src/basic_memory/mcp/tools/read_note.py index 229bc28c..cda64714 100644 --- a/src/basic_memory/mcp/tools/read_note.py +++ b/src/basic_memory/mcp/tools/read_note.py @@ -3,12 +3,12 @@ from textwrap import dedent from typing import Optional, Literal, cast +import logfire import yaml from loguru import logger from fastmcp import Context -from basic_memory import telemetry from basic_memory.config import ConfigManager from basic_memory.mcp.project_context import ( detect_project_from_url_prefix, @@ -140,7 +140,7 @@ async def read_note( if detected: project = detected - with telemetry.operation( + with logfire.span( "mcp.tool.read_note", entrypoint="mcp", tool_name="read_note", @@ -152,221 +152,210 @@ async def read_note( include_frontmatter=include_frontmatter, ): async with get_project_client(project, workspace, context) as (client, active_project): - with telemetry.contextualize( - project_name=active_project.name, - workspace_id=workspace, - tool_name="read_note", + # Resolve identifier with project-prefix awareness for memory:// URLs + _, entity_path, _ = await resolve_project_and_path(client, identifier, project, context) + + # Validate identifier to prevent path traversal attacks + # For memory:// URLs, validate the extracted path (not the raw URL which + # has a scheme prefix that confuses path validation) + raw_path = ( + memory_url_path(identifier) if identifier.startswith("memory://") else identifier + ) + processed_path = entity_path + project_path = active_project.home + + if not validate_project_path(raw_path, project_path) or not validate_project_path( + processed_path, project_path ): - # Resolve identifier with project-prefix awareness for memory:// URLs - _, entity_path, _ = await resolve_project_and_path( - client, identifier, project, context + logger.warning( + "Attempted path traversal attack blocked", + identifier=identifier, + processed_path=processed_path, + project=active_project.name, ) - - # Validate identifier to prevent path traversal attacks - # For memory:// URLs, validate the extracted path (not the raw URL which - # has a scheme prefix that confuses path validation) - raw_path = ( - memory_url_path(identifier) - if identifier.startswith("memory://") - else identifier - ) - processed_path = entity_path - project_path = active_project.home - - if not validate_project_path(raw_path, project_path) or not validate_project_path( - processed_path, project_path - ): - logger.warning( - "Attempted path traversal attack blocked", - identifier=identifier, - processed_path=processed_path, - project=active_project.name, - ) - if output_format == "json": - return { - "title": None, - "permalink": None, - "file_path": None, - "content": None, - "frontmatter": None, - "error": "SECURITY_VALIDATION_ERROR", - } - return f"# Error\n\nIdentifier '{identifier}' is not allowed - paths must stay within project boundaries" - - # Get the file via REST API - first try direct identifier resolution - logger.info( - f"Attempting to read note from Project: {active_project.name} identifier: {entity_path}" - ) - - # Import here to avoid circular import - from basic_memory.mcp.clients import KnowledgeClient, ResourceClient - - # Use typed clients for API calls - knowledge_client = KnowledgeClient(client, active_project.external_id) - resource_client = ResourceClient(client, active_project.external_id) - - async def _read_json_payload(entity_id: str) -> dict: - with telemetry.scope( - "mcp.read_note.shape_response", - domain="mcp", - action="read_note", - phase="shape_response", - ): - entity = await knowledge_client.get_entity(entity_id) - response = await resource_client.read( - entity_id, page=page, page_size=page_size - ) - content_text = response.text - body_content, parsed_frontmatter = _parse_opening_frontmatter(content_text) - return { - "title": entity.title, - "permalink": entity.permalink, - "file_path": entity.file_path, - "content": content_text if include_frontmatter else body_content, - "frontmatter": parsed_frontmatter, - } - - def _empty_json_payload() -> dict: + if output_format == "json": return { "title": None, "permalink": None, "file_path": None, "content": None, "frontmatter": None, + "error": "SECURITY_VALIDATION_ERROR", + } + return f"# Error\n\nIdentifier '{identifier}' is not allowed - paths must stay within project boundaries" + + # Get the file via REST API - first try direct identifier resolution + logger.info( + f"Attempting to read note from Project: {active_project.name} identifier: {entity_path}" + ) + + # Import here to avoid circular import + from basic_memory.mcp.clients import KnowledgeClient, ResourceClient + + # Use typed clients for API calls + knowledge_client = KnowledgeClient(client, active_project.external_id) + resource_client = ResourceClient(client, active_project.external_id) + + async def _read_json_payload(entity_id: str) -> dict: + with logfire.span( + "mcp.read_note.shape_response", + domain="mcp", + action="read_note", + phase="shape_response", + ): + entity = await knowledge_client.get_entity(entity_id) + response = await resource_client.read(entity_id, page=page, page_size=page_size) + content_text = response.text + body_content, parsed_frontmatter = _parse_opening_frontmatter(content_text) + return { + "title": entity.title, + "permalink": entity.permalink, + "file_path": entity.file_path, + "content": content_text if include_frontmatter else body_content, + "frontmatter": parsed_frontmatter, } - def _search_results(payload: object) -> list[dict[str, object]]: - if not isinstance(payload, dict): - return [] - payload_dict = cast(dict[str, object], payload) - results = payload_dict.get("results") - if not isinstance(results, list): - return [] - return [ - cast(dict[str, object], result) - for result in results - if isinstance(result, dict) - ] - - async def _search_candidates( - identifier_text: str, *, title_only: bool - ) -> dict[str, object]: - # Trigger: direct entity resolution failed for the caller's identifier. - # Why: search_notes applies the same memory:// normalization and tool-level - # query handling as the rest of MCP routing, which raw client calls skip. - # Outcome: unresolved memory URLs still fall back through normalized search. - search_type = "title" if title_only else "text" - response = await search_notes( - project=active_project.name, - workspace=workspace, - query=identifier_text, - search_type=search_type, - page=page, - page_size=page_size, - output_format="json", - context=context, - ) - return cast(dict[str, object], response) if isinstance(response, dict) else {} + def _empty_json_payload() -> dict: + return { + "title": None, + "permalink": None, + "file_path": None, + "content": None, + "frontmatter": None, + } + + def _search_results(payload: object) -> list[dict[str, object]]: + if not isinstance(payload, dict): + return [] + payload_dict = cast(dict[str, object], payload) + results = payload_dict.get("results") + if not isinstance(results, list): + return [] + return [ + cast(dict[str, object], result) + for result in results + if isinstance(result, dict) + ] + + async def _search_candidates( + identifier_text: str, *, title_only: bool + ) -> dict[str, object]: + # Trigger: direct entity resolution failed for the caller's identifier. + # Why: search_notes applies the same memory:// normalization and tool-level + # query handling as the rest of MCP routing, which raw client calls skip. + # Outcome: unresolved memory URLs still fall back through normalized search. + search_type = "title" if title_only else "text" + response = await search_notes( + project=active_project.name, + workspace=workspace, + query=identifier_text, + search_type=search_type, + page=page, + page_size=page_size, + output_format="json", + context=context, + ) + return cast(dict[str, object], response) if isinstance(response, dict) else {} - def _result_title(item: dict[str, object]) -> str: - return str(item.get("title") or "") + def _result_title(item: dict[str, object]) -> str: + return str(item.get("title") or "") - def _result_permalink(item: dict[str, object]) -> Optional[str]: - value = item.get("permalink") - return str(value) if value else None + def _result_permalink(item: dict[str, object]) -> Optional[str]: + value = item.get("permalink") + return str(value) if value else None - def _result_file_path(item: dict[str, object]) -> Optional[str]: - value = item.get("file_path") - return str(value) if value else None + def _result_file_path(item: dict[str, object]) -> Optional[str]: + value = item.get("file_path") + return str(value) if value else None - try: - # Try to resolve identifier to entity ID - entity_id = await knowledge_client.resolve_entity(entity_path, strict=True) + try: + # Try to resolve identifier to entity ID + entity_id = await knowledge_client.resolve_entity(entity_path, strict=True) - # Fetch content using entity ID - response = await resource_client.read(entity_id, page=page, page_size=page_size) + # Fetch content using entity ID + response = await resource_client.read(entity_id, page=page, page_size=page_size) - # If successful, return the content - if response.status_code == 200: - logger.info( - "Returning read_note result from resource: {path}", path=entity_path - ) - if output_format == "json": - return await _read_json_payload(entity_id) - return response.text - except Exception as e: # pragma: no cover - logger.info(f"Direct lookup failed for '{entity_path}': {e}") - # Continue to fallback methods - - # Fallback 1: Try title search via API - logger.info(f"Search title for: {identifier}") - title_results = await _search_candidates(identifier, title_only=True) - - title_candidates = _search_results(title_results) - if title_candidates: - # Trigger: direct resolution failed and title search returned candidates. - # Why: avoid returning unrelated notes when search yields only fuzzy matches. - # Outcome: fetch content only when a true exact title match exists. - result = next( - ( - candidate - for candidate in title_candidates - if _is_exact_title_match(identifier, _result_title(candidate)) - ), - None, + # If successful, return the content + if response.status_code == 200: + logger.info( + "Returning read_note result from resource: {path}", path=entity_path ) - if not result: - logger.info(f"No exact title match found for: {identifier}") - elif _result_permalink(result): - try: - # Resolve the permalink to entity ID - entity_id = await knowledge_client.resolve_entity( - _result_permalink(result) or "", strict=True - ) + if output_format == "json": + return await _read_json_payload(entity_id) + return response.text + except Exception as e: # pragma: no cover + logger.info(f"Direct lookup failed for '{entity_path}': {e}") + # Continue to fallback methods + + # Fallback 1: Try title search via API + logger.info(f"Search title for: {identifier}") + title_results = await _search_candidates(identifier, title_only=True) + + title_candidates = _search_results(title_results) + if title_candidates: + # Trigger: direct resolution failed and title search returned candidates. + # Why: avoid returning unrelated notes when search yields only fuzzy matches. + # Outcome: fetch content only when a true exact title match exists. + result = next( + ( + candidate + for candidate in title_candidates + if _is_exact_title_match(identifier, _result_title(candidate)) + ), + None, + ) + if not result: + logger.info(f"No exact title match found for: {identifier}") + elif _result_permalink(result): + try: + # Resolve the permalink to entity ID + entity_id = await knowledge_client.resolve_entity( + _result_permalink(result) or "", strict=True + ) - # Fetch content using the entity ID - response = await resource_client.read( - entity_id, page=page, page_size=page_size - ) + # Fetch content using the entity ID + response = await resource_client.read( + entity_id, page=page, page_size=page_size + ) - if response.status_code == 200: - logger.info( - f"Found note by exact title search: {_result_permalink(result)}" - ) - if output_format == "json": - return await _read_json_payload(entity_id) - return response.text - except Exception as e: # pragma: no cover + if response.status_code == 200: logger.info( - f"Failed to fetch content for found title match {_result_permalink(result)}: {e}" + f"Found note by exact title search: {_result_permalink(result)}" ) - else: - logger.info( - f"No results in title search for: {identifier} in project {active_project.name}" - ) + if output_format == "json": + return await _read_json_payload(entity_id) + return response.text + except Exception as e: # pragma: no cover + logger.info( + f"Failed to fetch content for found title match {_result_permalink(result)}: {e}" + ) + else: + logger.info( + f"No results in title search for: {identifier} in project {active_project.name}" + ) - # Fallback 2: Text search as a last resort - logger.info(f"Title search failed, trying text search for: {identifier}") - text_results = await _search_candidates(identifier, title_only=False) + # Fallback 2: Text search as a last resort + logger.info(f"Title search failed, trying text search for: {identifier}") + text_results = await _search_candidates(identifier, title_only=False) - # We didn't find a direct match, construct a helpful error message - text_candidates = _search_results(text_results) - if not text_candidates: - if output_format == "json": - return _empty_json_payload() - return format_not_found_message(active_project.name, identifier) + # We didn't find a direct match, construct a helpful error message + text_candidates = _search_results(text_results) + if not text_candidates: if output_format == "json": - payload = _empty_json_payload() - payload["related_results"] = [ - { - "title": _result_title(result), - "permalink": _result_permalink(result), - "file_path": _result_file_path(result), - } - for result in text_candidates[:5] - ] - return payload - return format_related_results(active_project.name, identifier, text_candidates[:5]) + return _empty_json_payload() + return format_not_found_message(active_project.name, identifier) + if output_format == "json": + payload = _empty_json_payload() + payload["related_results"] = [ + { + "title": _result_title(result), + "permalink": _result_permalink(result), + "file_path": _result_file_path(result), + } + for result in text_candidates[:5] + ] + return payload + return format_related_results(active_project.name, identifier, text_candidates[:5]) def format_not_found_message(project: str | None, identifier: str) -> str: diff --git a/src/basic_memory/mcp/tools/search.py b/src/basic_memory/mcp/tools/search.py index 94c99aa9..cffb8fff 100644 --- a/src/basic_memory/mcp/tools/search.py +++ b/src/basic_memory/mcp/tools/search.py @@ -4,11 +4,11 @@ from textwrap import dedent from typing import Annotated, List, Optional, Dict, Any, Literal +import logfire from loguru import logger from fastmcp import Context from pydantic import BeforeValidator -from basic_memory import telemetry from basic_memory.config import ConfigManager from basic_memory.utils import coerce_dict, coerce_list from basic_memory.mcp.container import get_container @@ -524,7 +524,7 @@ async def search_notes( if detected: project = detected - with telemetry.operation( + with logfire.span( "mcp.tool.search_notes", entrypoint="mcp", tool_name="search_notes", @@ -544,139 +544,134 @@ async def search_notes( has_status_filter=bool(status), ): async with get_project_client(project, workspace, context) as (client, active_project): - with telemetry.contextualize( - project_name=active_project.name, - workspace_id=workspace, - tool_name="search_notes", - ): - # Handle memory:// URLs by resolving to permalink search - is_memory_url = False - if query is not None: - _, resolved_query, is_memory_url = await resolve_project_and_path( - client, query, project, context - ) - if is_memory_url: - query = resolved_query - effective_search_type = search_type or _default_search_type() + # Handle memory:// URLs by resolving to permalink search + is_memory_url = False + if query is not None: + _, resolved_query, is_memory_url = await resolve_project_and_path( + client, query, project, context + ) if is_memory_url: - effective_search_type = "permalink" - - try: - # Create a SearchQuery object based on the parameters - search_query = SearchQuery() - - # Only map search_type to query fields when there is an actual query string. - # When query is None/empty, skip the search mode block — filters-only path. - effective_query = (query or "").strip() - if effective_query: - valid_search_types = { - "text", - "title", - "permalink", - "vector", - "semantic", - "hybrid", - } - if effective_search_type == "text": - search_query.text = effective_query - search_query.retrieval_mode = SearchRetrievalMode.FTS - elif effective_search_type in ("vector", "semantic"): - search_query.text = effective_query - search_query.retrieval_mode = SearchRetrievalMode.VECTOR - elif effective_search_type == "hybrid": - search_query.text = effective_query - search_query.retrieval_mode = SearchRetrievalMode.HYBRID - elif effective_search_type == "title": - search_query.title = effective_query - elif effective_search_type == "permalink" and "*" in effective_query: - search_query.permalink_match = effective_query - elif effective_search_type == "permalink": - search_query.permalink = effective_query - else: - raise ValueError( - f"Invalid search_type '{effective_search_type}'. " - f"Valid options: {', '.join(sorted(valid_search_types))}" - ) - - # Add optional filters if provided (empty lists are treated as no filter) - if entity_types: - search_query.entity_types = [SearchItemType(t) for t in entity_types] - if note_types: - search_query.note_types = note_types - if after_date: - search_query.after_date = after_date - if metadata_filters: - # Alias common column/model names to their frontmatter key equivalents. - # Users often pass "note_type" (the entity model column) when the - # frontmatter field is actually "type". - _METADATA_KEY_ALIASES = {"note_type": "type"} - metadata_filters = { - _METADATA_KEY_ALIASES.get(k, k): v for k, v in metadata_filters.items() - } - search_query.metadata_filters = metadata_filters - if tags: - search_query.tags = tags - if status: - search_query.status = status - if min_similarity is not None: - search_query.min_similarity = min_similarity - - # Reject searches with no criteria at all - if search_query.no_criteria(): - return ( - "# No Search Criteria\n\n" - "Please provide at least one of: `query`, `metadata_filters`, " - "`tags`, `status`, `note_types`, `entity_types`, or `after_date`." + query = resolved_query + effective_search_type = search_type or _default_search_type() + if is_memory_url: + effective_search_type = "permalink" + + try: + # Create a SearchQuery object based on the parameters + search_query = SearchQuery() + + # Only map search_type to query fields when there is an actual query string. + # When query is None/empty, skip the search mode block — filters-only path. + effective_query = (query or "").strip() + if effective_query: + valid_search_types = { + "text", + "title", + "permalink", + "vector", + "semantic", + "hybrid", + } + if effective_search_type == "text": + search_query.text = effective_query + search_query.retrieval_mode = SearchRetrievalMode.FTS + elif effective_search_type in ("vector", "semantic"): + search_query.text = effective_query + search_query.retrieval_mode = SearchRetrievalMode.VECTOR + elif effective_search_type == "hybrid": + search_query.text = effective_query + search_query.retrieval_mode = SearchRetrievalMode.HYBRID + elif effective_search_type == "title": + search_query.title = effective_query + elif effective_search_type == "permalink" and "*" in effective_query: + search_query.permalink_match = effective_query + elif effective_search_type == "permalink": + search_query.permalink = effective_query + else: + raise ValueError( + f"Invalid search_type '{effective_search_type}'. " + f"Valid options: {', '.join(sorted(valid_search_types))}" ) - # Default to entity-level results to avoid returning individual - # observations/relations as separate search results (see issue #31). - # Applied after no_criteria() so that the implicit default doesn't - # mask a truly empty search request. - if not search_query.entity_types: - search_query.entity_types = [SearchItemType("entity")] - - logger.debug( - f"Search request: project={active_project.name} " - f"search_type={effective_search_type} " - f"query={effective_query or ''} " - f"note_types={len(note_types)} entity_types={len(search_query.entity_types or [])} " - f"page={page} page_size={page_size}" - ) - # Import here to avoid circular import (tools → clients → utils → tools) - from basic_memory.mcp.clients import SearchClient - - # Use typed SearchClient for API calls - search_client = SearchClient(client, active_project.external_id) - result = await search_client.search( - search_query.model_dump(), - page=page, - page_size=page_size, - ) - logger.debug( - f"Search response: project={active_project.name} " - f"results={len(result.results)} has_more={str(result.has_more).lower()} " - f"page={result.current_page} page_size={result.page_size}" + # Add optional filters if provided (empty lists are treated as no filter) + if entity_types: + search_query.entity_types = [SearchItemType(t) for t in entity_types] + if note_types: + search_query.note_types = note_types + if after_date: + search_query.after_date = after_date + if metadata_filters: + # Alias common column/model names to their frontmatter key equivalents. + # Users often pass "note_type" (the entity model column) when the + # frontmatter field is actually "type". + _METADATA_KEY_ALIASES = {"note_type": "type"} + metadata_filters = { + _METADATA_KEY_ALIASES.get(k, k): v for k, v in metadata_filters.items() + } + search_query.metadata_filters = metadata_filters + if tags: + search_query.tags = tags + if status: + search_query.status = status + if min_similarity is not None: + search_query.min_similarity = min_similarity + + # Reject searches with no criteria at all + if search_query.no_criteria(): + return ( + "# No Search Criteria\n\n" + "Please provide at least one of: `query`, `metadata_filters`, " + "`tags`, `status`, `note_types`, `entity_types`, or `after_date`." ) - # Check if we got no results and provide helpful guidance - if not result.results: - logger.debug( - f"Search returned no results for query: {query} in project {active_project.name}" - ) - # Don't treat this as an error, but the user might want guidance - # We return the empty result as normal - the user can decide if they need help - - if output_format == "json": - return result.model_dump(mode="json", exclude_none=True) - - return _format_search_markdown(result, active_project.name, query) - - except Exception as e: - logger.error( - f"Search failed for query '{query or ''}': {e}, project: {active_project.name}" - ) - # Return formatted error message as string for better user experience - return _format_search_error_response( - active_project.name, str(e), query or "", effective_search_type + # Default to entity-level results to avoid returning individual + # observations/relations as separate search results (see issue #31). + # Applied after no_criteria() so that the implicit default doesn't + # mask a truly empty search request. + if not search_query.entity_types: + search_query.entity_types = [SearchItemType("entity")] + + logger.debug( + f"Search request: project={active_project.name} " + f"search_type={effective_search_type} " + f"query={effective_query or ''} " + f"note_types={len(note_types)} entity_types={len(search_query.entity_types or [])} " + f"page={page} page_size={page_size}" + ) + # Import here to avoid circular import (tools → clients → utils → tools) + from basic_memory.mcp.clients import SearchClient + + # Use typed SearchClient for API calls + search_client = SearchClient(client, active_project.external_id) + result = await search_client.search( + search_query.model_dump(), + page=page, + page_size=page_size, + ) + logger.debug( + f"Search response: project={active_project.name} " + f"results={len(result.results)} has_more={str(result.has_more).lower()} " + f"page={result.current_page} page_size={result.page_size}" + ) + + # Check if we got no results and provide helpful guidance + if not result.results: + logger.debug( + f"Search returned no results for query: {query} in project {active_project.name}" ) + # Don't treat this as an error, but the user might want guidance + # We return the empty result as normal - the user can decide if they need help + + if output_format == "json": + return result.model_dump(mode="json", exclude_none=True) + + return _format_search_markdown(result, active_project.name, query) + + except Exception as e: + logger.error( + f"Search failed for query '{query or ''}': {e}, project: {active_project.name}" + ) + # Return formatted error message as string for better user experience + return _format_search_error_response( + active_project.name, str(e), query or "", effective_search_type + ) diff --git a/src/basic_memory/mcp/tools/utils.py b/src/basic_memory/mcp/tools/utils.py index ded5a7bd..60c61361 100644 --- a/src/basic_memory/mcp/tools/utils.py +++ b/src/basic_memory/mcp/tools/utils.py @@ -6,8 +6,9 @@ import typing from contextlib import contextmanager -from typing import Optional +from typing import Any, Optional +import logfire from httpx import Response, URL, AsyncClient, HTTPStatusError from httpx._client import UseClientDefault, USE_CLIENT_DEFAULT from httpx._types import ( @@ -24,7 +25,6 @@ from loguru import logger from mcp.server.fastmcp.exceptions import ToolError -from basic_memory import telemetry from basic_memory.config import ConfigManager @@ -41,43 +41,22 @@ def _classify_http_outcome(status_code: int) -> str: return "unknown" # pragma: no cover -class _RequestSpan: - """Small adapter for attaching outcome metadata to a live request span.""" - - def __init__(self, active_span: typing.Any | None): - self._active_span = active_span - - def record_response(self, response: Response) -> None: - self._set_attributes( - { - "status_code": response.status_code, - "is_success": response.is_success, - "outcome": _classify_http_outcome(response.status_code), - } - ) - - def record_transport_error(self, exc: Exception) -> None: - self._set_attributes( - { - "is_success": False, - "outcome": "transport_error", - "error_type": type(exc).__name__, - } - ) - - def _set_attributes(self, attrs: dict[str, typing.Any]) -> None: - if self._active_span is None: - return +def _response_span_attrs(response: Response) -> dict[str, Any]: + """Attributes to attach to a request span after a response lands.""" + return { + "status_code": response.status_code, + "is_success": response.is_success, + "outcome": _classify_http_outcome(response.status_code), + } - set_attributes = getattr(self._active_span, "set_attributes", None) - if callable(set_attributes): - set_attributes(attrs) - return - set_attribute = getattr(self._active_span, "set_attribute", None) - if callable(set_attribute): - for key, value in attrs.items(): - set_attribute(key, value) +def _transport_error_span_attrs(exc: Exception) -> dict[str, Any]: + """Attributes to attach when the transport layer fails before any response.""" + return { + "is_success": False, + "outcome": "transport_error", + "error_type": type(exc).__name__, + } def get_error_message( @@ -130,15 +109,20 @@ def get_error_message( return f"HTTP error {status_code}: {method} request to '{path}' failed" -def _extract_response_data(response: Response) -> typing.Any: - """Safely decode response payload for error reporting.""" - try: - return response.json() - except Exception: +def _extract_response_data(response: Response) -> Any: + """Decode the JSON payload of an API response for error reporting. + + Upstream gateways (Fly, Cloudflare, load balancers) can return HTML + error pages before the request reaches our FastAPI app; those have no + structured `detail` to surface, so we skip them. A malformed body with + a JSON content-type is a server bug and we let it raise. + """ + if "application/json" not in response.headers.get("content-type", ""): return None + return response.json() -def _response_detail_text(response_data: typing.Any) -> str | None: +def _response_detail_text(response_data: Any) -> str | None: """Extract textual error detail from API payloads.""" if isinstance(response_data, dict): detail = response_data.get("detail") @@ -189,31 +173,6 @@ def _resolve_error_message( return get_error_message(status_code, url, method) -@contextmanager -def _request_scope( - method: str, - *, - client_name: str | None, - operation: str | None, - path_template: str | None, - params: QueryParamTypes | None = None, - has_body: bool = False, -): - """Create the shared MCP transport span used by all HTTP helpers.""" - attrs = { - "method": method, - "client_name": client_name, - "operation": operation, - "path_template": path_template, - "phase": "request", - "has_query": bool(params), - "has_body": has_body, - } - with telemetry.contextualize(**attrs): - with telemetry.started_span("mcp.http.request", **attrs) as active_span: - yield _RequestSpan(active_span) - - async def call_get( client: AsyncClient, url: URL | str, @@ -250,15 +209,18 @@ async def call_get( """ logger.debug(f"Calling GET '{url}' params: '{params}'") error_message = None - request_span: _RequestSpan | None = None + request_span: logfire.LogfireSpan | None = None try: - with _request_scope( - "GET", + with logfire.span( + "mcp.http.request", + method="GET", client_name=client_name, operation=operation, path_template=path_template, - params=params, + phase="request", + has_query=bool(params), + has_body=False, ) as request_span: response = await client.get( url, @@ -270,7 +232,7 @@ async def call_get( timeout=timeout, extensions=extensions, ) - request_span.record_response(response) + request_span.set_attributes(_response_span_attrs(response)) if response.is_success: return response @@ -299,7 +261,7 @@ async def call_get( raise ToolError(error_message) from e except Exception as e: if request_span is not None: - request_span.record_transport_error(e) + request_span.set_attributes(_transport_error_span_attrs(e)) raise @@ -347,15 +309,17 @@ async def call_put( """ logger.debug(f"Calling PUT '{url}'") error_message = None - request_span: _RequestSpan | None = None + request_span: logfire.LogfireSpan | None = None try: - with _request_scope( - "PUT", + with logfire.span( + "mcp.http.request", + method="PUT", client_name=client_name, operation=operation, path_template=path_template, - params=params, + phase="request", + has_query=bool(params), has_body=any(value is not None for value in (content, data, files, json)), ) as request_span: response = await client.put( @@ -372,7 +336,7 @@ async def call_put( timeout=timeout, extensions=extensions, ) - request_span.record_response(response) + request_span.set_attributes(_response_span_attrs(response)) if response.is_success: return response @@ -402,7 +366,7 @@ async def call_put( raise ToolError(error_message) from e except Exception as e: if request_span is not None: - request_span.record_transport_error(e) + request_span.set_attributes(_transport_error_span_attrs(e)) raise @@ -449,15 +413,17 @@ async def call_patch( ToolError: If the request fails with an appropriate error message """ logger.debug(f"Calling PATCH '{url}'") - request_span: _RequestSpan | None = None + request_span: logfire.LogfireSpan | None = None try: - with _request_scope( - "PATCH", + with logfire.span( + "mcp.http.request", + method="PATCH", client_name=client_name, operation=operation, path_template=path_template, - params=params, + phase="request", + has_query=bool(params), has_body=any(value is not None for value in (content, data, files, json)), ) as request_span: response = await client.patch( @@ -474,7 +440,7 @@ async def call_patch( timeout=timeout, extensions=extensions, ) - request_span.record_response(response) + request_span.set_attributes(_response_span_attrs(response)) if response.is_success: return response @@ -509,7 +475,7 @@ async def call_patch( raise ToolError(error_message) from e except Exception as e: if request_span is not None: - request_span.record_transport_error(e) + request_span.set_attributes(_transport_error_span_attrs(e)) raise @@ -557,15 +523,17 @@ async def call_post( """ logger.debug(f"Calling POST '{url}'") error_message = None - request_span: _RequestSpan | None = None + request_span: logfire.LogfireSpan | None = None try: - with _request_scope( - "POST", + with logfire.span( + "mcp.http.request", + method="POST", client_name=client_name, operation=operation, path_template=path_template, - params=params, + phase="request", + has_query=bool(params), has_body=any(value is not None for value in (content, data, files, json)), ) as request_span: response = await client.post( @@ -582,7 +550,7 @@ async def call_post( timeout=timeout, extensions=extensions, ) - request_span.record_response(response) + request_span.set_attributes(_response_span_attrs(response)) logger.debug(f"response: {_extract_response_data(response)}") if response.is_success: @@ -612,7 +580,7 @@ async def call_post( raise ToolError(error_message) from e except Exception as e: if request_span is not None: - request_span.record_transport_error(e) + request_span.set_attributes(_transport_error_span_attrs(e)) raise @@ -684,15 +652,18 @@ async def call_delete( """ logger.debug(f"Calling DELETE '{url}'") error_message = None - request_span: _RequestSpan | None = None + request_span: logfire.LogfireSpan | None = None try: - with _request_scope( - "DELETE", + with logfire.span( + "mcp.http.request", + method="DELETE", client_name=client_name, operation=operation, path_template=path_template, - params=params, + phase="request", + has_query=bool(params), + has_body=False, ) as request_span: response = await client.delete( url=url, @@ -704,7 +675,7 @@ async def call_delete( timeout=timeout, extensions=extensions, ) - request_span.record_response(response) + request_span.set_attributes(_response_span_attrs(response)) if response.is_success: return response @@ -733,5 +704,5 @@ async def call_delete( raise ToolError(error_message) from e except Exception as e: if request_span is not None: - request_span.record_transport_error(e) + request_span.set_attributes(_transport_error_span_attrs(e)) raise diff --git a/src/basic_memory/mcp/tools/write_note.py b/src/basic_memory/mcp/tools/write_note.py index 885175eb..74feb2a2 100644 --- a/src/basic_memory/mcp/tools/write_note.py +++ b/src/basic_memory/mcp/tools/write_note.py @@ -3,10 +3,10 @@ import textwrap from typing import Annotated, List, Union, Optional, Literal +import logfire from loguru import logger from pydantic import BeforeValidator -from basic_memory import telemetry from basic_memory.config import ConfigManager from basic_memory.mcp.project_context import get_project_client, add_project_metadata from basic_memory.mcp.server import mcp @@ -149,7 +149,7 @@ async def write_note( overwrite if overwrite is not None else ConfigManager().config.write_note_overwrite_default ) - with telemetry.operation( + with logfire.span( "mcp.tool.write_note", entrypoint="mcp", tool_name="write_note", @@ -160,169 +160,160 @@ async def write_note( output_format=output_format, ): async with get_project_client(project, workspace, context) as (client, active_project): - with telemetry.contextualize( - project_name=active_project.name, - workspace_id=workspace, - tool_name="write_note", - ): - logger.info( - f"MCP tool call tool=write_note project={active_project.name} directory={directory}, title={title}, tags={tags}" - ) - - # Normalize "/" to empty string for root directory (must happen before validation) - if directory == "/": - directory = "" - - # Validate directory path to prevent path traversal attacks - project_path = active_project.home - if directory and not validate_project_path(directory, project_path): - logger.warning( - "Attempted path traversal attack blocked", - directory=directory, - project=active_project.name, - ) - if output_format == "json": - return { - "title": title, - "permalink": None, - "file_path": None, - "checksum": None, - "action": "created", - "error": "SECURITY_VALIDATION_ERROR", - } - return f"# Error\n\nDirectory path '{directory}' is not allowed - paths must stay within project boundaries" - - # Process tags using the helper function - tag_list = parse_tags(tags) - - # Build entity_metadata from optional metadata, then explicit tags on top - # Order matters: explicit tags parameter takes precedence over metadata["tags"] - entity_metadata = {} - if metadata: - entity_metadata.update(metadata) - if tag_list: - entity_metadata["tags"] = tag_list - - entity = Entity( - title=title, + logger.info( + f"MCP tool call tool=write_note project={active_project.name} directory={directory}, title={title}, tags={tags}" + ) + + # Normalize "/" to empty string for root directory (must happen before validation) + if directory == "/": + directory = "" + + # Validate directory path to prevent path traversal attacks + project_path = active_project.home + if directory and not validate_project_path(directory, project_path): + logger.warning( + "Attempted path traversal attack blocked", directory=directory, - note_type=note_type, - content_type="text/markdown", - content=content, - entity_metadata=entity_metadata or None, - ) - - # Import here to avoid circular import - from basic_memory.mcp.clients import KnowledgeClient - - # Use typed KnowledgeClient for API calls - knowledge_client = KnowledgeClient(client, active_project.external_id) - - # Try to create the entity first (optimistic create) - logger.debug(f"Attempting to create entity permalink={entity.permalink}") - action = "Created" # Default to created - try: - result = await knowledge_client.create_entity(entity.model_dump()) - action = "Created" - except Exception as e: - # If creation failed due to conflict (already exists), try to update - if ( - "409" in str(e) - or "conflict" in str(e).lower() - or "already exists" in str(e).lower() - ): - # Guard: block overwrite unless explicitly enabled - if not effective_overwrite: - logger.warning( - f"write_note blocked: note already exists (overwrite not enabled) " - f"permalink={entity.permalink}" - ) - if output_format == "json": - return { - "title": title, - "permalink": entity.permalink, - "file_path": None, - "checksum": None, - "action": "conflict", - "error": "NOTE_ALREADY_EXISTS", - } - return _format_overwrite_error( - title, entity.permalink, active_project.name - ) - - logger.debug( - f"Entity exists, updating instead permalink={entity.permalink}" - ) - try: - if not entity.permalink: - raise ValueError( - "Entity permalink is required for updates" - ) # pragma: no cover - entity_id = await knowledge_client.resolve_entity(entity.permalink) - result = await knowledge_client.update_entity( - entity_id, entity.model_dump() - ) - action = "Updated" - except Exception as update_error: # pragma: no cover - # Re-raise the original error if update also fails - raise e from update_error # pragma: no cover - else: - # Re-raise if it's not a conflict error - raise # pragma: no cover - summary = [ - f"# {action} note", - f"project: {active_project.name}", - f"file_path: {result.file_path}", - f"permalink: {result.permalink}", - f"checksum: {result.checksum[:8] if result.checksum else 'unknown'}", - ] - - # Count observations by category - categories = {} - if result.observations: - for obs in result.observations: - categories[obs.category] = categories.get(obs.category, 0) + 1 - - summary.append("\n## Observations") - for category, count in sorted(categories.items()): - summary.append(f"- {category}: {count}") - - # Count resolved/unresolved relations - unresolved = 0 - resolved = 0 - if result.relations: - unresolved = sum(1 for r in result.relations if not r.to_id) - resolved = len(result.relations) - unresolved - - summary.append("\n## Relations") - summary.append(f"- Resolved: {resolved}") - if unresolved: - summary.append(f"- Unresolved: {unresolved}") - summary.append( - "\nNote: Unresolved relations point to entities that don't exist yet." - ) - summary.append( - "They will be automatically resolved when target entities are created or during sync operations." - ) - - if tag_list: - summary.append(f"\n## Tags\n- {', '.join(tag_list)}") - - # Log the response with structured data - logger.info( - f"MCP tool response: tool=write_note project={active_project.name} action={action} permalink={result.permalink} observations_count={len(result.observations)} relations_count={len(result.relations)} resolved_relations={resolved} unresolved_relations={unresolved}" + project=active_project.name, ) if output_format == "json": return { - "title": result.title, - "permalink": result.permalink, - "file_path": result.file_path, - "checksum": result.checksum, - "action": action.lower(), + "title": title, + "permalink": None, + "file_path": None, + "checksum": None, + "action": "created", + "error": "SECURITY_VALIDATION_ERROR", } + return f"# Error\n\nDirectory path '{directory}' is not allowed - paths must stay within project boundaries" + + # Process tags using the helper function + tag_list = parse_tags(tags) + + # Build entity_metadata from optional metadata, then explicit tags on top + # Order matters: explicit tags parameter takes precedence over metadata["tags"] + entity_metadata = {} + if metadata: + entity_metadata.update(metadata) + if tag_list: + entity_metadata["tags"] = tag_list + + entity = Entity( + title=title, + directory=directory, + note_type=note_type, + content_type="text/markdown", + content=content, + entity_metadata=entity_metadata or None, + ) + + # Import here to avoid circular import + from basic_memory.mcp.clients import KnowledgeClient + + # Use typed KnowledgeClient for API calls + knowledge_client = KnowledgeClient(client, active_project.external_id) + + # Try to create the entity first (optimistic create) + logger.debug(f"Attempting to create entity permalink={entity.permalink}") + action = "Created" # Default to created + try: + result = await knowledge_client.create_entity(entity.model_dump()) + action = "Created" + except Exception as e: + # If creation failed due to conflict (already exists), try to update + if ( + "409" in str(e) + or "conflict" in str(e).lower() + or "already exists" in str(e).lower() + ): + # Guard: block overwrite unless explicitly enabled + if not effective_overwrite: + logger.warning( + f"write_note blocked: note already exists (overwrite not enabled) " + f"permalink={entity.permalink}" + ) + if output_format == "json": + return { + "title": title, + "permalink": entity.permalink, + "file_path": None, + "checksum": None, + "action": "conflict", + "error": "NOTE_ALREADY_EXISTS", + } + return _format_overwrite_error(title, entity.permalink, active_project.name) + + logger.debug(f"Entity exists, updating instead permalink={entity.permalink}") + try: + if not entity.permalink: + raise ValueError( + "Entity permalink is required for updates" + ) # pragma: no cover + entity_id = await knowledge_client.resolve_entity(entity.permalink) + result = await knowledge_client.update_entity( + entity_id, entity.model_dump() + ) + action = "Updated" + except Exception as update_error: # pragma: no cover + # Re-raise the original error if update also fails + raise e from update_error # pragma: no cover + else: + # Re-raise if it's not a conflict error + raise # pragma: no cover + summary = [ + f"# {action} note", + f"project: {active_project.name}", + f"file_path: {result.file_path}", + f"permalink: {result.permalink}", + f"checksum: {result.checksum[:8] if result.checksum else 'unknown'}", + ] + + # Count observations by category + categories = {} + if result.observations: + for obs in result.observations: + categories[obs.category] = categories.get(obs.category, 0) + 1 + + summary.append("\n## Observations") + for category, count in sorted(categories.items()): + summary.append(f"- {category}: {count}") + + # Count resolved/unresolved relations + unresolved = 0 + resolved = 0 + if result.relations: + unresolved = sum(1 for r in result.relations if not r.to_id) + resolved = len(result.relations) - unresolved + + summary.append("\n## Relations") + summary.append(f"- Resolved: {resolved}") + if unresolved: + summary.append(f"- Unresolved: {unresolved}") + summary.append( + "\nNote: Unresolved relations point to entities that don't exist yet." + ) + summary.append( + "They will be automatically resolved when target entities are created or during sync operations." + ) - summary_result = "\n".join(summary) - return add_project_metadata(summary_result, active_project.name) + if tag_list: + summary.append(f"\n## Tags\n- {', '.join(tag_list)}") + + # Log the response with structured data + logger.info( + f"MCP tool response: tool=write_note project={active_project.name} action={action} permalink={result.permalink} observations_count={len(result.observations)} relations_count={len(result.relations)} resolved_relations={resolved} unresolved_relations={unresolved}" + ) + if output_format == "json": + return { + "title": result.title, + "permalink": result.permalink, + "file_path": result.file_path, + "checksum": result.checksum, + "action": action.lower(), + } + + summary_result = "\n".join(summary) + return add_project_metadata(summary_result, active_project.name) def _format_overwrite_error(title: str, permalink: str | None, project_name: str) -> str: diff --git a/src/basic_memory/repository/search_repository_base.py b/src/basic_memory/repository/search_repository_base.py index f44db34b..4459d990 100644 --- a/src/basic_memory/repository/search_repository_base.py +++ b/src/basic_memory/repository/search_repository_base.py @@ -12,11 +12,12 @@ from datetime import datetime from typing import Any, Callable, Dict, List, Optional +import logfire from loguru import logger from sqlalchemy import Executable, Result, text from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker -from basic_memory import db, telemetry +from basic_memory import db from basic_memory.repository.embedding_provider import EmbeddingProvider from basic_memory.repository.search_index_row import SearchIndexRow from basic_memory.repository.semantic_errors import ( @@ -845,7 +846,7 @@ def emit_progress(entity_id: int) -> None: progress_callback(entity_id, completed_entities, total_entities) prepare_window_size = self._vector_prepare_window_size() - with telemetry.started_span( + with logfire.span( "basic_memory.vector_sync.batch", project_id=self.project_id, backend=backend_name, @@ -1068,48 +1069,42 @@ def emit_progress(entity_id: int) -> None: write_seconds_total=result.write_seconds_total, ) batch_total_seconds = time.perf_counter() - batch_start - telemetry.record_histogram( - "vector_sync_batch_total_seconds", - batch_total_seconds, - unit="s", - backend=backend_name, - skip_only_batch=result.embedding_jobs_total == 0, + batch_attrs = { + "backend": backend_name, + "skip_only_batch": result.embedding_jobs_total == 0, + } + logfire.metric_histogram("vector_sync_batch_total_seconds", unit="s").record( + batch_total_seconds, attributes=batch_attrs ) - telemetry.add_counter( - "vector_sync_entities_total", - result.entities_total, - backend=backend_name, - skip_only_batch=result.embedding_jobs_total == 0, + logfire.metric_histogram("vector_sync_prepare_seconds", unit="s").record( + result.prepare_seconds_total, attributes=batch_attrs ) - telemetry.add_counter( - "vector_sync_entities_skipped", - result.entities_skipped, - backend=backend_name, - skip_only_batch=result.embedding_jobs_total == 0, + logfire.metric_histogram("vector_sync_queue_wait_seconds", unit="s").record( + result.queue_wait_seconds_total, attributes=batch_attrs ) - telemetry.add_counter( - "vector_sync_entities_deferred", - result.entities_deferred, - backend=backend_name, - skip_only_batch=result.embedding_jobs_total == 0, + logfire.metric_histogram("vector_sync_embed_seconds", unit="s").record( + result.embed_seconds_total, attributes=batch_attrs ) - telemetry.add_counter( - "vector_sync_embedding_jobs_total", - result.embedding_jobs_total, - backend=backend_name, - skip_only_batch=result.embedding_jobs_total == 0, + logfire.metric_histogram("vector_sync_write_seconds", unit="s").record( + result.write_seconds_total, attributes=batch_attrs ) - telemetry.add_counter( - "vector_sync_chunks_total", - result.chunks_total, - backend=backend_name, - skip_only_batch=result.embedding_jobs_total == 0, + logfire.metric_counter("vector_sync_entities_total").add( + result.entities_total, attributes=batch_attrs ) - telemetry.add_counter( - "vector_sync_chunks_skipped", - result.chunks_skipped, - backend=backend_name, - skip_only_batch=result.embedding_jobs_total == 0, + logfire.metric_counter("vector_sync_entities_skipped").add( + result.entities_skipped, attributes=batch_attrs + ) + logfire.metric_counter("vector_sync_entities_deferred").add( + result.entities_deferred, attributes=batch_attrs + ) + logfire.metric_counter("vector_sync_embedding_jobs_total").add( + result.embedding_jobs_total, attributes=batch_attrs + ) + logfire.metric_counter("vector_sync_chunks_total").add( + result.chunks_total, attributes=batch_attrs + ) + logfire.metric_counter("vector_sync_chunks_skipped").add( + result.chunks_skipped, attributes=batch_attrs ) if batch_span is not None: batch_span.set_attributes( @@ -1682,36 +1677,12 @@ def _log_vector_sync_complete( shard_count: int, remaining_jobs_after_shard: int, ) -> None: - """Log completion and slow-entity warnings with a consistent format.""" - backend_name = type(self).__name__.removesuffix("SearchRepository").lower() - telemetry.record_histogram( - "vector_sync_prepare_seconds", - prepare_seconds, - unit="s", - backend=backend_name, - skip_only_entity=entity_skipped and embedding_jobs_count == 0, - ) - telemetry.record_histogram( - "vector_sync_queue_wait_seconds", - queue_wait_seconds, - unit="s", - backend=backend_name, - skip_only_entity=entity_skipped and embedding_jobs_count == 0, - ) - telemetry.record_histogram( - "vector_sync_embed_seconds", - embed_seconds, - unit="s", - backend=backend_name, - skip_only_entity=entity_skipped and embedding_jobs_count == 0, - ) - telemetry.record_histogram( - "vector_sync_write_seconds", - write_seconds, - unit="s", - backend=backend_name, - skip_only_entity=entity_skipped and embedding_jobs_count == 0, - ) + """Log completion and slow-entity warnings with a consistent format. + + Per-entity timings are aggregated into `VectorSyncBatchResult` and + recorded as batch-level histograms once the batch completes — this + function stays on the per-entity hot path so it only emits logs. + """ if total_seconds > 10: logger.warning( "Vector sync slow entity: project_id={project_id} entity_id={entity_id} " diff --git a/src/basic_memory/services/context_service.py b/src/basic_memory/services/context_service.py index b9fe88d5..3cb7a3aa 100644 --- a/src/basic_memory/services/context_service.py +++ b/src/basic_memory/services/context_service.py @@ -10,7 +10,7 @@ from loguru import logger from sqlalchemy import text -from basic_memory import telemetry +import logfire from basic_memory.repository.entity_repository import EntityRepository from basic_memory.repository.observation_repository import ObservationRepository from basic_memory.repository.postgres_search_repository import PostgresSearchRepository @@ -111,7 +111,7 @@ async def build_context( f"Building context for URI: '{memory_url}' depth: '{depth}' since: '{since}' limit: '{limit}' offset: '{offset}' max_related: '{max_related}'" ) - with telemetry.scope( + with logfire.span( "memory.build_context", domain="memory", action="build_context", @@ -122,7 +122,7 @@ async def build_context( fetch_limit = limit + 1 normalized_path: Optional[str] = None - with telemetry.scope( + with logfire.span( "memory.build_context.resolve_primary", domain="memory", action="build_context", @@ -180,7 +180,7 @@ async def build_context( type_id_pairs = [(r.type, r.id) for r in primary] if primary else [] logger.debug(f"found primary type_id_pairs: {len(type_id_pairs)}") - with telemetry.scope( + with logfire.span( "memory.build_context.find_related", domain="memory", action="build_context", @@ -202,7 +202,7 @@ async def build_context( observations_by_entity = {} if include_observations and entity_ids: - with telemetry.scope( + with logfire.span( "memory.build_context.load_observations", domain="memory", action="build_context", @@ -226,7 +226,7 @@ async def build_context( has_more=has_more, ) - with telemetry.scope( + with logfire.span( "memory.build_context.shape_results", domain="memory", action="build_context", diff --git a/src/basic_memory/services/file_service.py b/src/basic_memory/services/file_service.py index 9b12c240..b2f8fdc4 100644 --- a/src/basic_memory/services/file_service.py +++ b/src/basic_memory/services/file_service.py @@ -12,7 +12,7 @@ import yaml -from basic_memory import telemetry +import logfire from basic_memory import file_utils if TYPE_CHECKING: # pragma: no cover @@ -89,7 +89,7 @@ async def read_entity_content(self, entity: EntityModel) -> str: """ logger.debug(f"Reading entity content, entity_id={entity.id}, permalink={entity.permalink}") - with telemetry.scope( + with logfire.span( "file_service.read_content", domain="file_service", action="read_content", @@ -191,7 +191,7 @@ async def write_file(self, path: FilePath, content: str) -> str: full_path = path_obj if path_obj.is_absolute() else self.base_path / path_obj try: - with telemetry.scope( + with logfire.span( "file_service.write", domain="file_service", action="write", @@ -249,7 +249,7 @@ async def read_file_content(self, path: FilePath) -> str: full_path = path_obj if path_obj.is_absolute() else self.base_path / path_obj try: - with telemetry.scope( + with logfire.span( "file_service.read_content", domain="file_service", action="read_content", @@ -299,7 +299,7 @@ async def read_file_bytes(self, path: FilePath) -> bytes: full_path = path_obj if path_obj.is_absolute() else self.base_path / path_obj try: - with telemetry.scope( + with logfire.span( "file_service.read_content", domain="file_service", action="read_content", @@ -342,7 +342,7 @@ async def read_file(self, path: FilePath) -> Tuple[str, str]: full_path = path_obj if path_obj.is_absolute() else self.base_path / path_obj try: - with telemetry.scope( + with logfire.span( "file_service.read", domain="file_service", action="read", diff --git a/src/basic_memory/services/search_service.py b/src/basic_memory/services/search_service.py index d43971b8..452c7346 100644 --- a/src/basic_memory/services/search_service.py +++ b/src/basic_memory/services/search_service.py @@ -11,7 +11,8 @@ from loguru import logger from sqlalchemy import text -from basic_memory import telemetry +import logfire + from basic_memory.models import Entity from basic_memory.repository import EntityRepository from basic_memory.repository.search_repository import ( @@ -186,7 +187,7 @@ async def search(self, query: SearchQuery, limit=10, offset=0) -> List[SearchInd or query.status ) - with telemetry.scope( + with logfire.span( "search.execute", retrieval_mode=retrieval_mode.value, has_query=has_query, @@ -195,28 +196,21 @@ async def search(self, query: SearchQuery, limit=10, offset=0) -> List[SearchInd offset=offset, ): logger.trace(f"Searching with query: {query}") - with telemetry.scope( - "search.repository_query", - retrieval_mode=retrieval_mode.value, - phase="repository_query", - has_query=has_query, - has_filters=has_filters, - ): - # First pass: preserve existing strict search behavior. - results = await self.repository.search( - search_text=strict_search_text, - permalink=query.permalink, - permalink_match=query.permalink_match, - title=query.title, - note_types=query.note_types, - search_item_types=query.entity_types, - after_date=after_date, - metadata_filters=metadata_filters, - retrieval_mode=retrieval_mode, - min_similarity=query.min_similarity, - limit=limit, - offset=offset, - ) + # First pass: preserve existing strict search behavior. + results = await self.repository.search( + search_text=strict_search_text, + permalink=query.permalink, + permalink_match=query.permalink_match, + title=query.title, + note_types=query.note_types, + search_item_types=query.entity_types, + after_date=after_date, + metadata_filters=metadata_filters, + retrieval_mode=retrieval_mode, + min_similarity=query.min_similarity, + limit=limit, + offset=offset, + ) # Trigger: strict FTS with plain multi-term text returned no results. # Why: natural-language queries often include stopwords that over-constrain implicit AND. @@ -235,34 +229,27 @@ async def search(self, query: SearchQuery, limit=10, offset=0) -> List[SearchInd "Strict FTS returned 0 results; retrying relaxed FTS query " f"strict='{strict_search_text}' relaxed='{relaxed_search_text}'" ) - with telemetry.scope( + with logfire.span( "search.relaxed_fts_retry", retrieval_mode=retrieval_mode.value, token_count=len(self._tokenize_fts_text(strict_search_text)), limit=limit, offset=offset, ): - with telemetry.scope( - "search.repository_query", - retrieval_mode=retrieval_mode.value, - phase="repository_query", - has_query=has_query, - has_filters=has_filters, - ): - return await self.repository.search( - search_text=relaxed_search_text, - permalink=query.permalink, - permalink_match=query.permalink_match, - title=query.title, - note_types=query.note_types, - search_item_types=query.entity_types, - after_date=after_date, - metadata_filters=metadata_filters, - retrieval_mode=retrieval_mode, - min_similarity=query.min_similarity, - limit=limit, - offset=offset, - ) + return await self.repository.search( + search_text=relaxed_search_text, + permalink=query.permalink, + permalink_match=query.permalink_match, + title=query.title, + note_types=query.note_types, + search_item_types=query.entity_types, + after_date=after_date, + metadata_filters=metadata_filters, + retrieval_mode=retrieval_mode, + min_similarity=query.min_similarity, + limit=limit, + offset=offset, + ) @staticmethod def _tokenize_fts_text(search_text: str) -> list[str]: @@ -396,17 +383,8 @@ async def index_entity_data( f"permalink={entity.permalink} project_id={entity.project_id}" ) try: - with telemetry.scope( - "search.index_entity_data", - phase="index_entity_data", - result_count=1, - ): - with telemetry.scope( - "search.index.delete_existing", - phase="delete_existing", - result_count=1, - ): - await self.repository.delete_by_entity_id(entity_id=entity.id) + with logfire.span("search.index_entity_data", entity_id=entity.id): + await self.repository.delete_by_entity_id(entity_id=entity.id) if entity.is_markdown: await self.index_entity_markdown(entity, content) @@ -676,28 +654,23 @@ async def index_entity_file( self, entity: Entity, ) -> None: - with telemetry.scope( - "search.index_file", - phase="index_file", - result_count=1, - ): - # Index entity file with no content - await self.repository.index_item( - SearchIndexRow( - id=entity.id, - entity_id=entity.id, - type=SearchItemType.ENTITY.value, - title=_strip_nul(entity.title), - permalink=entity.permalink, # Required for Postgres NOT NULL constraint - file_path=entity.file_path, - metadata={ - "note_type": entity.note_type, - }, - created_at=entity.created_at, - updated_at=_mtime_to_datetime(entity), - project_id=entity.project_id, - ) + # Index entity file with no content + await self.repository.index_item( + SearchIndexRow( + id=entity.id, + entity_id=entity.id, + type=SearchItemType.ENTITY.value, + title=_strip_nul(entity.title), + permalink=entity.permalink, # Required for Postgres NOT NULL constraint + file_path=entity.file_path, + metadata={ + "note_type": entity.note_type, + }, + created_at=entity.created_at, + updated_at=_mtime_to_datetime(entity), + project_id=entity.project_id, ) + ) async def index_entity_markdown( self, @@ -730,11 +703,7 @@ async def index_entity_markdown( The project_id is automatically added by the repository when indexing. """ - with telemetry.scope( - "search.index_markdown", - phase="index_markdown", - result_count=1, - ): + with logfire.span("search.index_markdown", entity_id=entity.id): rows_to_index = [] content_stems = [] @@ -743,51 +712,76 @@ async def index_entity_markdown( content_stems.extend(title_variants) if content is None: - with telemetry.scope( - "search.index.read_content", - phase="read_content", - result_count=1, - ): - content = await self.file_service.read_entity_content(entity) + content = await self.file_service.read_entity_content(entity) if content: content_stems.append(content) content_snippet = _strip_nul(content) - with telemetry.scope( - "search.index.build_rows", - phase="build_rows", - result_count=1, - ): - if entity.permalink: - content_stems.extend(self._generate_variants(entity.permalink)) + if entity.permalink: + content_stems.extend(self._generate_variants(entity.permalink)) - content_stems.extend(self._generate_variants(entity.file_path)) + content_stems.extend(self._generate_variants(entity.file_path)) - entity_tags = self._extract_entity_tags(entity) - if entity_tags: - content_stems.extend(entity_tags) + entity_tags = self._extract_entity_tags(entity) + if entity_tags: + content_stems.extend(entity_tags) - entity_content_stems = _strip_nul( - "\n".join(p for p in content_stems if p and p.strip()) + entity_content_stems = _strip_nul( + "\n".join(p for p in content_stems if p and p.strip()) + ) + + if len(entity_content_stems) > MAX_CONTENT_STEMS_SIZE: # pragma: no cover + entity_content_stems = entity_content_stems[ + :MAX_CONTENT_STEMS_SIZE + ] # pragma: no cover + + rows_to_index.append( + SearchIndexRow( + id=entity.id, + type=SearchItemType.ENTITY.value, + title=_strip_nul(entity.title), + content_stems=entity_content_stems, + content_snippet=content_snippet, + permalink=entity.permalink, + file_path=entity.file_path, + entity_id=entity.id, + metadata={ + "note_type": entity.note_type, + }, + created_at=entity.created_at, + updated_at=_mtime_to_datetime(entity), + project_id=entity.project_id, ) + ) - if len(entity_content_stems) > MAX_CONTENT_STEMS_SIZE: # pragma: no cover - entity_content_stems = entity_content_stems[ + seen_permalinks: set[str] = {entity.permalink} if entity.permalink else set() + for obs in entity.observations: + obs_permalink = obs.permalink + if obs_permalink in seen_permalinks: + logger.debug(f"Skipping duplicate observation permalink: {obs_permalink}") + continue + seen_permalinks.add(obs_permalink) + + obs_content_stems = _strip_nul( + "\n".join(p for p in self._generate_variants(obs.content) if p and p.strip()) + ) + if len(obs_content_stems) > MAX_CONTENT_STEMS_SIZE: # pragma: no cover + obs_content_stems = obs_content_stems[ :MAX_CONTENT_STEMS_SIZE ] # pragma: no cover - rows_to_index.append( SearchIndexRow( - id=entity.id, - type=SearchItemType.ENTITY.value, - title=_strip_nul(entity.title), - content_stems=entity_content_stems, - content_snippet=content_snippet, - permalink=entity.permalink, + id=obs.id, + type=SearchItemType.OBSERVATION.value, + title=_strip_nul(f"{obs.category}: {obs.content[:100]}..."), + content_stems=obs_content_stems, + content_snippet=_strip_nul(obs.content), + permalink=obs_permalink, file_path=entity.file_path, + category=obs.category, entity_id=entity.id, metadata={ - "note_type": entity.note_type, + "tags": obs.tags, }, created_at=entity.created_at, updated_at=_mtime_to_datetime(entity), @@ -795,79 +789,35 @@ async def index_entity_markdown( ) ) - seen_permalinks: set[str] = {entity.permalink} if entity.permalink else set() - for obs in entity.observations: - obs_permalink = obs.permalink - if obs_permalink in seen_permalinks: - logger.debug(f"Skipping duplicate observation permalink: {obs_permalink}") - continue - seen_permalinks.add(obs_permalink) - - obs_content_stems = _strip_nul( - "\n".join( - p for p in self._generate_variants(obs.content) if p and p.strip() - ) - ) - if len(obs_content_stems) > MAX_CONTENT_STEMS_SIZE: # pragma: no cover - obs_content_stems = obs_content_stems[ - :MAX_CONTENT_STEMS_SIZE - ] # pragma: no cover - rows_to_index.append( - SearchIndexRow( - id=obs.id, - type=SearchItemType.OBSERVATION.value, - title=_strip_nul(f"{obs.category}: {obs.content[:100]}..."), - content_stems=obs_content_stems, - content_snippet=_strip_nul(obs.content), - permalink=obs_permalink, - file_path=entity.file_path, - category=obs.category, - entity_id=entity.id, - metadata={ - "tags": obs.tags, - }, - created_at=entity.created_at, - updated_at=_mtime_to_datetime(entity), - project_id=entity.project_id, - ) - ) - - for rel in entity.outgoing_relations: - relation_title = _strip_nul( - f"{rel.from_entity.title} -> {rel.to_entity.title}" - if rel.to_entity - else f"{rel.from_entity.title}" - ) + for rel in entity.outgoing_relations: + relation_title = _strip_nul( + f"{rel.from_entity.title} -> {rel.to_entity.title}" + if rel.to_entity + else f"{rel.from_entity.title}" + ) - rel_content_stems = _strip_nul( - "\n".join( - p for p in self._generate_variants(relation_title) if p and p.strip() - ) - ) - rows_to_index.append( - SearchIndexRow( - id=rel.id, - title=relation_title, - permalink=rel.permalink, - content_stems=rel_content_stems, - file_path=entity.file_path, - type=SearchItemType.RELATION.value, - entity_id=entity.id, - from_id=rel.from_id, - to_id=rel.to_id, - relation_type=rel.relation_type, - created_at=entity.created_at, - updated_at=_mtime_to_datetime(entity), - project_id=entity.project_id, - ) + rel_content_stems = _strip_nul( + "\n".join(p for p in self._generate_variants(relation_title) if p and p.strip()) + ) + rows_to_index.append( + SearchIndexRow( + id=rel.id, + title=relation_title, + permalink=rel.permalink, + content_stems=rel_content_stems, + file_path=entity.file_path, + type=SearchItemType.RELATION.value, + entity_id=entity.id, + from_id=rel.from_id, + to_id=rel.to_id, + relation_type=rel.relation_type, + created_at=entity.created_at, + updated_at=_mtime_to_datetime(entity), + project_id=entity.project_id, ) + ) - with telemetry.scope( - "search.index.bulk_upsert", - phase="bulk_upsert", - result_count=len(rows_to_index), - ): - await self.repository.bulk_index_items(rows_to_index) + await self.repository.bulk_index_items(rows_to_index) async def delete_by_permalink(self, permalink: str): """Delete an item from the search index.""" diff --git a/src/basic_memory/sync/sync_service.py b/src/basic_memory/sync/sync_service.py index c384fb7b..cea2284e 100644 --- a/src/basic_memory/sync/sync_service.py +++ b/src/basic_memory/sync/sync_service.py @@ -15,7 +15,7 @@ from loguru import logger from sqlalchemy.exc import IntegrityError -from basic_memory import telemetry +import logfire from basic_memory import db from basic_memory.config import BasicMemoryConfig, ConfigManager from basic_memory.file_utils import ParseError, compute_checksum, remove_frontmatter @@ -314,7 +314,7 @@ async def sync( start_time = time.time() sync_start_timestamp = time.time() # Capture at start for watermark - with telemetry.operation( + with logfire.span( "sync.project.run", project_name=project_name, force_full=force_full, @@ -325,7 +325,7 @@ async def sync( # initial paths from db to sync # path -> checksum - with telemetry.scope("sync.project.scan", force_full=force_full): + with logfire.span("sync.project.scan", force_full=force_full): report = await self.scan(directory, force_full=force_full) # order of sync matters to resolve relations effectively @@ -334,7 +334,7 @@ async def sync( + f"deleted_files={len(report.deleted)}, moved_files={len(report.moves)}" ) - with telemetry.scope( + with logfire.span( "sync.project.apply_changes", new_count=len(report.new), modified_count=len(report.modified), @@ -381,9 +381,7 @@ async def sync( # not only the lightweight diff summary. # Outcome: full reindex can heal relation state even when the diff report is empty. if report.total > 0 or (force_full and indexed_entities): - with telemetry.scope( - "sync.project.resolve_relations", relation_scope="all_pending" - ): + with logfire.span("sync.project.resolve_relations", relation_scope="all_pending"): synced_entity_ids.extend(await self.resolve_relations()) else: logger.info("Skipping relation resolution - no file changes detected") @@ -392,7 +390,7 @@ async def sync( synced_entity_ids = list(dict.fromkeys(synced_entity_ids)) if synced_entity_ids and sync_embeddings and self.app_config.semantic_search_enabled: try: - with telemetry.scope( + with logfire.span( "sync.project.sync_embeddings", entity_count=len(synced_entity_ids), ): @@ -416,7 +414,7 @@ async def sync( # Update scan watermark after successful sync # Use the timestamp from sync start (not end) to ensure we catch files # created during the sync on the next iteration - with telemetry.scope("sync.project.update_watermark"): + with logfire.span("sync.project.update_watermark"): current_file_count = await self._quick_count_files(directory) if self.entity_repository.project_id is not None: project = await self.project_repository.find_by_id( @@ -763,7 +761,7 @@ async def scan(self, directory, force_full: bool = False): if project is None: raise ValueError(f"Project not found: {self.entity_repository.project_id}") - with telemetry.scope("sync.project.select_scan_strategy", force_full=force_full): + with logfire.span("sync.project.select_scan_strategy", force_full=force_full): # Step 1: Quick file count logger.debug("Counting files in directory") current_count = await self._quick_count_files(directory) @@ -807,7 +805,7 @@ async def scan(self, directory, force_full: bool = False): logger.warning("No scan watermark available, falling back to full scan") scan_coro = self._scan_directory_full(directory) - with telemetry.scope("sync.project.filesystem_scan", scan_type=scan_type): + with logfire.span("sync.project.filesystem_scan", scan_type=scan_type): file_paths_to_scan = await scan_coro if scan_type == "incremental": logger.debug( @@ -873,7 +871,7 @@ async def scan(self, directory, force_full: bool = False): # Step 4: Detect moves (for both full and incremental scans) # Check if any "new" files are actually moves by matching checksums - with telemetry.scope("sync.project.detect_moves", new_count=len(report.new)): + with logfire.span("sync.project.detect_moves", new_count=len(report.new)): for new_path in list( report.new ): # Use list() to allow modification during iteration @@ -908,7 +906,7 @@ async def scan(self, directory, force_full: bool = False): # Step 5: Detect deletions (only for full scans) # Incremental scans can't reliably detect deletions since they only see modified files if scan_type in ("full_initial", "full_deletions", "full_fallback", "full_forced"): - with telemetry.scope("sync.project.detect_deletions", scan_type=scan_type): + with logfire.span("sync.project.detect_deletions", scan_type=scan_type): # Use optimized query for just file paths (not full entities) db_file_paths = await self.entity_repository.get_all_file_paths() logger.debug(f"Found {len(db_file_paths)} db paths for deletion detection") @@ -998,7 +996,7 @@ async def sync_file( except FileNotFoundError: # File exists in database but not on filesystem # This indicates a database/filesystem inconsistency - treat as deletion - with telemetry.scope( + with logfire.span( "sync.file.failure", failure_type="file_not_found", path=path, @@ -1020,7 +1018,7 @@ async def sync_file( if isinstance(e, SyncFatalError) or isinstance( e.__cause__, SyncFatalError ): # pragma: no cover - with telemetry.scope( + with logfire.span( "sync.file.failure", failure_type=failure_type, path=path, @@ -1033,7 +1031,7 @@ async def sync_file( # Otherwise treat as recoverable file-level error error_msg = str(e) - with telemetry.scope( + with logfire.span( "sync.file.failure", failure_type=failure_type, path=path, @@ -1471,7 +1469,7 @@ async def resolve_relations(self, entity_id: int | None = None) -> set[int]: ) affected_entity_ids.add(relation.from_id) except IntegrityError: - with telemetry.scope( + with logfire.span( "sync.relation.resolve_conflict", relation_id=relation.id, relation_type=relation.relation_type, @@ -1492,7 +1490,7 @@ async def resolve_relations(self, entity_id: int | None = None) -> set[int]: try: await self.relation_repository.delete(relation.id) except Exception as e: - with telemetry.scope( + with logfire.span( "sync.relation.cleanup_failure", relation_id=relation.id, relation_type=relation.relation_type, diff --git a/src/basic_memory/telemetry.py b/src/basic_memory/telemetry.py index aee27cef..b2d9a8d4 100644 --- a/src/basic_memory/telemetry.py +++ b/src/basic_memory/telemetry.py @@ -1,67 +1,20 @@ -"""Optional Logfire telemetry helpers for Basic Memory. +"""Logfire telemetry bootstrap. -Telemetry is disabled by default. When enabled, this module configures Logfire, -exposes a `loguru` handler for trace-aware logging, and provides lightweight -helpers for manual spans and logger context binding. +`configure_telemetry()` wires up the Logfire SDK and returns the loguru +handler. Call sites use `logfire.span(...)` and `logfire.metric_counter(...)` +directly — there are no wrappers here. """ from __future__ import annotations -from contextlib import contextmanager -from dataclasses import dataclass, field -from typing import Any, Iterator +from typing import Any -from loguru import logger +import logfire REPOSITORY_URL = "https://github.com/basicmachines-co/basic-memory" ROOT_PATH = "src/basic_memory" - -def _load_logfire() -> Any | None: - """Load the optional logfire dependency lazily.""" - try: - import logfire - except ImportError: - return None - return logfire - - -@dataclass -class TelemetryState: - """Process-local Logfire configuration state.""" - - enabled: bool = False - configured: bool = False - service_name: str | None = None - environment: str | None = None - send_to_logfire: bool = False - warnings: list[str] = field(default_factory=list) - - -_STATE = TelemetryState() _LOGFIRE_HANDLER: dict[str, Any] | None = None -_METRICS: dict[tuple[str, str, str, str], Any] = {} - - -def reset_telemetry_state() -> None: - """Reset process-local telemetry state. - - Primarily used by tests. - """ - global _LOGFIRE_HANDLER - _STATE.enabled = False - _STATE.configured = False - _STATE.service_name = None - _STATE.environment = None - _STATE.send_to_logfire = False - _STATE.warnings.clear() - _LOGFIRE_HANDLER = None - _METRICS.clear() - - -def _filter_attributes(attrs: dict[str, Any]) -> dict[str, Any]: - """Drop null attributes so span and log payloads stay compact.""" - return {key: value for key, value in attrs.items() if value is not None} def configure_telemetry( @@ -73,28 +26,14 @@ def configure_telemetry( send_to_logfire: bool = False, log_level: str = "INFO", ) -> bool: - """Configure optional Logfire instrumentation for the current process.""" + """Configure Logfire for the current process. Returns True when enabled.""" global _LOGFIRE_HANDLER - - reset_telemetry_state() - _STATE.service_name = service_name - _STATE.environment = environment - _STATE.send_to_logfire = send_to_logfire - _STATE.enabled = enable_logfire + _LOGFIRE_HANDLER = None if not enable_logfire: return False - logfire = _load_logfire() - if logfire is None: - _STATE.enabled = False - _STATE.warnings.append( - "Logfire telemetry was enabled but the 'logfire' package is not installed. " - "Telemetry remains disabled." - ) - return False - - configure_kwargs = { + kwargs: dict[str, Any] = { "service_name": service_name, "environment": environment, "code_source": logfire.CodeSource( @@ -107,139 +46,19 @@ def configure_telemetry( } try: - logfire.configure(**configure_kwargs) + logfire.configure(**kwargs) except TypeError: - configure_kwargs.pop("send_to_logfire", None) - logfire.configure(**configure_kwargs) - except Exception as exc: # pragma: no cover - _STATE.enabled = False # pragma: no cover - _STATE.warnings.append(f"Failed to configure Logfire telemetry: {exc}") # pragma: no cover - return False # pragma: no cover + # Older logfire releases don't accept send_to_logfire as a keyword. + kwargs.pop("send_to_logfire", None) + logfire.configure(**kwargs) _LOGFIRE_HANDLER = logfire.loguru_handler() - _STATE.configured = True return True -def telemetry_enabled() -> bool: - """Return True when telemetry is both enabled and configured.""" - return _STATE.enabled and _STATE.configured - - def get_logfire_handler() -> dict[str, Any] | None: - """Return the active Logfire `loguru` handler, if any.""" + """Return the active Logfire loguru handler, if any.""" return _LOGFIRE_HANDLER -def pop_telemetry_warnings() -> list[str]: - """Return and clear pending telemetry warnings.""" - warnings = list(_STATE.warnings) - _STATE.warnings.clear() - return warnings - - -def _get_metric(metric_type: str, name: str, *, unit: str, description: str) -> Any | None: - """Create or reuse a Logfire metric instrument when telemetry is enabled.""" - logfire = _load_logfire() - if logfire is None or not _STATE.configured: # pragma: no cover - return None # pragma: no cover - - metric_key = (metric_type, name, unit, description) - cached_metric = _METRICS.get(metric_key) - if cached_metric is not None: - return cached_metric - - if metric_type == "counter": - metric = logfire.metric_counter(name, unit=unit, description=description) - elif metric_type == "histogram": - metric = logfire.metric_histogram(name, unit=unit, description=description) - else: # pragma: no cover - raise ValueError(f"Unsupported metric type: {metric_type}") # pragma: no cover - - _METRICS[metric_key] = metric - return metric - - -def add_counter( - name: str, - amount: int | float, - *, - unit: str = "1", - description: str = "", - **attrs: Any, -) -> None: - """Record a counter increment when telemetry is enabled.""" - metric = _get_metric("counter", name, unit=unit, description=description) - if metric is None: - return - metric.add(amount, attributes=_filter_attributes(attrs)) - - -def record_histogram( - name: str, - amount: int | float, - *, - unit: str = "", - description: str = "", - **attrs: Any, -) -> None: - """Record one histogram sample when telemetry is enabled.""" - metric = _get_metric("histogram", name, unit=unit, description=description) - if metric is None: - return - metric.record(amount, attributes=_filter_attributes(attrs)) - - -@contextmanager -def contextualize(**attrs: Any) -> Iterator[None]: - """Apply filtered telemetry attributes to Loguru calls in this scope.""" - with logger.contextualize(**_filter_attributes(attrs)): - yield - - -@contextmanager -def scope(name: str, **attrs: Any) -> Iterator[None]: - """Create a span and bind the same stable attributes into Loguru context.""" - with contextualize(**attrs): - with span(name, **attrs): - yield - - -# Alias: `operation` signals a root-level boundary (entrypoint, tool invocation), -# while `scope` signals a nested phase. The distinction is convention only. -operation = scope - - -@contextmanager -def span(name: str, **attrs: Any) -> Iterator[None]: - """Create a manual Logfire span when telemetry is enabled.""" - with started_span(name, **attrs): - yield - - -@contextmanager -def started_span(name: str, **attrs: Any) -> Iterator[Any | None]: - """Create a manual Logfire span and expose the active span handle when available.""" - logfire = _load_logfire() - if logfire is None or not _STATE.configured: # pragma: no cover - yield # pragma: no cover - return # pragma: no cover - - with logfire.span(name, **_filter_attributes(attrs)) as active_span: - yield active_span - - -__all__ = [ - "add_counter", - "contextualize", - "configure_telemetry", - "get_logfire_handler", - "operation", - "pop_telemetry_warnings", - "record_histogram", - "reset_telemetry_state", - "scope", - "span", - "started_span", - "telemetry_enabled", -] +__all__ = ["configure_telemetry", "get_logfire_handler"] diff --git a/src/basic_memory/utils.py b/src/basic_memory/utils.py index 74b02896..1645af4f 100644 --- a/src/basic_memory/utils.py +++ b/src/basic_memory/utils.py @@ -327,9 +327,6 @@ def setup_logging( logging.getLogger("httpx").setLevel(logging.WARNING) logging.getLogger("watchfiles.main").setLevel(logging.WARNING) - for warning_message in telemetry.pop_telemetry_warnings(): - logger.warning(warning_message) - def _cleanup_windows_log_files(log_dir: Path, current_log_name: str) -> None: """Trim stale per-process Windows log files so the directory stays bounded.""" diff --git a/tests/api/v2/test_knowledge_router_telemetry.py b/tests/api/v2/test_knowledge_router_telemetry.py index a203a2a7..e74838c9 100644 --- a/tests/api/v2/test_knowledge_router_telemetry.py +++ b/tests/api/v2/test_knowledge_router_telemetry.py @@ -8,6 +8,7 @@ from types import SimpleNamespace from typing import Any, cast +import logfire import pytest from fastapi import Response @@ -55,7 +56,7 @@ def _assert_only_root_span(spans: list[tuple[str, dict]], expected_name: str) -> @pytest.mark.asyncio async def test_create_entity_emits_only_root_span(monkeypatch) -> None: spans, fake_span = _capture_spans() - monkeypatch.setattr(knowledge_router_module.telemetry, "span", fake_span) + monkeypatch.setattr(logfire, "span", fake_span) entity = _fake_entity() response_content = ( @@ -101,7 +102,7 @@ def schedule(self, *args, **kwargs): @pytest.mark.asyncio async def test_update_entity_emits_only_root_span(monkeypatch) -> None: spans, fake_span = _capture_spans() - monkeypatch.setattr(knowledge_router_module.telemetry, "span", fake_span) + monkeypatch.setattr(logfire, "span", fake_span) entity = _fake_entity() response_content = "---\ntitle: Telemetry Entity\ntype: note\npermalink: notes/test\n---\n\nupdated telemetry content" @@ -153,7 +154,7 @@ def schedule(self, *args, **kwargs): @pytest.mark.asyncio async def test_edit_entity_emits_only_root_span(monkeypatch) -> None: spans, fake_span = _capture_spans() - monkeypatch.setattr(knowledge_router_module.telemetry, "span", fake_span) + monkeypatch.setattr(logfire, "span", fake_span) entity = _fake_entity() response_content = "---\ntitle: Telemetry Entity\ntype: note\npermalink: notes/test\n---\n\nedited telemetry content" diff --git a/tests/api/v2/test_search_router_telemetry.py b/tests/api/v2/test_search_router_telemetry.py index eb53bd55..92893843 100644 --- a/tests/api/v2/test_search_router_telemetry.py +++ b/tests/api/v2/test_search_router_telemetry.py @@ -6,6 +6,7 @@ from contextlib import contextmanager from typing import Any, cast +import logfire import pytest from basic_memory.schemas.search import SearchQuery @@ -14,7 +15,7 @@ @pytest.mark.asyncio -async def test_search_router_wraps_request_in_manual_operation() -> None: +async def test_search_router_wraps_request_in_manual_operation(monkeypatch) -> None: router = cast(Any, search_router_module) operations: list[tuple[str, dict]] = [] @@ -23,43 +24,37 @@ async def search(self, query, *, limit, offset): return [] @contextmanager - def fake_operation(name: str, **attrs): + def fake_span(name: str, **attrs): operations.append((name, attrs)) yield async def fake_to_search_results(entity_service, results): return [] - original_operation = router.telemetry.operation - original_to_search_results = router.to_search_results - router.telemetry.operation = fake_operation - router.to_search_results = fake_to_search_results - try: - response = await router.search( - SearchQuery(text="hello world"), - FakeSearchService(), - object(), - project_id=123, - page=2, - page_size=5, - ) - finally: - router.telemetry.operation = original_operation - router.to_search_results = original_to_search_results + monkeypatch.setattr(logfire, "span", fake_span) + monkeypatch.setattr(router, "to_search_results", fake_to_search_results) + + response = await router.search( + SearchQuery(text="hello world"), + FakeSearchService(), + object(), + project_id=123, + page=2, + page_size=5, + ) assert response.current_page == 2 - assert operations == [ - ( - "api.request.search", - { - "entrypoint": "api", - "domain": "search", - "action": "search", - "page": 2, - "page_size": 5, - "retrieval_mode": "fts", - "has_query": True, - "has_filters": False, - }, - ) - ] + # The root span fires with these attrs; additional nested spans may fire as well. + assert operations[0] == ( + "api.request.search", + { + "entrypoint": "api", + "domain": "search", + "action": "search", + "page": 2, + "page_size": 5, + "retrieval_mode": "fts", + "has_query": True, + "has_filters": False, + }, + ) diff --git a/tests/api/v2/test_utils_telemetry.py b/tests/api/v2/test_utils_telemetry.py index 6b280cff..440d60fe 100644 --- a/tests/api/v2/test_utils_telemetry.py +++ b/tests/api/v2/test_utils_telemetry.py @@ -7,6 +7,7 @@ from datetime import datetime, timezone from types import SimpleNamespace +import logfire import pytest from basic_memory.repository.search_index_row import SearchIndexRow @@ -28,7 +29,7 @@ def fake_span(name: str, **attrs): @pytest.mark.asyncio async def test_to_search_results_emits_hydration_spans(monkeypatch) -> None: spans, fake_span = _capture_spans() - monkeypatch.setattr(utils_module.telemetry, "span", fake_span) + monkeypatch.setattr(logfire, "span", fake_span) class FakeEntityService: async def get_entities_by_id(self, ids): diff --git a/tests/cli/test_cli_telemetry.py b/tests/cli/test_cli_telemetry.py index c1330f9e..1e3ab5ae 100644 --- a/tests/cli/test_cli_telemetry.py +++ b/tests/cli/test_cli_telemetry.py @@ -4,6 +4,8 @@ from typing import Any, cast +import logfire + from basic_memory.cli import app as cli_app @@ -33,11 +35,11 @@ def test_app_callback_registers_command_operation(monkeypatch) -> None: monkeypatch.setattr(cli_app, "maybe_show_cloud_promo", lambda command_name: None) monkeypatch.setattr(cli_app, "maybe_run_periodic_auto_update", lambda command_name: None) - def fake_operation(name: str, **attrs): + def fake_span(name: str, **attrs): operations.append((name, attrs)) return resource - monkeypatch.setattr(cli_app.telemetry, "operation", fake_operation) + monkeypatch.setattr(logfire, "span", fake_span) ctx = FakeContext(invoked_subcommand="status") cli_app.app_callback(cast(Any, ctx), version=None) diff --git a/tests/mcp/test_async_client_telemetry.py b/tests/mcp/test_async_client_telemetry.py index 45432c74..57dd9003 100644 --- a/tests/mcp/test_async_client_telemetry.py +++ b/tests/mcp/test_async_client_telemetry.py @@ -6,6 +6,7 @@ from contextlib import contextmanager from types import SimpleNamespace +import logfire import pytest async_client_module = importlib.import_module("basic_memory.mcp.async_client") @@ -29,7 +30,7 @@ def fake_span(name: str, **attrs): spans.append((name, attrs)) yield - monkeypatch.setattr(async_client_module.telemetry, "span", fake_span) + monkeypatch.setattr(logfire, "span", fake_span) monkeypatch.setattr("basic_memory.cli.auth.CLIAuth", FakeAuth) monkeypatch.setattr(async_client_module.logger, "error", error_messages.append) diff --git a/tests/mcp/test_client_telemetry.py b/tests/mcp/test_client_telemetry.py index 609261ef..96891aa0 100644 --- a/tests/mcp/test_client_telemetry.py +++ b/tests/mcp/test_client_telemetry.py @@ -6,6 +6,7 @@ from contextlib import contextmanager import httpx +import logfire import pytest from mcp.server.fastmcp.exceptions import ToolError @@ -20,29 +21,23 @@ def _capture_spans(): @contextmanager def fake_span(name: str, **attrs): spans.append((name, attrs)) - yield - @contextmanager - def fake_started_span(name: str, **attrs): - spans.append((name, attrs)) - - class FakeStartedSpan: + class FakeSpan: def set_attribute(self, key: str, value) -> None: attrs[key] = value def set_attributes(self, new_attrs: dict) -> None: attrs.update(new_attrs) - yield FakeStartedSpan() + yield FakeSpan() - return spans, fake_span, fake_started_span + return spans, fake_span @pytest.mark.asyncio async def test_knowledge_client_resolve_entity_emits_client_and_http_spans(monkeypatch) -> None: - spans, fake_span, fake_started_span = _capture_spans() - monkeypatch.setattr(knowledge_client_module.telemetry, "span", fake_span) - monkeypatch.setattr(utils_module.telemetry, "started_span", fake_started_span) + spans, fake_span = _capture_spans() + monkeypatch.setattr(logfire, "span", fake_span) async def handler(request: httpx.Request) -> httpx.Response: assert request.method == "POST" @@ -74,9 +69,8 @@ async def handler(request: httpx.Request) -> httpx.Response: @pytest.mark.asyncio async def test_search_client_emits_client_and_http_spans(monkeypatch) -> None: - spans, fake_span, fake_started_span = _capture_spans() - monkeypatch.setattr(search_client_module.telemetry, "span", fake_span) - monkeypatch.setattr(utils_module.telemetry, "started_span", fake_started_span) + spans, fake_span = _capture_spans() + monkeypatch.setattr(logfire, "span", fake_span) async def handler(request: httpx.Request) -> httpx.Response: assert request.method == "POST" @@ -116,8 +110,8 @@ async def handler(request: httpx.Request) -> httpx.Response: @pytest.mark.asyncio async def test_call_get_emits_http_outcome_for_client_errors(monkeypatch) -> None: - spans, _, fake_started_span = _capture_spans() - monkeypatch.setattr(utils_module.telemetry, "started_span", fake_started_span) + spans, fake_span = _capture_spans() + monkeypatch.setattr(logfire, "span", fake_span) async def handler(request: httpx.Request) -> httpx.Response: assert request.method == "GET" @@ -155,8 +149,8 @@ async def handler(request: httpx.Request) -> httpx.Response: @pytest.mark.asyncio async def test_call_get_emits_transport_error_outcome(monkeypatch) -> None: - spans, _, fake_started_span = _capture_spans() - monkeypatch.setattr(utils_module.telemetry, "started_span", fake_started_span) + spans, fake_span = _capture_spans() + monkeypatch.setattr(logfire, "span", fake_span) async def handler(request: httpx.Request) -> httpx.Response: raise httpx.ConnectError("boom", request=request) diff --git a/tests/mcp/test_project_context_telemetry.py b/tests/mcp/test_project_context_telemetry.py index 1863bc76..6f2c1132 100644 --- a/tests/mcp/test_project_context_telemetry.py +++ b/tests/mcp/test_project_context_telemetry.py @@ -6,6 +6,7 @@ from contextlib import contextmanager from typing import Any, cast +import logfire import pytest from basic_memory.config import ProjectEntry @@ -27,26 +28,20 @@ async def set_state(self, key: str, value: object, **kwargs) -> None: self._state[key] = value -def _capture_telemetry(): +def _capture_spans(): spans: list[tuple[str, dict]] = [] - contexts: list[dict] = [] @contextmanager def fake_span(name: str, **attrs): spans.append((name, attrs)) yield - @contextmanager - def fake_contextualize(**attrs): - contexts.append(attrs) - yield - - return spans, contexts, fake_span, fake_contextualize + return spans, fake_span @pytest.mark.asyncio async def test_resolve_workspace_parameter_emits_routing_span(monkeypatch) -> None: - spans, _, fake_span, _ = _capture_telemetry() + spans, fake_span = _capture_spans() context = _ContextState() workspace = WorkspaceInfo( tenant_id="11111111-1111-1111-1111-111111111111", @@ -58,7 +53,7 @@ async def test_resolve_workspace_parameter_emits_routing_span(monkeypatch) -> No async def fake_get_available_workspaces(context=None): return [workspace] - monkeypatch.setattr(project_context.telemetry, "span", fake_span) + monkeypatch.setattr(logfire, "span", fake_span) monkeypatch.setattr(project_context, "get_available_workspaces", fake_get_available_workspaces) resolved = await project_context.resolve_workspace_parameter(context=cast(Any, context)) @@ -74,15 +69,14 @@ async def fake_get_available_workspaces(context=None): @pytest.mark.asyncio async def test_get_project_client_contextualizes_route_mode(config_manager, monkeypatch) -> None: - spans, contexts, fake_span, fake_contextualize = _capture_telemetry() + spans, fake_span = _capture_spans() config = config_manager.load_config() (config_manager.config_dir.parent / "main").mkdir(parents=True, exist_ok=True) config.projects["main"] = ProjectEntry(path=str(config_manager.config_dir.parent / "main")) config_manager.save_config(config) - monkeypatch.setattr(project_context.telemetry, "span", fake_span) - monkeypatch.setattr(project_context.telemetry, "contextualize", fake_contextualize) + monkeypatch.setattr(logfire, "span", fake_span) with pytest.raises(Exception): async with project_context.get_project_client(project="main"): @@ -92,4 +86,9 @@ async def test_get_project_client_contextualizes_route_mode(config_manager, monk assert "routing.resolve_project" in span_names assert "routing.client_session" in span_names assert "routing.validate_project" in span_names - assert {"project_name": "main", "route_mode": "local_asgi"} in contexts + # route_mode + project_name are now carried as span attrs on routing.client_session + client_session_attrs = [attrs for name, attrs in spans if name == "routing.client_session"] + assert any( + attrs.get("project_name") == "main" and attrs.get("route_mode") == "local_asgi" + for attrs in client_session_attrs + ) diff --git a/tests/mcp/test_server_telemetry.py b/tests/mcp/test_server_telemetry.py index 69c58c6c..65968d94 100644 --- a/tests/mcp/test_server_telemetry.py +++ b/tests/mcp/test_server_telemetry.py @@ -4,30 +4,26 @@ from contextlib import contextmanager +import logfire import pytest from basic_memory.mcp.server import lifespan, mcp -import basic_memory.mcp.server as server_module @pytest.mark.asyncio -async def test_mcp_lifespan_wraps_startup_and_shutdown(config_manager) -> None: - operations: list[tuple[str, dict]] = [] +async def test_mcp_lifespan_wraps_startup_and_shutdown(config_manager, monkeypatch) -> None: + spans: list[tuple[str, dict]] = [] @contextmanager - def fake_operation(name: str, **attrs): - operations.append((name, attrs)) + def fake_span(name: str, **attrs): + spans.append((name, attrs)) yield - original_operation = server_module.telemetry.operation - server_module.telemetry.operation = fake_operation - try: - async with lifespan(mcp): - pass - finally: - server_module.telemetry.operation = original_operation - - assert [name for name, _ in operations] == [ - "mcp.lifecycle.startup", - "mcp.lifecycle.shutdown", - ] + monkeypatch.setattr(logfire, "span", fake_span) + + async with lifespan(mcp): + pass + + span_names = [name for name, _ in spans] + assert "mcp.lifecycle.startup" in span_names + assert "mcp.lifecycle.shutdown" in span_names diff --git a/tests/mcp/test_tool_telemetry.py b/tests/mcp/test_tool_telemetry.py index 5c0fedf4..08348f05 100644 --- a/tests/mcp/test_tool_telemetry.py +++ b/tests/mcp/test_tool_telemetry.py @@ -5,6 +5,7 @@ import importlib from contextlib import contextmanager +import logfire import pytest build_context_module = importlib.import_module("basic_memory.mcp.tools.build_context") @@ -14,34 +15,39 @@ write_note_module = importlib.import_module("basic_memory.mcp.tools.write_note") -def _recording_contexts(): - operations: list[tuple[str, dict]] = [] - contexts: list[dict] = [] +class _NoopSpan: + """Minimal stand-in for a live logfire span during tests.""" - @contextmanager - def fake_operation(name: str, **attrs): - operations.append((name, attrs)) - yield + def set_attributes(self, attrs: dict) -> None: + pass + + def set_attribute(self, key: str, value) -> None: + pass + + +def _recording_spans(): + spans: list[tuple[str, dict]] = [] @contextmanager - def fake_contextualize(**attrs): - contexts.append(attrs) - yield + def fake_span(name: str, **attrs): + spans.append((name, attrs)) + yield _NoopSpan() - return operations, contexts, fake_operation, fake_contextualize + return spans, fake_span -def _contains_context(contexts: list[dict], expected: dict) -> bool: - return any(expected.items() <= context.items() for context in contexts) +def _contains_span_attrs(spans: list[tuple[str, dict]], name: str, expected: dict) -> bool: + return any( + span_name == name and expected.items() <= attrs.items() for span_name, attrs in spans + ) @pytest.mark.asyncio async def test_write_note_emits_root_operation_and_project_context( app, test_project, monkeypatch ) -> None: - operations, contexts, fake_operation, fake_contextualize = _recording_contexts() - monkeypatch.setattr(write_note_module.telemetry, "operation", fake_operation) - monkeypatch.setattr(write_note_module.telemetry, "contextualize", fake_contextualize) + spans, fake_span = _recording_spans() + monkeypatch.setattr(logfire, "span", fake_span) await write_note_module.write_note( project=test_project.name, @@ -51,7 +57,7 @@ async def test_write_note_emits_root_operation_and_project_context( output_format="json", ) - assert operations[0] == ( + assert spans[0] == ( "mcp.tool.write_note", { "entrypoint": "mcp", @@ -63,21 +69,16 @@ async def test_write_note_emits_root_operation_and_project_context( "output_format": "json", }, ) - assert "api.request.knowledge.create_entity" in [name for name, _ in operations] - assert _contains_context( - contexts, + span_names = [name for name, _ in spans] + assert "api.request.knowledge.create_entity" in span_names + assert _contains_span_attrs( + spans, + "routing.client_session", { "project_name": test_project.name, "route_mode": "local_asgi", }, ) - assert _contains_context( - contexts, - { - "project_name": test_project.name, - "tool_name": "write_note", - }, - ) @pytest.mark.asyncio @@ -91,9 +92,8 @@ async def test_read_note_emits_root_operation_and_project_context( content="Readable telemetry content", ) - operations, contexts, fake_operation, fake_contextualize = _recording_contexts() - monkeypatch.setattr(read_note_module.telemetry, "operation", fake_operation) - monkeypatch.setattr(read_note_module.telemetry, "contextualize", fake_contextualize) + spans, fake_span = _recording_spans() + monkeypatch.setattr(logfire, "span", fake_span) await read_note_module.read_note( "notes/readable-telemetry-note", @@ -102,7 +102,7 @@ async def test_read_note_emits_root_operation_and_project_context( include_frontmatter=True, ) - assert operations[0] == ( + assert spans[0] == ( "mcp.tool.read_note", { "entrypoint": "mcp", @@ -115,24 +115,18 @@ async def test_read_note_emits_root_operation_and_project_context( "include_frontmatter": True, }, ) - operation_names = [name for name, _ in operations] - assert "api.request.knowledge.resolve_entity" in operation_names - assert "api.request.resource.get_content" in operation_names - assert "api.request.knowledge.get_entity" in operation_names - assert _contains_context( - contexts, + span_names = [name for name, _ in spans] + assert "api.request.knowledge.resolve_entity" in span_names + assert "api.request.resource.get_content" in span_names + assert "api.request.knowledge.get_entity" in span_names + assert _contains_span_attrs( + spans, + "routing.client_session", { "project_name": test_project.name, "route_mode": "local_asgi", }, ) - assert _contains_context( - contexts, - { - "project_name": test_project.name, - "tool_name": "read_note", - }, - ) @pytest.mark.asyncio @@ -147,9 +141,8 @@ async def test_search_notes_emits_root_operation_and_project_context( tags=["telemetry"], ) - operations, contexts, fake_operation, fake_contextualize = _recording_contexts() - monkeypatch.setattr(search_module.telemetry, "operation", fake_operation) - monkeypatch.setattr(search_module.telemetry, "contextualize", fake_contextualize) + spans, fake_span = _recording_spans() + monkeypatch.setattr(logfire, "span", fake_span) await search_module.search_notes( project=test_project.name, @@ -159,7 +152,7 @@ async def test_search_notes_emits_root_operation_and_project_context( output_format="json", ) - assert operations[0] == ( + assert spans[0] == ( "mcp.tool.search_notes", { "entrypoint": "mcp", @@ -178,21 +171,16 @@ async def test_search_notes_emits_root_operation_and_project_context( "has_status_filter": False, }, ) - assert ("api.request.search",) == (operations[1][0],) - assert _contains_context( - contexts, + span_names = [name for name, _ in spans] + assert "api.request.search" in span_names + assert _contains_span_attrs( + spans, + "routing.client_session", { "project_name": test_project.name, "route_mode": "local_asgi", }, ) - assert _contains_context( - contexts, - { - "project_name": test_project.name, - "tool_name": "search_notes", - }, - ) @pytest.mark.asyncio @@ -206,9 +194,8 @@ async def test_edit_note_emits_root_operation_and_project_context( content="Original telemetry content", ) - operations, contexts, fake_operation, fake_contextualize = _recording_contexts() - monkeypatch.setattr(edit_note_module.telemetry, "operation", fake_operation) - monkeypatch.setattr(edit_note_module.telemetry, "contextualize", fake_contextualize) + spans, fake_span = _recording_spans() + monkeypatch.setattr(logfire, "span", fake_span) await edit_note_module.edit_note( "notes/editable-telemetry-note", @@ -218,7 +205,7 @@ async def test_edit_note_emits_root_operation_and_project_context( output_format="json", ) - assert operations[0] == ( + assert spans[0] == ( "mcp.tool.edit_note", { "entrypoint": "mcp", @@ -232,23 +219,17 @@ async def test_edit_note_emits_root_operation_and_project_context( "expected_replacements": 1, }, ) - operation_names = [name for name, _ in operations] - assert "api.request.knowledge.resolve_entity" in operation_names - assert "api.request.knowledge.edit_entity" in operation_names - assert _contains_context( - contexts, + span_names = [name for name, _ in spans] + assert "api.request.knowledge.resolve_entity" in span_names + assert "api.request.knowledge.edit_entity" in span_names + assert _contains_span_attrs( + spans, + "routing.client_session", { "project_name": test_project.name, "route_mode": "local_asgi", }, ) - assert _contains_context( - contexts, - { - "project_name": test_project.name, - "tool_name": "edit_note", - }, - ) @pytest.mark.asyncio @@ -262,9 +243,8 @@ async def test_build_context_emits_root_operation_and_project_context( content="Context telemetry content", ) - operations, contexts, fake_operation, fake_contextualize = _recording_contexts() - monkeypatch.setattr(build_context_module.telemetry, "operation", fake_operation) - monkeypatch.setattr(build_context_module.telemetry, "contextualize", fake_contextualize) + spans, fake_span = _recording_spans() + monkeypatch.setattr(logfire, "span", fake_span) await build_context_module.build_context( url="memory://notes/context-telemetry-note", @@ -277,7 +257,7 @@ async def test_build_context_emits_root_operation_and_project_context( output_format="json", ) - assert operations[0] == ( + assert spans[0] == ( "mcp.tool.build_context", { "entrypoint": "mcp", @@ -293,18 +273,13 @@ async def test_build_context_emits_root_operation_and_project_context( "is_memory_url": True, }, ) - assert "api.request.memory.build_context" in [name for name, _ in operations] - assert _contains_context( - contexts, + span_names = [name for name, _ in spans] + assert "api.request.memory.build_context" in span_names + assert _contains_span_attrs( + spans, + "routing.client_session", { "project_name": test_project.name, "route_mode": "local_asgi", }, ) - assert _contains_context( - contexts, - { - "project_name": test_project.name, - "tool_name": "build_context", - }, - ) diff --git a/tests/mcp/test_tool_utils.py b/tests/mcp/test_tool_utils.py index 69526c11..3f689321 100644 --- a/tests/mcp/test_tool_utils.py +++ b/tests/mcp/test_tool_utils.py @@ -24,6 +24,7 @@ def __init__(self, status_code=200): self.status_code = status_code self.is_success = status_code < 400 self.json = lambda: {} + self.headers = {"content-type": "application/json"} def raise_for_status(self): if self.status_code >= 400: diff --git a/tests/mcp/test_tool_utils_cloud_auth.py b/tests/mcp/test_tool_utils_cloud_auth.py index b08532f6..dc182b19 100644 --- a/tests/mcp/test_tool_utils_cloud_auth.py +++ b/tests/mcp/test_tool_utils_cloud_auth.py @@ -14,6 +14,7 @@ def __init__(self, status_code: int, payload: dict): self.status_code = status_code self._payload = payload self.is_success = status_code < 400 + self.headers = {"content-type": "application/json"} def json(self): return self._payload diff --git a/tests/repository/test_semantic_search_base.py b/tests/repository/test_semantic_search_base.py index 1e8a58db..6998d199 100644 --- a/tests/repository/test_semantic_search_base.py +++ b/tests/repository/test_semantic_search_base.py @@ -696,17 +696,31 @@ async def _stub_flush(flush_jobs, entity_runtime, synced_entity_ids): counter_calls: list[tuple[str, float, dict]] = [] perf_counter_values = iter([0.0, 3.0, 4.5, 6.0]) + class _FakeHistogram: + def __init__(self, name: str) -> None: + self.name = name + + def record(self, amount, attributes=None) -> None: + histogram_calls.append((self.name, amount, attributes or {})) + + class _FakeCounter: + def __init__(self, name: str) -> None: + self.name = name + + def add(self, amount, attributes=None) -> None: + counter_calls.append((self.name, amount, attributes or {})) + monkeypatch.setattr(repo, "_prepare_entity_vector_jobs_window", _stub_prepare_window) monkeypatch.setattr(repo, "_flush_embedding_jobs", _stub_flush) monkeypatch.setattr( - search_repository_base_module.telemetry, - "record_histogram", - lambda name, amount, **attrs: histogram_calls.append((name, amount, attrs)), + search_repository_base_module.logfire, + "metric_histogram", + lambda name, **kwargs: _FakeHistogram(name), ) monkeypatch.setattr( - search_repository_base_module.telemetry, - "add_counter", - lambda name, amount, **attrs: counter_calls.append((name, amount, attrs)), + search_repository_base_module.logfire, + "metric_counter", + lambda name, **kwargs: _FakeCounter(name), ) monkeypatch.setattr( search_repository_base_module.time, @@ -716,12 +730,14 @@ async def _stub_flush(flush_jobs, entity_runtime, synced_entity_ids): result = await repo.sync_entity_vectors_batch([1, 2]) + # Batch-level histograms record once per batch using aggregated totals + # from VectorSyncBatchResult — not per entity. See _sync_entity_vectors_internal. assert result.entities_synced == 2 histogram_names = [name for name, _, _ in histogram_calls] - assert histogram_names.count("vector_sync_prepare_seconds") == 2 - assert histogram_names.count("vector_sync_queue_wait_seconds") == 2 - assert histogram_names.count("vector_sync_embed_seconds") == 2 - assert histogram_names.count("vector_sync_write_seconds") == 2 + assert histogram_names.count("vector_sync_prepare_seconds") == 1 + assert histogram_names.count("vector_sync_queue_wait_seconds") == 1 + assert histogram_names.count("vector_sync_embed_seconds") == 1 + assert histogram_names.count("vector_sync_write_seconds") == 1 assert histogram_names.count("vector_sync_batch_total_seconds") == 1 assert [name for name, _, _ in counter_calls].count("vector_sync_entities_total") == 1 diff --git a/tests/services/test_entity_service_telemetry.py b/tests/services/test_entity_service_telemetry.py index 6e907296..4c62149d 100644 --- a/tests/services/test_entity_service_telemetry.py +++ b/tests/services/test_entity_service_telemetry.py @@ -2,15 +2,13 @@ from __future__ import annotations -import importlib from contextlib import contextmanager +import logfire import pytest from basic_memory.schemas import Entity as EntitySchema -telemetry_module = importlib.import_module("basic_memory.telemetry") - def _capture_spans(): spans: list[tuple[str, dict]] = [] @@ -30,7 +28,7 @@ def _span_names(spans: list[tuple[str, dict]]) -> list[str]: @pytest.mark.asyncio async def test_create_entity_emits_file_write_span(entity_service, monkeypatch) -> None: spans, fake_span = _capture_spans() - monkeypatch.setattr(telemetry_module, "span", fake_span) + monkeypatch.setattr(logfire, "span", fake_span) schema = EntitySchema( title="Telemetry Create", @@ -59,7 +57,7 @@ async def test_edit_entity_emits_file_read_and_write_spans(entity_service, monke ) spans, fake_span = _capture_spans() - monkeypatch.setattr(telemetry_module, "span", fake_span) + monkeypatch.setattr(logfire, "span", fake_span) updated = await entity_service.edit_entity( created.file_path, diff --git a/tests/services/test_search_service_telemetry.py b/tests/services/test_search_service_telemetry.py index 7150b5d5..1a32b88a 100644 --- a/tests/services/test_search_service_telemetry.py +++ b/tests/services/test_search_service_telemetry.py @@ -2,15 +2,12 @@ from __future__ import annotations -import importlib from contextlib import contextmanager import pytest from basic_memory.schemas.search import SearchQuery -search_service_module = importlib.import_module("basic_memory.services.search_service") - def _capture_spans(): spans: list[tuple[str, dict]] = [] @@ -25,34 +22,31 @@ def fake_span(name: str, **attrs): @pytest.mark.asyncio async def test_search_service_wraps_repository_search(search_service, monkeypatch) -> None: + import logfire + spans, fake_span = _capture_spans() - monkeypatch.setattr(search_service_module.telemetry, "span", fake_span) + monkeypatch.setattr(logfire, "span", fake_span) await search_service.search(SearchQuery(text="Root Entity")) - assert spans[0] == ( - "search.execute", - { - "retrieval_mode": "fts", - "has_query": True, - "has_filters": False, - "limit": 10, - "offset": 0, - }, - ) - assert spans[1] == ( - "search.repository_query", - { - "retrieval_mode": "fts", - "phase": "repository_query", - "has_query": True, - "has_filters": False, - }, - ) + assert spans == [ + ( + "search.execute", + { + "retrieval_mode": "fts", + "has_query": True, + "has_filters": False, + "limit": 10, + "offset": 0, + }, + ), + ] @pytest.mark.asyncio async def test_search_service_emits_relaxed_retry_span(search_service, monkeypatch) -> None: + import logfire + spans, fake_span = _capture_spans() calls: list[dict] = [] @@ -60,18 +54,16 @@ async def fake_repository_search(**kwargs): calls.append(kwargs) return [] - monkeypatch.setattr(search_service_module.telemetry, "span", fake_span) + monkeypatch.setattr(logfire, "span", fake_span) monkeypatch.setattr(search_service.repository, "search", fake_repository_search) await search_service.search(SearchQuery(text="who are our main competitors and partners")) assert [name for name, _ in spans] == [ "search.execute", - "search.repository_query", "search.relaxed_fts_retry", - "search.repository_query", ] - assert spans[2] == ( + assert spans[1] == ( "search.relaxed_fts_retry", { "retrieval_mode": "fts", diff --git a/tests/sync/test_sync_service_telemetry.py b/tests/sync/test_sync_service_telemetry.py index 1f59964b..dba0a25c 100644 --- a/tests/sync/test_sync_service_telemetry.py +++ b/tests/sync/test_sync_service_telemetry.py @@ -9,6 +9,7 @@ from types import SimpleNamespace from unittest.mock import AsyncMock +import logfire import pytest from basic_memory.sync.sync_service import SyncReport @@ -17,21 +18,15 @@ sync_service_module = importlib.import_module("basic_memory.sync.sync_service") -def _capture_sync_telemetry(): - operations: list[tuple[str, dict]] = [] +def _capture_spans(): spans: list[tuple[str, dict]] = [] - @contextmanager - def fake_operation(name: str, **attrs): - operations.append((name, attrs)) - yield - @contextmanager def fake_span(name: str, **attrs): spans.append((name, attrs)) yield - return operations, spans, fake_operation, fake_span + return spans, fake_span def _write_markdown(project_root: Path, relative_path: str, content: str) -> Path: @@ -44,7 +39,7 @@ def _write_markdown(project_root: Path, relative_path: str, content: str) -> Pat @pytest.mark.asyncio async def test_sync_emits_phase_spans(sync_service, project_config, monkeypatch) -> None: - operations, spans, fake_operation, fake_span = _capture_sync_telemetry() + spans, fake_span = _capture_spans() report = SyncReport( new={"new.md"}, modified={"modified.md"}, @@ -76,8 +71,7 @@ async def fake_find_by_id(project_id): async def fake_update(project_id, values): return None - monkeypatch.setattr(sync_service_module.telemetry, "operation", fake_operation) - monkeypatch.setattr(sync_service_module.telemetry, "span", fake_span) + monkeypatch.setattr(logfire, "span", fake_span) monkeypatch.setattr(sync_service, "scan", fake_scan) monkeypatch.setattr(sync_service, "handle_move", fake_handle_move) monkeypatch.setattr(sync_service, "handle_delete", fake_handle_delete) @@ -95,26 +89,24 @@ async def fake_update(project_id, values): ) assert result is report - assert operations == [ - ( - "sync.project.run", - {"project_name": project_config.name, "force_full": True}, - ) - ] - assert [name for name, _ in spans] == [ - "sync.project.scan", - "sync.project.apply_changes", - "sync.project.resolve_relations", - "sync.project.update_watermark", - ] + span_names = [name for name, _ in spans] + assert "sync.project.run" in span_names + assert "sync.project.scan" in span_names + assert "sync.project.apply_changes" in span_names + assert "sync.project.resolve_relations" in span_names + assert "sync.project.update_watermark" in span_names + # The root span carries the project_name/force_full attrs previously on operation() + root_span_attrs = next(attrs for name, attrs in spans if name == "sync.project.run") + assert root_span_attrs["project_name"] == project_config.name + assert root_span_attrs["force_full"] is True @pytest.mark.asyncio async def test_sync_one_markdown_file_emits_index_phase_spans( sync_service, test_project, monkeypatch ) -> None: - _, spans, _, fake_span = _capture_sync_telemetry() - monkeypatch.setattr(batch_indexer_module.telemetry, "span", fake_span) + spans, fake_span = _capture_spans() + monkeypatch.setattr(logfire, "span", fake_span) monkeypatch.setattr(sync_service.search_service, "index_entity_data", AsyncMock()) _write_markdown( @@ -152,7 +144,7 @@ async def test_sync_one_markdown_file_emits_index_phase_spans( @pytest.mark.asyncio async def test_sync_file_emits_failure_span(sync_service, monkeypatch) -> None: - _, spans, _, fake_span = _capture_sync_telemetry() + spans, fake_span = _capture_spans() recorded_failures: list[tuple[str, str]] = [] async def fake_record_failure(path, error): @@ -161,7 +153,7 @@ async def fake_record_failure(path, error): async def fail_sync_markdown_file(path, new=True): raise ValueError("boom") - monkeypatch.setattr(sync_service_module.telemetry, "span", fake_span) + monkeypatch.setattr(logfire, "span", fake_span) monkeypatch.setattr(sync_service, "_should_skip_file", lambda path: _false_async()) monkeypatch.setattr(sync_service.file_service, "is_markdown", lambda path: True) monkeypatch.setattr(sync_service, "sync_markdown_file", fail_sync_markdown_file) @@ -170,7 +162,8 @@ async def fail_sync_markdown_file(path, new=True): result = await sync_service.sync_file("notes/broken.md", new=True) assert result == (None, None) - assert spans == [ + failure_spans = [span for span in spans if span[0] == "sync.file.failure"] + assert failure_spans == [ ( "sync.file.failure", { diff --git a/tests/test_telemetry.py b/tests/test_telemetry.py index b940f6fe..719fefa2 100644 --- a/tests/test_telemetry.py +++ b/tests/test_telemetry.py @@ -1,31 +1,13 @@ -"""Tests for optional Logfire bootstrap helpers.""" +"""Tests for Logfire telemetry bootstrap.""" from __future__ import annotations -from contextlib import contextmanager -from contextvars import copy_context - -from loguru import logger from basic_memory import __version__, telemetry from basic_memory.config import init_api_logging, init_cli_logging, init_mcp_logging class FakeLogfire: - """Small fake Logfire surface for bootstrap testing.""" - - class FakeCounter: - def __init__(self, calls: list[tuple[float, dict | None]]) -> None: - self.calls = calls - - def add(self, amount: float, *, attributes=None) -> None: - self.calls.append((amount, attributes)) - - class FakeHistogram: - def __init__(self, calls: list[tuple[float, dict | None]]) -> None: - self.calls = calls - - def record(self, amount: float, *, attributes=None) -> None: - self.calls.append((amount, attributes)) + """Minimal fake of the logfire module surface configure_telemetry touches.""" class CodeSource: def __init__(self, **kwargs): @@ -34,9 +16,6 @@ def __init__(self, **kwargs): def __init__(self, *, fail_on_send_to_logfire: bool = False) -> None: self.fail_on_send_to_logfire = fail_on_send_to_logfire self.configure_calls: list[dict] = [] - self.span_calls: list[tuple[str, dict]] = [] - self.counter_calls: dict[str, list[tuple[float, dict | None]]] = {} - self.histogram_calls: dict[str, list[tuple[float, dict | None]]] = {} def configure(self, **kwargs) -> None: self.configure_calls.append(kwargs) @@ -46,31 +25,8 @@ def configure(self, **kwargs) -> None: def loguru_handler(self) -> dict: return {"sink": "fake-logfire", "level": "INFO"} - def metric_counter(self, name: str, *, unit: str = "", description: str = ""): - self.counter_calls.setdefault(name, []) - return self.FakeCounter(self.counter_calls[name]) - - def metric_histogram(self, name: str, *, unit: str = "", description: str = ""): - self.histogram_calls.setdefault(name, []) - return self.FakeHistogram(self.histogram_calls[name]) - - @contextmanager - def span(self, name: str, **attrs): - self.span_calls.append((name, attrs)) - - class FakeStartedSpan: - def set_attribute(self, key: str, value) -> None: - attrs[key] = value - - def set_attributes(self, new_attrs: dict) -> None: - attrs.update(new_attrs) - - yield FakeStartedSpan() - def test_configure_telemetry_disabled_is_noop() -> None: - telemetry.reset_telemetry_state() - enabled = telemetry.configure_telemetry( "basic-memory-cli", environment="dev", @@ -78,235 +34,54 @@ def test_configure_telemetry_disabled_is_noop() -> None: ) assert enabled is False - assert telemetry.telemetry_enabled() is False assert telemetry.get_logfire_handler() is None - assert telemetry.pop_telemetry_warnings() == [] -def test_configure_telemetry_warns_when_dependency_missing(monkeypatch) -> None: - telemetry.reset_telemetry_state() - monkeypatch.setattr(telemetry, "_load_logfire", lambda: None) +def test_configure_telemetry_configures_logfire(monkeypatch) -> None: + fake_logfire = FakeLogfire() + monkeypatch.setattr(telemetry, "logfire", fake_logfire) enabled = telemetry.configure_telemetry( - "basic-memory-cli", - environment="dev", + "basic-memory-api", + environment="prod", + service_version="abc123", enable_logfire=True, + send_to_logfire=True, ) - assert enabled is False - assert telemetry.telemetry_enabled() is False - assert telemetry.get_logfire_handler() is None - assert telemetry.pop_telemetry_warnings() == [ - "Logfire telemetry was enabled but the 'logfire' package is not installed. " - "Telemetry remains disabled." - ] + assert enabled is True + assert telemetry.get_logfire_handler() == {"sink": "fake-logfire", "level": "INFO"} + assert len(fake_logfire.configure_calls) == 1 + assert fake_logfire.configure_calls[0]["send_to_logfire"] is True def test_configure_telemetry_retries_without_send_to_logfire(monkeypatch) -> None: fake_logfire = FakeLogfire(fail_on_send_to_logfire=True) - telemetry.reset_telemetry_state() - monkeypatch.setattr(telemetry, "_load_logfire", lambda: fake_logfire) + monkeypatch.setattr(telemetry, "logfire", fake_logfire) enabled = telemetry.configure_telemetry( "basic-memory-api", environment="prod", - service_version="abc123", enable_logfire=True, send_to_logfire=True, ) assert enabled is True - assert telemetry.telemetry_enabled() is True - assert telemetry.get_logfire_handler() == {"sink": "fake-logfire", "level": "INFO"} assert len(fake_logfire.configure_calls) == 2 assert fake_logfire.configure_calls[0]["send_to_logfire"] is True assert "send_to_logfire" not in fake_logfire.configure_calls[1] -def test_contextualize_adds_filtered_loguru_context() -> None: - records: list[dict] = [] - sink_id = logger.add(lambda message: records.append(message.record["extra"].copy())) - - try: - with telemetry.contextualize(project_name="main", workspace_id=None): - logger.info("inside telemetry context") - finally: - logger.remove(sink_id) - - assert records == [{"project_name": "main"}] - - -def test_contextualize_nested_contexts_merge_and_unwind() -> None: - records: list[dict] = [] - sink_id = logger.add(lambda message: records.append(message.record["extra"].copy())) - - try: - with telemetry.contextualize(project_name="main"): - logger.info("outer context") - with telemetry.contextualize(tool_name="write_note"): - logger.info("inner context") - logger.info("outer context restored") - finally: - logger.remove(sink_id) - - assert records == [ - {"project_name": "main"}, - {"project_name": "main", "tool_name": "write_note"}, - {"project_name": "main"}, - ] - - -def test_span_uses_logfire_when_enabled(monkeypatch) -> None: +def test_configure_telemetry_clears_handler_when_disabled(monkeypatch) -> None: + """A prior enabled configure must not leak its handler into a later disabled one.""" fake_logfire = FakeLogfire() - telemetry.reset_telemetry_state() - monkeypatch.setattr(telemetry, "_load_logfire", lambda: fake_logfire) - telemetry.configure_telemetry( - "basic-memory-mcp", - environment="dev", - enable_logfire=True, - ) + monkeypatch.setattr(telemetry, "logfire", fake_logfire) - with telemetry.span("mcp.tool.write_note", project_name="main", workspace_id=None): - pass + telemetry.configure_telemetry("basic-memory-cli", environment="dev", enable_logfire=True) + assert telemetry.get_logfire_handler() is not None - assert fake_logfire.span_calls == [("mcp.tool.write_note", {"project_name": "main"})] - - -def test_started_span_exposes_mutable_logfire_handle(monkeypatch) -> None: - fake_logfire = FakeLogfire() - telemetry.reset_telemetry_state() - monkeypatch.setattr(telemetry, "_load_logfire", lambda: fake_logfire) - telemetry.configure_telemetry( - "basic-memory-mcp", - environment="dev", - enable_logfire=True, - ) - - with telemetry.started_span("mcp.http.request", method="GET") as span: - assert span is not None - span.set_attribute("status_code", 200) - span.set_attributes({"is_success": True, "outcome": "success"}) - - assert fake_logfire.span_calls == [ - ( - "mcp.http.request", - { - "method": "GET", - "status_code": 200, - "is_success": True, - "outcome": "success", - }, - ) - ] - - -def test_metrics_record_when_telemetry_enabled(monkeypatch) -> None: - fake_logfire = FakeLogfire() - telemetry.reset_telemetry_state() - monkeypatch.setattr(telemetry, "_load_logfire", lambda: fake_logfire) - telemetry.configure_telemetry( - "basic-memory-cli", - environment="dev", - enable_logfire=True, - ) - - telemetry.record_histogram( - "vector_sync_prepare_seconds", - 1.25, - unit="s", - backend="sqlite", - skip_only_batch=True, - ) - telemetry.add_counter( - "vector_sync_entities_skipped", - 2, - backend="sqlite", - skip_only_batch=True, - ) - - assert fake_logfire.histogram_calls["vector_sync_prepare_seconds"] == [ - (1.25, {"backend": "sqlite", "skip_only_batch": True}) - ] - assert fake_logfire.counter_calls["vector_sync_entities_skipped"] == [ - (2, {"backend": "sqlite", "skip_only_batch": True}) - ] - - -def test_operation_creates_span_and_log_context(monkeypatch) -> None: - fake_logfire = FakeLogfire() - records: list[dict] = [] - sink_id = logger.add(lambda message: records.append(message.record["extra"].copy())) - - telemetry.reset_telemetry_state() - monkeypatch.setattr(telemetry, "_load_logfire", lambda: fake_logfire) - telemetry.configure_telemetry( - "basic-memory-cli", - environment="dev", - enable_logfire=True, - ) - - try: - with telemetry.operation( - "cli.command.status", - entrypoint="cli", - command_name="status", - workspace_id=None, - ): - logger.info("inside operation") - finally: - logger.remove(sink_id) - - assert fake_logfire.span_calls == [ - ("cli.command.status", {"entrypoint": "cli", "command_name": "status"}) - ] - assert records == [{"entrypoint": "cli", "command_name": "status"}] - - -def test_scope_creates_span_and_nested_log_context(monkeypatch) -> None: - fake_logfire = FakeLogfire() - records: list[dict] = [] - sink_id = logger.add(lambda message: records.append(message.record["extra"].copy())) - - telemetry.reset_telemetry_state() - monkeypatch.setattr(telemetry, "_load_logfire", lambda: fake_logfire) - telemetry.configure_telemetry( - "basic-memory-mcp", - environment="dev", - enable_logfire=True, - ) - - try: - with telemetry.contextualize(project_name="main"): - with telemetry.scope( - "routing.client_session", - route_mode="local_asgi", - workspace_id=None, - ): - logger.info("inside scope") - finally: - logger.remove(sink_id) - - assert fake_logfire.span_calls == [("routing.client_session", {"route_mode": "local_asgi"})] - assert records == [{"project_name": "main", "route_mode": "local_asgi"}] - - -def test_contextualize_isolated_per_context() -> None: - def read_extra() -> dict: - holder: list[dict] = [] - sink_id = logger.add(lambda message: holder.append(message.record["extra"].copy())) - try: - logger.info("outside context") - finally: - logger.remove(sink_id) - return holder[0] - - with telemetry.contextualize(project_name="main"): - inside = read_extra() - - outside = copy_context().run(read_extra) - - assert inside == {"project_name": "main"} - assert outside == {} + telemetry.configure_telemetry("basic-memory-cli", environment="dev", enable_logfire=False) + assert telemetry.get_logfire_handler() is None def test_init_logging_functions_configure_telemetry_and_logging(monkeypatch) -> None: diff --git a/tests/utils/test_setup_logging.py b/tests/utils/test_setup_logging.py index 9bf02a8a..448701b3 100644 --- a/tests/utils/test_setup_logging.py +++ b/tests/utils/test_setup_logging.py @@ -22,7 +22,6 @@ def test_setup_logging_uses_shared_log_file_off_windows(monkeypatch, tmp_path) - lambda sink, **kwargs: added_sinks.append(str(sink)), ) monkeypatch.setattr(utils.telemetry, "get_logfire_handler", lambda: None) - monkeypatch.setattr(utils.telemetry, "pop_telemetry_warnings", lambda: []) utils.setup_logging(log_to_file=True) @@ -45,7 +44,6 @@ def test_setup_logging_uses_per_process_log_file_on_windows(monkeypatch, tmp_pat lambda sink, **kwargs: added_sinks.append(str(sink)), ) monkeypatch.setattr(utils.telemetry, "get_logfire_handler", lambda: None) - monkeypatch.setattr(utils.telemetry, "pop_telemetry_warnings", lambda: []) utils.setup_logging(log_to_file=True) @@ -73,7 +71,6 @@ def test_setup_logging_trims_stale_windows_pid_logs(monkeypatch, tmp_path) -> No monkeypatch.setattr(utils.logger, "remove", lambda *args, **kwargs: None) monkeypatch.setattr(utils.logger, "add", lambda *args, **kwargs: None) monkeypatch.setattr(utils.telemetry, "get_logfire_handler", lambda: None) - monkeypatch.setattr(utils.telemetry, "pop_telemetry_warnings", lambda: []) utils.setup_logging(log_to_file=True) @@ -113,7 +110,6 @@ def test_setup_logging_honors_basic_memory_config_dir(monkeypatch, tmp_path) -> lambda sink, **kwargs: added_sinks.append(str(sink)), ) monkeypatch.setattr(utils.telemetry, "get_logfire_handler", lambda: None) - monkeypatch.setattr(utils.telemetry, "pop_telemetry_warnings", lambda: []) utils.setup_logging(log_to_file=True) @@ -152,7 +148,6 @@ def test_setup_logging_log_to_stdout(monkeypatch) -> None: monkeypatch.setattr(utils.logger, "remove", lambda *args, **kwargs: None) monkeypatch.setattr(utils.logger, "add", lambda sink, **kwargs: added_sinks.append(sink)) monkeypatch.setattr(utils.telemetry, "get_logfire_handler", lambda: None) - monkeypatch.setattr(utils.telemetry, "pop_telemetry_warnings", lambda: []) utils.setup_logging(log_to_stdout=True) @@ -171,7 +166,6 @@ def test_setup_logging_structured_context(monkeypatch) -> None: monkeypatch.setattr(utils.logger, "remove", lambda *args, **kwargs: None) monkeypatch.setattr(utils.logger, "add", lambda *args, **kwargs: None) monkeypatch.setattr(utils.telemetry, "get_logfire_handler", lambda: None) - monkeypatch.setattr(utils.telemetry, "pop_telemetry_warnings", lambda: []) monkeypatch.setattr( utils.logger, "configure", @@ -196,7 +190,6 @@ def test_setup_logging_suppresses_noisy_loggers(monkeypatch) -> None: monkeypatch.setattr(utils.logger, "remove", lambda *args, **kwargs: None) monkeypatch.setattr(utils.logger, "add", lambda *args, **kwargs: None) monkeypatch.setattr(utils.telemetry, "get_logfire_handler", lambda: None) - monkeypatch.setattr(utils.telemetry, "pop_telemetry_warnings", lambda: []) httpx_logger = utils.logging.getLogger("httpx") watchfiles_logger = utils.logging.getLogger("watchfiles.main") @@ -228,28 +221,7 @@ def test_setup_logging_adds_logfire_handler(monkeypatch) -> None: "get_logfire_handler", lambda: {"sink": "logfire-sink", "level": "INFO"}, ) - monkeypatch.setattr(utils.telemetry, "pop_telemetry_warnings", lambda: []) utils.setup_logging() assert added_sinks == ["logfire-sink"] - - -def test_setup_logging_emits_telemetry_warnings(monkeypatch) -> None: - """Telemetry warnings should be logged after sinks are configured.""" - warnings_logged: list[str] = [] - - monkeypatch.setenv("BASIC_MEMORY_ENV", "dev") - monkeypatch.setattr(utils.logger, "remove", lambda *args, **kwargs: None) - monkeypatch.setattr(utils.logger, "add", lambda *args, **kwargs: None) - monkeypatch.setattr(utils.telemetry, "get_logfire_handler", lambda: None) - monkeypatch.setattr( - utils.telemetry, - "pop_telemetry_warnings", - lambda: ["telemetry warning 1", "telemetry warning 2"], - ) - monkeypatch.setattr(utils.logger, "warning", lambda message: warnings_logged.append(message)) - - utils.setup_logging() - - assert warnings_logged == ["telemetry warning 1", "telemetry warning 2"] diff --git a/uv.lock b/uv.lock index 63525826..c949a35c 100644 --- a/uv.lock +++ b/uv.lock @@ -167,6 +167,7 @@ dependencies = [ { name = "fastmcp" }, { name = "greenlet" }, { name = "httpx" }, + { name = "logfire" }, { name = "loguru" }, { name = "markdown-it-py" }, { name = "mcp" }, @@ -196,11 +197,6 @@ dependencies = [ { name = "watchfiles" }, ] -[package.optional-dependencies] -telemetry = [ - { name = "logfire" }, -] - [package.dev-dependencies] dev = [ { name = "cst-lsp" }, @@ -235,7 +231,7 @@ requires-dist = [ { name = "fastmcp", specifier = ">=3.0.1,<4" }, { name = "greenlet", specifier = ">=3.1.1" }, { name = "httpx", specifier = ">=0.28.0" }, - { name = "logfire", marker = "extra == 'telemetry'", specifier = ">=4.19.0" }, + { name = "logfire", specifier = ">=4.19.0" }, { name = "loguru", specifier = ">=0.7.3" }, { name = "markdown-it-py", specifier = ">=3.0.0" }, { name = "mcp", specifier = ">=1.23.1" }, @@ -264,7 +260,6 @@ requires-dist = [ { name = "unidecode", specifier = ">=1.3.8" }, { name = "watchfiles", specifier = ">=1.0.4" }, ] -provides-extras = ["telemetry"] [package.metadata.requires-dev] dev = [