|
5 | 5 | Handles summarization and key information extraction. |
6 | 6 | """ |
7 | 7 |
|
8 | | -from typing import TYPE_CHECKING, Any, Dict, List |
| 8 | +from typing import TYPE_CHECKING, Any, Dict, List, Tuple |
9 | 9 |
|
10 | 10 | from openviking.core.directories import get_context_type_for_uri |
11 | 11 | from openviking.storage.queuefs import SemanticMsg, get_queue_manager |
| 12 | +from openviking.storage.viking_fs import get_viking_fs |
12 | 13 | from openviking.telemetry import get_current_telemetry |
13 | 14 | from openviking.telemetry.request_wait_tracker import get_request_wait_tracker |
14 | 15 | from openviking_cli.utils import get_logger |
| 16 | +from openviking_cli.utils.uri import VikingURI |
15 | 17 |
|
16 | 18 | if TYPE_CHECKING: |
17 | 19 | from openviking.parse.vlm import VLMProcessor |
@@ -57,29 +59,60 @@ async def summarize( |
57 | 59 | enqueued_count = 0 |
58 | 60 |
|
59 | 61 | telemetry = get_current_telemetry() |
| 62 | + |
| 63 | + def is_resources_root(uri: str) -> bool: |
| 64 | + return (uri or "").rstrip("/") == "viking://resources" |
| 65 | + |
| 66 | + async def list_top_children(temp_uri: str) -> List[Tuple[str, str]]: |
| 67 | + viking_fs = get_viking_fs() |
| 68 | + entries = await viking_fs.ls(temp_uri, show_all_hidden=True, ctx=ctx) |
| 69 | + children: List[Tuple[str, str]] = [] |
| 70 | + for entry in entries: |
| 71 | + name = entry.get("name", "") |
| 72 | + if not name or name in {".", ".."}: |
| 73 | + continue |
| 74 | + child_temp_uri = VikingURI(temp_uri).join(name).uri |
| 75 | + children.append((name, child_temp_uri)) |
| 76 | + return children |
| 77 | + |
60 | 78 | for uri, temp_uri in zip(resource_uris, temp_uris, strict=True): |
61 | 79 | # Determine context_type based on URI |
62 | 80 | context_type = get_context_type_for_uri(uri) |
63 | 81 |
|
64 | | - msg = SemanticMsg( |
65 | | - uri=temp_uri, |
66 | | - context_type=context_type, |
67 | | - account_id=ctx.account_id, |
68 | | - user_id=ctx.user.user_id, |
69 | | - agent_id=ctx.user.agent_id, |
70 | | - role=ctx.role.value, |
71 | | - skip_vectorization=skip_vectorization, |
72 | | - telemetry_id=telemetry.telemetry_id, |
73 | | - target_uri=uri if uri != temp_uri else None, |
74 | | - lifecycle_lock_handle_id=lifecycle_lock_handle_id, |
75 | | - is_code_repo=kwargs.get("is_code_repo", False), |
76 | | - ) |
77 | | - await semantic_queue.enqueue(msg) |
78 | | - if msg.telemetry_id: |
79 | | - get_request_wait_tracker().register_semantic_root(msg.telemetry_id, msg.id) |
80 | | - enqueued_count += 1 |
81 | | - logger.info( |
82 | | - f"Enqueued semantic generation for: {uri} (skip_vectorization={skip_vectorization})" |
83 | | - ) |
| 82 | + enqueue_units: List[Tuple[str, str]] = [] |
| 83 | + if is_resources_root(uri) and uri != temp_uri: |
| 84 | + children = await list_top_children(temp_uri) |
| 85 | + if not children: |
| 86 | + return { |
| 87 | + "status": "error", |
| 88 | + "message": f"no top-level import items found under temp uri: {temp_uri}", |
| 89 | + } |
| 90 | + for name, child_temp_uri in children: |
| 91 | + child_target_uri = VikingURI("viking://resources").join(name).uri |
| 92 | + enqueue_units.append((child_target_uri, child_temp_uri)) |
| 93 | + else: |
| 94 | + enqueue_units.append((uri, temp_uri)) |
| 95 | + |
| 96 | + for target_uri, source_uri in enqueue_units: |
| 97 | + msg = SemanticMsg( |
| 98 | + uri=source_uri, |
| 99 | + context_type=context_type, |
| 100 | + account_id=ctx.account_id, |
| 101 | + user_id=ctx.user.user_id, |
| 102 | + agent_id=ctx.user.agent_id, |
| 103 | + role=ctx.role.value, |
| 104 | + skip_vectorization=skip_vectorization, |
| 105 | + telemetry_id=telemetry.telemetry_id, |
| 106 | + target_uri=target_uri if target_uri != source_uri else None, |
| 107 | + lifecycle_lock_handle_id=lifecycle_lock_handle_id, |
| 108 | + is_code_repo=kwargs.get("is_code_repo", False), |
| 109 | + ) |
| 110 | + await semantic_queue.enqueue(msg) |
| 111 | + if msg.telemetry_id: |
| 112 | + get_request_wait_tracker().register_semantic_root(msg.telemetry_id, msg.id) |
| 113 | + enqueued_count += 1 |
| 114 | + logger.info( |
| 115 | + f"Enqueued semantic generation for: {target_uri} (skip_vectorization={skip_vectorization})" |
| 116 | + ) |
84 | 117 |
|
85 | 118 | return {"status": "success", "enqueued_count": enqueued_count} |
0 commit comments