Skip to content

Commit a18d4b9

Browse files
authored
Fix/add resource cover (#1338)
* fix(resources): improve handling of resource imports and naming - Add source_name to file upload requests for preserving original filenames - Handle single-directory zip files by using their root directory directly - Support viking://resources as parent directory for imports - Split summarization for resources root imports into individual child items - Add tests for new resource import behaviors * style(tests): format test files with consistent line breaks Improve readability by applying consistent line breaks in test file patches and removing trailing whitespace
1 parent f72055d commit a18d4b9

10 files changed

Lines changed: 573 additions & 33 deletions

File tree

crates/ov_cli/src/client.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,10 +596,15 @@ impl HttpClient {
596596

597597
self.post("/api/v1/resources", &body).await
598598
} else if path_obj.is_file() {
599+
let source_name = path_obj
600+
.file_name()
601+
.and_then(|n| n.to_str())
602+
.map(|s| s.to_string());
599603
let temp_file_id = self.upload_temp_file(path_obj).await?;
600604

601605
let body = serde_json::json!({
602606
"temp_file_id": temp_file_id,
607+
"source_name": source_name,
603608
"to": to,
604609
"parent": parent,
605610
"reason": reason,

openviking/parse/parsers/markdown.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -174,9 +174,17 @@ async def parse_content(
174174
await viking_fs.mkdir(temp_uri)
175175
logger.debug(f"[MarkdownParser] Created temp directory: {temp_uri}")
176176

177-
# Get document title
177+
explicit_name = kwargs.get("resource_name") or kwargs.get("source_name")
178+
179+
# Preserve the original uploaded filename when available instead of
180+
# the temp upload name (e.g. upload_<uuid>.txt).
178181
doc_title = meta.get("frontmatter", {}).get(
179-
"title", Path(source_path).stem if source_path else "Document"
182+
"title",
183+
Path(explicit_name).stem
184+
if explicit_name
185+
else Path(source_path).stem
186+
if source_path
187+
else "Document",
180188
)
181189

182190
# Create root directory
@@ -187,7 +195,13 @@ async def parse_content(
187195
logger.info(f"[MarkdownParser] Found {len(headings)} headings")
188196

189197
# Parse and create directory structure
190-
await self._parse_and_create_structure(content, headings, root_dir, source_path)
198+
await self._parse_and_create_structure(
199+
content,
200+
headings,
201+
root_dir,
202+
source_path,
203+
doc_name=self._sanitize_for_path(Path(doc_title).stem),
204+
)
191205

192206
parse_time = time.time() - start_time
193207
logger.info(f"[MarkdownParser] Parse completed in {parse_time:.2f}s")
@@ -365,6 +379,7 @@ async def _parse_and_create_structure(
365379
headings: List[Tuple[int, int, str, int]],
366380
root_dir: str,
367381
source_path: Optional[str] = None,
382+
doc_name: Optional[str] = None,
368383
) -> None:
369384
"""
370385
Parse markdown and create directory structure directly in VikingFS.
@@ -395,7 +410,9 @@ async def _parse_and_create_structure(
395410
await viking_fs.mkdir(root_dir)
396411

397412
# Get document name
398-
doc_name = self._sanitize_for_path(Path(source_path).stem if source_path else "content")
413+
doc_name = doc_name or self._sanitize_for_path(
414+
Path(source_path).stem if source_path else "content"
415+
)
399416

400417
# Small document: save as single file (check both token and char limits)
401418
if estimated_tokens <= max_size and len(content) <= max_chars:

openviking/parse/tree_builder.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,9 @@ async def finalize_from_temp(
125125
viking_fs = get_viking_fs()
126126
temp_uri = temp_dir_path
127127

128+
def is_resources_root(uri: Optional[str]) -> bool:
129+
return (uri or "").rstrip("/") == "viking://resources"
130+
128131
# 1. Find document root directory
129132
entries = await viking_fs.ls(temp_uri, ctx=ctx)
130133
doc_dirs = [e for e in entries if e.get("isDir") and e["name"] not in [".", ".."]]
@@ -153,21 +156,31 @@ async def finalize_from_temp(
153156
# 2. Determine base_uri and final document name with org/repo for GitHub/GitLab
154157
auto_base_uri = self._get_base_uri(scope, source_path, source_format)
155158
base_uri = parent_uri or auto_base_uri
159+
use_to_as_parent = is_resources_root(to_uri)
156160
# 3. Determine candidate_uri
157-
if to_uri:
161+
if to_uri and not use_to_as_parent:
158162
candidate_uri = to_uri
159163
else:
160-
if parent_uri:
164+
effective_parent_uri = parent_uri or to_uri if use_to_as_parent else parent_uri
165+
if effective_parent_uri:
161166
# Parent URI must exist and be a directory
162167
try:
163-
stat_result = await viking_fs.stat(parent_uri, ctx=ctx)
168+
stat_result = await viking_fs.stat(effective_parent_uri, ctx=ctx)
164169
except Exception as e:
165-
raise FileNotFoundError(f"Parent URI does not exist: {parent_uri}") from e
170+
raise FileNotFoundError(
171+
f"Parent URI does not exist: {effective_parent_uri}"
172+
) from e
166173
if not stat_result.get("isDir"):
167-
raise ValueError(f"Parent URI is not a directory: {parent_uri}")
174+
raise ValueError(f"Parent URI is not a directory: {effective_parent_uri}")
175+
base_uri = effective_parent_uri
168176
candidate_uri = VikingURI(base_uri).join(final_doc_name).uri
169177

170-
if to_uri:
178+
if to_uri and not use_to_as_parent:
179+
final_uri = candidate_uri
180+
elif use_to_as_parent:
181+
# Treat an explicit resources root target as "import under this
182+
# directory" while preserving the child URI so downstream logic can
183+
# incrementally update viking://resources/<child> when it exists.
171184
final_uri = candidate_uri
172185
else:
173186
final_uri = await self._resolve_unique_uri(candidate_uri)
@@ -177,7 +190,7 @@ async def finalize_from_temp(
177190
source_format=source_format,
178191
)
179192
tree._root_uri = final_uri
180-
if not to_uri:
193+
if not to_uri or use_to_as_parent:
181194
tree._candidate_uri = candidate_uri
182195

183196
# Create a minimal Context object for the root so that tree.root is not None

openviking/utils/media_processor.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,13 +167,29 @@ async def _process_file(
167167
try:
168168
with zipfile.ZipFile(file_path, "r") as zipf:
169169
safe_extract_zip(zipf, temp_dir)
170+
171+
extracted_entries = [p for p in temp_dir.iterdir() if p.name not in {".", ".."}]
172+
if len(extracted_entries) == 1 and extracted_entries[0].is_dir():
173+
dir_kwargs = dict(kwargs)
174+
dir_kwargs.pop("source_name", None)
175+
return await self._process_directory(
176+
extracted_entries[0], instruction, **dir_kwargs
177+
)
178+
170179
return await self._process_directory(temp_dir, instruction, **kwargs)
171180
finally:
172181
pass # Don't delete temp_dir yet, it will be used by TreeBuilder
182+
source_name = kwargs.get("source_name")
183+
if source_name:
184+
kwargs["resource_name"] = Path(source_name).stem
185+
kwargs.setdefault("source_name", source_name)
186+
else:
187+
kwargs.setdefault("resource_name", file_path.stem)
188+
173189
return await parse(
174190
str(file_path),
175191
instruction=instruction,
176192
vlm_processor=self._get_vlm_processor(),
177193
storage=self.storage,
178-
resource_name=file_path.stem,
194+
**kwargs,
179195
)

openviking/utils/summarizer.py

Lines changed: 54 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@
55
Handles summarization and key information extraction.
66
"""
77

8-
from typing import TYPE_CHECKING, Any, Dict, List
8+
from typing import TYPE_CHECKING, Any, Dict, List, Tuple
99

1010
from openviking.core.directories import get_context_type_for_uri
1111
from openviking.storage.queuefs import SemanticMsg, get_queue_manager
12+
from openviking.storage.viking_fs import get_viking_fs
1213
from openviking.telemetry import get_current_telemetry
1314
from openviking.telemetry.request_wait_tracker import get_request_wait_tracker
1415
from openviking_cli.utils import get_logger
16+
from openviking_cli.utils.uri import VikingURI
1517

1618
if TYPE_CHECKING:
1719
from openviking.parse.vlm import VLMProcessor
@@ -57,29 +59,60 @@ async def summarize(
5759
enqueued_count = 0
5860

5961
telemetry = get_current_telemetry()
62+
63+
def is_resources_root(uri: str) -> bool:
64+
return (uri or "").rstrip("/") == "viking://resources"
65+
66+
async def list_top_children(temp_uri: str) -> List[Tuple[str, str]]:
67+
viking_fs = get_viking_fs()
68+
entries = await viking_fs.ls(temp_uri, show_all_hidden=True, ctx=ctx)
69+
children: List[Tuple[str, str]] = []
70+
for entry in entries:
71+
name = entry.get("name", "")
72+
if not name or name in {".", ".."}:
73+
continue
74+
child_temp_uri = VikingURI(temp_uri).join(name).uri
75+
children.append((name, child_temp_uri))
76+
return children
77+
6078
for uri, temp_uri in zip(resource_uris, temp_uris, strict=True):
6179
# Determine context_type based on URI
6280
context_type = get_context_type_for_uri(uri)
6381

64-
msg = SemanticMsg(
65-
uri=temp_uri,
66-
context_type=context_type,
67-
account_id=ctx.account_id,
68-
user_id=ctx.user.user_id,
69-
agent_id=ctx.user.agent_id,
70-
role=ctx.role.value,
71-
skip_vectorization=skip_vectorization,
72-
telemetry_id=telemetry.telemetry_id,
73-
target_uri=uri if uri != temp_uri else None,
74-
lifecycle_lock_handle_id=lifecycle_lock_handle_id,
75-
is_code_repo=kwargs.get("is_code_repo", False),
76-
)
77-
await semantic_queue.enqueue(msg)
78-
if msg.telemetry_id:
79-
get_request_wait_tracker().register_semantic_root(msg.telemetry_id, msg.id)
80-
enqueued_count += 1
81-
logger.info(
82-
f"Enqueued semantic generation for: {uri} (skip_vectorization={skip_vectorization})"
83-
)
82+
enqueue_units: List[Tuple[str, str]] = []
83+
if is_resources_root(uri) and uri != temp_uri:
84+
children = await list_top_children(temp_uri)
85+
if not children:
86+
return {
87+
"status": "error",
88+
"message": f"no top-level import items found under temp uri: {temp_uri}",
89+
}
90+
for name, child_temp_uri in children:
91+
child_target_uri = VikingURI("viking://resources").join(name).uri
92+
enqueue_units.append((child_target_uri, child_temp_uri))
93+
else:
94+
enqueue_units.append((uri, temp_uri))
95+
96+
for target_uri, source_uri in enqueue_units:
97+
msg = SemanticMsg(
98+
uri=source_uri,
99+
context_type=context_type,
100+
account_id=ctx.account_id,
101+
user_id=ctx.user.user_id,
102+
agent_id=ctx.user.agent_id,
103+
role=ctx.role.value,
104+
skip_vectorization=skip_vectorization,
105+
telemetry_id=telemetry.telemetry_id,
106+
target_uri=target_uri if target_uri != source_uri else None,
107+
lifecycle_lock_handle_id=lifecycle_lock_handle_id,
108+
is_code_repo=kwargs.get("is_code_repo", False),
109+
)
110+
await semantic_queue.enqueue(msg)
111+
if msg.telemetry_id:
112+
get_request_wait_tracker().register_semantic_root(msg.telemetry_id, msg.id)
113+
enqueued_count += 1
114+
logger.info(
115+
f"Enqueued semantic generation for: {target_uri} (skip_vectorization={skip_vectorization})"
116+
)
84117

85118
return {"status": "success", "enqueued_count": enqueued_count}

openviking_cli/client/http.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,7 @@ async def add_resource(
366366
finally:
367367
Path(zip_path).unlink(missing_ok=True)
368368
elif path_obj.is_file():
369+
request_data["source_name"] = path_obj.name
369370
temp_file_id = await self._upload_temp_file(path)
370371
request_data["temp_file_id"] = temp_file_id
371372
else:
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
#!/usr/bin/env python3
2+
# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd.
3+
# SPDX-License-Identifier: AGPL-3.0
4+
5+
import zipfile
6+
from pathlib import Path
7+
from unittest.mock import AsyncMock
8+
9+
import pytest
10+
11+
from openviking.utils.media_processor import UnifiedResourceProcessor
12+
13+
14+
@pytest.mark.asyncio
15+
async def test_zip_single_top_level_dir_uses_real_root(tmp_path: Path):
16+
zip_path = tmp_path / "tt_b.zip"
17+
with zipfile.ZipFile(zip_path, "w") as zf:
18+
zf.writestr("tt_b/bb/readme.md", "# hello\n")
19+
20+
processor = UnifiedResourceProcessor()
21+
processor._process_directory = AsyncMock(return_value="ok")
22+
23+
result = await processor._process_file(zip_path, instruction="")
24+
25+
assert result == "ok"
26+
called_dir = processor._process_directory.await_args.args[0]
27+
assert isinstance(called_dir, Path)
28+
assert called_dir.name == "tt_b"
29+
30+
31+
@pytest.mark.asyncio
32+
async def test_zip_single_top_level_dir_ignores_zip_source_name(tmp_path: Path):
33+
zip_path = tmp_path / "tt_b.zip"
34+
with zipfile.ZipFile(zip_path, "w") as zf:
35+
zf.writestr("tt_b/bb/readme.md", "# hello\n")
36+
37+
processor = UnifiedResourceProcessor()
38+
processor._process_directory = AsyncMock(return_value="ok")
39+
40+
result = await processor._process_file(
41+
zip_path,
42+
instruction="",
43+
source_name="tt_b.zip",
44+
)
45+
46+
assert result == "ok"
47+
called_dir = processor._process_directory.await_args.args[0]
48+
assert isinstance(called_dir, Path)
49+
assert called_dir.name == "tt_b"
50+
assert "source_name" not in processor._process_directory.await_args.kwargs
51+
52+
53+
@pytest.mark.asyncio
54+
async def test_zip_multiple_top_level_entries_keeps_extract_root(tmp_path: Path):
55+
zip_path = tmp_path / "mixed.zip"
56+
with zipfile.ZipFile(zip_path, "w") as zf:
57+
zf.writestr("a/readme.md", "# a\n")
58+
zf.writestr("b/readme.md", "# b\n")
59+
60+
processor = UnifiedResourceProcessor()
61+
processor._process_directory = AsyncMock(return_value="ok")
62+
63+
result = await processor._process_file(zip_path, instruction="")
64+
65+
assert result == "ok"
66+
called_dir = processor._process_directory.await_args.args[0]
67+
assert isinstance(called_dir, Path)
68+
assert called_dir.name != "a"
69+
assert called_dir.name != "b"
70+
71+
72+
@pytest.mark.asyncio
73+
async def test_single_file_uses_source_name_for_resource_name(tmp_path: Path):
74+
file_path = tmp_path / "upload_123.txt"
75+
file_path.write_text("hello\n")
76+
77+
processor = UnifiedResourceProcessor()
78+
79+
with pytest.MonkeyPatch.context() as mp:
80+
parse_mock = AsyncMock(return_value="ok")
81+
mp.setattr("openviking.utils.media_processor.parse", parse_mock)
82+
83+
result = await processor._process_file(
84+
file_path,
85+
instruction="",
86+
source_name="aa.txt",
87+
)
88+
89+
assert result == "ok"
90+
assert parse_mock.await_args.kwargs["resource_name"] == "aa"
91+
assert parse_mock.await_args.kwargs["source_name"] == "aa.txt"

0 commit comments

Comments
 (0)