Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 68 additions & 19 deletions src/landingai_ade/_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,12 @@ def _get_input_filename(
) -> str:
"""Extract base filename (without extension) from file or URL input."""
if file_input is not None and not isinstance(file_input, Omit):
if isinstance(file_input, (Path, str)):
return Path(file_input).stem
if isinstance(file_input, Path):
return file_input.stem
elif isinstance(file_input, str):
# Distinguish file paths from raw content: file paths have extensions
if Path(file_input).suffix:
return Path(file_input).stem
elif isinstance(file_input, tuple) and len(file_input) > 0:
# Tuple format: (filename, content, mime_type)
return Path(str(file_input[0])).stem
Expand All @@ -111,12 +115,21 @@ def _save_response(
method_name: str,
result: Any,
) -> None:
"""Save API response to a JSON file in the specified folder."""
"""Save API response to a JSON file.

If save_to ends with '.json', it is treated as a full file path and the
response is written there directly. Otherwise it is treated as a directory
and the file is auto-named '{filename}_{method_name}_output.json'.
"""
try:
folder = Path(save_to)
folder.mkdir(parents=True, exist_ok=True)
output_path = folder / f"{filename}_{method_name}_output.json"
output_path.write_text(result.to_json())
save_path = Path(save_to)
if str(save_to).endswith(".json"):
save_path.parent.mkdir(parents=True, exist_ok=True)
save_path.write_text(result.to_json())
else:
save_path.mkdir(parents=True, exist_ok=True)
output_path = save_path / f"{filename}_{method_name}_output.json"
output_path.write_text(result.to_json())
except OSError as exc:
raise LandingAiadeError(f"Failed to save {method_name} response to {save_to}: {exc}") from exc

Expand Down Expand Up @@ -328,9 +341,9 @@ def extract(
strict: If True, reject schemas with unsupported fields (HTTP 422). If False, prune
unsupported fields and continue. Only applies to extract versions that support
schema validation.
save_to: Optional output folder path. If provided, the response will be saved as
JSON to this folder with the filename format: {input_file}_extract_output.json.
The folder will be created if it doesn't exist.
save_to: Optional output path. Accepts either a directory path (auto-generates
filename as {input_file}_extract_output.json) or a full file path ending
in .json (saves to that exact path). Parent directories are created automatically.

extra_headers: Send extra headers

Expand Down Expand Up @@ -429,9 +442,9 @@ def parse(
parameter. Set the parameter to page to split documents at the page level. The
splits object in the API output will contain a set of data for each page.

save_to: Optional output folder path. If provided, the response will be saved as
JSON to this folder with the filename format: {input_file}_parse_output.json.
The folder will be created if it doesn't exist.
save_to: Optional output path. Accepts either a directory path (auto-generates
filename as {input_file}_parse_output.json) or a full file path ending
in .json (saves to that exact path). Parent directories are created automatically.

extra_headers: Send extra headers

Expand Down Expand Up @@ -518,9 +531,9 @@ def split(

model: Model version to use for split classification. Defaults to the latest version.

save_to: Optional output folder path. If provided, the response will be saved as
JSON to this folder with the filename format: {input_file}_split_output.json.
The folder will be created if it doesn't exist.
save_to: Optional output path. Accepts either a directory path (auto-generates
filename as {input_file}_split_output.json) or a full file path ending
in .json (saves to that exact path). Parent directories are created automatically.

extra_headers: Send extra headers

Expand Down Expand Up @@ -768,6 +781,7 @@ async def extract(
markdown_url: Optional[str] | Omit = omit,
model: Optional[str] | Omit = omit,
strict: bool | Omit = omit,
save_to: str | Path | None = None,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
Expand Down Expand Up @@ -801,6 +815,10 @@ async def extract(
unsupported fields and continue. Only applies to extract versions that support
schema validation.

save_to: Optional output path. Accepts either a directory path (auto-generates
filename as {input_file}_extract_output.json) or a full file path ending
in .json (saves to that exact path). Parent directories are created automatically.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand All @@ -809,6 +827,9 @@ async def extract(

timeout: Override the client-level default timeout for this request, in seconds
"""
# Store original inputs for filename extraction before conversion
original_markdown = markdown
original_markdown_url = markdown_url
# Convert local file paths to file parameters
markdown, markdown_url = convert_url_to_file_if_local(markdown, markdown_url)

Expand All @@ -830,7 +851,7 @@ async def extract(
"runtime_tag": f"ade-python-v{_LIB_VERSION}",
**(extra_headers or {}),
}
return await self.post(
result = await self.post(
"/v1/ade/extract",
body=await async_maybe_transform(body, client_extract_params.ClientExtractParams),
files=files,
Expand All @@ -842,6 +863,10 @@ async def extract(
),
cast_to=ExtractResponse,
)
if save_to:
filename = _get_input_filename(original_markdown, original_markdown_url)
_save_response(save_to, filename, "extract", result)
return result

async def parse(
self,
Expand All @@ -852,6 +877,7 @@ async def parse(
model: Optional[str] | Omit = omit,
password: Optional[str] | Omit = omit,
split: Optional[Literal["page"]] | Omit = omit,
save_to: str | Path | None = None,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
Expand Down Expand Up @@ -890,6 +916,10 @@ async def parse(
parameter. Set the parameter to page to split documents at the page level. The
splits object in the API output will contain a set of data for each page.

save_to: Optional output path. Accepts either a directory path (auto-generates
filename as {input_file}_parse_output.json) or a full file path ending
in .json (saves to that exact path). Parent directories are created automatically.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand All @@ -898,6 +928,9 @@ async def parse(

timeout: Override the client-level default timeout for this request, in seconds
"""
# Store original inputs for filename extraction before conversion
original_document = document
original_document_url = document_url
# Convert local file paths to file parameters
document, document_url = convert_url_to_file_if_local(document, document_url)

Expand All @@ -920,7 +953,7 @@ async def parse(
"runtime_tag": f"ade-python-v{_LIB_VERSION}",
**(extra_headers or {}),
}
return await self.post(
result = await self.post(
"/v1/ade/parse",
body=await async_maybe_transform(body, client_parse_params.ClientParseParams),
files=files,
Expand All @@ -932,6 +965,10 @@ async def parse(
),
cast_to=ParseResponse,
)
if save_to:
filename = _get_input_filename(original_document, original_document_url)
_save_response(save_to, filename, "parse", result)
return result

async def split(
self,
Expand All @@ -940,6 +977,7 @@ async def split(
markdown: Union[FileTypes, str, None] | Omit = omit,
markdown_url: Optional[str] | Omit = omit,
model: Optional[str] | Omit = omit,
save_to: str | Path | None = None,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
Expand Down Expand Up @@ -967,6 +1005,10 @@ async def split(

model: Model version to use for split classification. Defaults to the latest version.

save_to: Optional output path. Accepts either a directory path (auto-generates
filename as {input_file}_split_output.json) or a full file path ending
in .json (saves to that exact path). Parent directories are created automatically.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand All @@ -975,6 +1017,9 @@ async def split(

timeout: Override the client-level default timeout for this request, in seconds
"""
# Store original inputs for filename extraction
original_markdown = markdown
original_markdown_url = markdown_url
body = deepcopy_minimal(
{
"split_class": split_class,
Expand All @@ -988,7 +1033,7 @@ async def split(
# sent to the server will contain a `boundary` parameter, e.g.
# multipart/form-data; boundary=---abc--
extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
return await self.post(
result = await self.post(
"/v1/ade/split",
body=await async_maybe_transform(body, client_split_params.ClientSplitParams),
files=files,
Expand All @@ -997,6 +1042,10 @@ async def split(
),
cast_to=SplitResponse,
)
if save_to:
filename = _get_input_filename(original_markdown, original_markdown_url)
_save_response(save_to, filename, "split", result)
return result

@override
def _make_status_error(
Expand Down
122 changes: 122 additions & 0 deletions tests/test_save_to.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import pytest

from landingai_ade import AsyncLandingAIADE
from landingai_ade._client import _save_response, _get_input_filename
from landingai_ade._exceptions import LandingAiadeError

Expand Down Expand Up @@ -73,6 +74,22 @@ def test_file_takes_precedence_over_url(self) -> None:
result = _get_input_filename(Path("local.pdf"), "https://example.com/remote.pdf")
assert result == "local"

def test_raw_markdown_string_returns_default(self) -> None:
"""Test that raw markdown content (not a file path) returns 'output'."""
result = _get_input_filename("# Hello World\n\nSome content here", None)
assert result == "output"

def test_multiline_markdown_string_returns_default(self) -> None:
"""Test that multi-line markdown content returns 'output'."""
markdown = "Form completed on September 3, 2025\nReference Number: RT-2025-0847"
result = _get_input_filename(markdown, None)
assert result == "output"

def test_short_string_without_extension_returns_default(self) -> None:
"""Test that a short string without a file extension returns 'output'."""
result = _get_input_filename("no_extension", None)
assert result == "output"


class TestSaveResponse:
"""Tests for _save_response helper function."""
Expand Down Expand Up @@ -137,3 +154,108 @@ def test_accepts_string_path(self, tmp_path: Path) -> None:

expected_file = tmp_path / "strpath_split_output.json"
assert expected_file.exists()

def test_full_json_path_saves_to_exact_location(self, tmp_path: Path) -> None:
"""Test that a path ending in .json is used as the exact output file."""
output_file = tmp_path / "custom_name.json"
mock_result = MagicMock()
mock_result.to_json.return_value = '{"key": "value"}'

_save_response(output_file, "ignored_filename", "extract", mock_result)

assert output_file.exists()
assert output_file.read_text() == '{"key": "value"}'
assert not (tmp_path / "ignored_filename_extract_output.json").exists()

def test_full_json_path_creates_parent_dirs(self, tmp_path: Path) -> None:
"""Test that parent directories are created for full .json path."""
output_file = tmp_path / "nested" / "deep" / "result.json"
mock_result = MagicMock()
mock_result.to_json.return_value = '{"nested": true}'

_save_response(output_file, "file", "parse", mock_result)

assert output_file.exists()
assert output_file.read_text() == '{"nested": true}'

def test_full_json_path_as_string(self, tmp_path: Path) -> None:
"""Test that a string path ending in .json works as full path mode."""
output_file = str(tmp_path / "my_output.json")
mock_result = MagicMock()
mock_result.to_json.return_value = '{"string": true}'

_save_response(output_file, "file", "split", mock_result)

assert Path(output_file).exists()
assert Path(output_file).read_text() == '{"string": true}'


class TestAsyncSaveTo:
"""Tests that async client methods accept save_to and save correctly."""

@pytest.fixture
def mock_response(self) -> MagicMock:
mock = MagicMock()
mock.to_json.return_value = '{"result": "ok"}'
return mock

@pytest.mark.asyncio
async def test_async_extract_save_to_directory(self, tmp_path: Path, mock_response: MagicMock) -> None:
from unittest.mock import AsyncMock, patch

client = AsyncLandingAIADE(apikey="test-key", base_url="http://localhost")
with patch.object(client, "post", new_callable=AsyncMock, return_value=mock_response):
result = await client.extract(
schema="{}",
markdown=Path("/path/to/doc.pdf"),
save_to=tmp_path,
)

assert (tmp_path / "doc_extract_output.json").exists()
assert result is mock_response

@pytest.mark.asyncio
async def test_async_extract_save_to_json_path(self, tmp_path: Path, mock_response: MagicMock) -> None:
from unittest.mock import AsyncMock, patch

output_file = tmp_path / "custom.json"
client = AsyncLandingAIADE(apikey="test-key", base_url="http://localhost")
with patch.object(client, "post", new_callable=AsyncMock, return_value=mock_response):
await client.extract(schema="{}", markdown=Path("/doc.pdf"), save_to=output_file)

assert output_file.exists()

@pytest.mark.asyncio
async def test_async_parse_save_to(self, tmp_path: Path, mock_response: MagicMock) -> None:
from unittest.mock import AsyncMock, patch

client = AsyncLandingAIADE(apikey="test-key", base_url="http://localhost")
with patch.object(client, "post", new_callable=AsyncMock, return_value=mock_response):
await client.parse(document=Path("/path/to/doc.pdf"), save_to=tmp_path)

assert (tmp_path / "doc_parse_output.json").exists()

@pytest.mark.asyncio
async def test_async_split_save_to(self, tmp_path: Path, mock_response: MagicMock) -> None:
from unittest.mock import AsyncMock, patch

client = AsyncLandingAIADE(apikey="test-key", base_url="http://localhost")
with patch.object(client, "post", new_callable=AsyncMock, return_value=mock_response):
await client.split(
split_class=[{"name": "type1"}],
markdown=Path("/path/to/doc.md"),
save_to=tmp_path,
)

assert (tmp_path / "doc_split_output.json").exists()

@pytest.mark.asyncio
async def test_async_no_save_when_save_to_none(self, tmp_path: Path, mock_response: MagicMock) -> None:
from unittest.mock import AsyncMock, patch

client = AsyncLandingAIADE(apikey="test-key", base_url="http://localhost")
with patch.object(client, "post", new_callable=AsyncMock, return_value=mock_response):
result = await client.extract(schema="{}")

assert result is mock_response
assert not list(tmp_path.iterdir())