Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 71 additions & 19 deletions src/landingai_ade/_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,12 @@ def _get_input_filename(
) -> str:
"""Extract base filename (without extension) from file or URL input."""
if file_input is not None and not isinstance(file_input, Omit):
if isinstance(file_input, (Path, str)):
return Path(file_input).stem
if isinstance(file_input, Path):
return file_input.stem
elif isinstance(file_input, str):
# Strings are always treated as raw content, not file paths.
# File inputs should use Path objects, tuples, or IO objects.
pass
elif isinstance(file_input, tuple) and len(file_input) > 0:
# Tuple format: (filename, content, mime_type)
return Path(str(file_input[0])).stem
Expand All @@ -111,12 +115,24 @@ def _save_response(
method_name: str,
result: Any,
) -> None:
"""Save API response to a JSON file in the specified folder."""
"""Save API response to a JSON file.

If save_to ends with '.json', it is treated as a full file path and the
response is written there directly. Otherwise it is treated as a directory
and the file is auto-named '{filename}_{method_name}_output.json'.
"""
try:
folder = Path(save_to)
folder.mkdir(parents=True, exist_ok=True)
output_path = folder / f"{filename}_{method_name}_output.json"
output_path.write_text(result.to_json())
save_path = Path(save_to)
if str(save_to).endswith(".json"):
save_path.parent.mkdir(parents=True, exist_ok=True)
save_path.write_text(result.to_json())
else:
save_path.mkdir(parents=True, exist_ok=True)
if filename == "output":
output_path = save_path / f"{method_name}_output.json"
else:
output_path = save_path / f"{filename}_{method_name}_output.json"
output_path.write_text(result.to_json())
except OSError as exc:
raise LandingAiadeError(f"Failed to save {method_name} response to {save_to}: {exc}") from exc

Expand Down Expand Up @@ -328,9 +344,9 @@ def extract(
strict: If True, reject schemas with unsupported fields (HTTP 422). If False, prune
unsupported fields and continue. Only applies to extract versions that support
schema validation.
save_to: Optional output folder path. If provided, the response will be saved as
JSON to this folder with the filename format: {input_file}_extract_output.json.
The folder will be created if it doesn't exist.
save_to: Optional output path. Accepts either a directory path (auto-generates
filename as {input_file}_extract_output.json) or a full file path ending
in .json (saves to that exact path). Parent directories are created automatically.

extra_headers: Send extra headers

Expand Down Expand Up @@ -429,9 +445,9 @@ def parse(
parameter. Set the parameter to page to split documents at the page level. The
splits object in the API output will contain a set of data for each page.

save_to: Optional output folder path. If provided, the response will be saved as
JSON to this folder with the filename format: {input_file}_parse_output.json.
The folder will be created if it doesn't exist.
save_to: Optional output path. Accepts either a directory path (auto-generates
filename as {input_file}_parse_output.json) or a full file path ending
in .json (saves to that exact path). Parent directories are created automatically.

extra_headers: Send extra headers

Expand Down Expand Up @@ -518,9 +534,9 @@ def split(

model: Model version to use for split classification. Defaults to the latest version.

save_to: Optional output folder path. If provided, the response will be saved as
JSON to this folder with the filename format: {input_file}_split_output.json.
The folder will be created if it doesn't exist.
save_to: Optional output path. Accepts either a directory path (auto-generates
filename as {input_file}_split_output.json) or a full file path ending
in .json (saves to that exact path). Parent directories are created automatically.

extra_headers: Send extra headers

Expand Down Expand Up @@ -768,6 +784,7 @@ async def extract(
markdown_url: Optional[str] | Omit = omit,
model: Optional[str] | Omit = omit,
strict: bool | Omit = omit,
save_to: str | Path | None = None,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
Expand Down Expand Up @@ -801,6 +818,10 @@ async def extract(
unsupported fields and continue. Only applies to extract versions that support
schema validation.

save_to: Optional output path. Accepts either a directory path (auto-generates
filename as {input_file}_extract_output.json) or a full file path ending
in .json (saves to that exact path). Parent directories are created automatically.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand All @@ -809,6 +830,9 @@ async def extract(

timeout: Override the client-level default timeout for this request, in seconds
"""
# Store original inputs for filename extraction before conversion
original_markdown = markdown
original_markdown_url = markdown_url
# Convert local file paths to file parameters
markdown, markdown_url = convert_url_to_file_if_local(markdown, markdown_url)

Expand All @@ -830,7 +854,7 @@ async def extract(
"runtime_tag": f"ade-python-v{_LIB_VERSION}",
**(extra_headers or {}),
}
return await self.post(
result = await self.post(
"/v1/ade/extract",
body=await async_maybe_transform(body, client_extract_params.ClientExtractParams),
files=files,
Expand All @@ -842,6 +866,10 @@ async def extract(
),
cast_to=ExtractResponse,
)
if save_to:
filename = _get_input_filename(original_markdown, original_markdown_url)
_save_response(save_to, filename, "extract", result)
return result

async def parse(
self,
Expand All @@ -852,6 +880,7 @@ async def parse(
model: Optional[str] | Omit = omit,
password: Optional[str] | Omit = omit,
split: Optional[Literal["page"]] | Omit = omit,
save_to: str | Path | None = None,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
Expand Down Expand Up @@ -890,6 +919,10 @@ async def parse(
parameter. Set the parameter to page to split documents at the page level. The
splits object in the API output will contain a set of data for each page.

save_to: Optional output path. Accepts either a directory path (auto-generates
filename as {input_file}_parse_output.json) or a full file path ending
in .json (saves to that exact path). Parent directories are created automatically.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand All @@ -898,6 +931,9 @@ async def parse(

timeout: Override the client-level default timeout for this request, in seconds
"""
# Store original inputs for filename extraction before conversion
original_document = document
original_document_url = document_url
# Convert local file paths to file parameters
document, document_url = convert_url_to_file_if_local(document, document_url)

Expand All @@ -920,7 +956,7 @@ async def parse(
"runtime_tag": f"ade-python-v{_LIB_VERSION}",
**(extra_headers or {}),
}
return await self.post(
result = await self.post(
"/v1/ade/parse",
body=await async_maybe_transform(body, client_parse_params.ClientParseParams),
files=files,
Expand All @@ -932,6 +968,10 @@ async def parse(
),
cast_to=ParseResponse,
)
if save_to:
filename = _get_input_filename(original_document, original_document_url)
_save_response(save_to, filename, "parse", result)
return result

async def split(
self,
Expand All @@ -940,6 +980,7 @@ async def split(
markdown: Union[FileTypes, str, None] | Omit = omit,
markdown_url: Optional[str] | Omit = omit,
model: Optional[str] | Omit = omit,
save_to: str | Path | None = None,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
Expand Down Expand Up @@ -967,6 +1008,10 @@ async def split(

model: Model version to use for split classification. Defaults to the latest version.

save_to: Optional output path. Accepts either a directory path (auto-generates
filename as {input_file}_split_output.json) or a full file path ending
in .json (saves to that exact path). Parent directories are created automatically.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand All @@ -975,6 +1020,9 @@ async def split(

timeout: Override the client-level default timeout for this request, in seconds
"""
# Store original inputs for filename extraction
original_markdown = markdown
original_markdown_url = markdown_url
body = deepcopy_minimal(
{
"split_class": split_class,
Expand All @@ -988,7 +1036,7 @@ async def split(
# sent to the server will contain a `boundary` parameter, e.g.
# multipart/form-data; boundary=---abc--
extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
return await self.post(
result = await self.post(
"/v1/ade/split",
body=await async_maybe_transform(body, client_split_params.ClientSplitParams),
files=files,
Expand All @@ -997,6 +1045,10 @@ async def split(
),
cast_to=SplitResponse,
)
if save_to:
filename = _get_input_filename(original_markdown, original_markdown_url)
_save_response(save_to, filename, "split", result)
return result

@override
def _make_status_error(
Expand Down
Loading