Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions chandra/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,17 @@ def load_file(filepath: str, config: dict):
page_range = parse_range_str(page_range)

input_type = filetype.guess(filepath)
is_pdf = False

# Prefer header-based detection but fall back to file extension.
if input_type and input_type.extension == "pdf":
is_pdf = True
elif filepath.lower().endswith(".pdf"):
is_pdf = True

if is_pdf:
images = load_pdf_images(filepath, page_range)
else:
# Non‑PDF inputs are treated as single images.
images = [load_image(filepath)]
return images
31 changes: 31 additions & 0 deletions tests/test_input_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from chandra import input as input_mod


def test_load_file_uses_pdf_loader_when_extension_pdf(monkeypatch):
"""Ensure load_file routes .pdf paths to load_pdf_images even if filetype.guess fails.

This simulates a multi-page PDF where only the first page would be used
if we treated the file as a single image.
"""

calls = {}

def fake_guess(_):
# Simulate failure to detect PDF from file header.
return None

def fake_load_pdf_images(path, page_range): # pragma: no cover - behavior verified via result
calls["path"] = path
calls["page_range"] = page_range
# Pretend we decoded three pages
return ["page0", "page1", "page2"]

monkeypatch.setattr(input_mod.filetype, "guess", fake_guess)
monkeypatch.setattr(input_mod, "load_pdf_images", fake_load_pdf_images)

images = input_mod.load_file("dummy.pdf", {"page_range": "0-2"})

assert images == ["page0", "page1", "page2"]
assert calls["path"].endswith("dummy.pdf")
# Parsed page range should be passed through as a list of ints
assert calls["page_range"] == [0, 1, 2]