datalab-to · PiyushInt · Mar 27, 2026
diff --git a/chandra/input.py b/chandra/input.py
@@ -69,8 +69,17 @@ def load_file(filepath: str, config: dict):
         page_range = parse_range_str(page_range)
 
     input_type = filetype.guess(filepath)
+    is_pdf = False
+
+    # Prefer header-based detection but fall back to file extension.
     if input_type and input_type.extension == "pdf":
+        is_pdf = True
+    elif filepath.lower().endswith(".pdf"):
+        is_pdf = True
+
+    if is_pdf:
         images = load_pdf_images(filepath, page_range)
     else:
+        # Non‑PDF inputs are treated as single images.
         images = [load_image(filepath)]
     return images
diff --git a/tests/test_input_loader.py b/tests/test_input_loader.py
@@ -0,0 +1,31 @@
+from chandra import input as input_mod
+
+
+def test_load_file_uses_pdf_loader_when_extension_pdf(monkeypatch):
+    """Ensure load_file routes .pdf paths to load_pdf_images even if filetype.guess fails.
+
+    This simulates a multi-page PDF where only the first page would be used
+    if we treated the file as a single image.
+    """
+
+    calls = {}
+
+    def fake_guess(_):
+        # Simulate failure to detect PDF from file header.
+        return None
+
+    def fake_load_pdf_images(path, page_range):  # pragma: no cover - behavior verified via result
+        calls["path"] = path
+        calls["page_range"] = page_range
+        # Pretend we decoded three pages
+        return ["page0", "page1", "page2"]
+
+    monkeypatch.setattr(input_mod.filetype, "guess", fake_guess)
+    monkeypatch.setattr(input_mod, "load_pdf_images", fake_load_pdf_images)
+
+    images = input_mod.load_file("dummy.pdf", {"page_range": "0-2"})
+
+    assert images == ["page0", "page1", "page2"]
+    assert calls["path"].endswith("dummy.pdf")
+    # Parsed page range should be passed through as a list of ints
+    assert calls["page_range"] == [0, 1, 2]