Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions packages/markitdown/src/markitdown/converters/_exiftool.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import json
import locale
import os
import shutil
import subprocess
from typing import Any, BinaryIO, Union

Expand All @@ -17,13 +19,24 @@ def exiftool_metadata(
if not exiftool_path:
return {}

# Validate the exiftool path to prevent path traversal and ensure it resolves
# to an actual executable before passing it to subprocess
resolved_path = shutil.which(exiftool_path)
if not resolved_path:
raise RuntimeError(
f"ExifTool executable not found or not executable: {exiftool_path}"
)
# Use the fully-resolved absolute path to prevent any path manipulation
exiftool_path = os.path.realpath(resolved_path)

# Verify exiftool version
try:
version_output = subprocess.run(
[exiftool_path, "-ver"],
capture_output=True,
text=True,
check=True,
timeout=30,
).stdout.strip()
version = _parse_version(version_output)
min_version = (12, 24)
Expand All @@ -32,21 +45,26 @@ def exiftool_metadata(
f"ExifTool version {version_output} is vulnerable to CVE-2021-22204. "
"Please upgrade to version 12.24 or later."
)
except (subprocess.CalledProcessError, ValueError) as e:
except (subprocess.CalledProcessError, subprocess.TimeoutExpired, ValueError) as e:
raise RuntimeError("Failed to verify ExifTool version.") from e

# Run exiftool
# Run exiftool — pass file content via stdin using "-" so that no user-controlled
# file path is ever passed as a command-line argument to ExifTool.
# The "--" separator is added to prevent any argument injection.
cur_pos = file_stream.tell()
try:
output = subprocess.run(
[exiftool_path, "-json", "-"],
[exiftool_path, "-json", "--", "-"],
input=file_stream.read(),
capture_output=True,
text=False,
timeout=120,
).stdout

return json.loads(
output.decode(locale.getpreferredencoding(False)),
)[0]
except subprocess.TimeoutExpired as e:
raise RuntimeError("ExifTool timed out while processing file.") from e
finally:
file_stream.seek(cur_pos)