Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
9feb7bc
fix: terminate call-graph alias fixpoint on oscillating rebinds
mldangelo May 22, 2026
467e66b
Merge remote-tracking branch 'origin/main' into mdangelo/codex/review…
mldangelo-oai May 22, 2026
cebd962
fix: preserve propagation when bounding alias cycles
mldangelo-oai May 22, 2026
d78c997
fix: bound assignment alias propagation work
mldangelo-oai May 22, 2026
26655ad
fix: fail closed on cyclic alias propagation
mldangelo-oai May 22, 2026
cbbbd10
fix: converge stable alias rebind states
mldangelo-oai May 22, 2026
21fcf1a
Merge remote-tracking branch 'origin/main' into mdangelo/codex/review…
mldangelo-oai May 23, 2026
c617e8e
fix: preserve findings on incomplete call-graph analysis
mldangelo-oai May 23, 2026
3572995
fix: preserve startup-hook findings on analysis limits
mldangelo-oai May 23, 2026
42e7702
Merge remote-tracking branch 'origin/main' into mdangelo/codex/review…
mldangelo-oai May 23, 2026
3acc6a8
fix: fail closed on conditional alias rebinding
mldangelo-oai May 23, 2026
f670f86
fix: retain aliases and findings across analysis limits
mldangelo-oai May 23, 2026
dbcdffb
fix: preserve deterministic loop-else alias results
mldangelo-oai May 23, 2026
6cd7de3
fix: track ambiguous alias reads before overwrites
mldangelo-oai May 23, 2026
78eb56d
fix: fail closed during torch reference filtering
mldangelo-oai May 23, 2026
e37bce1
fix: preserve findings across alias ambiguity limits
mldangelo-oai May 23, 2026
8f2c0b4
fix: propagate ambiguous aliases in installed packages
mldangelo-oai May 23, 2026
183869a
fix: preserve deterministic alias findings across limits
mldangelo-oai May 23, 2026
95642a4
fix: resolve deterministic alias alternatives safely
mldangelo-oai May 23, 2026
2d00183
fix: retain deterministic aliases before epilogues
mldangelo-oai May 23, 2026
b054f0a
Merge remote-tracking branch 'origin/main' into mdangelo/codex/review…
mldangelo-oai May 23, 2026
8ca3b53
fix: track ambiguous alias calls before overwrites
mldangelo-oai May 23, 2026
3bbeb4d
test: stabilize generic zip raw-scan fixture
mldangelo-oai May 23, 2026
71925b1
fix: preserve same-line terminal alias ordering
mldangelo-oai May 23, 2026
06b7a3c
fix: resolve deterministic terminal alias branches
mldangelo-oai May 23, 2026
66049f2
fix: handle one-sided terminal alias paths
mldangelo-oai May 23, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- avoid repeatedly scanning sharded model families during directory scans
- keep shard sibling discovery within the requested scan root
- preserve per-shard metadata when aggregating sharded model families
- prevent picklescan call-graph alias cycles from hanging scans
- stop flagging a false-positive ONNX Python operator when tensor weight bytes coincidentally spell `PyOp`
- distinguish ASCII-serialized Torch7 artifacts from plain PyTorch source text

Expand Down
1 change: 1 addition & 0 deletions packages/modelaudit-picklescan/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ and this package adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Bug Fixes

- prevent call-graph alias cycles from hanging scans
- detect nested brace-format lookups that reach tracked `defaultdict` factories
- avoid `str.format` false positives when a `ChainMap` shadows a `defaultdict`
- block `statistics.quantiles` call-iterator consumption in call-graph analysis
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
CallGraphFinding,
StartupHookWriteFinding,
UnanalyzedCallGraphReference,
_CallGraphAnalysisLimitError,
find_dangerous_call_graphs,
find_startup_hook_write_call_graphs,
find_unanalyzed_callable_call_graph_references,
Expand Down Expand Up @@ -1012,6 +1013,9 @@ def _with_call_graph_findings(report: PickleReport) -> PickleReport:
with shared_source_sensitive_caches():
try:
call_graph_findings = find_dangerous_call_graphs(import_references, callable_invocations)
except _CallGraphAnalysisLimitError as error:
call_graph_findings = error.partial_findings
enrichment_errors.append(("python_call_graph", error))
except Exception as error:
call_graph_findings = ()
enrichment_errors.append(("python_call_graph", error))
Expand All @@ -1020,6 +1024,9 @@ def _with_call_graph_findings(report: PickleReport) -> PickleReport:
import_references,
callable_invocations,
)
except _CallGraphAnalysisLimitError as error:
startup_hook_write_findings = error.partial_startup_hook_write_findings
enrichment_errors.append(("python_call_graph_startup_hook_write", error))
except Exception as error:
startup_hook_write_findings = ()
enrichment_errors.append(("python_call_graph_startup_hook_write", error))
Expand Down
247 changes: 186 additions & 61 deletions packages/modelaudit-picklescan/src/modelaudit_picklescan/call_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
_MAX_VISITED_FUNCTIONS = 64
_MAX_CALLS_PER_FUNCTION = 128
_MAX_ASSIGNMENT_ALIASES = 128
_MAX_ASSIGNMENT_ALIAS_PASSES = 256
_MAX_FUNCTION_INSTANCE_ALIASES = 32
_MAX_CLASS_INSTANCE_ALIASES = 128
_MAX_INHERITED_CLASS_METHODS = 128
Expand Down Expand Up @@ -66,6 +67,21 @@ def cache_clear(self) -> None:
_SOURCE_SENSITIVE_CACHED_FUNCTIONS: set[_CacheClearable] = set()


class _CallGraphAnalysisLimitError(RuntimeError):
"""Raised when bounded call-graph enrichment cannot complete safely."""

def __init__(
self,
message: str,
*,
partial_findings: tuple[CallGraphFinding, ...] = (),
partial_startup_hook_write_findings: tuple[StartupHookWriteFinding, ...] = (),
) -> None:
super().__init__(message)
self.partial_findings = partial_findings
self.partial_startup_hook_write_findings = partial_startup_hook_write_findings


def _register_source_sensitive_cache(function: _CachedFunctionT) -> _CachedFunctionT:
_SOURCE_SENSITIVE_CACHED_FUNCTIONS.add(cast(_CacheClearable, function))
return function
Expand Down Expand Up @@ -320,48 +336,58 @@ def find_dangerous_call_graphs(
if str(reference.get("module", "")) and str(reference.get("name", ""))
}

analysis_limit_error: _CallGraphAnalysisLimitError | None = None
for reference in _iter_call_graph_references(import_references, callable_references, invoked_references):
module = str(reference.get("module", ""))
name = str(reference.get("name", ""))
if not module or not name:
continue
try:
module = str(reference.get("module", ""))
name = str(reference.get("name", ""))
if not module or not name:
continue

entrypoints = _call_graph_entrypoints_for_reference(module, name, reference)
if not entrypoints:
continue
allow_invoked_non_lifecycle_entrypoint = _is_explicit_method_import_reference(name)
sink_path = _first_matching_path(entrypoints, _find_sink_path)
if sink_path is None:
for positional_arg_count in positional_arg_counts.get((module, name), ()):
sink_path = _first_matching_path(
entrypoints,
_invoked_import_execution_path_callback(
positional_arg_count,
allow_non_lifecycle_entrypoint=allow_invoked_non_lifecycle_entrypoint,
),
)
if sink_path is not None:
break
if sink_path is None:
continue
entrypoints = _call_graph_entrypoints_for_reference(module, name, reference)
if not entrypoints:
continue
allow_invoked_non_lifecycle_entrypoint = _is_explicit_method_import_reference(name)
sink_path = _first_matching_path(entrypoints, _find_sink_path)
Comment thread
mldangelo-oai marked this conversation as resolved.
if sink_path is None:
for positional_arg_count in positional_arg_counts.get((module, name), ()):
sink_path = _first_matching_path(
entrypoints,
_invoked_import_execution_path_callback(
positional_arg_count,
allow_non_lifecycle_entrypoint=allow_invoked_non_lifecycle_entrypoint,
),
)
if sink_path is not None:
break
if sink_path is None:
continue

finding_key = (module, name, sink_path)
if finding_key in seen_findings:
continue
seen_findings.add(finding_key)
finding_key = (module, name, sink_path)
if finding_key in seen_findings:
continue
seen_findings.add(finding_key)

sink = sink_path[-1]
findings.append(
CallGraphFinding(
module=module,
name=name,
import_reference=f"{module}.{name}",
sink=sink,
call_path=sink_path,
sink = sink_path[-1]
findings.append(
CallGraphFinding(
module=module,
name=name,
import_reference=f"{module}.{name}",
sink=sink,
call_path=sink_path,
)
)
)
if len(findings) >= _MAX_IMPORT_REFERENCES:
break
if len(findings) >= _MAX_IMPORT_REFERENCES:
break
except _CallGraphAnalysisLimitError as error:
if analysis_limit_error is None:
analysis_limit_error = error
if analysis_limit_error is not None:
raise _CallGraphAnalysisLimitError(
str(analysis_limit_error),
partial_findings=tuple(findings),
) from analysis_limit_error
return tuple(findings)


Expand All @@ -380,6 +406,7 @@ def find_startup_hook_write_call_graphs(
for reference in _iter_callable_invocation_references(callable_invocations)
}
require_invocations = callable_invocations is not None and callable_invocations_complete
analysis_limit_error: _CallGraphAnalysisLimitError | None = None
for reference in _iter_import_references(import_references):
module = str(reference.get("module", ""))
name = str(reference.get("name", ""))
Expand All @@ -389,32 +416,49 @@ def find_startup_hook_write_call_graphs(
continue
seen.add((module, name))

entrypoints = _safe_call_graph_entrypoints(f"{module}.{name}")
if not entrypoints:
continue
if _first_matching_path(entrypoints, _find_sink_path) is not None:
continue
open_path = _first_matching_path(entrypoints, _find_file_open_path)
if open_path is not None:
openers.append(
_ImportCallPath(
module=module,
name=name,
import_reference=f"{module}.{name}",
call_path=open_path,
try:
entrypoints = _safe_call_graph_entrypoints(f"{module}.{name}")
if not entrypoints:
continue
if _first_matching_path(entrypoints, _find_sink_path) is not None:
Comment thread
mldangelo-oai marked this conversation as resolved.
Outdated
continue
open_path = _first_matching_path(entrypoints, _find_file_open_path)
if open_path is not None:
openers.append(
_ImportCallPath(
module=module,
name=name,
import_reference=f"{module}.{name}",
call_path=open_path,
)
)
)
write_path = _first_matching_path(entrypoints, _find_file_write_path)
if write_path is not None:
writers.append(
_ImportCallPath(
module=module,
name=name,
import_reference=f"{module}.{name}",
call_path=write_path,
write_path = _first_matching_path(entrypoints, _find_file_write_path)
if write_path is not None:
writers.append(
_ImportCallPath(
module=module,
name=name,
import_reference=f"{module}.{name}",
call_path=write_path,
)
)
)

except _CallGraphAnalysisLimitError as error:
if analysis_limit_error is None:
analysis_limit_error = error

findings = _materialize_startup_hook_write_findings(openers, writers)
if analysis_limit_error is not None:
raise _CallGraphAnalysisLimitError(
str(analysis_limit_error),
partial_startup_hook_write_findings=findings,
) from analysis_limit_error
return findings


def _materialize_startup_hook_write_findings(
openers: list[_ImportCallPath],
writers: list[_ImportCallPath],
) -> tuple[StartupHookWriteFinding, ...]:
if not openers or not writers:
return ()

Expand Down Expand Up @@ -497,6 +541,8 @@ def shared_source_sensitive_caches() -> Iterator[None]:
def _safe_call_graph_entrypoints(function_name: str) -> tuple[str, ...]:
try:
return _call_graph_entrypoints(function_name)
except _CallGraphAnalysisLimitError:
raise
Comment thread
mldangelo-oai marked this conversation as resolved.
Comment thread
mldangelo-oai marked this conversation as resolved.
Comment thread
mldangelo-oai marked this conversation as resolved.
except Exception:
return ()

Expand All @@ -508,6 +554,8 @@ def _first_matching_path(
for entrypoint in entrypoints:
try:
path = path_for(entrypoint)
except _CallGraphAnalysisLimitError:
raise
Comment thread
mldangelo-oai marked this conversation as resolved.
Outdated
Comment thread
mldangelo-oai marked this conversation as resolved.
Outdated
except Exception:
continue
if path is not None:
Expand Down Expand Up @@ -1653,6 +1701,59 @@ def _local_class_node_from_target(
return local_class_nodes.get(class_name)


def _contains_current_loop_break(nodes: Iterable[ast.stmt]) -> bool:
"""Return whether this loop body can break without entering a nested scope or loop."""

def contains_break(node: ast.AST) -> bool:
if isinstance(node, ast.Break):
return True
if isinstance(
node,
ast.For | ast.AsyncFor | ast.While | ast.FunctionDef | ast.AsyncFunctionDef | ast.Lambda | ast.ClassDef,
):
return False
return any(contains_break(child) for child in ast.iter_child_nodes(node))

return any(contains_break(node) for node in nodes)


def _conditionally_rebound_assignment_nodes(nodes: Iterable[ast.AST]) -> dict[str, set[int]]:
"""Return alternate-path assignment nodes grouped by ambiguously rebound name."""
ambiguous_assignment_nodes: dict[str, set[int]] = {}
for node in nodes:
branch_bodies: tuple[Iterable[ast.stmt], ...]
if isinstance(node, ast.If):
branch_bodies = (node.body, node.orelse)
elif isinstance(node, ast.Try):
Comment thread
mldangelo-oai marked this conversation as resolved.
Outdated
branch_bodies = (
(*node.body, *node.orelse),
*(handler.body for handler in node.handlers),
)
Comment thread
mldangelo-oai marked this conversation as resolved.
Outdated
Comment thread
mldangelo-oai marked this conversation as resolved.
Outdated
elif isinstance(node, ast.For | ast.AsyncFor | ast.While) and _contains_current_loop_break(node.body):
Comment thread
mldangelo-oai marked this conversation as resolved.
branch_bodies = (node.body, node.orelse)
elif isinstance(node, ast.Match):
branch_bodies = tuple(case.body for case in node.cases)
else:
continue

branch_assignments: list[dict[str, set[int]]] = []
seen_targets: set[str] = set()
for branch_body in branch_bodies:
assignments: dict[str, set[int]] = {}
for statement in _definition_scope_statements(branch_body):
for target_name in _assignment_alias_target_names(statement):
assignments.setdefault(target_name, set()).add(id(statement))
branch_targets = set(assignments)
for target_name in seen_targets & branch_targets:
Comment thread
mldangelo-oai marked this conversation as resolved.
Outdated
target_nodes = ambiguous_assignment_nodes.setdefault(target_name, set())
target_nodes.update(assignments[target_name])
for previous_assignments in branch_assignments:
target_nodes.update(previous_assignments.get(target_name, set()))
branch_assignments.append(assignments)
seen_targets.update(branch_targets)
return ambiguous_assignment_nodes


def _collect_assignment_aliases(
nodes: Iterable[ast.AST],
module_name: str,
Expand All @@ -1664,10 +1765,20 @@ def _collect_assignment_aliases(
) -> dict[str, str]:
node_list = tuple(nodes)
assignment_aliases: dict[str, str] = {}
conditionally_rebound_node_ids = _conditionally_rebound_assignment_nodes(node_list)
seen_states: set[tuple[tuple[str, str], ...]] = {()}
passes = 0

changed = True
while changed and len(assignment_aliases) < _MAX_ASSIGNMENT_ALIASES:
if passes >= _MAX_ASSIGNMENT_ALIAS_PASSES:
raise _CallGraphAnalysisLimitError(
f"assignment alias analysis exceeded {_MAX_ASSIGNMENT_ALIAS_PASSES} propagation passes"
)
passes += 1
state = tuple(sorted(assignment_aliases.items()))
changed = False
last_changed_node_ids: dict[str, int] = {}
scoped_aliases = {**aliases, **assignment_aliases}
for node in node_list:
resolved = _assignment_alias_value(
Expand All @@ -1685,8 +1796,22 @@ def _collect_assignment_aliases(
continue
assignment_aliases[target_name] = resolved
changed = True
last_changed_node_ids[target_name] = id(node)
if len(assignment_aliases) >= _MAX_ASSIGNMENT_ALIASES:
break
next_state = tuple(sorted(assignment_aliases.items()))
if next_state == state:
if any(
node_id in conditionally_rebound_node_ids.get(target_name, set())
for target_name, node_id in last_changed_node_ids.items()
):
Comment thread
mldangelo-oai marked this conversation as resolved.
Outdated
raise _CallGraphAnalysisLimitError(
"assignment alias analysis encountered ambiguous conditional rebinding"
)
break
Comment thread
mldangelo-oai marked this conversation as resolved.
if next_state in seen_states:
raise _CallGraphAnalysisLimitError("assignment alias analysis entered a propagation cycle")
seen_states.add(next_state)
return assignment_aliases


Expand Down
Loading
Loading