Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
9feb7bc
fix: terminate call-graph alias fixpoint on oscillating rebinds
mldangelo May 22, 2026
467e66b
Merge remote-tracking branch 'origin/main' into mdangelo/codex/review…
mldangelo-oai May 22, 2026
cebd962
fix: preserve propagation when bounding alias cycles
mldangelo-oai May 22, 2026
d78c997
fix: bound assignment alias propagation work
mldangelo-oai May 22, 2026
26655ad
fix: fail closed on cyclic alias propagation
mldangelo-oai May 22, 2026
cbbbd10
fix: converge stable alias rebind states
mldangelo-oai May 22, 2026
21fcf1a
Merge remote-tracking branch 'origin/main' into mdangelo/codex/review…
mldangelo-oai May 23, 2026
c617e8e
fix: preserve findings on incomplete call-graph analysis
mldangelo-oai May 23, 2026
3572995
fix: preserve startup-hook findings on analysis limits
mldangelo-oai May 23, 2026
42e7702
Merge remote-tracking branch 'origin/main' into mdangelo/codex/review…
mldangelo-oai May 23, 2026
3acc6a8
fix: fail closed on conditional alias rebinding
mldangelo-oai May 23, 2026
f670f86
fix: retain aliases and findings across analysis limits
mldangelo-oai May 23, 2026
dbcdffb
fix: preserve deterministic loop-else alias results
mldangelo-oai May 23, 2026
6cd7de3
fix: track ambiguous alias reads before overwrites
mldangelo-oai May 23, 2026
78eb56d
fix: fail closed during torch reference filtering
mldangelo-oai May 23, 2026
e37bce1
fix: preserve findings across alias ambiguity limits
mldangelo-oai May 23, 2026
8f2c0b4
fix: propagate ambiguous aliases in installed packages
mldangelo-oai May 23, 2026
183869a
fix: preserve deterministic alias findings across limits
mldangelo-oai May 23, 2026
95642a4
fix: resolve deterministic alias alternatives safely
mldangelo-oai May 23, 2026
2d00183
fix: retain deterministic aliases before epilogues
mldangelo-oai May 23, 2026
b054f0a
Merge remote-tracking branch 'origin/main' into mdangelo/codex/review…
mldangelo-oai May 23, 2026
8ca3b53
fix: track ambiguous alias calls before overwrites
mldangelo-oai May 23, 2026
3bbeb4d
test: stabilize generic zip raw-scan fixture
mldangelo-oai May 23, 2026
71925b1
fix: preserve same-line terminal alias ordering
mldangelo-oai May 23, 2026
06b7a3c
fix: resolve deterministic terminal alias branches
mldangelo-oai May 23, 2026
66049f2
fix: handle one-sided terminal alias paths
mldangelo-oai May 23, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- avoid repeatedly scanning sharded model families during directory scans
- keep shard sibling discovery within the requested scan root
- preserve per-shard metadata when aggregating sharded model families
- prevent picklescan call-graph alias cycles from hanging scans
- stop flagging a false-positive ONNX Python operator when tensor weight bytes coincidentally spell `PyOp`
- distinguish ASCII-serialized Torch7 artifacts from plain PyTorch source text

Expand Down
1 change: 1 addition & 0 deletions packages/modelaudit-picklescan/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ and this package adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Bug Fixes

- prevent call-graph alias cycles from hanging scans
- detect nested brace-format lookups that reach tracked `defaultdict` factories
- avoid `str.format` false positives when a `ChainMap` shadows a `defaultdict`
- block `statistics.quantiles` call-iterator consumption in call-graph analysis
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
CallGraphFinding,
StartupHookWriteFinding,
UnanalyzedCallGraphReference,
_CallGraphAnalysisLimitError,
find_dangerous_call_graphs,
find_startup_hook_write_call_graphs,
find_unanalyzed_callable_call_graph_references,
Expand Down Expand Up @@ -1012,6 +1013,9 @@ def _with_call_graph_findings(report: PickleReport) -> PickleReport:
with shared_source_sensitive_caches():
try:
call_graph_findings = find_dangerous_call_graphs(import_references, callable_invocations)
except _CallGraphAnalysisLimitError as error:
call_graph_findings = error.partial_findings
enrichment_errors.append(("python_call_graph", error))
except Exception as error:
call_graph_findings = ()
enrichment_errors.append(("python_call_graph", error))
Expand All @@ -1020,6 +1024,9 @@ def _with_call_graph_findings(report: PickleReport) -> PickleReport:
import_references,
callable_invocations,
)
except _CallGraphAnalysisLimitError as error:
startup_hook_write_findings = error.partial_startup_hook_write_findings
enrichment_errors.append(("python_call_graph_startup_hook_write", error))
except Exception as error:
startup_hook_write_findings = ()
enrichment_errors.append(("python_call_graph_startup_hook_write", error))
Expand Down
174 changes: 113 additions & 61 deletions packages/modelaudit-picklescan/src/modelaudit_picklescan/call_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
_MAX_VISITED_FUNCTIONS = 64
_MAX_CALLS_PER_FUNCTION = 128
_MAX_ASSIGNMENT_ALIASES = 128
_MAX_ASSIGNMENT_ALIAS_PASSES = 256
_MAX_FUNCTION_INSTANCE_ALIASES = 32
_MAX_CLASS_INSTANCE_ALIASES = 128
_MAX_INHERITED_CLASS_METHODS = 128
Expand Down Expand Up @@ -66,6 +67,21 @@ def cache_clear(self) -> None:
_SOURCE_SENSITIVE_CACHED_FUNCTIONS: set[_CacheClearable] = set()


class _CallGraphAnalysisLimitError(RuntimeError):
"""Raised when bounded call-graph enrichment cannot complete safely."""

def __init__(
self,
message: str,
*,
partial_findings: tuple[CallGraphFinding, ...] = (),
partial_startup_hook_write_findings: tuple[StartupHookWriteFinding, ...] = (),
) -> None:
super().__init__(message)
self.partial_findings = partial_findings
self.partial_startup_hook_write_findings = partial_startup_hook_write_findings


def _register_source_sensitive_cache(function: _CachedFunctionT) -> _CachedFunctionT:
_SOURCE_SENSITIVE_CACHED_FUNCTIONS.add(cast(_CacheClearable, function))
return function
Expand Down Expand Up @@ -320,48 +336,51 @@ def find_dangerous_call_graphs(
if str(reference.get("module", "")) and str(reference.get("name", ""))
}

for reference in _iter_call_graph_references(import_references, callable_references, invoked_references):
module = str(reference.get("module", ""))
name = str(reference.get("name", ""))
if not module or not name:
continue
try:
for reference in _iter_call_graph_references(import_references, callable_references, invoked_references):
module = str(reference.get("module", ""))
name = str(reference.get("name", ""))
if not module or not name:
continue

entrypoints = _call_graph_entrypoints_for_reference(module, name, reference)
if not entrypoints:
continue
allow_invoked_non_lifecycle_entrypoint = _is_explicit_method_import_reference(name)
sink_path = _first_matching_path(entrypoints, _find_sink_path)
if sink_path is None:
for positional_arg_count in positional_arg_counts.get((module, name), ()):
sink_path = _first_matching_path(
entrypoints,
_invoked_import_execution_path_callback(
positional_arg_count,
allow_non_lifecycle_entrypoint=allow_invoked_non_lifecycle_entrypoint,
),
)
if sink_path is not None:
break
if sink_path is None:
continue
entrypoints = _call_graph_entrypoints_for_reference(module, name, reference)
if not entrypoints:
continue
allow_invoked_non_lifecycle_entrypoint = _is_explicit_method_import_reference(name)
sink_path = _first_matching_path(entrypoints, _find_sink_path)
Comment thread
mldangelo-oai marked this conversation as resolved.
if sink_path is None:
for positional_arg_count in positional_arg_counts.get((module, name), ()):
sink_path = _first_matching_path(
entrypoints,
_invoked_import_execution_path_callback(
positional_arg_count,
allow_non_lifecycle_entrypoint=allow_invoked_non_lifecycle_entrypoint,
),
)
if sink_path is not None:
break
if sink_path is None:
continue

finding_key = (module, name, sink_path)
if finding_key in seen_findings:
continue
seen_findings.add(finding_key)
finding_key = (module, name, sink_path)
if finding_key in seen_findings:
continue
seen_findings.add(finding_key)

sink = sink_path[-1]
findings.append(
CallGraphFinding(
module=module,
name=name,
import_reference=f"{module}.{name}",
sink=sink,
call_path=sink_path,
sink = sink_path[-1]
findings.append(
CallGraphFinding(
module=module,
name=name,
import_reference=f"{module}.{name}",
sink=sink,
call_path=sink_path,
)
)
)
if len(findings) >= _MAX_IMPORT_REFERENCES:
break
if len(findings) >= _MAX_IMPORT_REFERENCES:
break
except _CallGraphAnalysisLimitError as error:
raise _CallGraphAnalysisLimitError(str(error), partial_findings=tuple(findings)) from error
Comment thread
mldangelo-oai marked this conversation as resolved.
Outdated
return tuple(findings)


Expand Down Expand Up @@ -389,32 +408,45 @@ def find_startup_hook_write_call_graphs(
continue
seen.add((module, name))

entrypoints = _safe_call_graph_entrypoints(f"{module}.{name}")
if not entrypoints:
continue
if _first_matching_path(entrypoints, _find_sink_path) is not None:
continue
open_path = _first_matching_path(entrypoints, _find_file_open_path)
if open_path is not None:
openers.append(
_ImportCallPath(
module=module,
name=name,
import_reference=f"{module}.{name}",
call_path=open_path,
try:
entrypoints = _safe_call_graph_entrypoints(f"{module}.{name}")
if not entrypoints:
continue
if _first_matching_path(entrypoints, _find_sink_path) is not None:
Comment thread
mldangelo-oai marked this conversation as resolved.
Outdated
continue
open_path = _first_matching_path(entrypoints, _find_file_open_path)
if open_path is not None:
openers.append(
_ImportCallPath(
module=module,
name=name,
import_reference=f"{module}.{name}",
call_path=open_path,
)
)
)
write_path = _first_matching_path(entrypoints, _find_file_write_path)
if write_path is not None:
writers.append(
_ImportCallPath(
module=module,
name=name,
import_reference=f"{module}.{name}",
call_path=write_path,
write_path = _first_matching_path(entrypoints, _find_file_write_path)
if write_path is not None:
writers.append(
_ImportCallPath(
module=module,
name=name,
import_reference=f"{module}.{name}",
call_path=write_path,
)
)
)
except _CallGraphAnalysisLimitError as error:
raise _CallGraphAnalysisLimitError(
str(error),
partial_startup_hook_write_findings=_materialize_startup_hook_write_findings(openers, writers),
) from error

return _materialize_startup_hook_write_findings(openers, writers)


def _materialize_startup_hook_write_findings(
openers: list[_ImportCallPath],
writers: list[_ImportCallPath],
) -> tuple[StartupHookWriteFinding, ...]:
if not openers or not writers:
return ()

Expand Down Expand Up @@ -497,6 +529,8 @@ def shared_source_sensitive_caches() -> Iterator[None]:
def _safe_call_graph_entrypoints(function_name: str) -> tuple[str, ...]:
try:
return _call_graph_entrypoints(function_name)
except _CallGraphAnalysisLimitError:
raise
Comment thread
mldangelo-oai marked this conversation as resolved.
Comment thread
mldangelo-oai marked this conversation as resolved.
Comment thread
mldangelo-oai marked this conversation as resolved.
except Exception:
return ()

Expand All @@ -508,6 +542,8 @@ def _first_matching_path(
for entrypoint in entrypoints:
try:
path = path_for(entrypoint)
except _CallGraphAnalysisLimitError:
raise
Comment thread
mldangelo-oai marked this conversation as resolved.
Outdated
Comment thread
mldangelo-oai marked this conversation as resolved.
Outdated
except Exception:
continue
if path is not None:
Expand Down Expand Up @@ -1664,9 +1700,19 @@ def _collect_assignment_aliases(
) -> dict[str, str]:
node_list = tuple(nodes)
assignment_aliases: dict[str, str] = {}
# A source-flattened branch may rebind one name during every pass while its
# final state stays stable. Only completed-pass state cycles are incomplete.
seen_states: set[tuple[tuple[str, str], ...]] = {()}
passes = 0

changed = True
while changed and len(assignment_aliases) < _MAX_ASSIGNMENT_ALIASES:
if passes >= _MAX_ASSIGNMENT_ALIAS_PASSES:
raise _CallGraphAnalysisLimitError(
f"assignment alias analysis exceeded {_MAX_ASSIGNMENT_ALIAS_PASSES} propagation passes"
)
passes += 1
state = tuple(sorted(assignment_aliases.items()))
changed = False
scoped_aliases = {**aliases, **assignment_aliases}
for node in node_list:
Expand All @@ -1687,6 +1733,12 @@ def _collect_assignment_aliases(
changed = True
if len(assignment_aliases) >= _MAX_ASSIGNMENT_ALIASES:
break
next_state = tuple(sorted(assignment_aliases.items()))
if next_state == state:
break
Comment thread
mldangelo-oai marked this conversation as resolved.
if next_state in seen_states:
raise _CallGraphAnalysisLimitError("assignment alias analysis entered a propagation cycle")
seen_states.add(next_state)
return assignment_aliases


Expand Down
Loading
Loading