Skip to content
Merged

ci solve #19532

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 12 additions & 7 deletions exir/memory_planning.py
Original file line number Diff line number Diff line change
Expand Up @@ -1207,18 +1207,21 @@ def _handle(


def _partition_specs_by_device(
all_specs: set[TensorSpec],
all_specs: list[TensorSpec],
enable_non_cpu_memory_planning: bool,
) -> dict[tuple[DeviceType, int], set[TensorSpec]]:
) -> dict[tuple[DeviceType, int], list[TensorSpec]]:
"""Partition specs by (device_type, device_index).

Different device indices on the same device type (e.g. CUDA:0 vs CUDA:1)
get separate memory buffers.

When ``enable_non_cpu_memory_planning`` is False (legacy), all specs are
placed into a single CPU:0 bucket regardless of their device attribute.

Insertion order is preserved within each partition because order-sensitive
algorithms (e.g. greedy with bisect.insort) rely on it for stable tie-breaking.
"""
specs_by_device: dict[tuple[DeviceType, int], set[TensorSpec]] = defaultdict(set)
specs_by_device: dict[tuple[DeviceType, int], list[TensorSpec]] = defaultdict(list)
if not enable_non_cpu_memory_planning:
specs_by_device[_CPU_KEY] = all_specs
return specs_by_device
Expand All @@ -1227,7 +1230,7 @@ def _partition_specs_by_device(
has_pre_assigned_mem_id = False
for spec in all_specs:
device_key = (spec.device, spec.device_index)
specs_by_device[device_key].add(spec)
specs_by_device[device_key].append(spec)
if spec.device != DeviceType.CPU:
has_non_cpu_specs = True
if spec.mem_id is not None:
Expand Down Expand Up @@ -1308,9 +1311,11 @@ def apply_algo(
# Extract the nodes and their lifespans from the graph_module
_ = update_all_tensors_lifetime(graph_module, graph_signature)

# Collect and materialize specs into a set so we can iterate multiple
# times and partition by device.
all_specs: set[TensorSpec] = set(
# Collect specs into an ordered list so we can iterate multiple times and
# partition by device. Order matters: order-sensitive algorithms (e.g.
# greedy with bisect.insort) rely on insertion order for stable tie-breaking,
# and `collect_specs_from_nodes` already deduplicates via its `dedup` flag.
all_specs: list[TensorSpec] = list(
collect_specs_from_nodes(
graph_module.graph.nodes,
graph_signature,
Expand Down
19 changes: 12 additions & 7 deletions exir/print_program.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,16 +353,21 @@ def _format_graph(graph: torch.fx.Graph, offending_node_idx: int) -> str:

def _stacktrace_to_framelist(stacktrace: str) -> FrameList:
"""Creates a frame list from a stacktrace string."""
pattern = r'File "(.*?)", line (\d+), in (.*?)\n'
matches = re.findall(pattern, stacktrace)
# Capture (filename, lineno, name, source-line) in a single regex. Python
# 3.11+ tracebacks may include extra caret/underline lines (e.g. "^^^^")
# between frames, so we cannot rely on a fixed line offset; instead we pull
# the source line directly out of the line that immediately follows each
# `File "...", line N, in <name>` header.
pattern = re.compile(r'File "(.*?)", line (\d+), in (.*?)\n([^\n]*)')
matches = pattern.findall(stacktrace)
mapped_frame_list = [
Frame(
filename=match[0],
lineno=int(match[1]),
name=match[2],
context=stacktrace.split("\n")[i * 2 + 1].strip(),
filename=m[0],
lineno=int(m[1]),
name=m[2],
context=m[3].strip(),
)
for i, match in enumerate(matches)
for m in matches
]
return FrameList(mapped_frame_list)

Expand Down
4 changes: 4 additions & 0 deletions runtime/core/test/device_allocator_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,9 @@ TEST_F(DeviceAllocatorTest, RegistrySingletonInstance) {
EXPECT_EQ(&instance1, &instance2);
}

// EXPECT_DEATH requires gtest death-test support, which is unavailable on
// platforms without fork() (e.g. iOS). Skip on those platforms.
#if GTEST_HAS_DEATH_TEST
TEST_F(DeviceAllocatorTest, RegisteringSameDeviceTypeTwiceAborts) {
// The fixture has already registered cuda_allocator() for CUDA; attempting
// to register a second allocator for the same device type must abort.
Expand All @@ -239,3 +242,4 @@ TEST_F(DeviceAllocatorTest, RegisteringSameDeviceTypeTwiceAborts) {
register_device_allocator(&another_allocator),
"Allocator already registered");
}
#endif // GTEST_HAS_DEATH_TEST
Loading