diff --git a/exir/memory_planning.py b/exir/memory_planning.py index b13568a0f93..9b19cbc7770 100644 --- a/exir/memory_planning.py +++ b/exir/memory_planning.py @@ -1207,9 +1207,9 @@ def _handle( def _partition_specs_by_device( - all_specs: set[TensorSpec], + all_specs: list[TensorSpec], enable_non_cpu_memory_planning: bool, -) -> dict[tuple[DeviceType, int], set[TensorSpec]]: +) -> dict[tuple[DeviceType, int], list[TensorSpec]]: """Partition specs by (device_type, device_index). Different device indices on the same device type (e.g. CUDA:0 vs CUDA:1) @@ -1217,8 +1217,11 @@ def _partition_specs_by_device( When ``enable_non_cpu_memory_planning`` is False (legacy), all specs are placed into a single CPU:0 bucket regardless of their device attribute. + + Insertion order is preserved within each partition because order-sensitive + algorithms (e.g. greedy with bisect.insort) rely on it for stable tie-breaking. """ - specs_by_device: dict[tuple[DeviceType, int], set[TensorSpec]] = defaultdict(set) + specs_by_device: dict[tuple[DeviceType, int], list[TensorSpec]] = defaultdict(list) if not enable_non_cpu_memory_planning: specs_by_device[_CPU_KEY] = all_specs return specs_by_device @@ -1227,7 +1230,7 @@ def _partition_specs_by_device( has_pre_assigned_mem_id = False for spec in all_specs: device_key = (spec.device, spec.device_index) - specs_by_device[device_key].add(spec) + specs_by_device[device_key].append(spec) if spec.device != DeviceType.CPU: has_non_cpu_specs = True if spec.mem_id is not None: @@ -1308,9 +1311,11 @@ def apply_algo( # Extract the nodes and their lifespans from the graph_module _ = update_all_tensors_lifetime(graph_module, graph_signature) - # Collect and materialize specs into a set so we can iterate multiple - # times and partition by device. - all_specs: set[TensorSpec] = set( + # Collect specs into an ordered list so we can iterate multiple times and + # partition by device. Order matters: order-sensitive algorithms (e.g. + # greedy with bisect.insort) rely on insertion order for stable tie-breaking, + # and `collect_specs_from_nodes` already deduplicates via its `dedup` flag. + all_specs: list[TensorSpec] = list( collect_specs_from_nodes( graph_module.graph.nodes, graph_signature, diff --git a/exir/print_program.py b/exir/print_program.py index cf2daa2c2d3..c1ec1a0bb8e 100644 --- a/exir/print_program.py +++ b/exir/print_program.py @@ -353,16 +353,21 @@ def _format_graph(graph: torch.fx.Graph, offending_node_idx: int) -> str: def _stacktrace_to_framelist(stacktrace: str) -> FrameList: """Creates a frame list from a stacktrace string.""" - pattern = r'File "(.*?)", line (\d+), in (.*?)\n' - matches = re.findall(pattern, stacktrace) + # Capture (filename, lineno, name, source-line) in a single regex. Python + # 3.11+ tracebacks may include extra caret/underline lines (e.g. "^^^^") + # between frames, so we cannot rely on a fixed line offset; instead we pull + # the source line directly out of the line that immediately follows each + # `File "...", line N, in ` header. + pattern = re.compile(r'File "(.*?)", line (\d+), in (.*?)\n([^\n]*)') + matches = pattern.findall(stacktrace) mapped_frame_list = [ Frame( - filename=match[0], - lineno=int(match[1]), - name=match[2], - context=stacktrace.split("\n")[i * 2 + 1].strip(), + filename=m[0], + lineno=int(m[1]), + name=m[2], + context=m[3].strip(), ) - for i, match in enumerate(matches) + for m in matches ] return FrameList(mapped_frame_list) diff --git a/runtime/core/test/device_allocator_test.cpp b/runtime/core/test/device_allocator_test.cpp index f0bd7c6556e..3c165fcc0d9 100644 --- a/runtime/core/test/device_allocator_test.cpp +++ b/runtime/core/test/device_allocator_test.cpp @@ -231,6 +231,9 @@ TEST_F(DeviceAllocatorTest, RegistrySingletonInstance) { EXPECT_EQ(&instance1, &instance2); } +// EXPECT_DEATH requires gtest death-test support, which is unavailable on +// platforms without fork() (e.g. iOS). Skip on those platforms. +#if GTEST_HAS_DEATH_TEST TEST_F(DeviceAllocatorTest, RegisteringSameDeviceTypeTwiceAborts) { // The fixture has already registered cuda_allocator() for CUDA; attempting // to register a second allocator for the same device type must abort. @@ -239,3 +242,4 @@ TEST_F(DeviceAllocatorTest, RegisteringSameDeviceTypeTwiceAborts) { register_device_allocator(&another_allocator), "Allocator already registered"); } +#endif // GTEST_HAS_DEATH_TEST