diff --git a/lisa/microsoft/testsuites/core/cpu.py b/lisa/microsoft/testsuites/core/cpu.py index 34edc2ab60..a4ad2f23e8 100644 --- a/lisa/microsoft/testsuites/core/cpu.py +++ b/lisa/microsoft/testsuites/core/cpu.py @@ -41,28 +41,40 @@ class CPU(TestSuite): @TestCaseMetadata( description=""" - This test case will check that L3 cache is correctly mapped - to NUMA node. + This test case checks that the L3 cache is correctly mapped to NUMA + nodes, i.e. an L3 cache belongs to the NUMA node / socket it serves + and is not incorrectly mapped to individual CPU cores. + Steps: 1. Check if NUMA is disabled in commandline. If disabled, and kernel version is <= 2.6.37, test is skipped as hyper-v has no support for NUMA : https://t.ly/x8k3 2. Get the mappings using command : `lscpu --extended=cpu,node,socket,cache` - 3. Each line in the mapping corresponds to one CPU core. The L3 - cache of each core must be mapped to the NUMA node that core - belongs to instead of the core itself. - - Example : - Correct mapping: - CPU NODE SOCKET L1d L1i L2 L3 - 8 0 0 8 8 8 0 - 9 1 1 9 9 9 1 - - Incorrect mapping: - CPU NODE SOCKET L1d L1i L2 L3 - 8 0 0 8 8 8 8 - 9 1 1 9 9 9 9 + 3. Validate the L3 cache topology against these invariants + (works for Intel 1:1, AMD EPYC multi-CCD and sub-NUMA clustering): + a. An L3 cache must not span multiple sockets. + b. If an L3 cache is shared across NUMA nodes, those NUMA nodes + must be on the same socket (valid sub-NUMA clustering). + c. L3 cache IDs must not be unique per CPU within a NUMA node + (that indicates L3 is mapped to the CPU id instead of NUMA). + d. In a strict 1:1 NUMA-to-L3 topology, the L3 cache id must + equal the NUMA node id. + + If lscpu does not report a parseable cache mapping (e.g. ARM64 VMs + without an L3 column, or partial cache reporting), the test is + skipped instead of failed. + + Example failure modes (invariant 'd' / 'c'): + Correct 1:1 mapping (L3 id == NUMA id): + CPU NODE SOCKET L1d:L1i:L2:L3 + 8 0 0 8:8:8:0 + 9 1 1 9:9:9:1 + + Incorrect mapping (L3 id == CPU id, not NUMA id): + CPU NODE SOCKET L1d:L1i:L2:L3 + 8 0 0 8:8:8:8 + 9 1 1 9:9:9:9 """, priority=1, # Marking this test unsupported on BSD as the neccessary info is not exposed @@ -118,44 +130,33 @@ def verify_l3_cache( self._verify_node_mapping(node, effective_numa_node_size) return - # For all other cases, check L3 cache mapping with socket awareness - cpu_info = lscpu.get_cpu_info() - - # Build a mapping of socket -> NUMA nodes and socket -> L3 caches - socket_to_numa_nodes: dict[int, set[int]] = {} - socket_to_l3_caches: dict[int, set[int]] = {} - - for cpu in cpu_info: - socket = cpu.socket - numa_node = cpu.numa_node - l3_cache = cpu.l3_cache - - # Track NUMA nodes per socket - if socket not in socket_to_numa_nodes: - socket_to_numa_nodes[socket] = set() - socket_to_numa_nodes[socket].add(numa_node) - - # Track L3 caches per socket - if socket not in socket_to_l3_caches: - socket_to_l3_caches[socket] = set() - socket_to_l3_caches[socket].add(l3_cache) - - # Check if this is a simple 1:1 mapping (traditional case) - all_numa_nodes = set() - all_l3_caches = set() - for numa_nodes in socket_to_numa_nodes.values(): - all_numa_nodes.update(numa_nodes) - for l3_caches in socket_to_l3_caches.values(): - all_l3_caches.update(l3_caches) - - # Check if this is a simple 1:1 mapping or socket-aware mapping - # If NUMA nodes and L3 caches are identical sets, use simple verification - if self._is_one_to_one_mapping(socket_to_numa_nodes, socket_to_l3_caches): - self._verify_one_to_one_mapping(cpu_info, log) - else: - self._verify_socket_aware_mapping( - cpu_info, socket_to_numa_nodes, socket_to_l3_caches, log - ) + # Generic L3 cache topology validation for the remaining processor + # types (after the known model/VM-size early returns above). + # This handles both traditional 1:1 NUMA-to-L3 mapping (e.g. Intel) + # and multi-L3-per-NUMA topologies (e.g. AMD EPYC where a NUMA node + # spans multiple CCDs, each with its own L3 cache). + # + # The universal invariants verified are: + # 1. L3 caches must not be shared across sockets + # 2. Cross-NUMA L3 sharing is only valid within the same socket + # 3. L3 cache IDs must not be unique per CPU (indicates L3=CPU_ID bug) + # 4. In a strict 1:1 NUMA-to-L3 topology, L3 ID must equal NUMA ID + try: + cpu_info = lscpu.get_cpu_info() + except AssertionError as e: + # get_cpu_info() raises AssertionError when the lscpu output + # cannot be parsed into the expected cache mapping format. This + # happens on: + # - VMs where no cache hierarchy is exposed (lscpu shows "-") + # - ARM64 VMs that only have L1d/L1i/L2 (no L3) + # - Partially allocated VMs where some NUMA nodes lack L3 + # - Any other unexpected/empty lscpu output + raise SkippedException( + f"Unable to parse lscpu cache mapping; cannot validate L3 " + f"cache topology. Details: {e}" + ) from e + + self._verify_l3_cache_topology(cpu_info, log) @TestCaseMetadata( description=""" @@ -298,7 +299,13 @@ def _create_stimer_interrupts(self, node: Node, cpu_count: int) -> None: process.kill() def _verify_node_mapping(self, node: Node, numa_node_size: int) -> None: - cpu_info = node.tools[Lscpu].get_cpu_info() + try: + cpu_info = node.tools[Lscpu].get_cpu_info() + except AssertionError as e: + raise SkippedException( + f"Unable to parse lscpu cache mapping; cannot validate L3 " + f"cache topology. Details: {e}" + ) from e cpu_info.sort(key=lambda cpu: cpu.cpu) for i, cpu in enumerate(cpu_info): numa_node_id = i // numa_node_size @@ -308,89 +315,108 @@ def _verify_node_mapping(self, node: Node, numa_node_size: int) -> None: "associated with the core.", ).is_equal_to(numa_node_id) - def _is_one_to_one_mapping( - self, - socket_to_numa_nodes: dict[int, set[int]], - socket_to_l3_caches: dict[int, set[int]], - ) -> bool: - """Check if NUMA nodes and L3 caches have a 1:1 mapping.""" - all_numa_nodes = set() - all_l3_caches = set() - for numa_nodes in socket_to_numa_nodes.values(): - all_numa_nodes.update(numa_nodes) - for l3_caches in socket_to_l3_caches.values(): - all_l3_caches.update(l3_caches) - - return all_numa_nodes == all_l3_caches - - def _verify_one_to_one_mapping(self, cpu_info: list[Any], log: Logger) -> None: - """Verify traditional 1:1 mapping between NUMA nodes and L3 caches.""" - log.debug("Detected 1:1 mapping between NUMA nodes and L3 caches") - for cpu in cpu_info: - assert_that( - cpu.l3_cache, - "L3 cache of each core must be mapped to the NUMA node " - "associated with the core.", - ).is_equal_to(cpu.numa_node) - - def _verify_socket_aware_mapping( + def _verify_l3_cache_topology( self, cpu_info: list[Any], - socket_to_numa_nodes: dict[int, set[int]], - socket_to_l3_caches: dict[int, set[int]], log: Logger, ) -> None: - """Verify shared L3 cache mapping within sockets.""" - log.debug("Detected shared L3 cache within sockets") - - # Verify consistency: all CPUs in same NUMA node should have same L3 cache - self._verify_numa_consistency(cpu_info) - - # Verify isolation: L3 caches should not be shared across sockets - self._verify_socket_isolation(socket_to_numa_nodes, socket_to_l3_caches, log) + """Verify L3 cache topology is correct for any processor type. + + This is a generic validation that works for all topologies: + - Traditional 1:1 NUMA-to-L3 (e.g. Intel, older AMD) + - Multi-L3-per-NUMA (e.g. AMD EPYC where NUMA spans multiple CCDs) + - Multi-NUMA-per-L3 (e.g. large VMs with sub-NUMA clustering where + the hypervisor splits a socket into sub-NUMA domains but the + physical L3 cache spans both NUMAs within the socket) + + The invariants verified are: + 1. L3 caches must not be shared across sockets + 2. If an L3 cache is shared across NUMA nodes, those NUMA nodes + must be on the same socket + 3. L3 cache IDs must not be unique per CPU within a NUMA node + (which would indicate L3 is incorrectly mapped to CPU ID) + 4. In a strict 1:1 NUMA-to-L3 topology, the L3 cache ID must + equal the NUMA node ID + """ + # Build helper mappings + numa_to_l3_caches: dict[int, set[int]] = {} + numa_to_sockets: dict[int, set[int]] = {} + l3_to_numas: dict[int, set[int]] = {} + l3_to_sockets: dict[int, set[int]] = {} + for cpu in cpu_info: + numa_to_l3_caches.setdefault(cpu.numa_node, set()).add(cpu.l3_cache) + numa_to_sockets.setdefault(cpu.numa_node, set()).add(cpu.socket) + l3_to_numas.setdefault(cpu.l3_cache, set()).add(cpu.numa_node) + l3_to_sockets.setdefault(cpu.l3_cache, set()).add(cpu.socket) - def _verify_numa_consistency(self, cpu_info: list[Any]) -> None: - """Verify all CPUs in the same NUMA node have the same L3 cache.""" - numa_to_l3_mapping = {} + # 1. Verify no L3 cache is shared across sockets + for l3_cache, sockets in l3_to_sockets.items(): + assert_that( + len(sockets), + f"L3 cache {l3_cache} must not span multiple sockets, " + f"but is present on sockets {sorted(sockets)}", + ).is_equal_to(1) + + # 2. If an L3 is shared across NUMA nodes, verify those NUMAs + # are on the same socket (sub-NUMA clustering is valid) + for l3_cache, numas in l3_to_numas.items(): + if len(numas) <= 1: + continue + # Get all sockets these NUMA nodes belong to + sockets_for_shared_l3: set[int] = set() + for numa in numas: + sockets_for_shared_l3.update(numa_to_sockets[numa]) + assert_that( + len(sockets_for_shared_l3), + f"L3 cache {l3_cache} is shared across NUMA nodes " + f"{sorted(numas)}, but they span multiple sockets " + f"{sorted(sockets_for_shared_l3)}. L3 sharing across " + f"NUMA nodes is only valid within the same socket.", + ).is_equal_to(1) + + # 3. Sanity check: if every CPU in a NUMA node has a unique L3 + # cache, the L3 IDs are likely incorrectly mapped to CPU IDs + # instead of shared cache IDs. Valid multi-CCD topologies have + # fewer L3 caches than CPUs (e.g. 32 CPUs sharing 4 L3s). + numa_to_cpus: dict[int, list[int]] = {} for cpu in cpu_info: - if cpu.numa_node not in numa_to_l3_mapping: - numa_to_l3_mapping[cpu.numa_node] = cpu.l3_cache - else: - # Verify consistency: all CPUs in same NUMA node should have same L3 + numa_to_cpus.setdefault(cpu.numa_node, []).append(cpu.cpu) + for numa_node, l3_caches in numa_to_l3_caches.items(): + cpu_count = len(numa_to_cpus.get(numa_node, [])) + if cpu_count > 1 and len(l3_caches) == cpu_count: assert_that( - cpu.l3_cache, - f"All CPUs in NUMA node {cpu.numa_node} should have the same " - f"L3 cache mapping, expected " - f"{numa_to_l3_mapping[cpu.numa_node]} " - f"but found {cpu.l3_cache} for CPU {cpu.cpu}", - ).is_equal_to(numa_to_l3_mapping[cpu.numa_node]) - - def _verify_socket_isolation( - self, - socket_to_numa_nodes: dict[int, set[int]], - socket_to_l3_caches: dict[int, set[int]], - log: Logger, - ) -> None: - """Verify L3 caches are not shared across sockets.""" - for socket, numa_nodes in socket_to_numa_nodes.items(): - l3_caches_in_socket = socket_to_l3_caches[socket] - - # Get L3 caches used by other sockets - other_socket_l3_caches = set() - for other_socket, other_l3_caches in socket_to_l3_caches.items(): - if other_socket != socket: - other_socket_l3_caches.update(other_l3_caches) - - # Verify no L3 cache is shared across sockets - shared_l3_caches = l3_caches_in_socket.intersection(other_socket_l3_caches) - assert_that( - len(shared_l3_caches), - f"L3 caches should not be shared across sockets. " - f"Socket {socket} shares L3 cache(s) {shared_l3_caches} with " - f"other sockets", - ).is_equal_to(0) - + len(l3_caches), + f"NUMA node {numa_node} has {cpu_count} CPUs each with " + f"a unique L3 cache ID {sorted(l3_caches)}, which " + f"indicates incorrect cache mapping (L3 should be " + f"shared across cores, not unique per CPU).", + ).is_less_than(cpu_count) + + # 4. Strict 1:1 case: each NUMA owns exactly one L3 and each L3 + # belongs to exactly one NUMA. Then L3 ID must equal NUMA ID. + # Multi-CCD and sub-NUMA clustering are excluded by this guard. + each_numa_has_one_l3 = all(len(l3s) == 1 for l3s in numa_to_l3_caches.values()) + each_l3_has_one_numa = all(len(numas) == 1 for numas in l3_to_numas.values()) + if each_numa_has_one_l3 and each_l3_has_one_numa: + for numa_node, l3_caches in numa_to_l3_caches.items(): + l3_cache = next(iter(l3_caches)) + assert_that( + l3_cache, + f"NUMA node {numa_node} maps 1:1 to a single L3 cache, " + f"so its L3 cache ID must equal the NUMA node ID, but " + f"got L3 cache ID {l3_cache}.", + ).is_equal_to(numa_node) + + # Log the topology for debugging + for numa_node, l3_caches in sorted(numa_to_l3_caches.items()): + sorted_sockets = sorted(numa_to_sockets[numa_node]) log.debug( - f"Socket {socket}: NUMA nodes {sorted(numa_nodes)} use " - f"L3 cache(s) {sorted(l3_caches_in_socket)}" + f"NUMA node {numa_node} (socket {sorted_sockets}): " + f"{len(l3_caches)} L3 cache(s) {sorted(l3_caches)}" ) + for l3_cache, numas in sorted(l3_to_numas.items()): + if len(numas) > 1: + log.debug( + f"L3 cache {l3_cache}: shared across NUMA nodes " + f"{sorted(numas)} (sub-NUMA clustering)" + )