Skip to content
290 changes: 158 additions & 132 deletions lisa/microsoft/testsuites/core/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,28 +41,40 @@
class CPU(TestSuite):
@TestCaseMetadata(
description="""
This test case will check that L3 cache is correctly mapped
to NUMA node.
This test case checks that the L3 cache is correctly mapped to NUMA
nodes, i.e. an L3 cache belongs to the NUMA node / socket it serves
and is not incorrectly mapped to individual CPU cores.

Steps:
1. Check if NUMA is disabled in commandline. If disabled,
and kernel version is <= 2.6.37, test is skipped as hyper-v
has no support for NUMA : https://t.ly/x8k3
2. Get the mappings using command :
`lscpu --extended=cpu,node,socket,cache`
3. Each line in the mapping corresponds to one CPU core. The L3
cache of each core must be mapped to the NUMA node that core
belongs to instead of the core itself.

Example :
Correct mapping:
CPU NODE SOCKET L1d L1i L2 L3
8 0 0 8 8 8 0
9 1 1 9 9 9 1

Incorrect mapping:
CPU NODE SOCKET L1d L1i L2 L3
8 0 0 8 8 8 8
9 1 1 9 9 9 9
3. Validate the L3 cache topology against these invariants
(works for Intel 1:1, AMD EPYC multi-CCD and sub-NUMA clustering):
a. An L3 cache must not span multiple sockets.
b. If an L3 cache is shared across NUMA nodes, those NUMA nodes
must be on the same socket (valid sub-NUMA clustering).
c. L3 cache IDs must not be unique per CPU within a NUMA node
(that indicates L3 is mapped to the CPU id instead of NUMA).
d. In a strict 1:1 NUMA-to-L3 topology, the L3 cache id must
equal the NUMA node id.

If lscpu does not report a parseable cache mapping (e.g. ARM64 VMs
without an L3 column, or partial cache reporting), the test is
skipped instead of failed.

Example failure modes (invariant 'd' / 'c'):
Correct 1:1 mapping (L3 id == NUMA id):
CPU NODE SOCKET L1d:L1i:L2:L3
8 0 0 8:8:8:0
9 1 1 9:9:9:1

Comment thread
umfranci marked this conversation as resolved.
Incorrect mapping (L3 id == CPU id, not NUMA id):
CPU NODE SOCKET L1d:L1i:L2:L3
8 0 0 8:8:8:8
9 1 1 9:9:9:9
""",
priority=1,
# Marking this test unsupported on BSD as the neccessary info is not exposed
Expand Down Expand Up @@ -118,44 +130,33 @@ def verify_l3_cache(
self._verify_node_mapping(node, effective_numa_node_size)
return

# For all other cases, check L3 cache mapping with socket awareness
cpu_info = lscpu.get_cpu_info()

# Build a mapping of socket -> NUMA nodes and socket -> L3 caches
socket_to_numa_nodes: dict[int, set[int]] = {}
socket_to_l3_caches: dict[int, set[int]] = {}

for cpu in cpu_info:
socket = cpu.socket
numa_node = cpu.numa_node
l3_cache = cpu.l3_cache

# Track NUMA nodes per socket
if socket not in socket_to_numa_nodes:
socket_to_numa_nodes[socket] = set()
socket_to_numa_nodes[socket].add(numa_node)

# Track L3 caches per socket
if socket not in socket_to_l3_caches:
socket_to_l3_caches[socket] = set()
socket_to_l3_caches[socket].add(l3_cache)

# Check if this is a simple 1:1 mapping (traditional case)
all_numa_nodes = set()
all_l3_caches = set()
for numa_nodes in socket_to_numa_nodes.values():
all_numa_nodes.update(numa_nodes)
for l3_caches in socket_to_l3_caches.values():
all_l3_caches.update(l3_caches)

# Check if this is a simple 1:1 mapping or socket-aware mapping
# If NUMA nodes and L3 caches are identical sets, use simple verification
if self._is_one_to_one_mapping(socket_to_numa_nodes, socket_to_l3_caches):
self._verify_one_to_one_mapping(cpu_info, log)
else:
self._verify_socket_aware_mapping(
cpu_info, socket_to_numa_nodes, socket_to_l3_caches, log
)
# Generic L3 cache topology validation for the remaining processor
# types (after the known model/VM-size early returns above).
# This handles both traditional 1:1 NUMA-to-L3 mapping (e.g. Intel)
# and multi-L3-per-NUMA topologies (e.g. AMD EPYC where a NUMA node
# spans multiple CCDs, each with its own L3 cache).
#
# The universal invariants verified are:
# 1. L3 caches must not be shared across sockets
# 2. Cross-NUMA L3 sharing is only valid within the same socket
# 3. L3 cache IDs must not be unique per CPU (indicates L3=CPU_ID bug)
# 4. In a strict 1:1 NUMA-to-L3 topology, L3 ID must equal NUMA ID
Comment thread
umfranci marked this conversation as resolved.
try:
cpu_info = lscpu.get_cpu_info()
except AssertionError as e:
# get_cpu_info() raises AssertionError when the lscpu output
# cannot be parsed into the expected cache mapping format. This
# happens on:
# - VMs where no cache hierarchy is exposed (lscpu shows "-")
# - ARM64 VMs that only have L1d/L1i/L2 (no L3)
# - Partially allocated VMs where some NUMA nodes lack L3
# - Any other unexpected/empty lscpu output
raise SkippedException(
f"Unable to parse lscpu cache mapping; cannot validate L3 "
f"cache topology. Details: {e}"
) from e

self._verify_l3_cache_topology(cpu_info, log)

@TestCaseMetadata(
description="""
Expand Down Expand Up @@ -298,7 +299,13 @@ def _create_stimer_interrupts(self, node: Node, cpu_count: int) -> None:
process.kill()

def _verify_node_mapping(self, node: Node, numa_node_size: int) -> None:
cpu_info = node.tools[Lscpu].get_cpu_info()
try:
cpu_info = node.tools[Lscpu].get_cpu_info()
except AssertionError as e:
raise SkippedException(
f"Unable to parse lscpu cache mapping; cannot validate L3 "
f"cache topology. Details: {e}"
) from e
cpu_info.sort(key=lambda cpu: cpu.cpu)
for i, cpu in enumerate(cpu_info):
numa_node_id = i // numa_node_size
Expand All @@ -308,89 +315,108 @@ def _verify_node_mapping(self, node: Node, numa_node_size: int) -> None:
"associated with the core.",
).is_equal_to(numa_node_id)

def _is_one_to_one_mapping(
self,
socket_to_numa_nodes: dict[int, set[int]],
socket_to_l3_caches: dict[int, set[int]],
) -> bool:
"""Check if NUMA nodes and L3 caches have a 1:1 mapping."""
all_numa_nodes = set()
all_l3_caches = set()
for numa_nodes in socket_to_numa_nodes.values():
all_numa_nodes.update(numa_nodes)
for l3_caches in socket_to_l3_caches.values():
all_l3_caches.update(l3_caches)

return all_numa_nodes == all_l3_caches

def _verify_one_to_one_mapping(self, cpu_info: list[Any], log: Logger) -> None:
"""Verify traditional 1:1 mapping between NUMA nodes and L3 caches."""
log.debug("Detected 1:1 mapping between NUMA nodes and L3 caches")
for cpu in cpu_info:
assert_that(
cpu.l3_cache,
"L3 cache of each core must be mapped to the NUMA node "
"associated with the core.",
).is_equal_to(cpu.numa_node)

def _verify_socket_aware_mapping(
def _verify_l3_cache_topology(
self,
cpu_info: list[Any],
socket_to_numa_nodes: dict[int, set[int]],
socket_to_l3_caches: dict[int, set[int]],
log: Logger,
) -> None:
"""Verify shared L3 cache mapping within sockets."""
log.debug("Detected shared L3 cache within sockets")

# Verify consistency: all CPUs in same NUMA node should have same L3 cache
self._verify_numa_consistency(cpu_info)

# Verify isolation: L3 caches should not be shared across sockets
self._verify_socket_isolation(socket_to_numa_nodes, socket_to_l3_caches, log)
"""Verify L3 cache topology is correct for any processor type.

This is a generic validation that works for all topologies:
- Traditional 1:1 NUMA-to-L3 (e.g. Intel, older AMD)
- Multi-L3-per-NUMA (e.g. AMD EPYC where NUMA spans multiple CCDs)
- Multi-NUMA-per-L3 (e.g. large VMs with sub-NUMA clustering where
the hypervisor splits a socket into sub-NUMA domains but the
physical L3 cache spans both NUMAs within the socket)

The invariants verified are:
1. L3 caches must not be shared across sockets
2. If an L3 cache is shared across NUMA nodes, those NUMA nodes
must be on the same socket
3. L3 cache IDs must not be unique per CPU within a NUMA node
(which would indicate L3 is incorrectly mapped to CPU ID)
4. In a strict 1:1 NUMA-to-L3 topology, the L3 cache ID must
equal the NUMA node ID
"""
# Build helper mappings
numa_to_l3_caches: dict[int, set[int]] = {}
numa_to_sockets: dict[int, set[int]] = {}
l3_to_numas: dict[int, set[int]] = {}
l3_to_sockets: dict[int, set[int]] = {}
Comment thread
umfranci marked this conversation as resolved.
for cpu in cpu_info:
numa_to_l3_caches.setdefault(cpu.numa_node, set()).add(cpu.l3_cache)
numa_to_sockets.setdefault(cpu.numa_node, set()).add(cpu.socket)
l3_to_numas.setdefault(cpu.l3_cache, set()).add(cpu.numa_node)
l3_to_sockets.setdefault(cpu.l3_cache, set()).add(cpu.socket)

def _verify_numa_consistency(self, cpu_info: list[Any]) -> None:
"""Verify all CPUs in the same NUMA node have the same L3 cache."""
numa_to_l3_mapping = {}
# 1. Verify no L3 cache is shared across sockets
for l3_cache, sockets in l3_to_sockets.items():
assert_that(
len(sockets),
f"L3 cache {l3_cache} must not span multiple sockets, "
f"but is present on sockets {sorted(sockets)}",
).is_equal_to(1)

# 2. If an L3 is shared across NUMA nodes, verify those NUMAs
# are on the same socket (sub-NUMA clustering is valid)
for l3_cache, numas in l3_to_numas.items():
if len(numas) <= 1:
continue
# Get all sockets these NUMA nodes belong to
sockets_for_shared_l3: set[int] = set()
for numa in numas:
sockets_for_shared_l3.update(numa_to_sockets[numa])
assert_that(
len(sockets_for_shared_l3),
f"L3 cache {l3_cache} is shared across NUMA nodes "
f"{sorted(numas)}, but they span multiple sockets "
f"{sorted(sockets_for_shared_l3)}. L3 sharing across "
f"NUMA nodes is only valid within the same socket.",
).is_equal_to(1)

# 3. Sanity check: if every CPU in a NUMA node has a unique L3
# cache, the L3 IDs are likely incorrectly mapped to CPU IDs
# instead of shared cache IDs. Valid multi-CCD topologies have
# fewer L3 caches than CPUs (e.g. 32 CPUs sharing 4 L3s).
numa_to_cpus: dict[int, list[int]] = {}
for cpu in cpu_info:
if cpu.numa_node not in numa_to_l3_mapping:
numa_to_l3_mapping[cpu.numa_node] = cpu.l3_cache
else:
# Verify consistency: all CPUs in same NUMA node should have same L3
numa_to_cpus.setdefault(cpu.numa_node, []).append(cpu.cpu)
for numa_node, l3_caches in numa_to_l3_caches.items():
cpu_count = len(numa_to_cpus.get(numa_node, []))
if cpu_count > 1 and len(l3_caches) == cpu_count:
assert_that(
cpu.l3_cache,
f"All CPUs in NUMA node {cpu.numa_node} should have the same "
f"L3 cache mapping, expected "
f"{numa_to_l3_mapping[cpu.numa_node]} "
f"but found {cpu.l3_cache} for CPU {cpu.cpu}",
).is_equal_to(numa_to_l3_mapping[cpu.numa_node])

def _verify_socket_isolation(
self,
socket_to_numa_nodes: dict[int, set[int]],
socket_to_l3_caches: dict[int, set[int]],
log: Logger,
) -> None:
"""Verify L3 caches are not shared across sockets."""
for socket, numa_nodes in socket_to_numa_nodes.items():
l3_caches_in_socket = socket_to_l3_caches[socket]

# Get L3 caches used by other sockets
other_socket_l3_caches = set()
for other_socket, other_l3_caches in socket_to_l3_caches.items():
if other_socket != socket:
other_socket_l3_caches.update(other_l3_caches)

# Verify no L3 cache is shared across sockets
shared_l3_caches = l3_caches_in_socket.intersection(other_socket_l3_caches)
assert_that(
len(shared_l3_caches),
f"L3 caches should not be shared across sockets. "
f"Socket {socket} shares L3 cache(s) {shared_l3_caches} with "
f"other sockets",
).is_equal_to(0)

len(l3_caches),
f"NUMA node {numa_node} has {cpu_count} CPUs each with "
f"a unique L3 cache ID {sorted(l3_caches)}, which "
f"indicates incorrect cache mapping (L3 should be "
f"shared across cores, not unique per CPU).",
).is_less_than(cpu_count)

# 4. Strict 1:1 case: each NUMA owns exactly one L3 and each L3
# belongs to exactly one NUMA. Then L3 ID must equal NUMA ID.
# Multi-CCD and sub-NUMA clustering are excluded by this guard.
each_numa_has_one_l3 = all(len(l3s) == 1 for l3s in numa_to_l3_caches.values())
each_l3_has_one_numa = all(len(numas) == 1 for numas in l3_to_numas.values())
if each_numa_has_one_l3 and each_l3_has_one_numa:
for numa_node, l3_caches in numa_to_l3_caches.items():
l3_cache = next(iter(l3_caches))
assert_that(
l3_cache,
f"NUMA node {numa_node} maps 1:1 to a single L3 cache, "
f"so its L3 cache ID must equal the NUMA node ID, but "
f"got L3 cache ID {l3_cache}.",
).is_equal_to(numa_node)

# Log the topology for debugging
for numa_node, l3_caches in sorted(numa_to_l3_caches.items()):
sorted_sockets = sorted(numa_to_sockets[numa_node])
log.debug(
f"Socket {socket}: NUMA nodes {sorted(numa_nodes)} use "
f"L3 cache(s) {sorted(l3_caches_in_socket)}"
f"NUMA node {numa_node} (socket {sorted_sockets}): "
f"{len(l3_caches)} L3 cache(s) {sorted(l3_caches)}"
)
for l3_cache, numas in sorted(l3_to_numas.items()):
if len(numas) > 1:
log.debug(
f"L3 cache {l3_cache}: shared across NUMA nodes "
f"{sorted(numas)} (sub-NUMA clustering)"
)
Loading