Skip to content

Commit c7b7ca2

Browse files
GH-126910: Add gdb support for unwinding JIT frames (#146071)
Co-authored-by: Pablo Galindo Salgado <pablogsal@gmail.com>
1 parent da09ef8 commit c7b7ca2

32 files changed

Lines changed: 1993 additions & 810 deletions

Doc/c-api/perfmaps.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ Note that holding an :term:`attached thread state` is not required for these API
3131
or ``-2`` on failure to create a lock. Check ``errno`` for more information
3232
about the cause of a failure.
3333

34-
.. c:function:: int PyUnstable_WritePerfMapEntry(const void *code_addr, unsigned int code_size, const char *entry_name)
34+
.. c:function:: int PyUnstable_WritePerfMapEntry(const void *code_addr, size_t code_size, const char *entry_name)
3535
3636
Write one single entry to the ``/tmp/perf-$pid.map`` file. This function is
3737
thread safe. Here is what an example entry looks like::

Include/cpython/ceval.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ typedef struct {
3838
PyAPI_FUNC(int) PyUnstable_PerfMapState_Init(void);
3939
PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry(
4040
const void *code_addr,
41-
unsigned int code_size,
41+
size_t code_size,
4242
const char *entry_name);
4343
PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void);
4444
PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename);

Include/internal/pycore_ceval.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ typedef struct {
9494
void* (*init_state)(void);
9595
// Callback to register every trampoline being created
9696
void (*write_state)(void* state, const void *code_addr,
97-
unsigned int code_size, PyCodeObject* code);
97+
size_t code_size, PyCodeObject* code);
9898
// Callback to free the trampoline state
9999
int (*free_state)(void* state);
100100
} _PyPerf_Callbacks;
@@ -108,6 +108,10 @@ extern PyStatus _PyPerfTrampoline_AfterFork_Child(void);
108108
#ifdef PY_HAVE_PERF_TRAMPOLINE
109109
extern _PyPerf_Callbacks _Py_perfmap_callbacks;
110110
extern _PyPerf_Callbacks _Py_perfmap_jit_callbacks;
111+
extern void _PyPerfJit_WriteNamedCode(const void *code_addr,
112+
size_t code_size,
113+
const char *entry,
114+
const char *filename);
111115
#endif
112116

113117
static inline PyObject*

Include/internal/pycore_interp_structs.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ struct code_arena_st;
6969
struct trampoline_api_st {
7070
void* (*init_state)(void);
7171
void (*write_state)(void* state, const void *code_addr,
72-
unsigned int code_size, PyCodeObject* code);
72+
size_t code_size, PyCodeObject* code);
7373
int (*free_state)(void* state);
7474
void *state;
7575
Py_ssize_t code_padding;

Include/internal/pycore_jit.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ typedef _Py_CODEUNIT *(*jit_func)(
2323
_PyStackRef _tos_cache0, _PyStackRef _tos_cache1, _PyStackRef _tos_cache2
2424
);
2525

26-
_Py_CODEUNIT *_PyJIT(
26+
_Py_CODEUNIT *_PyJIT_Entry(
2727
_PyExecutorObject *executor, _PyInterpreterFrame *frame,
2828
_PyStackRef *stack_pointer, PyThreadState *tstate
2929
);
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#ifndef Py_INTERNAL_JIT_UNWIND_H
2+
#define Py_INTERNAL_JIT_UNWIND_H
3+
4+
#ifndef Py_BUILD_CORE
5+
# error "this header requires Py_BUILD_CORE define"
6+
#endif
7+
8+
#include <stddef.h>
9+
#include <stdint.h>
10+
11+
#if defined(_Py_JIT) && defined(__linux__) && defined(__ELF__)
12+
# define PY_HAVE_JIT_GDB_UNWIND
13+
#endif
14+
15+
#if defined(PY_HAVE_PERF_TRAMPOLINE) || defined(PY_HAVE_JIT_GDB_UNWIND)
16+
17+
#if defined(PY_HAVE_JIT_GDB_UNWIND)
18+
extern PyMutex _Py_jit_debug_mutex;
19+
#endif
20+
21+
/* DWARF exception-handling pointer encodings shared by JIT unwind users. */
22+
enum {
23+
DWRF_EH_PE_absptr = 0x00,
24+
DWRF_EH_PE_omit = 0xff,
25+
26+
/* Data type encodings */
27+
DWRF_EH_PE_uleb128 = 0x01,
28+
DWRF_EH_PE_udata2 = 0x02,
29+
DWRF_EH_PE_udata4 = 0x03,
30+
DWRF_EH_PE_udata8 = 0x04,
31+
DWRF_EH_PE_sleb128 = 0x09,
32+
DWRF_EH_PE_sdata2 = 0x0a,
33+
DWRF_EH_PE_sdata4 = 0x0b,
34+
DWRF_EH_PE_sdata8 = 0x0c,
35+
DWRF_EH_PE_signed = 0x08,
36+
37+
/* Reference type encodings */
38+
DWRF_EH_PE_pcrel = 0x10,
39+
DWRF_EH_PE_textrel = 0x20,
40+
DWRF_EH_PE_datarel = 0x30,
41+
DWRF_EH_PE_funcrel = 0x40,
42+
DWRF_EH_PE_aligned = 0x50,
43+
DWRF_EH_PE_indirect = 0x80
44+
};
45+
46+
/* Return the size of the generated .eh_frame data for the given encoding. */
47+
size_t _PyJitUnwind_EhFrameSize(int absolute_addr);
48+
49+
/*
50+
* Build DWARF .eh_frame data for JIT code; returns size written or 0 on error.
51+
* absolute_addr selects the FDE address encoding:
52+
* - 0: PC-relative offsets (perf jitdump synthesized DSO).
53+
* - nonzero: absolute addresses (GDB JIT in-memory ELF).
54+
*/
55+
size_t _PyJitUnwind_BuildEhFrame(uint8_t *buffer, size_t buffer_size,
56+
const void *code_addr, size_t code_size,
57+
int absolute_addr);
58+
59+
void *_PyJitUnwind_GdbRegisterCode(const void *code_addr,
60+
size_t code_size,
61+
const char *entry,
62+
const char *filename);
63+
64+
void _PyJitUnwind_GdbUnregisterCode(void *handle);
65+
66+
#endif // defined(PY_HAVE_PERF_TRAMPOLINE) || defined(PY_HAVE_JIT_GDB_UNWIND)
67+
68+
#endif // Py_INTERNAL_JIT_UNWIND_H

Include/internal/pycore_optimizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ typedef struct _PyExecutorObject {
198198
uint32_t code_size;
199199
size_t jit_size;
200200
void *jit_code;
201+
void *jit_gdb_handle;
201202
_PyExitData exits[1];
202203
} _PyExecutorObject;
203204

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Sample script for use by test_gdb.test_jit
2+
3+
import _testinternalcapi
4+
import operator
5+
6+
7+
WARMUP_ITERATIONS = _testinternalcapi.TIER2_THRESHOLD + 10
8+
9+
10+
def jit_bt_hot(depth, warming_up_caller=False):
11+
if depth == 0:
12+
if not warming_up_caller:
13+
id(42)
14+
return
15+
16+
for iteration in range(WARMUP_ITERATIONS):
17+
operator.call(
18+
jit_bt_hot,
19+
depth - 1,
20+
warming_up_caller or iteration + 1 != WARMUP_ITERATIONS,
21+
)
22+
23+
24+
# Warm the shared shim once without hitting builtin_id so the real run uses
25+
# the steady-state shim path when GDB breaks inside id(42).
26+
jit_bt_hot(1, warming_up_caller=True)
27+
jit_bt_hot(1)

Lib/test/test_gdb/test_jit.py

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
import os
2+
import platform
3+
import re
4+
import sys
5+
import unittest
6+
7+
from .util import setup_module, DebuggerTests
8+
9+
10+
JIT_SAMPLE_SCRIPT = os.path.join(os.path.dirname(__file__), "gdb_jit_sample.py")
11+
# In batch GDB, break in builtin_id() while it is running under JIT,
12+
# then repeatedly "finish" until the selected frame is the JIT executor.
13+
# That gives a deterministic backtrace starting with py::jit:executor.
14+
#
15+
# builtin_id() sits only a few helper frames above the JIT entry on this path.
16+
# This bound is just a generous upper limit so the test fails clearly if the
17+
# expected stack shape changes.
18+
MAX_FINISH_STEPS = 20
19+
# After landing on the JIT entry frame, single-step a bounded number of
20+
# instructions further into the blob so the backtrace is taken from JIT code
21+
# itself rather than the immediate helper-return site. The exact number of
22+
# steps is not significant: each step is cross-checked against the selected
23+
# frame's symbol so the test fails loudly if stepping escapes the registered
24+
# JIT region, instead of asserting against a misleading backtrace.
25+
MAX_JIT_ENTRY_STEPS = 4
26+
EVAL_FRAME_RE = r"(_PyEval_EvalFrameDefault|_PyEval_Vector)"
27+
JIT_EXECUTOR_FRAME = "py::jit:executor"
28+
JIT_ENTRY_SYMBOL = "_PyJIT_Entry"
29+
BACKTRACE_FRAME_RE = re.compile(r"^#\d+\s+.*$", re.MULTILINE)
30+
31+
FINISH_TO_JIT_EXECUTOR = (
32+
"python exec(\"import gdb\\n"
33+
f"target = {JIT_EXECUTOR_FRAME!r}\\n"
34+
f"for _ in range({MAX_FINISH_STEPS}):\\n"
35+
" frame = gdb.selected_frame()\\n"
36+
" if frame is not None and frame.name() == target:\\n"
37+
" break\\n"
38+
" gdb.execute('finish')\\n"
39+
"else:\\n"
40+
" raise RuntimeError('did not reach %s' % target)\\n\")"
41+
)
42+
STEP_INSIDE_JIT_EXECUTOR = (
43+
"python exec(\"import gdb\\n"
44+
f"target = {JIT_EXECUTOR_FRAME!r}\\n"
45+
f"for _ in range({MAX_JIT_ENTRY_STEPS}):\\n"
46+
" frame = gdb.selected_frame()\\n"
47+
" if frame is None or frame.name() != target:\\n"
48+
" raise RuntimeError('left JIT region during stepping: '\\n"
49+
" + repr(frame and frame.name()))\\n"
50+
" gdb.execute('si')\\n"
51+
"frame = gdb.selected_frame()\\n"
52+
"if frame is None or frame.name() != target:\\n"
53+
" raise RuntimeError('stepped out of JIT region after si')\\n\")"
54+
)
55+
56+
57+
def setUpModule():
58+
setup_module()
59+
60+
61+
# The GDB JIT interface registration is gated on __linux__ && __ELF__ in
62+
# Python/jit_unwind.c, and the synthetic EH-frame is only implemented for
63+
# x86_64 and AArch64 (a #error fires otherwise). Skip cleanly on other
64+
# platforms or architectures instead of producing timeouts / empty backtraces.
65+
# is_enabled() implies is_available() and also implies that the runtime has
66+
# JIT execution active; interpreter-only tier 2 builds don't hit this path.
67+
@unittest.skipUnless(sys.platform == "linux",
68+
"GDB JIT interface is only implemented for Linux + ELF")
69+
@unittest.skipUnless(platform.machine() in ("x86_64", "aarch64"),
70+
"GDB JIT CFI emitter only supports x86_64 and AArch64")
71+
@unittest.skipUnless(hasattr(sys, "_jit") and sys._jit.is_enabled(),
72+
"requires a JIT-enabled build with JIT execution active")
73+
class JitBacktraceTests(DebuggerTests):
74+
def get_stack_trace(self, **kwargs):
75+
# These tests validate the JIT-relevant part of the backtrace via
76+
# _assert_jit_backtrace_shape, so an unrelated "?? ()" frame below
77+
# the JIT/eval segment (e.g. libc without debug info) is tolerable.
78+
kwargs.setdefault("skip_on_truncation", False)
79+
return super().get_stack_trace(**kwargs)
80+
81+
def _extract_backtrace_frames(self, gdb_output):
82+
frames = BACKTRACE_FRAME_RE.findall(gdb_output)
83+
self.assertGreater(
84+
len(frames), 0,
85+
f"expected at least one GDB backtrace frame in output:\n{gdb_output}",
86+
)
87+
return frames
88+
89+
def _assert_jit_backtrace_shape(self, gdb_output, *, anchor_at_top):
90+
# Shape assertions applied to every JIT backtrace we produce:
91+
# 1. The synthetic JIT symbol appears exactly once. A second
92+
# py::jit:executor frame would mean the unwinder is
93+
# materializing two native frames for a single logical JIT
94+
# region, or failing to unwind out of the region entirely.
95+
# 2. The unwinder must climb directly back out of the JIT region
96+
# into the eval loop. _PyJIT_Entry only exists to establish the
97+
# physical frame; the synthetic executor FDE collapses it away.
98+
# 3. For tests that assert a specific entry PC, the JIT frame
99+
# is also at #0.
100+
frames = self._extract_backtrace_frames(gdb_output)
101+
backtrace = "\n".join(frames)
102+
103+
jit_frames = [frame for frame in frames if JIT_EXECUTOR_FRAME in frame]
104+
jit_count = len(jit_frames)
105+
self.assertEqual(
106+
jit_count, 1,
107+
f"expected exactly 1 {JIT_EXECUTOR_FRAME} frame, got {jit_count}\n"
108+
f"backtrace:\n{backtrace}",
109+
)
110+
eval_frames = [frame for frame in frames if re.search(EVAL_FRAME_RE, frame)]
111+
eval_count = len(eval_frames)
112+
self.assertGreaterEqual(
113+
eval_count, 1,
114+
f"expected at least one _PyEval_* frame, got {eval_count}\n"
115+
f"backtrace:\n{backtrace}",
116+
)
117+
jit_frame_index = next(
118+
i for i, frame in enumerate(frames) if JIT_EXECUTOR_FRAME in frame
119+
)
120+
frames_after_jit = frames[jit_frame_index + 1:]
121+
first_eval_offset = next(
122+
(
123+
i for i, frame in enumerate(frames_after_jit)
124+
if re.search(EVAL_FRAME_RE, frame)
125+
),
126+
None,
127+
)
128+
self.assertIsNotNone(
129+
first_eval_offset,
130+
f"expected an eval frame after the JIT frame\n"
131+
f"backtrace:\n{backtrace}",
132+
)
133+
unexpected_between = frames_after_jit[:first_eval_offset]
134+
self.assertFalse(
135+
unexpected_between,
136+
"expected the executor frame to unwind directly into eval\n"
137+
f"backtrace:\n{backtrace}",
138+
)
139+
relevant_end = max(
140+
i
141+
for i, frame in enumerate(frames)
142+
if (
143+
JIT_EXECUTOR_FRAME in frame
144+
or re.search(EVAL_FRAME_RE, frame)
145+
)
146+
)
147+
truncated_frames = [
148+
frame for frame in frames[: relevant_end + 1]
149+
if " ?? ()" in frame
150+
]
151+
self.assertFalse(
152+
truncated_frames,
153+
"unexpected truncated frame before the validated JIT/eval segment\n"
154+
f"backtrace:\n{backtrace}",
155+
)
156+
if anchor_at_top:
157+
self.assertRegex(
158+
frames[0],
159+
re.compile(rf"^#0\s+{re.escape(JIT_EXECUTOR_FRAME)}"),
160+
)
161+
162+
def test_bt_unwinds_through_jit_frames(self):
163+
gdb_output = self.get_stack_trace(
164+
script=JIT_SAMPLE_SCRIPT,
165+
cmds_after_breakpoint=["bt"],
166+
PYTHON_JIT="1",
167+
)
168+
# The executor should appear as a named JIT frame and unwind back into
169+
# the eval loop.
170+
self._assert_jit_backtrace_shape(gdb_output, anchor_at_top=False)
171+
172+
def test_bt_handoff_from_jit_entry_to_executor(self):
173+
gdb_output = self.get_stack_trace(
174+
script=JIT_SAMPLE_SCRIPT,
175+
breakpoint=JIT_ENTRY_SYMBOL,
176+
cmds_after_breakpoint=[
177+
"delete 1",
178+
"tbreak builtin_id",
179+
"continue",
180+
"bt",
181+
],
182+
PYTHON_JIT="1",
183+
)
184+
# If we stop first in the shim and then continue into the real JIT
185+
# workload, the final backtrace should match the architecture's
186+
# executor unwind contract.
187+
self._assert_jit_backtrace_shape(gdb_output, anchor_at_top=False)
188+
189+
def test_bt_unwinds_from_inside_jit_executor(self):
190+
gdb_output = self.get_stack_trace(
191+
script=JIT_SAMPLE_SCRIPT,
192+
cmds_after_breakpoint=[
193+
FINISH_TO_JIT_EXECUTOR,
194+
STEP_INSIDE_JIT_EXECUTOR,
195+
"bt",
196+
],
197+
PYTHON_JIT="1",
198+
)
199+
# Once the selected PC is inside the JIT executor, we require that GDB
200+
# identifies the JIT frame at #0 and keeps unwinding into _PyEval_*.
201+
self._assert_jit_backtrace_shape(gdb_output, anchor_at_top=True)

0 commit comments

Comments
 (0)