python · ambv · Apr 7, 2026 · Dec 10, 2025 · Dec 10, 2025 · Jan 28, 2026
@@ -1,6 +1,5 @@
 from __future__ import annotations
 import builtins
-import functools
 import keyword
 import re
 import token as T
@@ -11,12 +10,12 @@
 from collections import deque
 from io import StringIO
 from tokenize import TokenInfo as TI
+from traceback import _str_width as str_width, _wlen as wlen
 from typing import Iterable, Iterator, Match, NamedTuple, Self
 
 from .types import CharBuffer, CharWidths
 from .trace import trace
 
-ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
 ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02")
 ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""})
 IDENTIFIERS_AFTER = {"def", "class"}
@@ -59,32 +58,6 @@ class ColorSpan(NamedTuple):
     tag: str
 
 
-@functools.cache
-def str_width(c: str) -> int:
-    if ord(c) < 128:
-        return 1
-    # gh-139246 for zero-width joiner and combining characters
-    if unicodedata.combining(c):
-        return 0
-    category = unicodedata.category(c)
-    if category == "Cf" and c != "\u00ad":
-        return 0
-    w = unicodedata.east_asian_width(c)
-    if w in ("N", "Na", "H", "A"):
-        return 1
-    return 2
-
-
-def wlen(s: str) -> int:
-    if len(s) == 1 and s != "\x1a":
-        return str_width(s)
-    length = sum(str_width(i) for i in s)
-    # remove lengths of any escape sequences
-    sequence = ANSI_ESCAPE_SEQUENCE.findall(s)
-    ctrl_z_cnt = s.count("\x1a")
-    return length - sum(len(i) for i in sequence) + ctrl_z_cnt
-
-
 def unbracket(s: str, including_content: bool = False) -> str:
     r"""Return `s` with \001 and \002 characters removed.
 

diff --git a/Lib/test/test_pyrepl/support.py b/Lib/test/test_pyrepl/support.py
@@ -1,12 +1,13 @@
 from code import InteractiveConsole
 from functools import partial
+from traceback import ANSI_ESCAPE_SEQUENCE
 from typing import Iterable
 from unittest.mock import MagicMock
 
 from _pyrepl.console import Console, Event
 from _pyrepl.readline import ReadlineAlikeReader, ReadlineConfig
 from _pyrepl.simple_interact import _strip_final_indent
-from _pyrepl.utils import unbracket, ANSI_ESCAPE_SEQUENCE
+from _pyrepl.utils import unbracket
 
 
 class ScreenEqualMixin:

diff --git a/Lib/test/test_pyrepl/test_utils.py b/Lib/test/test_pyrepl/test_utils.py
@@ -1,52 +1,9 @@
 from unittest import TestCase
 
-from _pyrepl.utils import str_width, wlen, prev_next_window, gen_colors
+from _pyrepl.utils import prev_next_window, gen_colors
 
 
 class TestUtils(TestCase):
-    def test_str_width(self):
-        characters = [
-            'a',
-            '1',
-            '_',
-            '!',
-            '\x1a',
-            '\u263A',
-            '\uffb9',
-            '\N{LATIN SMALL LETTER E WITH ACUTE}',  # é
-            '\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ
-            '\u00ad',
-        ]
-        for c in characters:
-            self.assertEqual(str_width(c), 1)
-
-        zero_width_characters = [
-            '\N{COMBINING ACUTE ACCENT}',
-            '\N{ZERO WIDTH JOINER}',
-        ]
-        for c in zero_width_characters:
-            with self.subTest(character=c):
-                self.assertEqual(str_width(c), 0)
-
-        characters = [chr(99989), chr(99999)]
-        for c in characters:
-            self.assertEqual(str_width(c), 2)
-
-    def test_wlen(self):
-        for c in ['a', 'b', '1', '!', '_']:
-            self.assertEqual(wlen(c), 1)
-        self.assertEqual(wlen('\x1a'), 2)
-
-        char_east_asian_width_N = chr(3800)
-        self.assertEqual(wlen(char_east_asian_width_N), 1)
-        char_east_asian_width_W = chr(4352)
-        self.assertEqual(wlen(char_east_asian_width_W), 2)
-
-        self.assertEqual(wlen('hello'), 5)
-        self.assertEqual(wlen('hello' + '\x1a'), 7)
-        self.assertEqual(wlen('e\N{COMBINING ACUTE ACCENT}'), 1)
-        self.assertEqual(wlen('a\N{ZERO WIDTH JOINER}b'), 2)
-
     def test_prev_next_window(self):
         def gen_normal():
             yield 1

diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
@@ -28,6 +28,7 @@
 import traceback
 from functools import partial
 from pathlib import Path
+from traceback import _str_width, _wlen
 import _colorize
 
 MODULE_PREFIX = f'{__name__}.' if __name__ == '__main__' else ''
@@ -1787,6 +1788,50 @@ def f():
         ]
         self.assertEqual(result_lines, expected)
 
+    def test_str_width(self):
+        characters = [
+            'a',
+            '1',
+            '_',
+            '!',
+            '\x1a',
+            '\u263A',
+            '\uffb9',
+            '\N{LATIN SMALL LETTER E WITH ACUTE}',  # é
+            '\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ
+            '\u00ad',
+        ]
+        for c in characters:
+            self.assertEqual(_str_width(c), 1)
+
+        zero_width_characters = [
+            '\N{COMBINING ACUTE ACCENT}',
+            '\N{ZERO WIDTH JOINER}',
+        ]
+        for c in zero_width_characters:
+            with self.subTest(character=c):
+                self.assertEqual(_str_width(c), 0)
+
+        characters = [chr(99989), chr(99999)]
+        for c in characters:
+            self.assertEqual(_str_width(c), 2)
+
+    def test_wlen(self):
+        for c in ['a', 'b', '1', '!', '_']:
+            self.assertEqual(_wlen(c), 1)
+        self.assertEqual(_wlen('\x1a'), 2)
+
+        char_east_asian_width_N = chr(3800)
+        self.assertEqual(_wlen(char_east_asian_width_N), 1)
+        char_east_asian_width_W = chr(4352)
+        self.assertEqual(_wlen(char_east_asian_width_W), 2)
+
+        self.assertEqual(_wlen('hello'), 5)
+        self.assertEqual(_wlen('hello' + '\x1a'), 7)
+        self.assertEqual(_wlen('e\N{COMBINING ACUTE ACCENT}'), 1)
+        self.assertEqual(_wlen('a\N{ZERO WIDTH JOINER}b'), 2)
+
+
 class TestKeywordTypoSuggestions(unittest.TestCase):
     TYPO_CASES = [
         ("with block ad something:\n  pass", "and"),
@@ -5321,6 +5366,32 @@ def expected(t, m, fn, l, f, E, e, z):
         ]
         self.assertEqual(actual, expected(**colors))
 
+    def test_colorized_traceback_unicode(self):
+        try:
+            啊哈=1; 啊哈/0####
+        except Exception as e:
+            exc = traceback.TracebackException.from_exception(e)
+
+        actual = "".join(exc.format(colorize=True)).splitlines()
+        def expected(t, m, fn, l, f, E, e, z):
+            return [
+                f"    啊哈=1; {e}啊哈{z}{E}/{z}{e}0{z}####",
+                f"            {e}~~~~{z}{E}^{z}{e}~{z}",
+            ]
+        self.assertEqual(actual[2:4], expected(**colors))
+
+        try:
+            ééééé/0
+        except Exception as e:
+            exc = traceback.TracebackException.from_exception(e)
+
+        actual = "".join(exc.format(colorize=True)).splitlines()
+        def expected(t, m, fn, l, f, E, e, z):
+            return [
+                f"    {E}ééééé{z}/0",
+                f"    {E}^^^^^{z}",
+            ]
+        self.assertEqual(actual[2:4], expected(**colors))
 
 class TestLazyImportSuggestions(unittest.TestCase):
     """Test that lazy imports are not reified when computing AttributeError suggestions."""

diff --git a/Lib/traceback.py b/Lib/traceback.py
@@ -1,8 +1,10 @@
 """Extract, format and print information about Python stack traces."""
 
 import collections.abc
+import functools
 import itertools
 import linecache
+import re
 import sys
 import textwrap
 import types
@@ -681,12 +683,12 @@ def output_line(lineno):
                         colorized_line_parts = []
                         colorized_carets_parts = []
 
-                        for color, group in itertools.groupby(itertools.zip_longest(line, carets, fillvalue=""), key=lambda x: x[1]):
+                        for color, group in itertools.groupby(_zip_display_width(line, carets), key=lambda x: x[1]):
                             caret_group = list(group)
-                            if color == "^":
+                            if "^" in color:
                                 colorized_line_parts.append(theme.error_highlight + "".join(char for char, _ in caret_group) + theme.reset)
                                 colorized_carets_parts.append(theme.error_highlight + "".join(caret for _, caret in caret_group) + theme.reset)
-                            elif color == "~":
+                            elif "~" in color:
                                 colorized_line_parts.append(theme.error_range + "".join(char for char, _ in caret_group) + theme.reset)
                                 colorized_carets_parts.append(theme.error_range + "".join(caret for _, caret in caret_group) + theme.reset)
                             else:
@@ -968,7 +970,46 @@ def setup_positions(expr, force_valid=True):
 
     return None
 
-_WIDE_CHAR_SPECIFIERS = "WF"
+
+def _zip_display_width(line, carets):
+    import unicodedata
+    carets = iter(carets)
+    for char in unicodedata.iter_graphemes(line):
+        char = str(char)
+        char_width = _display_width(char)
+        yield char, "".join(itertools.islice(carets, char_width))
+
+
+@functools.cache
+def _str_width(c: str) -> int:
+    import unicodedata
+    if ord(c) < 128:
+        return 1
+    # gh-139246 for zero-width joiner and combining characters
+    if unicodedata.combining(c):
+        return 0
+    category = unicodedata.category(c)
+    if category == "Cf" and c != "\u00ad":
+        return 0
+    w = unicodedata.east_asian_width(c)
+    if w in ("N", "Na", "H", "A"):
+        return 1
+    return 2
+
+
+ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
+
+
+def _wlen(s: str) -> int:
+    if len(s) == 1 and s != "\x1a":
+        return _str_width(s)
+    length = sum(_str_width(i) for i in s)
+    # remove lengths of any escape sequences
+    sequence = ANSI_ESCAPE_SEQUENCE.findall(s)
+    ctrl_z_cnt = s.count("\x1a")
+    return length - sum(len(i) for i in sequence) + ctrl_z_cnt
+
+
 
 def _display_width(line, offset=None):
     """Calculate the extra amount of width space the given source
@@ -982,13 +1023,7 @@ def _display_width(line, offset=None):
     if line.isascii():
         return offset
 
-    import unicodedata
-
-    return sum(
-        2 if unicodedata.east_asian_width(char) in _WIDE_CHAR_SPECIFIERS else 1
-        for char in line[:offset]
-    )
-
+    return _wlen(line[:offset])
 
 
 class _ExceptionPrintContext:

diff --git a/Misc/NEWS.d/next/Library/2025-12-10-15-15-09.gh-issue-130273.iCfiY5.rst b/Misc/NEWS.d/next/Library/2025-12-10-15-15-09.gh-issue-130273.iCfiY5.rst
@@ -0,0 +1 @@
+Fix traceback color output with unicode characters
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Fix traceback color output with unicode characters
ambv marked this conversation as resolved. Outdated Show resolved Hide resolved