Skip to content
29 changes: 1 addition & 28 deletions Lib/_pyrepl/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations
import builtins
import functools
import keyword
import re
import token as T
Expand All @@ -11,12 +10,12 @@
from collections import deque
from io import StringIO
from tokenize import TokenInfo as TI
from traceback import _str_width as str_width, _wlen as wlen
from typing import Iterable, Iterator, Match, NamedTuple, Self

from .types import CharBuffer, CharWidths
from .trace import trace

ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02")
ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""})
IDENTIFIERS_AFTER = {"def", "class"}
Expand Down Expand Up @@ -59,32 +58,6 @@ class ColorSpan(NamedTuple):
tag: str


@functools.cache
def str_width(c: str) -> int:
if ord(c) < 128:
return 1
# gh-139246 for zero-width joiner and combining characters
if unicodedata.combining(c):
return 0
category = unicodedata.category(c)
if category == "Cf" and c != "\u00ad":
return 0
w = unicodedata.east_asian_width(c)
if w in ("N", "Na", "H", "A"):
return 1
return 2


def wlen(s: str) -> int:
if len(s) == 1 and s != "\x1a":
return str_width(s)
length = sum(str_width(i) for i in s)
# remove lengths of any escape sequences
sequence = ANSI_ESCAPE_SEQUENCE.findall(s)
ctrl_z_cnt = s.count("\x1a")
return length - sum(len(i) for i in sequence) + ctrl_z_cnt


def unbracket(s: str, including_content: bool = False) -> str:
r"""Return `s` with \001 and \002 characters removed.

Expand Down
3 changes: 2 additions & 1 deletion Lib/test/test_pyrepl/support.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from code import InteractiveConsole
from functools import partial
from traceback import ANSI_ESCAPE_SEQUENCE
from typing import Iterable
from unittest.mock import MagicMock

from _pyrepl.console import Console, Event
from _pyrepl.readline import ReadlineAlikeReader, ReadlineConfig
from _pyrepl.simple_interact import _strip_final_indent
from _pyrepl.utils import unbracket, ANSI_ESCAPE_SEQUENCE
from _pyrepl.utils import unbracket


class ScreenEqualMixin:
Expand Down
45 changes: 1 addition & 44 deletions Lib/test/test_pyrepl/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,9 @@
from unittest import TestCase

from _pyrepl.utils import str_width, wlen, prev_next_window, gen_colors
from _pyrepl.utils import prev_next_window, gen_colors


class TestUtils(TestCase):
def test_str_width(self):
characters = [
'a',
'1',
'_',
'!',
'\x1a',
'\u263A',
'\uffb9',
'\N{LATIN SMALL LETTER E WITH ACUTE}', # é
'\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ
'\u00ad',
]
for c in characters:
self.assertEqual(str_width(c), 1)

zero_width_characters = [
'\N{COMBINING ACUTE ACCENT}',
'\N{ZERO WIDTH JOINER}',
]
for c in zero_width_characters:
with self.subTest(character=c):
self.assertEqual(str_width(c), 0)

characters = [chr(99989), chr(99999)]
for c in characters:
self.assertEqual(str_width(c), 2)

def test_wlen(self):
for c in ['a', 'b', '1', '!', '_']:
self.assertEqual(wlen(c), 1)
self.assertEqual(wlen('\x1a'), 2)

char_east_asian_width_N = chr(3800)
self.assertEqual(wlen(char_east_asian_width_N), 1)
char_east_asian_width_W = chr(4352)
self.assertEqual(wlen(char_east_asian_width_W), 2)

self.assertEqual(wlen('hello'), 5)
self.assertEqual(wlen('hello' + '\x1a'), 7)
self.assertEqual(wlen('e\N{COMBINING ACUTE ACCENT}'), 1)
self.assertEqual(wlen('a\N{ZERO WIDTH JOINER}b'), 2)

def test_prev_next_window(self):
def gen_normal():
yield 1
Expand Down
71 changes: 71 additions & 0 deletions Lib/test/test_traceback.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import traceback
from functools import partial
from pathlib import Path
from traceback import _str_width, _wlen
import _colorize

MODULE_PREFIX = f'{__name__}.' if __name__ == '__main__' else ''
Expand Down Expand Up @@ -1787,6 +1788,50 @@ def f():
]
self.assertEqual(result_lines, expected)

def test_str_width(self):
characters = [
'a',
'1',
'_',
'!',
'\x1a',
'\u263A',
'\uffb9',
'\N{LATIN SMALL LETTER E WITH ACUTE}', # é
'\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ
'\u00ad',
]
for c in characters:
self.assertEqual(_str_width(c), 1)

zero_width_characters = [
'\N{COMBINING ACUTE ACCENT}',
'\N{ZERO WIDTH JOINER}',
]
for c in zero_width_characters:
with self.subTest(character=c):
self.assertEqual(_str_width(c), 0)

characters = [chr(99989), chr(99999)]
for c in characters:
self.assertEqual(_str_width(c), 2)

def test_wlen(self):
for c in ['a', 'b', '1', '!', '_']:
self.assertEqual(_wlen(c), 1)
self.assertEqual(_wlen('\x1a'), 2)

char_east_asian_width_N = chr(3800)
self.assertEqual(_wlen(char_east_asian_width_N), 1)
char_east_asian_width_W = chr(4352)
self.assertEqual(_wlen(char_east_asian_width_W), 2)

self.assertEqual(_wlen('hello'), 5)
self.assertEqual(_wlen('hello' + '\x1a'), 7)
self.assertEqual(_wlen('e\N{COMBINING ACUTE ACCENT}'), 1)
self.assertEqual(_wlen('a\N{ZERO WIDTH JOINER}b'), 2)


class TestKeywordTypoSuggestions(unittest.TestCase):
TYPO_CASES = [
("with block ad something:\n pass", "and"),
Expand Down Expand Up @@ -5321,6 +5366,32 @@ def expected(t, m, fn, l, f, E, e, z):
]
self.assertEqual(actual, expected(**colors))

def test_colorized_traceback_unicode(self):
try:
啊哈=1; 啊哈/0####
except Exception as e:
exc = traceback.TracebackException.from_exception(e)

actual = "".join(exc.format(colorize=True)).splitlines()
def expected(t, m, fn, l, f, E, e, z):
return [
f" 啊哈=1; {e}啊哈{z}{E}/{z}{e}0{z}####",
f" {e}~~~~{z}{E}^{z}{e}~{z}",
]
self.assertEqual(actual[2:4], expected(**colors))

try:
ééééé/0
except Exception as e:
exc = traceback.TracebackException.from_exception(e)

actual = "".join(exc.format(colorize=True)).splitlines()
def expected(t, m, fn, l, f, E, e, z):
return [
f" {E}ééééé{z}/0",
f" {E}^^^^^{z}",
]
self.assertEqual(actual[2:4], expected(**colors))

class TestLazyImportSuggestions(unittest.TestCase):
"""Test that lazy imports are not reified when computing AttributeError suggestions."""
Expand Down
57 changes: 46 additions & 11 deletions Lib/traceback.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""Extract, format and print information about Python stack traces."""

import collections.abc
import functools
import itertools
import linecache
import re
import sys
import textwrap
import types
Expand Down Expand Up @@ -681,12 +683,12 @@ def output_line(lineno):
colorized_line_parts = []
colorized_carets_parts = []

for color, group in itertools.groupby(itertools.zip_longest(line, carets, fillvalue=""), key=lambda x: x[1]):
for color, group in itertools.groupby(_zip_display_width(line, carets), key=lambda x: x[1]):
caret_group = list(group)
if color == "^":
if "^" in color:
colorized_line_parts.append(theme.error_highlight + "".join(char for char, _ in caret_group) + theme.reset)
colorized_carets_parts.append(theme.error_highlight + "".join(caret for _, caret in caret_group) + theme.reset)
elif color == "~":
elif "~" in color:
colorized_line_parts.append(theme.error_range + "".join(char for char, _ in caret_group) + theme.reset)
colorized_carets_parts.append(theme.error_range + "".join(caret for _, caret in caret_group) + theme.reset)
else:
Expand Down Expand Up @@ -968,7 +970,46 @@ def setup_positions(expr, force_valid=True):

return None

_WIDE_CHAR_SPECIFIERS = "WF"

def _zip_display_width(line, carets):
import unicodedata
carets = iter(carets)
for char in unicodedata.iter_graphemes(line):
char = str(char)
char_width = _display_width(char)
yield char, "".join(itertools.islice(carets, char_width))


@functools.cache
def _str_width(c: str) -> int:
import unicodedata
if ord(c) < 128:
return 1
# gh-139246 for zero-width joiner and combining characters
if unicodedata.combining(c):
return 0
category = unicodedata.category(c)
if category == "Cf" and c != "\u00ad":
return 0
w = unicodedata.east_asian_width(c)
if w in ("N", "Na", "H", "A"):
return 1
return 2


ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")


def _wlen(s: str) -> int:
if len(s) == 1 and s != "\x1a":
return _str_width(s)
length = sum(_str_width(i) for i in s)
# remove lengths of any escape sequences
sequence = ANSI_ESCAPE_SEQUENCE.findall(s)
ctrl_z_cnt = s.count("\x1a")
return length - sum(len(i) for i in sequence) + ctrl_z_cnt



def _display_width(line, offset=None):
"""Calculate the extra amount of width space the given source
Expand All @@ -982,13 +1023,7 @@ def _display_width(line, offset=None):
if line.isascii():
return offset

import unicodedata

return sum(
2 if unicodedata.east_asian_width(char) in _WIDE_CHAR_SPECIFIERS else 1
for char in line[:offset]
)

return _wlen(line[:offset])


class _ExceptionPrintContext:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix traceback color output with unicode characters
Loading