From aca534c67dea7eb0fcddf194bc64d65bc3e07c8b Mon Sep 17 00:00:00 2001 From: Ran Benita Date: Tue, 26 May 2020 14:59:16 +0300 Subject: [PATCH] Improve our own wcwidth implementation and remove dependency on wcwidth package `TerminalWriter`, imported recently from `py`, contains its own incomplete wcwidth (`char_with`/`get_line_width`) implementation. The `TerminalReporter` also needs this, but uses the external `wcwidth` package. This commit brings the `TerminalWriter` implementation up-to-par with `wcwidth`, moves to implementation to a new file `_pytest._io.wcwidth` which is used everywhere, and removes the dependency. The differences compared to the `wcwidth` package are: - Normalizes the string before counting. - Uses Python's `unicodedata` instead of vendored Unicode tables. This means the data corresponds to the Python's version Unicode version instead of the `wcwidth`'s package version. - Apply some optimizations. --- changelog/7264.improvement.rst | 1 + setup.py | 1 - src/_pytest/_io/terminalwriter.py | 17 ++-------- src/_pytest/_io/wcwidth.py | 55 +++++++++++++++++++++++++++++++ src/_pytest/terminal.py | 3 +- testing/io/test_wcwidth.py | 38 +++++++++++++++++++++ testing/test_terminal.py | 27 ++++++++------- 7 files changed, 111 insertions(+), 31 deletions(-) create mode 100644 changelog/7264.improvement.rst create mode 100644 src/_pytest/_io/wcwidth.py create mode 100644 testing/io/test_wcwidth.py diff --git a/changelog/7264.improvement.rst b/changelog/7264.improvement.rst new file mode 100644 index 000000000..035745c4d --- /dev/null +++ b/changelog/7264.improvement.rst @@ -0,0 +1 @@ +The dependency on the ``wcwidth`` package has been removed. diff --git a/setup.py b/setup.py index 6ebfd67fb..cd2ecbe07 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,6 @@ INSTALL_REQUIRES = [ 'colorama;sys_platform=="win32"', "pluggy>=0.12,<1.0", 'importlib-metadata>=0.12;python_version<"3.8"', - "wcwidth", ] diff --git a/src/_pytest/_io/terminalwriter.py b/src/_pytest/_io/terminalwriter.py index 4f22f5a7a..a285cf4fc 100644 --- a/src/_pytest/_io/terminalwriter.py +++ b/src/_pytest/_io/terminalwriter.py @@ -2,12 +2,12 @@ import os import shutil import sys -import unicodedata -from functools import lru_cache from typing import Optional from typing import Sequence from typing import TextIO +from .wcwidth import wcswidth + # This code was initially copied from py 1.8.1, file _io/terminalwriter.py. @@ -22,17 +22,6 @@ def get_terminal_width() -> int: return width -@lru_cache(100) -def char_width(c: str) -> int: - # Fullwidth and Wide -> 2, all else (including Ambiguous) -> 1. - return 2 if unicodedata.east_asian_width(c) in ("F", "W") else 1 - - -def get_line_width(text: str) -> int: - text = unicodedata.normalize("NFC", text) - return sum(char_width(c) for c in text) - - def should_do_markup(file: TextIO) -> bool: if os.environ.get("PY_COLORS") == "1": return True @@ -99,7 +88,7 @@ class TerminalWriter: @property def width_of_current_line(self) -> int: """Return an estimate of the width so far in the current line.""" - return get_line_width(self._current_line) + return wcswidth(self._current_line) def markup(self, text: str, **markup: bool) -> str: for name in markup: diff --git a/src/_pytest/_io/wcwidth.py b/src/_pytest/_io/wcwidth.py new file mode 100644 index 000000000..e5c7bf4d8 --- /dev/null +++ b/src/_pytest/_io/wcwidth.py @@ -0,0 +1,55 @@ +import unicodedata +from functools import lru_cache + + +@lru_cache(100) +def wcwidth(c: str) -> int: + """Determine how many columns are needed to display a character in a terminal. + + Returns -1 if the character is not printable. + Returns 0, 1 or 2 for other characters. + """ + o = ord(c) + + # ASCII fast path. + if 0x20 <= o < 0x07F: + return 1 + + # Some Cf/Zp/Zl characters which should be zero-width. + if ( + o == 0x0000 + or 0x200B <= o <= 0x200F + or 0x2028 <= o <= 0x202E + or 0x2060 <= o <= 0x2063 + ): + return 0 + + category = unicodedata.category(c) + + # Control characters. + if category == "Cc": + return -1 + + # Combining characters with zero width. + if category in ("Me", "Mn"): + return 0 + + # Full/Wide east asian characters. + if unicodedata.east_asian_width(c) in ("F", "W"): + return 2 + + return 1 + + +def wcswidth(s: str) -> int: + """Determine how many columns are needed to display a string in a terminal. + + Returns -1 if the string contains non-printable characters. + """ + width = 0 + for c in unicodedata.normalize("NFC", s): + wc = wcwidth(c) + if wc < 0: + return -1 + width += wc + return width diff --git a/src/_pytest/terminal.py b/src/_pytest/terminal.py index 8ecb5a16b..646fe4cca 100644 --- a/src/_pytest/terminal.py +++ b/src/_pytest/terminal.py @@ -27,6 +27,7 @@ from more_itertools import collapse import pytest from _pytest import nodes from _pytest._io import TerminalWriter +from _pytest._io.wcwidth import wcswidth from _pytest.compat import order_preserving_dict from _pytest.config import Config from _pytest.config import ExitCode @@ -1122,8 +1123,6 @@ def _get_pos(config, rep): def _get_line_with_reprcrash_message(config, rep, termwidth): """Get summary line for a report, trying to add reprcrash message.""" - from wcwidth import wcswidth - verbose_word = rep._get_verbose_word(config) pos = _get_pos(config, rep) diff --git a/testing/io/test_wcwidth.py b/testing/io/test_wcwidth.py new file mode 100644 index 000000000..7cc74df5d --- /dev/null +++ b/testing/io/test_wcwidth.py @@ -0,0 +1,38 @@ +import pytest +from _pytest._io.wcwidth import wcswidth +from _pytest._io.wcwidth import wcwidth + + +@pytest.mark.parametrize( + ("c", "expected"), + [ + ("\0", 0), + ("\n", -1), + ("a", 1), + ("1", 1), + ("א", 1), + ("\u200B", 0), + ("\u1ABE", 0), + ("\u0591", 0), + ("πŸ‰", 2), + ("οΌ„", 2), + ], +) +def test_wcwidth(c: str, expected: int) -> None: + assert wcwidth(c) == expected + + +@pytest.mark.parametrize( + ("s", "expected"), + [ + ("", 0), + ("hello, world!", 13), + ("hello, world!\n", -1), + ("0123456789", 10), + ("Χ©ΧœΧ•Χ, Χ’Χ•ΧœΧ!", 11), + ("שְבֻגָיים", 6), + ("πŸ‰πŸ‰πŸ‰", 6), + ], +) +def test_wcswidth(s: str, expected: int) -> None: + assert wcswidth(s) == expected diff --git a/testing/test_terminal.py b/testing/test_terminal.py index 0f5b4cb68..17fd29238 100644 --- a/testing/test_terminal.py +++ b/testing/test_terminal.py @@ -14,7 +14,9 @@ import pluggy import py import _pytest.config +import _pytest.terminal import pytest +from _pytest._io.wcwidth import wcswidth from _pytest.config import ExitCode from _pytest.pytester import Testdir from _pytest.reports import BaseReport @@ -2027,9 +2029,6 @@ def test_skip_reasons_folding(): def test_line_with_reprcrash(monkeypatch): - import _pytest.terminal - from wcwidth import wcswidth - mocked_verbose_word = "FAILED" mocked_pos = "some::nodeid" @@ -2079,19 +2078,19 @@ def test_line_with_reprcrash(monkeypatch): check("some\nmessage", 80, "FAILED some::nodeid - some") # Test unicode safety. - check("πŸ˜„πŸ˜„πŸ˜„πŸ˜„πŸ˜„\n2nd line", 25, "FAILED some::nodeid - ...") - check("πŸ˜„πŸ˜„πŸ˜„πŸ˜„πŸ˜„\n2nd line", 26, "FAILED some::nodeid - ...") - check("πŸ˜„πŸ˜„πŸ˜„πŸ˜„πŸ˜„\n2nd line", 27, "FAILED some::nodeid - πŸ˜„...") - check("πŸ˜„πŸ˜„πŸ˜„πŸ˜„πŸ˜„\n2nd line", 28, "FAILED some::nodeid - πŸ˜„...") - check("πŸ˜„πŸ˜„πŸ˜„πŸ˜„πŸ˜„\n2nd line", 29, "FAILED some::nodeid - πŸ˜„πŸ˜„...") + check("πŸ‰πŸ‰πŸ‰πŸ‰πŸ‰\n2nd line", 25, "FAILED some::nodeid - ...") + check("πŸ‰πŸ‰πŸ‰πŸ‰πŸ‰\n2nd line", 26, "FAILED some::nodeid - ...") + check("πŸ‰πŸ‰πŸ‰πŸ‰πŸ‰\n2nd line", 27, "FAILED some::nodeid - πŸ‰...") + check("πŸ‰πŸ‰πŸ‰πŸ‰πŸ‰\n2nd line", 28, "FAILED some::nodeid - πŸ‰...") + check("πŸ‰πŸ‰πŸ‰πŸ‰πŸ‰\n2nd line", 29, "FAILED some::nodeid - πŸ‰πŸ‰...") # NOTE: constructed, not sure if this is supported. - mocked_pos = "nodeid::πŸ˜„::withunicode" - check("πŸ˜„πŸ˜„πŸ˜„πŸ˜„πŸ˜„\n2nd line", 29, "FAILED nodeid::πŸ˜„::withunicode") - check("πŸ˜„πŸ˜„πŸ˜„πŸ˜„πŸ˜„\n2nd line", 40, "FAILED nodeid::πŸ˜„::withunicode - πŸ˜„πŸ˜„...") - check("πŸ˜„πŸ˜„πŸ˜„πŸ˜„πŸ˜„\n2nd line", 41, "FAILED nodeid::πŸ˜„::withunicode - πŸ˜„πŸ˜„...") - check("πŸ˜„πŸ˜„πŸ˜„πŸ˜„πŸ˜„\n2nd line", 42, "FAILED nodeid::πŸ˜„::withunicode - πŸ˜„πŸ˜„πŸ˜„...") - check("πŸ˜„πŸ˜„πŸ˜„πŸ˜„πŸ˜„\n2nd line", 80, "FAILED nodeid::πŸ˜„::withunicode - πŸ˜„πŸ˜„πŸ˜„πŸ˜„πŸ˜„") + mocked_pos = "nodeid::πŸ‰::withunicode" + check("πŸ‰πŸ‰πŸ‰πŸ‰πŸ‰\n2nd line", 29, "FAILED nodeid::πŸ‰::withunicode") + check("πŸ‰πŸ‰πŸ‰πŸ‰πŸ‰\n2nd line", 40, "FAILED nodeid::πŸ‰::withunicode - πŸ‰πŸ‰...") + check("πŸ‰πŸ‰πŸ‰πŸ‰πŸ‰\n2nd line", 41, "FAILED nodeid::πŸ‰::withunicode - πŸ‰πŸ‰...") + check("πŸ‰πŸ‰πŸ‰πŸ‰πŸ‰\n2nd line", 42, "FAILED nodeid::πŸ‰::withunicode - πŸ‰πŸ‰πŸ‰...") + check("πŸ‰πŸ‰πŸ‰πŸ‰πŸ‰\n2nd line", 80, "FAILED nodeid::πŸ‰::withunicode - πŸ‰πŸ‰πŸ‰πŸ‰πŸ‰") @pytest.mark.parametrize(