Improve our own wcwidth implementation and remove dependency on wcwidth package

`TerminalWriter`, imported recently from `py`, contains its own incomplete wcwidth (`char_with`/`get_line_width`) implementation. The `TerminalReporter` also needs this, but uses the external `wcwidth` package. This commit brings the `TerminalWriter` implementation up-to-par with `wcwidth`, moves to implementation to a new file `_pytest._io.wcwidth` which is used everywhere, and removes the dependency. The differences compared to the `wcwidth` package are: - Normalizes the string before counting. - Uses Python's `unicodedata` instead of vendored Unicode tables. This means the data corresponds to the Python's version Unicode version instead of the `wcwidth`'s package version. - Apply some optimizations.
2020-05-26 14:59:16 +03:00 · 2020-05-26 14:59:16 +03:00 · aca534c67d
parent 54ae27f081
commit aca534c67d
7 changed files with 111 additions and 31 deletions
--- a/changelog/7264.improvement.rst
+++ b/changelog/7264.improvement.rst
@ -0,0 +1 @@
+The dependency on the ``wcwidth`` package has been removed.
--- a/setup.py
+++ b/setup.py
@ -12,7 +12,6 @@ INSTALL_REQUIRES = [
    'colorama;sys_platform=="win32"',
    "pluggy>=0.12,<1.0",
    'importlib-metadata>=0.12;python_version<"3.8"',
-    "wcwidth",
 ]


--- a/src/_pytest/_io/terminalwriter.py
+++ b/src/_pytest/_io/terminalwriter.py
@ -2,12 +2,12 @@
 import os
 import shutil
 import sys
-import unicodedata
-from functools import lru_cache
 from typing import Optional
 from typing import Sequence
 from typing import TextIO

+from .wcwidth import wcswidth
+

 # This code was initially copied from py 1.8.1, file _io/terminalwriter.py.

@ -22,17 +22,6 @@ def get_terminal_width() -> int:
    return width


-@lru_cache(100)
-def char_width(c: str) -> int:
-    # Fullwidth and Wide -> 2, all else (including Ambiguous) -> 1.
-    return 2 if unicodedata.east_asian_width(c) in ("F", "W") else 1
-
-
-def get_line_width(text: str) -> int:
-    text = unicodedata.normalize("NFC", text)
-    return sum(char_width(c) for c in text)
-
-
 def should_do_markup(file: TextIO) -> bool:
    if os.environ.get("PY_COLORS") == "1":
        return True
@ -99,7 +88,7 @@ class TerminalWriter:
    @property
    def width_of_current_line(self) -> int:
        """Return an estimate of the width so far in the current line."""
-        return get_line_width(self._current_line)
+        return wcswidth(self._current_line)

    def markup(self, text: str, **markup: bool) -> str:
        for name in markup:
--- a/src/_pytest/_io/wcwidth.py
+++ b/src/_pytest/_io/wcwidth.py
@ -0,0 +1,55 @@
+import unicodedata
+from functools import lru_cache
+
+
+@lru_cache(100)
+def wcwidth(c: str) -> int:
+    """Determine how many columns are needed to display a character in a terminal.
+
+    Returns -1 if the character is not printable.
+    Returns 0, 1 or 2 for other characters.
+    """
+    o = ord(c)
+
+    # ASCII fast path.
+    if 0x20 <= o < 0x07F:
+        return 1
+
+    # Some Cf/Zp/Zl characters which should be zero-width.
+    if (
+        o == 0x0000
+        or 0x200B <= o <= 0x200F
+        or 0x2028 <= o <= 0x202E
+        or 0x2060 <= o <= 0x2063
+    ):
+        return 0
+
+    category = unicodedata.category(c)
+
+    # Control characters.
+    if category == "Cc":
+        return -1
+
+    # Combining characters with zero width.
+    if category in ("Me", "Mn"):
+        return 0
+
+    # Full/Wide east asian characters.
+    if unicodedata.east_asian_width(c) in ("F", "W"):
+        return 2
+
+    return 1
+
+
+def wcswidth(s: str) -> int:
+    """Determine how many columns are needed to display a string in a terminal.
+
+    Returns -1 if the string contains non-printable characters.
+    """
+    width = 0
+    for c in unicodedata.normalize("NFC", s):
+        wc = wcwidth(c)
+        if wc < 0:
+            return -1
+        width += wc
+    return width
--- a/src/_pytest/terminal.py
+++ b/src/_pytest/terminal.py
@ -27,6 +27,7 @@ from more_itertools import collapse
 import pytest
 from _pytest import nodes
 from _pytest._io import TerminalWriter
+from _pytest._io.wcwidth import wcswidth
 from _pytest.compat import order_preserving_dict
 from _pytest.config import Config
 from _pytest.config import ExitCode
@ -1122,8 +1123,6 @@ def _get_pos(config, rep):

 def _get_line_with_reprcrash_message(config, rep, termwidth):
    """Get summary line for a report, trying to add reprcrash message."""
-    from wcwidth import wcswidth
-
    verbose_word = rep._get_verbose_word(config)
    pos = _get_pos(config, rep)

--- a/testing/io/test_wcwidth.py
+++ b/testing/io/test_wcwidth.py
@ -0,0 +1,38 @@
+import pytest
+from _pytest._io.wcwidth import wcswidth
+from _pytest._io.wcwidth import wcwidth
+
+
+@pytest.mark.parametrize(
+    ("c", "expected"),
+    [
+        ("\0", 0),
+        ("\n", -1),
+        ("a", 1),
+        ("1", 1),
+        ("א", 1),
+        ("\u200B", 0),
+        ("\u1ABE", 0),
+        ("\u0591", 0),
+        ("🉐", 2),
+        ("＄", 2),
+    ],
+)
+def test_wcwidth(c: str, expected: int) -> None:
+    assert wcwidth(c) == expected
+
+
+@pytest.mark.parametrize(
+    ("s", "expected"),
+    [
+        ("", 0),
+        ("hello, world!", 13),
+        ("hello, world!\n", -1),
+        ("0123456789", 10),
+        ("שלום, עולם!", 11),
+        ("שְבֻעָיים", 6),
+        ("🉐🉐🉐", 6),
+    ],
+)
+def test_wcswidth(s: str, expected: int) -> None:
+    assert wcswidth(s) == expected
--- a/testing/test_terminal.py
+++ b/testing/test_terminal.py
@ -14,7 +14,9 @@ import pluggy
 import py

 import _pytest.config
+import _pytest.terminal
 import pytest
+from _pytest._io.wcwidth import wcswidth
 from _pytest.config import ExitCode
 from _pytest.pytester import Testdir
 from _pytest.reports import BaseReport
@ -2027,9 +2029,6 @@ def test_skip_reasons_folding():


 def test_line_with_reprcrash(monkeypatch):
-    import _pytest.terminal
-    from wcwidth import wcswidth
-
    mocked_verbose_word = "FAILED"

    mocked_pos = "some::nodeid"
@ -2079,19 +2078,19 @@ def test_line_with_reprcrash(monkeypatch):
    check("some\nmessage", 80, "FAILED some::nodeid - some")

    # Test unicode safety.
-    check("😄😄😄😄😄\n2nd line", 25, "FAILED some::nodeid - ...")
-    check("😄😄😄😄😄\n2nd line", 26, "FAILED some::nodeid - ...")
-    check("😄😄😄😄😄\n2nd line", 27, "FAILED some::nodeid - 😄...")
-    check("😄😄😄😄😄\n2nd line", 28, "FAILED some::nodeid - 😄...")
-    check("😄😄😄😄😄\n2nd line", 29, "FAILED some::nodeid - 😄😄...")
+    check("🉐🉐🉐🉐🉐\n2nd line", 25, "FAILED some::nodeid - ...")
+    check("🉐🉐🉐🉐🉐\n2nd line", 26, "FAILED some::nodeid - ...")
+    check("🉐🉐🉐🉐🉐\n2nd line", 27, "FAILED some::nodeid - 🉐...")
+    check("🉐🉐🉐🉐🉐\n2nd line", 28, "FAILED some::nodeid - 🉐...")
+    check("🉐🉐🉐🉐🉐\n2nd line", 29, "FAILED some::nodeid - 🉐🉐...")

    # NOTE: constructed, not sure if this is supported.
-    mocked_pos = "nodeid::😄::withunicode"
-    check("😄😄😄😄😄\n2nd line", 29, "FAILED nodeid::😄::withunicode")
-    check("😄😄😄😄😄\n2nd line", 40, "FAILED nodeid::😄::withunicode - 😄😄...")
-    check("😄😄😄😄😄\n2nd line", 41, "FAILED nodeid::😄::withunicode - 😄😄...")
-    check("😄😄😄😄😄\n2nd line", 42, "FAILED nodeid::😄::withunicode - 😄😄😄...")
-    check("😄😄😄😄😄\n2nd line", 80, "FAILED nodeid::😄::withunicode - 😄😄😄😄😄")
+    mocked_pos = "nodeid::🉐::withunicode"
+    check("🉐🉐🉐🉐🉐\n2nd line", 29, "FAILED nodeid::🉐::withunicode")
+    check("🉐🉐🉐🉐🉐\n2nd line", 40, "FAILED nodeid::🉐::withunicode - 🉐🉐...")
+    check("🉐🉐🉐🉐🉐\n2nd line", 41, "FAILED nodeid::🉐::withunicode - 🉐🉐...")
+    check("🉐🉐🉐🉐🉐\n2nd line", 42, "FAILED nodeid::🉐::withunicode - 🉐🉐🉐...")
+    check("🉐🉐🉐🉐🉐\n2nd line", 80, "FAILED nodeid::🉐::withunicode - 🉐🉐🉐🉐🉐")


@pytest.mark.parametrize(
				`@ -0,0 +1 @@`
				The dependency on the ``wcwidth`` package has been removed.