From aca534c67dea7eb0fcddf194bc64d65bc3e07c8b Mon Sep 17 00:00:00 2001
From: Ran Benita <ran@unusedvar.com>
Date: Tue, 26 May 2020 14:59:16 +0300
Subject: [PATCH] Improve our own wcwidth implementation and remove dependency
 on wcwidth package

`TerminalWriter`, imported recently from `py`, contains its own
incomplete wcwidth (`char_with`/`get_line_width`) implementation. The
`TerminalReporter` also needs this, but uses the external `wcwidth`
package.

This commit brings the `TerminalWriter` implementation up-to-par with
`wcwidth`, moves to implementation to a new file `_pytest._io.wcwidth`
which is used everywhere, and removes the dependency.

The differences compared to the `wcwidth` package are:

- Normalizes the string before counting.

- Uses Python's `unicodedata` instead of vendored Unicode tables. This
  means the data corresponds to the Python's version Unicode version
  instead of the `wcwidth`'s package version.

- Apply some optimizations.
---
 changelog/7264.improvement.rst    |  1 +
 setup.py                          |  1 -
 src/_pytest/_io/terminalwriter.py | 17 ++--------
 src/_pytest/_io/wcwidth.py        | 55 +++++++++++++++++++++++++++++++
 src/_pytest/terminal.py           |  3 +-
 testing/io/test_wcwidth.py        | 38 +++++++++++++++++++++
 testing/test_terminal.py          | 27 ++++++++-------
 7 files changed, 111 insertions(+), 31 deletions(-)
 create mode 100644 changelog/7264.improvement.rst
 create mode 100644 src/_pytest/_io/wcwidth.py
 create mode 100644 testing/io/test_wcwidth.py

diff --git a/changelog/7264.improvement.rst b/changelog/7264.improvement.rst
new file mode 100644
index 000000000..035745c4d
--- /dev/null
+++ b/changelog/7264.improvement.rst
@@ -0,0 +1 @@
+The dependency on the ``wcwidth`` package has been removed.
diff --git a/setup.py b/setup.py
index 6ebfd67fb..cd2ecbe07 100644
--- a/setup.py
+++ b/setup.py
@@ -12,7 +12,6 @@ INSTALL_REQUIRES = [
     'colorama;sys_platform=="win32"',
     "pluggy>=0.12,<1.0",
     'importlib-metadata>=0.12;python_version<"3.8"',
-    "wcwidth",
 ]
 
 
diff --git a/src/_pytest/_io/terminalwriter.py b/src/_pytest/_io/terminalwriter.py
index 4f22f5a7a..a285cf4fc 100644
--- a/src/_pytest/_io/terminalwriter.py
+++ b/src/_pytest/_io/terminalwriter.py
@@ -2,12 +2,12 @@
 import os
 import shutil
 import sys
-import unicodedata
-from functools import lru_cache
 from typing import Optional
 from typing import Sequence
 from typing import TextIO
 
+from .wcwidth import wcswidth
+
 
 # This code was initially copied from py 1.8.1, file _io/terminalwriter.py.
 
@@ -22,17 +22,6 @@ def get_terminal_width() -> int:
     return width
 
 
-@lru_cache(100)
-def char_width(c: str) -> int:
-    # Fullwidth and Wide -> 2, all else (including Ambiguous) -> 1.
-    return 2 if unicodedata.east_asian_width(c) in ("F", "W") else 1
-
-
-def get_line_width(text: str) -> int:
-    text = unicodedata.normalize("NFC", text)
-    return sum(char_width(c) for c in text)
-
-
 def should_do_markup(file: TextIO) -> bool:
     if os.environ.get("PY_COLORS") == "1":
         return True
@@ -99,7 +88,7 @@ class TerminalWriter:
     @property
     def width_of_current_line(self) -> int:
         """Return an estimate of the width so far in the current line."""
-        return get_line_width(self._current_line)
+        return wcswidth(self._current_line)
 
     def markup(self, text: str, **markup: bool) -> str:
         for name in markup:
diff --git a/src/_pytest/_io/wcwidth.py b/src/_pytest/_io/wcwidth.py
new file mode 100644
index 000000000..e5c7bf4d8
--- /dev/null
+++ b/src/_pytest/_io/wcwidth.py
@@ -0,0 +1,55 @@
+import unicodedata
+from functools import lru_cache
+
+
+@lru_cache(100)
+def wcwidth(c: str) -> int:
+    """Determine how many columns are needed to display a character in a terminal.
+
+    Returns -1 if the character is not printable.
+    Returns 0, 1 or 2 for other characters.
+    """
+    o = ord(c)
+
+    # ASCII fast path.
+    if 0x20 <= o < 0x07F:
+        return 1
+
+    # Some Cf/Zp/Zl characters which should be zero-width.
+    if (
+        o == 0x0000
+        or 0x200B <= o <= 0x200F
+        or 0x2028 <= o <= 0x202E
+        or 0x2060 <= o <= 0x2063
+    ):
+        return 0
+
+    category = unicodedata.category(c)
+
+    # Control characters.
+    if category == "Cc":
+        return -1
+
+    # Combining characters with zero width.
+    if category in ("Me", "Mn"):
+        return 0
+
+    # Full/Wide east asian characters.
+    if unicodedata.east_asian_width(c) in ("F", "W"):
+        return 2
+
+    return 1
+
+
+def wcswidth(s: str) -> int:
+    """Determine how many columns are needed to display a string in a terminal.
+
+    Returns -1 if the string contains non-printable characters.
+    """
+    width = 0
+    for c in unicodedata.normalize("NFC", s):
+        wc = wcwidth(c)
+        if wc < 0:
+            return -1
+        width += wc
+    return width
diff --git a/src/_pytest/terminal.py b/src/_pytest/terminal.py
index 8ecb5a16b..646fe4cca 100644
--- a/src/_pytest/terminal.py
+++ b/src/_pytest/terminal.py
@@ -27,6 +27,7 @@ from more_itertools import collapse
 import pytest
 from _pytest import nodes
 from _pytest._io import TerminalWriter
+from _pytest._io.wcwidth import wcswidth
 from _pytest.compat import order_preserving_dict
 from _pytest.config import Config
 from _pytest.config import ExitCode
@@ -1122,8 +1123,6 @@ def _get_pos(config, rep):
 
 def _get_line_with_reprcrash_message(config, rep, termwidth):
     """Get summary line for a report, trying to add reprcrash message."""
-    from wcwidth import wcswidth
-
     verbose_word = rep._get_verbose_word(config)
     pos = _get_pos(config, rep)
 
diff --git a/testing/io/test_wcwidth.py b/testing/io/test_wcwidth.py
new file mode 100644
index 000000000..7cc74df5d
--- /dev/null
+++ b/testing/io/test_wcwidth.py
@@ -0,0 +1,38 @@
+import pytest
+from _pytest._io.wcwidth import wcswidth
+from _pytest._io.wcwidth import wcwidth
+
+
+@pytest.mark.parametrize(
+    ("c", "expected"),
+    [
+        ("\0", 0),
+        ("\n", -1),
+        ("a", 1),
+        ("1", 1),
+        ("א", 1),
+        ("\u200B", 0),
+        ("\u1ABE", 0),
+        ("\u0591", 0),
+        ("🉐", 2),
+        ("＄", 2),
+    ],
+)
+def test_wcwidth(c: str, expected: int) -> None:
+    assert wcwidth(c) == expected
+
+
+@pytest.mark.parametrize(
+    ("s", "expected"),
+    [
+        ("", 0),
+        ("hello, world!", 13),
+        ("hello, world!\n", -1),
+        ("0123456789", 10),
+        ("שלום, עולם!", 11),
+        ("שְבֻעָיים", 6),
+        ("🉐🉐🉐", 6),
+    ],
+)
+def test_wcswidth(s: str, expected: int) -> None:
+    assert wcswidth(s) == expected
diff --git a/testing/test_terminal.py b/testing/test_terminal.py
index 0f5b4cb68..17fd29238 100644
--- a/testing/test_terminal.py
+++ b/testing/test_terminal.py
@@ -14,7 +14,9 @@ import pluggy
 import py
 
 import _pytest.config
+import _pytest.terminal
 import pytest
+from _pytest._io.wcwidth import wcswidth
 from _pytest.config import ExitCode
 from _pytest.pytester import Testdir
 from _pytest.reports import BaseReport
@@ -2027,9 +2029,6 @@ def test_skip_reasons_folding():
 
 
 def test_line_with_reprcrash(monkeypatch):
-    import _pytest.terminal
-    from wcwidth import wcswidth
-
     mocked_verbose_word = "FAILED"
 
     mocked_pos = "some::nodeid"
@@ -2079,19 +2078,19 @@ def test_line_with_reprcrash(monkeypatch):
     check("some\nmessage", 80, "FAILED some::nodeid - some")
 
     # Test unicode safety.
-    check("😄😄😄😄😄\n2nd line", 25, "FAILED some::nodeid - ...")
-    check("😄😄😄😄😄\n2nd line", 26, "FAILED some::nodeid - ...")
-    check("😄😄😄😄😄\n2nd line", 27, "FAILED some::nodeid - 😄...")
-    check("😄😄😄😄😄\n2nd line", 28, "FAILED some::nodeid - 😄...")
-    check("😄😄😄😄😄\n2nd line", 29, "FAILED some::nodeid - 😄😄...")
+    check("🉐🉐🉐🉐🉐\n2nd line", 25, "FAILED some::nodeid - ...")
+    check("🉐🉐🉐🉐🉐\n2nd line", 26, "FAILED some::nodeid - ...")
+    check("🉐🉐🉐🉐🉐\n2nd line", 27, "FAILED some::nodeid - 🉐...")
+    check("🉐🉐🉐🉐🉐\n2nd line", 28, "FAILED some::nodeid - 🉐...")
+    check("🉐🉐🉐🉐🉐\n2nd line", 29, "FAILED some::nodeid - 🉐🉐...")
 
     # NOTE: constructed, not sure if this is supported.
-    mocked_pos = "nodeid::😄::withunicode"
-    check("😄😄😄😄😄\n2nd line", 29, "FAILED nodeid::😄::withunicode")
-    check("😄😄😄😄😄\n2nd line", 40, "FAILED nodeid::😄::withunicode - 😄😄...")
-    check("😄😄😄😄😄\n2nd line", 41, "FAILED nodeid::😄::withunicode - 😄😄...")
-    check("😄😄😄😄😄\n2nd line", 42, "FAILED nodeid::😄::withunicode - 😄😄😄...")
-    check("😄😄😄😄😄\n2nd line", 80, "FAILED nodeid::😄::withunicode - 😄😄😄😄😄")
+    mocked_pos = "nodeid::🉐::withunicode"
+    check("🉐🉐🉐🉐🉐\n2nd line", 29, "FAILED nodeid::🉐::withunicode")
+    check("🉐🉐🉐🉐🉐\n2nd line", 40, "FAILED nodeid::🉐::withunicode - 🉐🉐...")
+    check("🉐🉐🉐🉐🉐\n2nd line", 41, "FAILED nodeid::🉐::withunicode - 🉐🉐...")
+    check("🉐🉐🉐🉐🉐\n2nd line", 42, "FAILED nodeid::🉐::withunicode - 🉐🉐🉐...")
+    check("🉐🉐🉐🉐🉐\n2nd line", 80, "FAILED nodeid::🉐::withunicode - 🉐🉐🉐🉐🉐")
 
 
 @pytest.mark.parametrize(