From 78c2c48c67f594235f63c927fd1fc0760ac79ade Mon Sep 17 00:00:00 2001 From: itxasos23 <115184221+itxasos23@users.noreply.github.com> Date: Mon, 10 Oct 2022 02:09:33 +0200 Subject: [PATCH] Handle NFC/NFD strings that normalize to the same string. (#10355) Co-authored-by: Zac Hatfield-Dodds --- AUTHORS | 1 + changelog/3426.improvement.rst | 1 + src/_pytest/_io/saferepr.py | 20 +++++++++++++++----- src/_pytest/assertion/util.py | 23 ++++++++++++++++++----- testing/test_assertion.py | 18 ++++++++++++++++++ 5 files changed, 53 insertions(+), 10 deletions(-) create mode 100644 changelog/3426.improvement.rst diff --git a/AUTHORS b/AUTHORS index 4e15caf8e..55b0237ea 100644 --- a/AUTHORS +++ b/AUTHORS @@ -154,6 +154,7 @@ Ian Bicking Ian Lesperance Ilya Konstantinov Ionuț Turturică +Itxaso Aizpurua Iwan Briquemont Jaap Broekhuizen Jakob van Santen diff --git a/changelog/3426.improvement.rst b/changelog/3426.improvement.rst new file mode 100644 index 000000000..e232d56aa --- /dev/null +++ b/changelog/3426.improvement.rst @@ -0,0 +1 @@ +Assertion failures with strings in NFC and NFD forms that normalize to the same string now have a dedicated error message detailing the issue, and their utf-8 representation is expresed instead. diff --git a/src/_pytest/_io/saferepr.py b/src/_pytest/_io/saferepr.py index a27e8c2a6..c70187223 100644 --- a/src/_pytest/_io/saferepr.py +++ b/src/_pytest/_io/saferepr.py @@ -41,7 +41,7 @@ class SafeRepr(reprlib.Repr): information on exceptions raised during the call. """ - def __init__(self, maxsize: Optional[int]) -> None: + def __init__(self, maxsize: Optional[int], use_ascii: bool = False) -> None: """ :param maxsize: If not None, will truncate the resulting repr to that specific size, using ellipsis @@ -54,10 +54,15 @@ class SafeRepr(reprlib.Repr): # truncation. self.maxstring = maxsize if maxsize is not None else 1_000_000_000 self.maxsize = maxsize + self.use_ascii = use_ascii def repr(self, x: object) -> str: try: - s = super().repr(x) + if self.use_ascii: + s = ascii(x) + else: + s = super().repr(x) + except (KeyboardInterrupt, SystemExit): raise except BaseException as exc: @@ -94,7 +99,9 @@ def safeformat(obj: object) -> str: DEFAULT_REPR_MAX_SIZE = 240 -def saferepr(obj: object, maxsize: Optional[int] = DEFAULT_REPR_MAX_SIZE) -> str: +def saferepr( + obj: object, maxsize: Optional[int] = DEFAULT_REPR_MAX_SIZE, use_ascii: bool = False +) -> str: """Return a size-limited safe repr-string for the given object. Failing __repr__ functions of user instances will be represented @@ -104,10 +111,11 @@ def saferepr(obj: object, maxsize: Optional[int] = DEFAULT_REPR_MAX_SIZE) -> str This function is a wrapper around the Repr/reprlib functionality of the stdlib. """ - return SafeRepr(maxsize).repr(obj) + + return SafeRepr(maxsize, use_ascii).repr(obj) -def saferepr_unlimited(obj: object) -> str: +def saferepr_unlimited(obj: object, use_ascii: bool = True) -> str: """Return an unlimited-size safe repr-string for the given object. As with saferepr, failing __repr__ functions of user instances @@ -119,6 +127,8 @@ def saferepr_unlimited(obj: object) -> str: when maxsize=None, but that might affect some other code. """ try: + if use_ascii: + return ascii(obj) return repr(obj) except Exception as exc: return _format_repr_exception(exc, obj) diff --git a/src/_pytest/assertion/util.py b/src/_pytest/assertion/util.py index 0c34b83ea..fc5dfdbd5 100644 --- a/src/_pytest/assertion/util.py +++ b/src/_pytest/assertion/util.py @@ -10,6 +10,7 @@ from typing import List from typing import Mapping from typing import Optional from typing import Sequence +from unicodedata import normalize import _pytest._code from _pytest import outcomes @@ -156,20 +157,32 @@ def has_default_eq( return True -def assertrepr_compare(config, op: str, left: Any, right: Any) -> Optional[List[str]]: +def assertrepr_compare( + config, op: str, left: Any, right: Any, use_ascii: bool = False +) -> Optional[List[str]]: """Return specialised explanations for some operators/operands.""" verbose = config.getoption("verbose") + + # Strings which normalize equal are often hard to distinguish when printed; use ascii() to make this easier. + # See issue #3246. + use_ascii = ( + isinstance(left, str) + and isinstance(right, str) + and normalize("NFD", left) == normalize("NFD", right) + ) + if verbose > 1: - left_repr = saferepr_unlimited(left) - right_repr = saferepr_unlimited(right) + left_repr = saferepr_unlimited(left, use_ascii=use_ascii) + right_repr = saferepr_unlimited(right, use_ascii=use_ascii) else: # XXX: "15 chars indentation" is wrong # ("E AssertionError: assert "); should use term width. maxsize = ( 80 - 15 - len(op) - 2 ) // 2 # 15 chars indentation, 1 space around op - left_repr = saferepr(left, maxsize=maxsize) - right_repr = saferepr(right, maxsize=maxsize) + + left_repr = saferepr(left, maxsize=maxsize, use_ascii=use_ascii) + right_repr = saferepr(right, maxsize=maxsize, use_ascii=use_ascii) summary = f"{left_repr} {op} {right_repr}" diff --git a/testing/test_assertion.py b/testing/test_assertion.py index 2bc06d65a..d8844f2e4 100644 --- a/testing/test_assertion.py +++ b/testing/test_assertion.py @@ -776,6 +776,24 @@ class TestAssert_reprcompare: msg = "\n".join(expl) assert msg + def test_nfc_nfd_same_string(self) -> None: + # issue 3426 + left = "hyv\xe4" + right = "hyva\u0308" + expl = callequal(left, right) + assert expl == [ + r"'hyv\xe4' == 'hyva\u0308'", + f"- {str(right)}", + f"+ {str(left)}", + ] + + expl = callequal(left, right, verbose=2) + assert expl == [ + r"'hyv\xe4' == 'hyva\u0308'", + f"- {str(right)}", + f"+ {str(left)}", + ] + class TestAssert_reprcompare_dataclass: def test_dataclasses(self, pytester: Pytester) -> None: