From 78c2c48c67f594235f63c927fd1fc0760ac79ade Mon Sep 17 00:00:00 2001
From: itxasos23 <115184221+itxasos23@users.noreply.github.com>
Date: Mon, 10 Oct 2022 02:09:33 +0200
Subject: [PATCH] Handle NFC/NFD strings that normalize to the same string.
 (#10355)

Co-authored-by: Zac Hatfield-Dodds <zac.hatfield.dodds@gmail.com>
---
 AUTHORS                        |  1 +
 changelog/3426.improvement.rst |  1 +
 src/_pytest/_io/saferepr.py    | 20 +++++++++++++++-----
 src/_pytest/assertion/util.py  | 23 ++++++++++++++++++-----
 testing/test_assertion.py      | 18 ++++++++++++++++++
 5 files changed, 53 insertions(+), 10 deletions(-)
 create mode 100644 changelog/3426.improvement.rst

diff --git a/AUTHORS b/AUTHORS
index 4e15caf8e..55b0237ea 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -154,6 +154,7 @@ Ian Bicking
 Ian Lesperance
 Ilya Konstantinov
 Ionuț Turturică
+Itxaso Aizpurua
 Iwan Briquemont
 Jaap Broekhuizen
 Jakob van Santen
diff --git a/changelog/3426.improvement.rst b/changelog/3426.improvement.rst
new file mode 100644
index 000000000..e232d56aa
--- /dev/null
+++ b/changelog/3426.improvement.rst
@@ -0,0 +1 @@
+Assertion failures with strings in NFC and NFD forms that normalize to the same string now have a dedicated error message detailing the issue, and their utf-8 representation is expresed instead.
diff --git a/src/_pytest/_io/saferepr.py b/src/_pytest/_io/saferepr.py
index a27e8c2a6..c70187223 100644
--- a/src/_pytest/_io/saferepr.py
+++ b/src/_pytest/_io/saferepr.py
@@ -41,7 +41,7 @@ class SafeRepr(reprlib.Repr):
     information on exceptions raised during the call.
     """
 
-    def __init__(self, maxsize: Optional[int]) -> None:
+    def __init__(self, maxsize: Optional[int], use_ascii: bool = False) -> None:
         """
         :param maxsize:
             If not None, will truncate the resulting repr to that specific size, using ellipsis
@@ -54,10 +54,15 @@ class SafeRepr(reprlib.Repr):
         # truncation.
         self.maxstring = maxsize if maxsize is not None else 1_000_000_000
         self.maxsize = maxsize
+        self.use_ascii = use_ascii
 
     def repr(self, x: object) -> str:
         try:
-            s = super().repr(x)
+            if self.use_ascii:
+                s = ascii(x)
+            else:
+                s = super().repr(x)
+
         except (KeyboardInterrupt, SystemExit):
             raise
         except BaseException as exc:
@@ -94,7 +99,9 @@ def safeformat(obj: object) -> str:
 DEFAULT_REPR_MAX_SIZE = 240
 
 
-def saferepr(obj: object, maxsize: Optional[int] = DEFAULT_REPR_MAX_SIZE) -> str:
+def saferepr(
+    obj: object, maxsize: Optional[int] = DEFAULT_REPR_MAX_SIZE, use_ascii: bool = False
+) -> str:
     """Return a size-limited safe repr-string for the given object.
 
     Failing __repr__ functions of user instances will be represented
@@ -104,10 +111,11 @@ def saferepr(obj: object, maxsize: Optional[int] = DEFAULT_REPR_MAX_SIZE) -> str
     This function is a wrapper around the Repr/reprlib functionality of the
     stdlib.
     """
-    return SafeRepr(maxsize).repr(obj)
+
+    return SafeRepr(maxsize, use_ascii).repr(obj)
 
 
-def saferepr_unlimited(obj: object) -> str:
+def saferepr_unlimited(obj: object, use_ascii: bool = True) -> str:
     """Return an unlimited-size safe repr-string for the given object.
 
     As with saferepr, failing __repr__ functions of user instances
@@ -119,6 +127,8 @@ def saferepr_unlimited(obj: object) -> str:
     when maxsize=None, but that might affect some other code.
     """
     try:
+        if use_ascii:
+            return ascii(obj)
         return repr(obj)
     except Exception as exc:
         return _format_repr_exception(exc, obj)
diff --git a/src/_pytest/assertion/util.py b/src/_pytest/assertion/util.py
index 0c34b83ea..fc5dfdbd5 100644
--- a/src/_pytest/assertion/util.py
+++ b/src/_pytest/assertion/util.py
@@ -10,6 +10,7 @@ from typing import List
 from typing import Mapping
 from typing import Optional
 from typing import Sequence
+from unicodedata import normalize
 
 import _pytest._code
 from _pytest import outcomes
@@ -156,20 +157,32 @@ def has_default_eq(
     return True
 
 
-def assertrepr_compare(config, op: str, left: Any, right: Any) -> Optional[List[str]]:
+def assertrepr_compare(
+    config, op: str, left: Any, right: Any, use_ascii: bool = False
+) -> Optional[List[str]]:
     """Return specialised explanations for some operators/operands."""
     verbose = config.getoption("verbose")
+
+    # Strings which normalize equal are often hard to distinguish when printed; use ascii() to make this easier.
+    # See issue #3246.
+    use_ascii = (
+        isinstance(left, str)
+        and isinstance(right, str)
+        and normalize("NFD", left) == normalize("NFD", right)
+    )
+
     if verbose > 1:
-        left_repr = saferepr_unlimited(left)
-        right_repr = saferepr_unlimited(right)
+        left_repr = saferepr_unlimited(left, use_ascii=use_ascii)
+        right_repr = saferepr_unlimited(right, use_ascii=use_ascii)
     else:
         # XXX: "15 chars indentation" is wrong
         #      ("E       AssertionError: assert "); should use term width.
         maxsize = (
             80 - 15 - len(op) - 2
         ) // 2  # 15 chars indentation, 1 space around op
-        left_repr = saferepr(left, maxsize=maxsize)
-        right_repr = saferepr(right, maxsize=maxsize)
+
+        left_repr = saferepr(left, maxsize=maxsize, use_ascii=use_ascii)
+        right_repr = saferepr(right, maxsize=maxsize, use_ascii=use_ascii)
 
     summary = f"{left_repr} {op} {right_repr}"
 
diff --git a/testing/test_assertion.py b/testing/test_assertion.py
index 2bc06d65a..d8844f2e4 100644
--- a/testing/test_assertion.py
+++ b/testing/test_assertion.py
@@ -776,6 +776,24 @@ class TestAssert_reprcompare:
         msg = "\n".join(expl)
         assert msg
 
+    def test_nfc_nfd_same_string(self) -> None:
+        # issue 3426
+        left = "hyv\xe4"
+        right = "hyva\u0308"
+        expl = callequal(left, right)
+        assert expl == [
+            r"'hyv\xe4' == 'hyva\u0308'",
+            f"- {str(right)}",
+            f"+ {str(left)}",
+        ]
+
+        expl = callequal(left, right, verbose=2)
+        assert expl == [
+            r"'hyv\xe4' == 'hyva\u0308'",
+            f"- {str(right)}",
+            f"+ {str(left)}",
+        ]
+
 
 class TestAssert_reprcompare_dataclass:
     def test_dataclasses(self, pytester: Pytester) -> None: