Handle NFC/NFD strings that normalize to the same string. (#10355)

Co-authored-by: Zac Hatfield-Dodds <zac.hatfield.dodds@gmail.com>
This commit is contained in:
itxasos23 2022-10-10 02:09:33 +02:00 committed by GitHub
parent 8a40fc5315
commit 78c2c48c67
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 53 additions and 10 deletions

View File

@ -154,6 +154,7 @@ Ian Bicking
Ian Lesperance Ian Lesperance
Ilya Konstantinov Ilya Konstantinov
Ionuț Turturică Ionuț Turturică
Itxaso Aizpurua
Iwan Briquemont Iwan Briquemont
Jaap Broekhuizen Jaap Broekhuizen
Jakob van Santen Jakob van Santen

View File

@ -0,0 +1 @@
Assertion failures with strings in NFC and NFD forms that normalize to the same string now have a dedicated error message detailing the issue, and their utf-8 representation is expresed instead.

View File

@ -41,7 +41,7 @@ class SafeRepr(reprlib.Repr):
information on exceptions raised during the call. information on exceptions raised during the call.
""" """
def __init__(self, maxsize: Optional[int]) -> None: def __init__(self, maxsize: Optional[int], use_ascii: bool = False) -> None:
""" """
:param maxsize: :param maxsize:
If not None, will truncate the resulting repr to that specific size, using ellipsis If not None, will truncate the resulting repr to that specific size, using ellipsis
@ -54,10 +54,15 @@ class SafeRepr(reprlib.Repr):
# truncation. # truncation.
self.maxstring = maxsize if maxsize is not None else 1_000_000_000 self.maxstring = maxsize if maxsize is not None else 1_000_000_000
self.maxsize = maxsize self.maxsize = maxsize
self.use_ascii = use_ascii
def repr(self, x: object) -> str: def repr(self, x: object) -> str:
try: try:
s = super().repr(x) if self.use_ascii:
s = ascii(x)
else:
s = super().repr(x)
except (KeyboardInterrupt, SystemExit): except (KeyboardInterrupt, SystemExit):
raise raise
except BaseException as exc: except BaseException as exc:
@ -94,7 +99,9 @@ def safeformat(obj: object) -> str:
DEFAULT_REPR_MAX_SIZE = 240 DEFAULT_REPR_MAX_SIZE = 240
def saferepr(obj: object, maxsize: Optional[int] = DEFAULT_REPR_MAX_SIZE) -> str: def saferepr(
obj: object, maxsize: Optional[int] = DEFAULT_REPR_MAX_SIZE, use_ascii: bool = False
) -> str:
"""Return a size-limited safe repr-string for the given object. """Return a size-limited safe repr-string for the given object.
Failing __repr__ functions of user instances will be represented Failing __repr__ functions of user instances will be represented
@ -104,10 +111,11 @@ def saferepr(obj: object, maxsize: Optional[int] = DEFAULT_REPR_MAX_SIZE) -> str
This function is a wrapper around the Repr/reprlib functionality of the This function is a wrapper around the Repr/reprlib functionality of the
stdlib. stdlib.
""" """
return SafeRepr(maxsize).repr(obj)
return SafeRepr(maxsize, use_ascii).repr(obj)
def saferepr_unlimited(obj: object) -> str: def saferepr_unlimited(obj: object, use_ascii: bool = True) -> str:
"""Return an unlimited-size safe repr-string for the given object. """Return an unlimited-size safe repr-string for the given object.
As with saferepr, failing __repr__ functions of user instances As with saferepr, failing __repr__ functions of user instances
@ -119,6 +127,8 @@ def saferepr_unlimited(obj: object) -> str:
when maxsize=None, but that might affect some other code. when maxsize=None, but that might affect some other code.
""" """
try: try:
if use_ascii:
return ascii(obj)
return repr(obj) return repr(obj)
except Exception as exc: except Exception as exc:
return _format_repr_exception(exc, obj) return _format_repr_exception(exc, obj)

View File

@ -10,6 +10,7 @@ from typing import List
from typing import Mapping from typing import Mapping
from typing import Optional from typing import Optional
from typing import Sequence from typing import Sequence
from unicodedata import normalize
import _pytest._code import _pytest._code
from _pytest import outcomes from _pytest import outcomes
@ -156,20 +157,32 @@ def has_default_eq(
return True return True
def assertrepr_compare(config, op: str, left: Any, right: Any) -> Optional[List[str]]: def assertrepr_compare(
config, op: str, left: Any, right: Any, use_ascii: bool = False
) -> Optional[List[str]]:
"""Return specialised explanations for some operators/operands.""" """Return specialised explanations for some operators/operands."""
verbose = config.getoption("verbose") verbose = config.getoption("verbose")
# Strings which normalize equal are often hard to distinguish when printed; use ascii() to make this easier.
# See issue #3246.
use_ascii = (
isinstance(left, str)
and isinstance(right, str)
and normalize("NFD", left) == normalize("NFD", right)
)
if verbose > 1: if verbose > 1:
left_repr = saferepr_unlimited(left) left_repr = saferepr_unlimited(left, use_ascii=use_ascii)
right_repr = saferepr_unlimited(right) right_repr = saferepr_unlimited(right, use_ascii=use_ascii)
else: else:
# XXX: "15 chars indentation" is wrong # XXX: "15 chars indentation" is wrong
# ("E AssertionError: assert "); should use term width. # ("E AssertionError: assert "); should use term width.
maxsize = ( maxsize = (
80 - 15 - len(op) - 2 80 - 15 - len(op) - 2
) // 2 # 15 chars indentation, 1 space around op ) // 2 # 15 chars indentation, 1 space around op
left_repr = saferepr(left, maxsize=maxsize)
right_repr = saferepr(right, maxsize=maxsize) left_repr = saferepr(left, maxsize=maxsize, use_ascii=use_ascii)
right_repr = saferepr(right, maxsize=maxsize, use_ascii=use_ascii)
summary = f"{left_repr} {op} {right_repr}" summary = f"{left_repr} {op} {right_repr}"

View File

@ -776,6 +776,24 @@ class TestAssert_reprcompare:
msg = "\n".join(expl) msg = "\n".join(expl)
assert msg assert msg
def test_nfc_nfd_same_string(self) -> None:
# issue 3426
left = "hyv\xe4"
right = "hyva\u0308"
expl = callequal(left, right)
assert expl == [
r"'hyv\xe4' == 'hyva\u0308'",
f"- {str(right)}",
f"+ {str(left)}",
]
expl = callequal(left, right, verbose=2)
assert expl == [
r"'hyv\xe4' == 'hyva\u0308'",
f"- {str(right)}",
f"+ {str(left)}",
]
class TestAssert_reprcompare_dataclass: class TestAssert_reprcompare_dataclass:
def test_dataclasses(self, pytester: Pytester) -> None: def test_dataclasses(self, pytester: Pytester) -> None: