Handle NFC/NFD strings that normalize to the same string. (#10355)

Co-authored-by: Zac Hatfield-Dodds <zac.hatfield.dodds@gmail.com>
This commit is contained in:
itxasos23 2022-10-10 02:09:33 +02:00 committed by GitHub
parent 8a40fc5315
commit 78c2c48c67
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 53 additions and 10 deletions

View File

@ -154,6 +154,7 @@ Ian Bicking
Ian Lesperance
Ilya Konstantinov
Ionuț Turturică
Itxaso Aizpurua
Iwan Briquemont
Jaap Broekhuizen
Jakob van Santen

View File

@ -0,0 +1 @@
Assertion failures with strings in NFC and NFD forms that normalize to the same string now have a dedicated error message detailing the issue, and their utf-8 representation is expresed instead.

View File

@ -41,7 +41,7 @@ class SafeRepr(reprlib.Repr):
information on exceptions raised during the call.
"""
def __init__(self, maxsize: Optional[int]) -> None:
def __init__(self, maxsize: Optional[int], use_ascii: bool = False) -> None:
"""
:param maxsize:
If not None, will truncate the resulting repr to that specific size, using ellipsis
@ -54,10 +54,15 @@ class SafeRepr(reprlib.Repr):
# truncation.
self.maxstring = maxsize if maxsize is not None else 1_000_000_000
self.maxsize = maxsize
self.use_ascii = use_ascii
def repr(self, x: object) -> str:
try:
if self.use_ascii:
s = ascii(x)
else:
s = super().repr(x)
except (KeyboardInterrupt, SystemExit):
raise
except BaseException as exc:
@ -94,7 +99,9 @@ def safeformat(obj: object) -> str:
DEFAULT_REPR_MAX_SIZE = 240
def saferepr(obj: object, maxsize: Optional[int] = DEFAULT_REPR_MAX_SIZE) -> str:
def saferepr(
obj: object, maxsize: Optional[int] = DEFAULT_REPR_MAX_SIZE, use_ascii: bool = False
) -> str:
"""Return a size-limited safe repr-string for the given object.
Failing __repr__ functions of user instances will be represented
@ -104,10 +111,11 @@ def saferepr(obj: object, maxsize: Optional[int] = DEFAULT_REPR_MAX_SIZE) -> str
This function is a wrapper around the Repr/reprlib functionality of the
stdlib.
"""
return SafeRepr(maxsize).repr(obj)
return SafeRepr(maxsize, use_ascii).repr(obj)
def saferepr_unlimited(obj: object) -> str:
def saferepr_unlimited(obj: object, use_ascii: bool = True) -> str:
"""Return an unlimited-size safe repr-string for the given object.
As with saferepr, failing __repr__ functions of user instances
@ -119,6 +127,8 @@ def saferepr_unlimited(obj: object) -> str:
when maxsize=None, but that might affect some other code.
"""
try:
if use_ascii:
return ascii(obj)
return repr(obj)
except Exception as exc:
return _format_repr_exception(exc, obj)

View File

@ -10,6 +10,7 @@ from typing import List
from typing import Mapping
from typing import Optional
from typing import Sequence
from unicodedata import normalize
import _pytest._code
from _pytest import outcomes
@ -156,20 +157,32 @@ def has_default_eq(
return True
def assertrepr_compare(config, op: str, left: Any, right: Any) -> Optional[List[str]]:
def assertrepr_compare(
config, op: str, left: Any, right: Any, use_ascii: bool = False
) -> Optional[List[str]]:
"""Return specialised explanations for some operators/operands."""
verbose = config.getoption("verbose")
# Strings which normalize equal are often hard to distinguish when printed; use ascii() to make this easier.
# See issue #3246.
use_ascii = (
isinstance(left, str)
and isinstance(right, str)
and normalize("NFD", left) == normalize("NFD", right)
)
if verbose > 1:
left_repr = saferepr_unlimited(left)
right_repr = saferepr_unlimited(right)
left_repr = saferepr_unlimited(left, use_ascii=use_ascii)
right_repr = saferepr_unlimited(right, use_ascii=use_ascii)
else:
# XXX: "15 chars indentation" is wrong
# ("E AssertionError: assert "); should use term width.
maxsize = (
80 - 15 - len(op) - 2
) // 2 # 15 chars indentation, 1 space around op
left_repr = saferepr(left, maxsize=maxsize)
right_repr = saferepr(right, maxsize=maxsize)
left_repr = saferepr(left, maxsize=maxsize, use_ascii=use_ascii)
right_repr = saferepr(right, maxsize=maxsize, use_ascii=use_ascii)
summary = f"{left_repr} {op} {right_repr}"

View File

@ -776,6 +776,24 @@ class TestAssert_reprcompare:
msg = "\n".join(expl)
assert msg
def test_nfc_nfd_same_string(self) -> None:
# issue 3426
left = "hyv\xe4"
right = "hyva\u0308"
expl = callequal(left, right)
assert expl == [
r"'hyv\xe4' == 'hyva\u0308'",
f"- {str(right)}",
f"+ {str(left)}",
]
expl = callequal(left, right, verbose=2)
assert expl == [
r"'hyv\xe4' == 'hyva\u0308'",
f"- {str(right)}",
f"+ {str(left)}",
]
class TestAssert_reprcompare_dataclass:
def test_dataclasses(self, pytester: Pytester) -> None: