Handle NFC/NFD strings that normalize to the same string. (#10355)
Co-authored-by: Zac Hatfield-Dodds <zac.hatfield.dodds@gmail.com>
This commit is contained in:
parent
8a40fc5315
commit
78c2c48c67
1
AUTHORS
1
AUTHORS
|
@ -154,6 +154,7 @@ Ian Bicking
|
|||
Ian Lesperance
|
||||
Ilya Konstantinov
|
||||
Ionuț Turturică
|
||||
Itxaso Aizpurua
|
||||
Iwan Briquemont
|
||||
Jaap Broekhuizen
|
||||
Jakob van Santen
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Assertion failures with strings in NFC and NFD forms that normalize to the same string now have a dedicated error message detailing the issue, and their utf-8 representation is expresed instead.
|
|
@ -41,7 +41,7 @@ class SafeRepr(reprlib.Repr):
|
|||
information on exceptions raised during the call.
|
||||
"""
|
||||
|
||||
def __init__(self, maxsize: Optional[int]) -> None:
|
||||
def __init__(self, maxsize: Optional[int], use_ascii: bool = False) -> None:
|
||||
"""
|
||||
:param maxsize:
|
||||
If not None, will truncate the resulting repr to that specific size, using ellipsis
|
||||
|
@ -54,10 +54,15 @@ class SafeRepr(reprlib.Repr):
|
|||
# truncation.
|
||||
self.maxstring = maxsize if maxsize is not None else 1_000_000_000
|
||||
self.maxsize = maxsize
|
||||
self.use_ascii = use_ascii
|
||||
|
||||
def repr(self, x: object) -> str:
|
||||
try:
|
||||
s = super().repr(x)
|
||||
if self.use_ascii:
|
||||
s = ascii(x)
|
||||
else:
|
||||
s = super().repr(x)
|
||||
|
||||
except (KeyboardInterrupt, SystemExit):
|
||||
raise
|
||||
except BaseException as exc:
|
||||
|
@ -94,7 +99,9 @@ def safeformat(obj: object) -> str:
|
|||
DEFAULT_REPR_MAX_SIZE = 240
|
||||
|
||||
|
||||
def saferepr(obj: object, maxsize: Optional[int] = DEFAULT_REPR_MAX_SIZE) -> str:
|
||||
def saferepr(
|
||||
obj: object, maxsize: Optional[int] = DEFAULT_REPR_MAX_SIZE, use_ascii: bool = False
|
||||
) -> str:
|
||||
"""Return a size-limited safe repr-string for the given object.
|
||||
|
||||
Failing __repr__ functions of user instances will be represented
|
||||
|
@ -104,10 +111,11 @@ def saferepr(obj: object, maxsize: Optional[int] = DEFAULT_REPR_MAX_SIZE) -> str
|
|||
This function is a wrapper around the Repr/reprlib functionality of the
|
||||
stdlib.
|
||||
"""
|
||||
return SafeRepr(maxsize).repr(obj)
|
||||
|
||||
return SafeRepr(maxsize, use_ascii).repr(obj)
|
||||
|
||||
|
||||
def saferepr_unlimited(obj: object) -> str:
|
||||
def saferepr_unlimited(obj: object, use_ascii: bool = True) -> str:
|
||||
"""Return an unlimited-size safe repr-string for the given object.
|
||||
|
||||
As with saferepr, failing __repr__ functions of user instances
|
||||
|
@ -119,6 +127,8 @@ def saferepr_unlimited(obj: object) -> str:
|
|||
when maxsize=None, but that might affect some other code.
|
||||
"""
|
||||
try:
|
||||
if use_ascii:
|
||||
return ascii(obj)
|
||||
return repr(obj)
|
||||
except Exception as exc:
|
||||
return _format_repr_exception(exc, obj)
|
||||
|
|
|
@ -10,6 +10,7 @@ from typing import List
|
|||
from typing import Mapping
|
||||
from typing import Optional
|
||||
from typing import Sequence
|
||||
from unicodedata import normalize
|
||||
|
||||
import _pytest._code
|
||||
from _pytest import outcomes
|
||||
|
@ -156,20 +157,32 @@ def has_default_eq(
|
|||
return True
|
||||
|
||||
|
||||
def assertrepr_compare(config, op: str, left: Any, right: Any) -> Optional[List[str]]:
|
||||
def assertrepr_compare(
|
||||
config, op: str, left: Any, right: Any, use_ascii: bool = False
|
||||
) -> Optional[List[str]]:
|
||||
"""Return specialised explanations for some operators/operands."""
|
||||
verbose = config.getoption("verbose")
|
||||
|
||||
# Strings which normalize equal are often hard to distinguish when printed; use ascii() to make this easier.
|
||||
# See issue #3246.
|
||||
use_ascii = (
|
||||
isinstance(left, str)
|
||||
and isinstance(right, str)
|
||||
and normalize("NFD", left) == normalize("NFD", right)
|
||||
)
|
||||
|
||||
if verbose > 1:
|
||||
left_repr = saferepr_unlimited(left)
|
||||
right_repr = saferepr_unlimited(right)
|
||||
left_repr = saferepr_unlimited(left, use_ascii=use_ascii)
|
||||
right_repr = saferepr_unlimited(right, use_ascii=use_ascii)
|
||||
else:
|
||||
# XXX: "15 chars indentation" is wrong
|
||||
# ("E AssertionError: assert "); should use term width.
|
||||
maxsize = (
|
||||
80 - 15 - len(op) - 2
|
||||
) // 2 # 15 chars indentation, 1 space around op
|
||||
left_repr = saferepr(left, maxsize=maxsize)
|
||||
right_repr = saferepr(right, maxsize=maxsize)
|
||||
|
||||
left_repr = saferepr(left, maxsize=maxsize, use_ascii=use_ascii)
|
||||
right_repr = saferepr(right, maxsize=maxsize, use_ascii=use_ascii)
|
||||
|
||||
summary = f"{left_repr} {op} {right_repr}"
|
||||
|
||||
|
|
|
@ -776,6 +776,24 @@ class TestAssert_reprcompare:
|
|||
msg = "\n".join(expl)
|
||||
assert msg
|
||||
|
||||
def test_nfc_nfd_same_string(self) -> None:
|
||||
# issue 3426
|
||||
left = "hyv\xe4"
|
||||
right = "hyva\u0308"
|
||||
expl = callequal(left, right)
|
||||
assert expl == [
|
||||
r"'hyv\xe4' == 'hyva\u0308'",
|
||||
f"- {str(right)}",
|
||||
f"+ {str(left)}",
|
||||
]
|
||||
|
||||
expl = callequal(left, right, verbose=2)
|
||||
assert expl == [
|
||||
r"'hyv\xe4' == 'hyva\u0308'",
|
||||
f"- {str(right)}",
|
||||
f"+ {str(left)}",
|
||||
]
|
||||
|
||||
|
||||
class TestAssert_reprcompare_dataclass:
|
||||
def test_dataclasses(self, pytester: Pytester) -> None:
|
||||
|
|
Loading…
Reference in New Issue