Handle NFC/NFD strings that normalize to the same string. (#10355)
Co-authored-by: Zac Hatfield-Dodds <zac.hatfield.dodds@gmail.com>
This commit is contained in:
parent
8a40fc5315
commit
78c2c48c67
1
AUTHORS
1
AUTHORS
|
@ -154,6 +154,7 @@ Ian Bicking
|
||||||
Ian Lesperance
|
Ian Lesperance
|
||||||
Ilya Konstantinov
|
Ilya Konstantinov
|
||||||
Ionuț Turturică
|
Ionuț Turturică
|
||||||
|
Itxaso Aizpurua
|
||||||
Iwan Briquemont
|
Iwan Briquemont
|
||||||
Jaap Broekhuizen
|
Jaap Broekhuizen
|
||||||
Jakob van Santen
|
Jakob van Santen
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Assertion failures with strings in NFC and NFD forms that normalize to the same string now have a dedicated error message detailing the issue, and their utf-8 representation is expresed instead.
|
|
@ -41,7 +41,7 @@ class SafeRepr(reprlib.Repr):
|
||||||
information on exceptions raised during the call.
|
information on exceptions raised during the call.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, maxsize: Optional[int]) -> None:
|
def __init__(self, maxsize: Optional[int], use_ascii: bool = False) -> None:
|
||||||
"""
|
"""
|
||||||
:param maxsize:
|
:param maxsize:
|
||||||
If not None, will truncate the resulting repr to that specific size, using ellipsis
|
If not None, will truncate the resulting repr to that specific size, using ellipsis
|
||||||
|
@ -54,10 +54,15 @@ class SafeRepr(reprlib.Repr):
|
||||||
# truncation.
|
# truncation.
|
||||||
self.maxstring = maxsize if maxsize is not None else 1_000_000_000
|
self.maxstring = maxsize if maxsize is not None else 1_000_000_000
|
||||||
self.maxsize = maxsize
|
self.maxsize = maxsize
|
||||||
|
self.use_ascii = use_ascii
|
||||||
|
|
||||||
def repr(self, x: object) -> str:
|
def repr(self, x: object) -> str:
|
||||||
try:
|
try:
|
||||||
|
if self.use_ascii:
|
||||||
|
s = ascii(x)
|
||||||
|
else:
|
||||||
s = super().repr(x)
|
s = super().repr(x)
|
||||||
|
|
||||||
except (KeyboardInterrupt, SystemExit):
|
except (KeyboardInterrupt, SystemExit):
|
||||||
raise
|
raise
|
||||||
except BaseException as exc:
|
except BaseException as exc:
|
||||||
|
@ -94,7 +99,9 @@ def safeformat(obj: object) -> str:
|
||||||
DEFAULT_REPR_MAX_SIZE = 240
|
DEFAULT_REPR_MAX_SIZE = 240
|
||||||
|
|
||||||
|
|
||||||
def saferepr(obj: object, maxsize: Optional[int] = DEFAULT_REPR_MAX_SIZE) -> str:
|
def saferepr(
|
||||||
|
obj: object, maxsize: Optional[int] = DEFAULT_REPR_MAX_SIZE, use_ascii: bool = False
|
||||||
|
) -> str:
|
||||||
"""Return a size-limited safe repr-string for the given object.
|
"""Return a size-limited safe repr-string for the given object.
|
||||||
|
|
||||||
Failing __repr__ functions of user instances will be represented
|
Failing __repr__ functions of user instances will be represented
|
||||||
|
@ -104,10 +111,11 @@ def saferepr(obj: object, maxsize: Optional[int] = DEFAULT_REPR_MAX_SIZE) -> str
|
||||||
This function is a wrapper around the Repr/reprlib functionality of the
|
This function is a wrapper around the Repr/reprlib functionality of the
|
||||||
stdlib.
|
stdlib.
|
||||||
"""
|
"""
|
||||||
return SafeRepr(maxsize).repr(obj)
|
|
||||||
|
return SafeRepr(maxsize, use_ascii).repr(obj)
|
||||||
|
|
||||||
|
|
||||||
def saferepr_unlimited(obj: object) -> str:
|
def saferepr_unlimited(obj: object, use_ascii: bool = True) -> str:
|
||||||
"""Return an unlimited-size safe repr-string for the given object.
|
"""Return an unlimited-size safe repr-string for the given object.
|
||||||
|
|
||||||
As with saferepr, failing __repr__ functions of user instances
|
As with saferepr, failing __repr__ functions of user instances
|
||||||
|
@ -119,6 +127,8 @@ def saferepr_unlimited(obj: object) -> str:
|
||||||
when maxsize=None, but that might affect some other code.
|
when maxsize=None, but that might affect some other code.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
if use_ascii:
|
||||||
|
return ascii(obj)
|
||||||
return repr(obj)
|
return repr(obj)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
return _format_repr_exception(exc, obj)
|
return _format_repr_exception(exc, obj)
|
||||||
|
|
|
@ -10,6 +10,7 @@ from typing import List
|
||||||
from typing import Mapping
|
from typing import Mapping
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from typing import Sequence
|
from typing import Sequence
|
||||||
|
from unicodedata import normalize
|
||||||
|
|
||||||
import _pytest._code
|
import _pytest._code
|
||||||
from _pytest import outcomes
|
from _pytest import outcomes
|
||||||
|
@ -156,20 +157,32 @@ def has_default_eq(
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def assertrepr_compare(config, op: str, left: Any, right: Any) -> Optional[List[str]]:
|
def assertrepr_compare(
|
||||||
|
config, op: str, left: Any, right: Any, use_ascii: bool = False
|
||||||
|
) -> Optional[List[str]]:
|
||||||
"""Return specialised explanations for some operators/operands."""
|
"""Return specialised explanations for some operators/operands."""
|
||||||
verbose = config.getoption("verbose")
|
verbose = config.getoption("verbose")
|
||||||
|
|
||||||
|
# Strings which normalize equal are often hard to distinguish when printed; use ascii() to make this easier.
|
||||||
|
# See issue #3246.
|
||||||
|
use_ascii = (
|
||||||
|
isinstance(left, str)
|
||||||
|
and isinstance(right, str)
|
||||||
|
and normalize("NFD", left) == normalize("NFD", right)
|
||||||
|
)
|
||||||
|
|
||||||
if verbose > 1:
|
if verbose > 1:
|
||||||
left_repr = saferepr_unlimited(left)
|
left_repr = saferepr_unlimited(left, use_ascii=use_ascii)
|
||||||
right_repr = saferepr_unlimited(right)
|
right_repr = saferepr_unlimited(right, use_ascii=use_ascii)
|
||||||
else:
|
else:
|
||||||
# XXX: "15 chars indentation" is wrong
|
# XXX: "15 chars indentation" is wrong
|
||||||
# ("E AssertionError: assert "); should use term width.
|
# ("E AssertionError: assert "); should use term width.
|
||||||
maxsize = (
|
maxsize = (
|
||||||
80 - 15 - len(op) - 2
|
80 - 15 - len(op) - 2
|
||||||
) // 2 # 15 chars indentation, 1 space around op
|
) // 2 # 15 chars indentation, 1 space around op
|
||||||
left_repr = saferepr(left, maxsize=maxsize)
|
|
||||||
right_repr = saferepr(right, maxsize=maxsize)
|
left_repr = saferepr(left, maxsize=maxsize, use_ascii=use_ascii)
|
||||||
|
right_repr = saferepr(right, maxsize=maxsize, use_ascii=use_ascii)
|
||||||
|
|
||||||
summary = f"{left_repr} {op} {right_repr}"
|
summary = f"{left_repr} {op} {right_repr}"
|
||||||
|
|
||||||
|
|
|
@ -776,6 +776,24 @@ class TestAssert_reprcompare:
|
||||||
msg = "\n".join(expl)
|
msg = "\n".join(expl)
|
||||||
assert msg
|
assert msg
|
||||||
|
|
||||||
|
def test_nfc_nfd_same_string(self) -> None:
|
||||||
|
# issue 3426
|
||||||
|
left = "hyv\xe4"
|
||||||
|
right = "hyva\u0308"
|
||||||
|
expl = callequal(left, right)
|
||||||
|
assert expl == [
|
||||||
|
r"'hyv\xe4' == 'hyva\u0308'",
|
||||||
|
f"- {str(right)}",
|
||||||
|
f"+ {str(left)}",
|
||||||
|
]
|
||||||
|
|
||||||
|
expl = callequal(left, right, verbose=2)
|
||||||
|
assert expl == [
|
||||||
|
r"'hyv\xe4' == 'hyva\u0308'",
|
||||||
|
f"- {str(right)}",
|
||||||
|
f"+ {str(left)}",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class TestAssert_reprcompare_dataclass:
|
class TestAssert_reprcompare_dataclass:
|
||||||
def test_dataclasses(self, pytester: Pytester) -> None:
|
def test_dataclasses(self, pytester: Pytester) -> None:
|
||||||
|
|
Loading…
Reference in New Issue