From 48235ba807483fe349d2dc66aaeddc0d03f8b0d4 Mon Sep 17 00:00:00 2001 From: Jon Dufresne Date: Thu, 9 May 2019 06:55:32 -0700 Subject: [PATCH] Refs #30399 -- Made assertHTMLEqual normalize character and entity references. --- django/test/html.py | 15 ++++++--------- docs/releases/3.0.txt | 5 +++++ docs/topics/testing/tools.txt | 6 ++++-- tests/test_utils/tests.py | 25 +++++++++++++++++++++++++ 4 files changed, 40 insertions(+), 11 deletions(-) diff --git a/django/test/html.py b/django/test/html.py index 8b064529b0..911872bb69 100644 --- a/django/test/html.py +++ b/django/test/html.py @@ -3,11 +3,14 @@ import re from html.parser import HTMLParser -WHITESPACE = re.compile(r'\s+') +# ASCII whitespace is U+0009 TAB, U+000A LF, U+000C FF, U+000D CR, or U+0020 +# SPACE. +# https://infra.spec.whatwg.org/#ascii-whitespace +ASCII_WHITESPACE = re.compile(r'[\t\n\f\r ]+') def normalize_whitespace(string): - return WHITESPACE.sub(' ', string) + return ASCII_WHITESPACE.sub(' ', string) class Element: @@ -144,7 +147,7 @@ class Parser(HTMLParser): ) def __init__(self): - super().__init__(convert_charrefs=False) + super().__init__() self.root = RootElement() self.open_tags = [] self.element_positions = {} @@ -202,12 +205,6 @@ class Parser(HTMLParser): def handle_data(self, data): self.current.append(data) - def handle_charref(self, name): - self.current.append('&%s;' % name) - - def handle_entityref(self, name): - self.current.append('&%s;' % name) - def parse_html(html): """ diff --git a/docs/releases/3.0.txt b/docs/releases/3.0.txt index 2b9c5c5ea0..335ab2c0d5 100644 --- a/docs/releases/3.0.txt +++ b/docs/releases/3.0.txt @@ -246,6 +246,11 @@ Tests * Tests and test cases to run can be selected by test name pattern using the new :option:`test -k` option. +* HTML comparison, as used by + :meth:`~django.test.SimpleTestCase.assertHTMLEqual`, now treats text, character + references, and entity references that refer to the same character as + equivalent. + URLs ~~~~ diff --git a/docs/topics/testing/tools.txt b/docs/topics/testing/tools.txt index d9f508023c..6d37a7421d 100644 --- a/docs/topics/testing/tools.txt +++ b/docs/topics/testing/tools.txt @@ -1603,14 +1603,16 @@ your test suite. * The ordering of attributes of an HTML element is not significant. * Attributes without an argument are equal to attributes that equal in name and value (see the examples). + * Text, character references, and entity references that refer to the same + character are equivalent. The following examples are valid tests and don't raise any ``AssertionError``:: self.assertHTMLEqual( - '

Hello world!

', + '

Hello 'world'!

', '''

- Hello world! + Hello 'world'!

''' ) self.assertHTMLEqual( diff --git a/tests/test_utils/tests.py b/tests/test_utils/tests.py index 69a99d47d2..5b84bbd383 100644 --- a/tests/test_utils/tests.py +++ b/tests/test_utils/tests.py @@ -612,6 +612,31 @@ class HTMLEqualTests(SimpleTestCase): '', '') + def test_normalize_refs(self): + pairs = [ + (''', '''), + (''', "'"), + (''', '''), + (''', "'"), + ("'", '''), + ("'", '''), + ('&', '&'), + ('&', '&'), + ('&', '&'), + ('&', '&'), + ('&', '&'), + ('&', '&'), + ('&', '&'), + ('&', '&'), + ('&', '&'), + ('&', '&'), + ('&', '&'), + ('&', '&'), + ] + for pair in pairs: + with self.subTest(repr(pair)): + self.assertHTMLEqual(*pair) + def test_complex_examples(self): self.assertHTMLEqual( """