Refs #30399 -- Made assertHTMLEqual normalize character and entity references.

This commit is contained in:
Jon Dufresne 2019-05-09 06:55:32 -07:00 committed by Carlton Gibson
parent af5ec222cc
commit 48235ba807
4 changed files with 40 additions and 11 deletions

View File

@ -3,11 +3,14 @@
import re
from html.parser import HTMLParser
WHITESPACE = re.compile(r'\s+')
# ASCII whitespace is U+0009 TAB, U+000A LF, U+000C FF, U+000D CR, or U+0020
# SPACE.
# https://infra.spec.whatwg.org/#ascii-whitespace
ASCII_WHITESPACE = re.compile(r'[\t\n\f\r ]+')
def normalize_whitespace(string):
return WHITESPACE.sub(' ', string)
return ASCII_WHITESPACE.sub(' ', string)
class Element:
@ -144,7 +147,7 @@ class Parser(HTMLParser):
)
def __init__(self):
super().__init__(convert_charrefs=False)
super().__init__()
self.root = RootElement()
self.open_tags = []
self.element_positions = {}
@ -202,12 +205,6 @@ class Parser(HTMLParser):
def handle_data(self, data):
self.current.append(data)
def handle_charref(self, name):
self.current.append('&%s;' % name)
def handle_entityref(self, name):
self.current.append('&%s;' % name)
def parse_html(html):
"""

View File

@ -246,6 +246,11 @@ Tests
* Tests and test cases to run can be selected by test name pattern using the
new :option:`test -k` option.
* HTML comparison, as used by
:meth:`~django.test.SimpleTestCase.assertHTMLEqual`, now treats text, character
references, and entity references that refer to the same character as
equivalent.
URLs
~~~~

View File

@ -1603,14 +1603,16 @@ your test suite.
* The ordering of attributes of an HTML element is not significant.
* Attributes without an argument are equal to attributes that equal in
name and value (see the examples).
* Text, character references, and entity references that refer to the same
character are equivalent.
The following examples are valid tests and don't raise any
``AssertionError``::
self.assertHTMLEqual(
'<p>Hello <b>world!</p>',
'<p>Hello <b>&#x27;world&#x27;!</p>',
'''<p>
Hello <b>world! </b>
Hello <b>&#39;world&#39;! </b>
</p>'''
)
self.assertHTMLEqual(

View File

@ -612,6 +612,31 @@ class HTMLEqualTests(SimpleTestCase):
'<input type="text" id="id_name" />',
'<input type="password" id="id_name" />')
def test_normalize_refs(self):
pairs = [
('&#39;', '&#x27;'),
('&#39;', "'"),
('&#x27;', '&#39;'),
('&#x27;', "'"),
("'", '&#39;'),
("'", '&#x27;'),
('&amp;', '&#38;'),
('&amp;', '&#x26;'),
('&amp;', '&'),
('&#38;', '&amp;'),
('&#38;', '&#x26;'),
('&#38;', '&'),
('&#x26;', '&amp;'),
('&#x26;', '&#38;'),
('&#x26;', '&'),
('&', '&amp;'),
('&', '&#38;'),
('&', '&#x26;'),
]
for pair in pairs:
with self.subTest(repr(pair)):
self.assertHTMLEqual(*pair)
def test_complex_examples(self):
self.assertHTMLEqual(
"""<tr><th><label for="id_first_name">First name:</label></th>