Refs #30399 -- Made assertHTMLEqual normalize character and entity references.

This commit is contained in:
Jon Dufresne 2019-05-09 06:55:32 -07:00 committed by Carlton Gibson
parent af5ec222cc
commit 48235ba807
4 changed files with 40 additions and 11 deletions

View File

@ -3,11 +3,14 @@
import re import re
from html.parser import HTMLParser from html.parser import HTMLParser
WHITESPACE = re.compile(r'\s+') # ASCII whitespace is U+0009 TAB, U+000A LF, U+000C FF, U+000D CR, or U+0020
# SPACE.
# https://infra.spec.whatwg.org/#ascii-whitespace
ASCII_WHITESPACE = re.compile(r'[\t\n\f\r ]+')
def normalize_whitespace(string): def normalize_whitespace(string):
return WHITESPACE.sub(' ', string) return ASCII_WHITESPACE.sub(' ', string)
class Element: class Element:
@ -144,7 +147,7 @@ class Parser(HTMLParser):
) )
def __init__(self): def __init__(self):
super().__init__(convert_charrefs=False) super().__init__()
self.root = RootElement() self.root = RootElement()
self.open_tags = [] self.open_tags = []
self.element_positions = {} self.element_positions = {}
@ -202,12 +205,6 @@ class Parser(HTMLParser):
def handle_data(self, data): def handle_data(self, data):
self.current.append(data) self.current.append(data)
def handle_charref(self, name):
self.current.append('&%s;' % name)
def handle_entityref(self, name):
self.current.append('&%s;' % name)
def parse_html(html): def parse_html(html):
""" """

View File

@ -246,6 +246,11 @@ Tests
* Tests and test cases to run can be selected by test name pattern using the * Tests and test cases to run can be selected by test name pattern using the
new :option:`test -k` option. new :option:`test -k` option.
* HTML comparison, as used by
:meth:`~django.test.SimpleTestCase.assertHTMLEqual`, now treats text, character
references, and entity references that refer to the same character as
equivalent.
URLs URLs
~~~~ ~~~~

View File

@ -1603,14 +1603,16 @@ your test suite.
* The ordering of attributes of an HTML element is not significant. * The ordering of attributes of an HTML element is not significant.
* Attributes without an argument are equal to attributes that equal in * Attributes without an argument are equal to attributes that equal in
name and value (see the examples). name and value (see the examples).
* Text, character references, and entity references that refer to the same
character are equivalent.
The following examples are valid tests and don't raise any The following examples are valid tests and don't raise any
``AssertionError``:: ``AssertionError``::
self.assertHTMLEqual( self.assertHTMLEqual(
'<p>Hello <b>world!</p>', '<p>Hello <b>&#x27;world&#x27;!</p>',
'''<p> '''<p>
Hello <b>world! </b> Hello <b>&#39;world&#39;! </b>
</p>''' </p>'''
) )
self.assertHTMLEqual( self.assertHTMLEqual(

View File

@ -612,6 +612,31 @@ class HTMLEqualTests(SimpleTestCase):
'<input type="text" id="id_name" />', '<input type="text" id="id_name" />',
'<input type="password" id="id_name" />') '<input type="password" id="id_name" />')
def test_normalize_refs(self):
pairs = [
('&#39;', '&#x27;'),
('&#39;', "'"),
('&#x27;', '&#39;'),
('&#x27;', "'"),
("'", '&#39;'),
("'", '&#x27;'),
('&amp;', '&#38;'),
('&amp;', '&#x26;'),
('&amp;', '&'),
('&#38;', '&amp;'),
('&#38;', '&#x26;'),
('&#38;', '&'),
('&#x26;', '&amp;'),
('&#x26;', '&#38;'),
('&#x26;', '&'),
('&', '&amp;'),
('&', '&#38;'),
('&', '&#x26;'),
]
for pair in pairs:
with self.subTest(repr(pair)):
self.assertHTMLEqual(*pair)
def test_complex_examples(self): def test_complex_examples(self):
self.assertHTMLEqual( self.assertHTMLEqual(
"""<tr><th><label for="id_first_name">First name:</label></th> """<tr><th><label for="id_first_name">First name:</label></th>