diff --git a/django/test/html.py b/django/test/html.py
new file mode 100644
index 0000000000..ff073ba728
--- /dev/null
+++ b/django/test/html.py
@@ -0,0 +1,221 @@
+"""
+Comparing two html documents.
+"""
+import re
+from HTMLParser import HTMLParseError
+from django.utils.encoding import force_unicode
+from django.utils.htmlparser import HTMLParser
+
+
+WHITESPACE = re.compile('\s+')
+
+
+def normalize_whitespace(string):
+ return WHITESPACE.sub(' ', string)
+
+
+class Element(object):
+ def __init__(self, name, attributes):
+ self.name = name
+ self.attributes = sorted(attributes)
+ self.children = []
+
+ def append(self, element):
+ if isinstance(element, basestring):
+ element = force_unicode(element)
+ element = normalize_whitespace(element)
+ if self.children:
+ if isinstance(self.children[-1], basestring):
+ self.children[-1] += element
+ self.children[-1] = normalize_whitespace(self.children[-1])
+ return
+ elif self.children:
+ # removing last children if it is only whitespace
+ # this can result in incorrect dom representations since
+ # whitespace between inline tags like is significant
+ if isinstance(self.children[-1], basestring):
+ if self.children[-1].isspace():
+ self.children.pop()
+ if element:
+ self.children.append(element)
+
+ def finalize(self):
+ def rstrip_last_element(children):
+ if children:
+ if isinstance(children[-1], basestring):
+ children[-1] = children[-1].rstrip()
+ if not children[-1]:
+ children.pop()
+ children = rstrip_last_element(children)
+ return children
+
+ rstrip_last_element(self.children)
+ for i, child in enumerate(self.children):
+ if isinstance(child, basestring):
+ self.children[i] = child.strip()
+ elif hasattr(child, 'finalize'):
+ child.finalize()
+
+ def __eq__(self, element):
+ if not hasattr(element, 'name'):
+ return False
+ if hasattr(element, 'name') and self.name != element.name:
+ return False
+ if len(self.attributes) != len(element.attributes):
+ return False
+ if self.attributes != element.attributes:
+ # attributes without a value is same as attribute with value that
+ # equals the attributes name:
+ # ==
+ for i in range(len(self.attributes)):
+ attr, value = self.attributes[i]
+ other_attr, other_value = element.attributes[i]
+ if value is None:
+ value = attr
+ if other_value is None:
+ other_value = other_attr
+ if attr != other_attr or value != other_value:
+ return False
+ if self.children != element.children:
+ return False
+ return True
+
+ def __ne__(self, element):
+ return not self.__eq__(element)
+
+ def _count(self, element, count=True):
+ if not isinstance(element, basestring):
+ if self == element:
+ return 1
+ i = 0
+ for child in self.children:
+ # child is text content and element is also text content, then
+ # make a simple "text" in "text"
+ if isinstance(child, basestring):
+ if isinstance(element, basestring):
+ if count:
+ i += child.count(element)
+ elif element in child:
+ return 1
+ else:
+ i += child._count(element, count=count)
+ if not count and i:
+ return i
+ return i
+
+ def __contains__(self, element):
+ return self._count(element, count=False) > 0
+
+ def count(self, element):
+ return self._count(element, count=True)
+
+ def __getitem__(self, key):
+ return self.children[key]
+
+ def __unicode__(self):
+ output = u'<%s' % self.name
+ for key, value in self.attributes:
+ if value:
+ output += u' %s="%s"' % (key, value)
+ else:
+ output += u' %s' % key
+ if self.children:
+ output += u'>\n'
+ output += u''.join(unicode(c) for c in self.children)
+ output += u'\n%s>' % self.name
+ else:
+ output += u' />'
+ return output
+
+ def __repr__(self):
+ return unicode(self)
+
+
+class RootElement(Element):
+ def __init__(self):
+ super(RootElement, self).__init__(None, ())
+
+ def __unicode__(self):
+ return u''.join(unicode(c) for c in self.children)
+
+
+class Parser(HTMLParser):
+ SELF_CLOSING_TAGS = ('br' , 'hr', 'input', 'img', 'meta', 'spacer',
+ 'link', 'frame', 'base', 'col')
+
+ def __init__(self):
+ HTMLParser.__init__(self)
+ self.root = RootElement()
+ self.open_tags = []
+ self.element_positions = {}
+
+ def error(self, msg):
+ raise HTMLParseError(msg, self.getpos())
+
+ def format_position(self, position=None, element=None):
+ if not position and element:
+ position = self.element_positions[element]
+ if position is None:
+ position = self.getpos()
+ if hasattr(position, 'lineno'):
+ position = position.lineno, position.offset
+ return 'Line %d, Column %d' % position
+
+ @property
+ def current(self):
+ if self.open_tags:
+ return self.open_tags[-1]
+ else:
+ return self.root
+
+ def handle_startendtag(self, tag, attrs):
+ self.handle_starttag(tag, attrs)
+ if tag not in self.SELF_CLOSING_TAGS:
+ self.handle_endtag(tag)
+
+ def handle_starttag(self, tag, attrs):
+ element = Element(tag, attrs)
+ self.current.append(element)
+ if tag not in self.SELF_CLOSING_TAGS:
+ self.open_tags.append(element)
+ self.element_positions[element] = self.getpos()
+
+ def handle_endtag(self, tag):
+ if not self.open_tags:
+ self.error("Unexpected end tag `%s` (%s)" % (
+ tag, self.format_position()))
+ element = self.open_tags.pop()
+ while element.name != tag:
+ if not self.open_tags:
+ self.error("Unexpected end tag `%s` (%s)" % (
+ tag, self.format_position()))
+ element = self.open_tags.pop()
+
+ def handle_data(self, data):
+ self.current.append(data)
+
+ def handle_charref(self, name):
+ self.current.append('&%s;' % name)
+
+ def handle_entityref(self, name):
+ self.current.append('&%s;' % name)
+
+
+def parse_html(html):
+ """
+ Takes a string that contains *valid* HTML and turns it into a Python object
+ structure that can be easily compared against other HTML on semantic
+ equivilance. Syntactical differences like which quotation is used on
+ arguments will be ignored.
+
+ """
+ parser = Parser()
+ parser.feed(html)
+ parser.close()
+ document = parser.root
+ document.finalize()
+ # Removing ROOT element if it's not necessary
+ if len(document.children) == 1:
+ if not isinstance(document.children[0], basestring):
+ document = document.children[0]
+ return document
diff --git a/django/test/testcases.py b/django/test/testcases.py
index 2e011e4cea..0110d5e24d 100644
--- a/django/test/testcases.py
+++ b/django/test/testcases.py
@@ -1,5 +1,6 @@
from __future__ import with_statement
+import difflib
import os
import re
import sys
@@ -29,12 +30,14 @@ from django.forms.fields import CharField
from django.http import QueryDict
from django.test import _doctest as doctest
from django.test.client import Client
+from django.test.html import HTMLParseError, parse_html
from django.test.signals import template_rendered
from django.test.utils import (get_warnings_state, restore_warnings_state,
override_settings)
from django.test.utils import ContextList
from django.utils import simplejson, unittest as ut2
from django.utils.encoding import smart_str, force_unicode
+from django.utils.unittest.util import safe_repr
from django.views.static import serve
__all__ = ('DocTestRunner', 'OutputChecker', 'TestCase', 'TransactionTestCase',
@@ -78,6 +81,16 @@ def restore_transaction_methods():
transaction.leave_transaction_management = real_leave_transaction_management
transaction.managed = real_managed
+
+def assert_and_parse_html(self, html, user_msg, msg):
+ try:
+ dom = parse_html(html)
+ except HTMLParseError, e:
+ standardMsg = u'%s\n%s' % (msg, e.msg)
+ self.fail(self._formatMessage(user_msg, standardMsg))
+ return dom
+
+
class OutputChecker(doctest.OutputChecker):
def check_output(self, want, got, optionflags):
"""
@@ -396,6 +409,39 @@ class SimpleTestCase(ut2.TestCase):
self.assertTrue(isinstance(fieldclass(*field_args, **field_kwargs),
fieldclass))
+ def assertHTMLEqual(self, html1, html2, msg=None):
+ """
+ Asserts that two html snippets are semantically the same,
+ e.g. whitespace in most cases is ignored, attribute ordering is not
+ significant. The passed in arguments must be valid HTML.
+
+ """
+ dom1 = assert_and_parse_html(self, html1, msg,
+ u'First argument is not valid html:')
+ dom2 = assert_and_parse_html(self, html2, msg,
+ u'Second argument is not valid html:')
+
+ if dom1 != dom2:
+ standardMsg = '%s != %s' % (
+ safe_repr(dom1, True), safe_repr(dom2, True))
+ diff = ('\n' + '\n'.join(difflib.ndiff(
+ unicode(dom1).splitlines(),
+ unicode(dom2).splitlines())))
+ standardMsg = self._truncateMessage(standardMsg, diff)
+ self.fail(self._formatMessage(msg, standardMsg))
+
+ def assertHTMLNotEqual(self, html1, html2, msg=None):
+ """Asserts that two HTML snippets are not semantically equivalent."""
+ dom1 = assert_and_parse_html(self, html1, msg,
+ u'First argument is not valid html:')
+ dom2 = assert_and_parse_html(self, html2, msg,
+ u'Second argument is not valid html:')
+
+ if dom1 == dom2:
+ standardMsg = '%s == %s' % (
+ safe_repr(dom1, True), safe_repr(dom2, True))
+ self.fail(self._formatMessage(msg, standardMsg))
+
class TransactionTestCase(SimpleTestCase):
# The class we'll use for the test client self.client.
@@ -554,7 +600,7 @@ class TransactionTestCase(SimpleTestCase):
(url, expected_url))
def assertContains(self, response, text, count=None, status_code=200,
- msg_prefix=''):
+ msg_prefix='', html=False):
"""
Asserts that a response indicates that some content was retrieved
successfully, (i.e., the HTTP status code was as expected), and that
@@ -576,7 +622,13 @@ class TransactionTestCase(SimpleTestCase):
msg_prefix + "Couldn't retrieve content: Response code was %d"
" (expected %d)" % (response.status_code, status_code))
text = smart_str(text, response._charset)
- real_count = response.content.count(text)
+ content = response.content
+ if html:
+ content = assert_and_parse_html(self, content, None,
+ u"Response's content is not valid html:")
+ text = assert_and_parse_html(self, text, None,
+ u"Second argument is not valid html:")
+ real_count = content.count(text)
if count is not None:
self.assertEqual(real_count, count,
msg_prefix + "Found %d instances of '%s' in response"
@@ -586,7 +638,7 @@ class TransactionTestCase(SimpleTestCase):
msg_prefix + "Couldn't find '%s' in response" % text)
def assertNotContains(self, response, text, status_code=200,
- msg_prefix=''):
+ msg_prefix='', html=False):
"""
Asserts that a response indicates that some content was retrieved
successfully, (i.e., the HTTP status code was as expected), and that
@@ -606,7 +658,13 @@ class TransactionTestCase(SimpleTestCase):
msg_prefix + "Couldn't retrieve content: Response code was %d"
" (expected %d)" % (response.status_code, status_code))
text = smart_str(text, response._charset)
- self.assertEqual(response.content.count(text), 0,
+ content = response.content
+ if html:
+ content = assert_and_parse_html(self, content, None,
+ u'Response\'s content is no valid html:')
+ text = assert_and_parse_html(self, text, None,
+ u'Second argument is no valid html:')
+ self.assertEqual(content.count(text), 0,
msg_prefix + "Response should not contain '%s'" % text)
def assertFormError(self, response, form, field, errors, msg_prefix=''):
diff --git a/django/utils/htmlparser.py b/django/utils/htmlparser.py
new file mode 100644
index 0000000000..ed743f5679
--- /dev/null
+++ b/django/utils/htmlparser.py
@@ -0,0 +1,94 @@
+import HTMLParser as _HTMLParser
+
+
+class HTMLParser(_HTMLParser.HTMLParser):
+ """
+ Patched version of stdlib's HTMLParser with patch from:
+ http://bugs.python.org/issue670664
+ """
+ def __init__(self):
+ _HTMLParser.HTMLParser.__init__(self)
+ self.cdata_tag = None
+
+ def set_cdata_mode(self, tag):
+ self.interesting = _HTMLParser.interesting_cdata
+ self.cdata_tag = tag.lower()
+
+ def clear_cdata_mode(self):
+ self.interesting = _HTMLParser.interesting_normal
+ self.cdata_tag = None
+
+ # Internal -- handle starttag, return end or -1 if not terminated
+ def parse_starttag(self, i):
+ self.__starttag_text = None
+ endpos = self.check_for_whole_start_tag(i)
+ if endpos < 0:
+ return endpos
+ rawdata = self.rawdata
+ self.__starttag_text = rawdata[i:endpos]
+
+ # Now parse the data between i+1 and j into a tag and attrs
+ attrs = []
+ match = _HTMLParser.tagfind.match(rawdata, i + 1)
+ assert match, 'unexpected call to parse_starttag()'
+ k = match.end()
+ self.lasttag = tag = rawdata[i + 1:k].lower()
+
+ while k < endpos:
+ m = _HTMLParser.attrfind.match(rawdata, k)
+ if not m:
+ break
+ attrname, rest, attrvalue = m.group(1, 2, 3)
+ if not rest:
+ attrvalue = None
+ elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
+ attrvalue[:1] == '"' == attrvalue[-1:]:
+ attrvalue = attrvalue[1:-1]
+ attrvalue = self.unescape(attrvalue)
+ attrs.append((attrname.lower(), attrvalue))
+ k = m.end()
+
+ end = rawdata[k:endpos].strip()
+ if end not in (">", "/>"):
+ lineno, offset = self.getpos()
+ if "\n" in self.__starttag_text:
+ lineno = lineno + self.__starttag_text.count("\n")
+ offset = len(self.__starttag_text) \
+ - self.__starttag_text.rfind("\n")
+ else:
+ offset = offset + len(self.__starttag_text)
+ self.error("junk characters in start tag: %r"
+ % (rawdata[k:endpos][:20],))
+ if end.endswith('/>'):
+ # XHTML-style empty tag:
+ self.handle_startendtag(tag, attrs)
+ else:
+ self.handle_starttag(tag, attrs)
+ if tag in self.CDATA_CONTENT_ELEMENTS:
+ self.set_cdata_mode(tag) # <--------------------------- Changed
+ return endpos
+
+ # Internal -- parse endtag, return end or -1 if incomplete
+ def parse_endtag(self, i):
+ rawdata = self.rawdata
+ assert rawdata[i:i + 2] == "", "unexpected call to parse_endtag"
+ match = _HTMLParser.endendtag.search(rawdata, i + 1) # >
+ if not match:
+ return -1
+ j = match.end()
+ match = _HTMLParser.endtagfind.match(rawdata, i) # + tag + >
+ if not match:
+ if self.cdata_tag is not None: # *** add ***
+ self.handle_data(rawdata[i:j]) # *** add ***
+ return j # *** add ***
+ self.error("bad end tag: %r" % (rawdata[i:j],))
+ # --- changed start ---------------------------------------------------
+ tag = match.group(1).strip()
+ if self.cdata_tag is not None:
+ if tag.lower() != self.cdata_tag:
+ self.handle_data(rawdata[i:j])
+ return j
+ # --- changed end -----------------------------------------------------
+ self.handle_endtag(tag.lower())
+ self.clear_cdata_mode()
+ return j
diff --git a/docs/releases/1.4.txt b/docs/releases/1.4.txt
index f2c97f603a..cb0f01ed7d 100644
--- a/docs/releases/1.4.txt
+++ b/docs/releases/1.4.txt
@@ -475,6 +475,21 @@ Time zone support is enabled by default in new projects created with
:djadmin:`startproject`. If you want to use this feature in an existing
project, read the :ref:`migration guide `.
+HTML comparisons in tests
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The :class:`~django.test.testcase.TestCase` base class now has some helpers to
+compare HTML without tripping over irrelevant differences in whitespace,
+argument quoting and ordering, and closing of self-closing tags. HTML can
+either be compared directly with the new
+:meth:`~django.test.testcase.TestCase.assertHTMLEqual` and
+:meth:`~django.test.testcase.TestCase.assertHTMLNotEqual` assertions, or use
+the ``html=True`` flag with
+:meth:`~django.test.testcase.TestCase.assertContains` and
+:meth:`~django.test.testcase.TestCase.assertNotContains` to test if the test
+client's response contains a given HTML fragment. See the :ref:`assertion
+documentation` for more information.
+
Minor features
~~~~~~~~~~~~~~
diff --git a/docs/topics/testing.txt b/docs/topics/testing.txt
index f0f0b445f9..ebc9f1ab28 100644
--- a/docs/topics/testing.txt
+++ b/docs/topics/testing.txt
@@ -1542,17 +1542,33 @@ your test suite.
self.assertFieldOutput(EmailField, {'a@a.com': 'a@a.com'}, {'aaa': [u'Enter a valid e-mail address.']})
-.. method:: TestCase.assertContains(response, text, count=None, status_code=200, msg_prefix='')
+.. method:: TestCase.assertContains(response, text, count=None, status_code=200, msg_prefix='', html=False)
Asserts that a ``Response`` instance produced the given ``status_code`` and
that ``text`` appears in the content of the response. If ``count`` is
provided, ``text`` must occur exactly ``count`` times in the response.
-.. method:: TestCase.assertNotContains(response, text, status_code=200, msg_prefix='')
+ .. versionadded:: 1.4
+
+ Set ``html`` to ``True`` to handle ``text`` as HTML. The comparison with
+ the response content will be based on HTML semantics instead of
+ character-by-character equality. Whitespace is ignored in most cases,
+ attribute ordering is not significant. See
+ :func:`~TestCase.assertHTMLEqual` for more details.
+
+.. method:: TestCase.assertNotContains(response, text, status_code=200, msg_prefix='', html=False)
Asserts that a ``Response`` instance produced the given ``status_code`` and
that ``text`` does not appears in the content of the response.
+ .. versionadded:: 1.4
+
+ Set ``html`` to ``True`` to handle ``text`` as HTML. The comparison with
+ the response content will be based on HTML semantics instead of
+ character-by-character equality. Whitespace is ignored in most cases,
+ attribute ordering is not significant. See
+ :func:`~TestCase.assertHTMLEqual` for more details.
+
.. method:: TestCase.assertFormError(response, form, field, errors, msg_prefix='')
Asserts that a field on a form raises the provided list of errors when
@@ -1656,6 +1672,48 @@ your test suite.
Person.objects.create(name="Aaron")
Person.objects.create(name="Daniel")
+.. method:: TestCase.assertHTMLEqual(html1, html2, msg=None)
+
+ .. versionadded:: 1.4
+
+ Asserts that the strings ``html1`` and ``html2`` are equal. The comparison
+ is based on HTML semantics. The comparison takes following things into
+ account:
+
+ * Whitespace before and after HTML tags is ignored
+ * All types of whitespace are considered equivalent
+ * All open tags are closed implicitly, i.e. when a surrounding tag is
+ closed or the HTML document ends
+ * Empty tags are equivalent to their self-closing version
+ * The ordering of attributes of an HTML element is not significant
+ * Attributes without an argument are equal to attributes that equal in
+ name and value (see the examples)
+
+ The following examples are valid tests and don't raise any
+ ``AssertionError``::
+
+ self.assertHTMLEqual('
Hello world!
',
+ '''
+ Hello world!
+
''')
+ self.assertHTMLEqual(
+ '',
+ '')
+
+ ``html1`` and ``html2`` must be valid HTML. An ``AssertionError`` will be
+ raised if one of them cannot be parsed.
+
+.. method:: TestCase.assertHTMLNotEqual(html1, html2, msg=None)
+
+ .. versionadded:: 1.4
+
+ Asserts that the strings ``html1`` and ``html2`` are *not* equal. The
+ comparison is based on HTML semantics. See
+ :func:`~TestCase.assertHTMLEqual` for details.
+
+ ``html1`` and ``html2`` must be valid HTML. An ``AssertionError`` will be
+ raised if one of them cannot be parsed.
+
.. _topics-testing-email:
diff --git a/tests/modeltests/generic_relations/tests.py b/tests/modeltests/generic_relations/tests.py
index adfba82cd6..0ac552cf77 100644
--- a/tests/modeltests/generic_relations/tests.py
+++ b/tests/modeltests/generic_relations/tests.py
@@ -200,11 +200,11 @@ class GenericRelationsTests(TestCase):
def test_generic_inline_formsets(self):
GenericFormSet = generic_inlineformset_factory(TaggedItem, extra=1)
formset = GenericFormSet()
- self.assertEqual(u''.join(form.as_p() for form in formset.forms), u"""
+ self.assertHTMLEqual(u''.join(form.as_p() for form in formset.forms), u"""
""")
formset = GenericFormSet(instance=Animal())
- self.assertEqual(u''.join(form.as_p() for form in formset.forms), u"""
+ self.assertHTMLEqual(u''.join(form.as_p() for form in formset.forms), u"""
""")
platypus = Animal.objects.create(
@@ -216,13 +216,13 @@ class GenericRelationsTests(TestCase):
tagged_item_id = TaggedItem.objects.get(
tag='shiny', object_id=platypus.id
).id
- self.assertEqual(u''.join(form.as_p() for form in formset.forms), u"""
+ self.assertHTMLEqual(u''.join(form.as_p() for form in formset.forms), u"""
""" % tagged_item_id)
lion = Animal.objects.create(common_name="Lion", latin_name="Panthera leo")
formset = GenericFormSet(instance=lion, prefix='x')
- self.assertEqual(u''.join(form.as_p() for form in formset.forms), u"""
+ self.assertHTMLEqual(u''.join(form.as_p() for form in formset.forms), u"""
@@ -653,7 +653,7 @@ class OldFormForXTests(TestCase):
# ManyToManyFields are represented by a MultipleChoiceField, ForeignKeys and any
# fields with the 'choices' attribute are represented by a ChoiceField.
f = ArticleForm(auto_id=False)
- self.assertEqual(unicode(f), '''