django1/django/test/html.py

"""
Comparing two html documents.
"""

from __future__ import unicode_literals

import re
from django.utils.encoding import force_text
from django.utils.html_parser import HTMLParser, HTMLParseError
from django.utils import six
from django.utils.encoding import python_2_unicode_compatible


WHITESPACE = re.compile('\s+')


def normalize_whitespace(string):
    return WHITESPACE.sub(' ', string)


@python_2_unicode_compatible
class Element(object):
    def __init__(self, name, attributes):
        self.name = name
        self.attributes = sorted(attributes)
        self.children = []

    def append(self, element):
        if isinstance(element, six.string_types):
            element = force_text(element)
            element = normalize_whitespace(element)
            if self.children:
                if isinstance(self.children[-1], six.string_types):
                    self.children[-1] += element
                    self.children[-1] = normalize_whitespace(self.children[-1])
                    return
        elif self.children:
            # removing last children if it is only whitespace
            # this can result in incorrect dom representations since
            # whitespace between inline tags like <span> is significant
            if isinstance(self.children[-1], six.string_types):
                if self.children[-1].isspace():
                    self.children.pop()
        if element:
            self.children.append(element)

    def finalize(self):
        def rstrip_last_element(children):
            if children:
                if isinstance(children[-1], six.string_types):
                    children[-1] = children[-1].rstrip()
                    if not children[-1]:
                        children.pop()
                        children = rstrip_last_element(children)
            return children

        rstrip_last_element(self.children)
        for i, child in enumerate(self.children):
            if isinstance(child, six.string_types):
                self.children[i] = child.strip()
            elif hasattr(child, 'finalize'):
                child.finalize()

    def __eq__(self, element):
        if not hasattr(element, 'name'):
            return False
        if hasattr(element, 'name') and self.name != element.name:
            return False
        if len(self.attributes) != len(element.attributes):
            return False
        if self.attributes != element.attributes:
            # attributes without a value is same as attribute with value that
            # equals the attributes name:
            # <input checked> == <input checked="checked">
            for i in range(len(self.attributes)):
                attr, value = self.attributes[i]
                other_attr, other_value = element.attributes[i]
                if value is None:
                    value = attr
                if other_value is None:
                    other_value = other_attr
                if attr != other_attr or value != other_value:
                    return False
        if self.children != element.children:
            return False
        return True

    def __hash__(self):
        return hash((self.name,) + tuple(a for a in self.attributes))

    def __ne__(self, element):
        return not self.__eq__(element)

    def _count(self, element, count=True):
        if not isinstance(element, six.string_types):
            if self == element:
                return 1
        i = 0
        for child in self.children:
            # child is text content and element is also text content, then
            # make a simple "text" in "text"
            if isinstance(child, six.string_types):
                if isinstance(element, six.string_types):
                    if count:
                        i += child.count(element)
                    elif element in child:
                        return 1
            else:
                i += child._count(element, count=count)
                if not count and i:
                    return i
        return i

    def __contains__(self, element):
        return self._count(element, count=False) > 0

    def count(self, element):
        return self._count(element, count=True)

    def __getitem__(self, key):
        return self.children[key]

    def __str__(self):
        output = '<%s' % self.name
        for key, value in self.attributes:
            if value:
                output += ' %s="%s"' % (key, value)
            else:
                output += ' %s' % key
        if self.children:
            output += '>\n'
            output += ''.join(six.text_type(c) for c in self.children)
            output += '\n</%s>' % self.name
        else:
            output += ' />'
        return output

    def __repr__(self):
        return six.text_type(self)


@python_2_unicode_compatible
class RootElement(Element):
    def __init__(self):
        super(RootElement, self).__init__(None, ())

    def __str__(self):
        return ''.join(six.text_type(c) for c in self.children)


class Parser(HTMLParser):
    SELF_CLOSING_TAGS = ('br' , 'hr', 'input', 'img', 'meta', 'spacer',
        'link', 'frame', 'base', 'col')

    def __init__(self):
        HTMLParser.__init__(self)
        self.root = RootElement()
        self.open_tags = []
        self.element_positions = {}

    def error(self, msg):
        raise HTMLParseError(msg, self.getpos())

    def format_position(self, position=None, element=None):
        if not position and element:
            position = self.element_positions[element]
        if position is None:
            position = self.getpos()
        if hasattr(position, 'lineno'):
            position = position.lineno, position.offset
        return 'Line %d, Column %d' % position

    @property
    def current(self):
        if self.open_tags:
            return self.open_tags[-1]
        else:
            return self.root

    def handle_startendtag(self, tag, attrs):
        self.handle_starttag(tag, attrs)
        if tag not in self.SELF_CLOSING_TAGS:
            self.handle_endtag(tag)

    def handle_starttag(self, tag, attrs):
        # Special case handling of 'class' attribute, so that comparisons of DOM
        # instances are not sensitive to ordering of classes.
        attrs = [
            (name, " ".join(sorted(value.split(" "))))
            if name == "class"
            else (name, value)
            for name, value in attrs
            ]
        element = Element(tag, attrs)
        self.current.append(element)
        if tag not in self.SELF_CLOSING_TAGS:
            self.open_tags.append(element)
        self.element_positions[element] = self.getpos()

    def handle_endtag(self, tag):
        if not self.open_tags:
            self.error("Unexpected end tag `%s` (%s)" % (
                tag, self.format_position()))
        element = self.open_tags.pop()
        while element.name != tag:
            if not self.open_tags:
                self.error("Unexpected end tag `%s` (%s)" % (
                    tag, self.format_position()))
            element = self.open_tags.pop()

    def handle_data(self, data):
        self.current.append(data)

    def handle_charref(self, name):
        self.current.append('&%s;' % name)

    def handle_entityref(self, name):
        self.current.append('&%s;' % name)


def parse_html(html):
    """
    Takes a string that contains *valid* HTML and turns it into a Python object
    structure that can be easily compared against other HTML on semantic
    equivilance. Syntactical differences like which quotation is used on
    arguments will be ignored.

    """
    parser = Parser()
    parser.feed(html)
    parser.close()
    document = parser.root
    document.finalize()
    # Removing ROOT element if it's not necessary
    if len(document.children) == 1:
        if not isinstance(document.children[0], six.string_types):
            document = document.children[0]
    return document
Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and converted Django tests to use them where appropriate. Thanks Greg Müllegger. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17414 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2012-02-01 04:36:11 +08:00			`"""`
			`Comparing two html documents.`
			`"""`
Fixed #18269 -- Applied unicode_literals for Python 3 compatibility. Thanks Vinay Sajip for the support of his django3 branch and Jannis Leidel for the review. 2012-06-08 00:08:47 +08:00
			`from __future__ import unicode_literals`

Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and converted Django tests to use them where appropriate. Thanks Greg Müllegger. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17414 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2012-02-01 04:36:11 +08:00			`import re`
[py3] Ported django.utils.encoding. * Renamed smart_unicode to smart_text (but kept the old name under Python 2 for backwards compatibility). * Renamed smart_str to smart_bytes. * Re-introduced smart_str as an alias for smart_text under Python 3 and smart_bytes under Python 2 (which is backwards compatible). Thus smart_str always returns a str objects. * Used the new smart_str in a few places where both Python 2 and 3 want a str. 2012-07-21 16:00:10 +08:00			`from django.utils.encoding import force_text`
[py3] Switched to Python 3-compatible imports. xrange/range will be dealt with in a separate commit due to the huge number of changes. 2012-07-20 22:16:57 +08:00			`from django.utils.html_parser import HTMLParser, HTMLParseError`
[py3] Replaced basestring by six.string_types. 2012-07-20 20:22:00 +08:00			`from django.utils import six`
[py3] Refactored __unicode__ to __str__. * Renamed the __unicode__ methods * Applied the python_2_unicode_compatible decorator * Removed the StrAndUnicode mix-in that is superseded by python_2_unicode_compatible * Kept the __unicode__ methods in classes that specifically test it under Python 2 2012-08-12 18:32:08 +08:00			`from django.utils.encoding import python_2_unicode_compatible`
Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and converted Django tests to use them where appropriate. Thanks Greg Müllegger. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17414 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2012-02-01 04:36:11 +08:00

			`WHITESPACE = re.compile('\s+')`


			`def normalize_whitespace(string):`
			`return WHITESPACE.sub(' ', string)`


[py3] Refactored __unicode__ to __str__. * Renamed the __unicode__ methods * Applied the python_2_unicode_compatible decorator * Removed the StrAndUnicode mix-in that is superseded by python_2_unicode_compatible * Kept the __unicode__ methods in classes that specifically test it under Python 2 2012-08-12 18:32:08 +08:00			`@python_2_unicode_compatible`
Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and converted Django tests to use them where appropriate. Thanks Greg Müllegger. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17414 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2012-02-01 04:36:11 +08:00			`class Element(object):`
			`def __init__(self, name, attributes):`
			`self.name = name`
			`self.attributes = sorted(attributes)`
			`self.children = []`

			`def append(self, element):`
[py3] Replaced basestring by six.string_types. 2012-07-20 20:22:00 +08:00			`if isinstance(element, six.string_types):`
[py3] Ported django.utils.encoding. * Renamed smart_unicode to smart_text (but kept the old name under Python 2 for backwards compatibility). * Renamed smart_str to smart_bytes. * Re-introduced smart_str as an alias for smart_text under Python 3 and smart_bytes under Python 2 (which is backwards compatible). Thus smart_str always returns a str objects. * Used the new smart_str in a few places where both Python 2 and 3 want a str. 2012-07-21 16:00:10 +08:00			`element = force_text(element)`
Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and converted Django tests to use them where appropriate. Thanks Greg Müllegger. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17414 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2012-02-01 04:36:11 +08:00			`element = normalize_whitespace(element)`
			`if self.children:`
[py3] Replaced basestring by six.string_types. 2012-07-20 20:22:00 +08:00			`if isinstance(self.children[-1], six.string_types):`
Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and converted Django tests to use them where appropriate. Thanks Greg Müllegger. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17414 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2012-02-01 04:36:11 +08:00			`self.children[-1] += element`
			`self.children[-1] = normalize_whitespace(self.children[-1])`
			`return`
			`elif self.children:`
			`# removing last children if it is only whitespace`
			`# this can result in incorrect dom representations since`
			`# whitespace between inline tags like <span> is significant`
[py3] Replaced basestring by six.string_types. 2012-07-20 20:22:00 +08:00			`if isinstance(self.children[-1], six.string_types):`
Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and converted Django tests to use them where appropriate. Thanks Greg Müllegger. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17414 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2012-02-01 04:36:11 +08:00			`if self.children[-1].isspace():`
			`self.children.pop()`
			`if element:`
			`self.children.append(element)`

			`def finalize(self):`
			`def rstrip_last_element(children):`
			`if children:`
[py3] Replaced basestring by six.string_types. 2012-07-20 20:22:00 +08:00			`if isinstance(children[-1], six.string_types):`
Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and converted Django tests to use them where appropriate. Thanks Greg Müllegger. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17414 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2012-02-01 04:36:11 +08:00			`children[-1] = children[-1].rstrip()`
			`if not children[-1]:`
			`children.pop()`
			`children = rstrip_last_element(children)`
			`return children`

			`rstrip_last_element(self.children)`
			`for i, child in enumerate(self.children):`
[py3] Replaced basestring by six.string_types. 2012-07-20 20:22:00 +08:00			`if isinstance(child, six.string_types):`
Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and converted Django tests to use them where appropriate. Thanks Greg Müllegger. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17414 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2012-02-01 04:36:11 +08:00			`self.children[i] = child.strip()`
			`elif hasattr(child, 'finalize'):`
			`child.finalize()`

			`def __eq__(self, element):`
			`if not hasattr(element, 'name'):`
			`return False`
			`if hasattr(element, 'name') and self.name != element.name:`
			`return False`
			`if len(self.attributes) != len(element.attributes):`
			`return False`
			`if self.attributes != element.attributes:`
			`# attributes without a value is same as attribute with value that`
			`# equals the attributes name:`
			`# <input checked> == <input checked="checked">`
			`for i in range(len(self.attributes)):`
			`attr, value = self.attributes[i]`
			`other_attr, other_value = element.attributes[i]`
			`if value is None:`
			`value = attr`
			`if other_value is None:`
			`other_value = other_attr`
			`if attr != other_attr or value != other_value:`
			`return False`
			`if self.children != element.children:`
			`return False`
			`return True`

Fixed #19634 -- Added proper __hash__ methods. Classes overriding __eq__ need a __hash__ such that equal objects have the same hash. Thanks akaariai for the report and regebro for the patch. 2013-02-26 05:53:08 +08:00			`def __hash__(self):`
			`return hash((self.name,) + tuple(a for a in self.attributes))`
[py3] Made Element instances hashable 2012-08-09 05:13:33 +08:00
Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and converted Django tests to use them where appropriate. Thanks Greg Müllegger. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17414 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2012-02-01 04:36:11 +08:00			`def __ne__(self, element):`
			`return not self.__eq__(element)`

			`def _count(self, element, count=True):`
[py3] Replaced basestring by six.string_types. 2012-07-20 20:22:00 +08:00			`if not isinstance(element, six.string_types):`
Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and converted Django tests to use them where appropriate. Thanks Greg Müllegger. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17414 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2012-02-01 04:36:11 +08:00			`if self == element:`
			`return 1`
			`i = 0`
			`for child in self.children:`
			`# child is text content and element is also text content, then`
			`# make a simple "text" in "text"`
[py3] Replaced basestring by six.string_types. 2012-07-20 20:22:00 +08:00			`if isinstance(child, six.string_types):`
			`if isinstance(element, six.string_types):`
Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and converted Django tests to use them where appropriate. Thanks Greg Müllegger. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17414 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2012-02-01 04:36:11 +08:00			`if count:`
			`i += child.count(element)`
			`elif element in child:`
			`return 1`
			`else:`
			`i += child._count(element, count=count)`
			`if not count and i:`
			`return i`
			`return i`

			`def __contains__(self, element):`
			`return self._count(element, count=False) > 0`

			`def count(self, element):`
			`return self._count(element, count=True)`

			`def __getitem__(self, key):`
			`return self.children[key]`

[py3] Refactored __unicode__ to __str__. * Renamed the __unicode__ methods * Applied the python_2_unicode_compatible decorator * Removed the StrAndUnicode mix-in that is superseded by python_2_unicode_compatible * Kept the __unicode__ methods in classes that specifically test it under Python 2 2012-08-12 18:32:08 +08:00			`def __str__(self):`
Fixed #18269 -- Applied unicode_literals for Python 3 compatibility. Thanks Vinay Sajip for the support of his django3 branch and Jannis Leidel for the review. 2012-06-08 00:08:47 +08:00			`output = '<%s' % self.name`
Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and converted Django tests to use them where appropriate. Thanks Greg Müllegger. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17414 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2012-02-01 04:36:11 +08:00			`for key, value in self.attributes:`
			`if value:`
Fixed #18269 -- Applied unicode_literals for Python 3 compatibility. Thanks Vinay Sajip for the support of his django3 branch and Jannis Leidel for the review. 2012-06-08 00:08:47 +08:00			`output += ' %s="%s"' % (key, value)`
Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and converted Django tests to use them where appropriate. Thanks Greg Müllegger. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17414 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2012-02-01 04:36:11 +08:00			`else:`
Fixed #18269 -- Applied unicode_literals for Python 3 compatibility. Thanks Vinay Sajip for the support of his django3 branch and Jannis Leidel for the review. 2012-06-08 00:08:47 +08:00			`output += ' %s' % key`
Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and converted Django tests to use them where appropriate. Thanks Greg Müllegger. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17414 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2012-02-01 04:36:11 +08:00			`if self.children:`
Fixed #18269 -- Applied unicode_literals for Python 3 compatibility. Thanks Vinay Sajip for the support of his django3 branch and Jannis Leidel for the review. 2012-06-08 00:08:47 +08:00			`output += '>\n'`
[py3] Replaced unicode/str by six.text_type/bytes. 2012-07-20 20:48:51 +08:00			`output += ''.join(six.text_type(c) for c in self.children)`
Fixed #18269 -- Applied unicode_literals for Python 3 compatibility. Thanks Vinay Sajip for the support of his django3 branch and Jannis Leidel for the review. 2012-06-08 00:08:47 +08:00			`output += '\n</%s>' % self.name`
Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and converted Django tests to use them where appropriate. Thanks Greg Müllegger. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17414 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2012-02-01 04:36:11 +08:00			`else:`
Fixed #18269 -- Applied unicode_literals for Python 3 compatibility. Thanks Vinay Sajip for the support of his django3 branch and Jannis Leidel for the review. 2012-06-08 00:08:47 +08:00			`output += ' />'`
Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and converted Django tests to use them where appropriate. Thanks Greg Müllegger. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17414 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2012-02-01 04:36:11 +08:00			`return output`

			`def __repr__(self):`
[py3] Replaced unicode/str by six.text_type/bytes. 2012-07-20 20:48:51 +08:00			`return six.text_type(self)`
Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and converted Django tests to use them where appropriate. Thanks Greg Müllegger. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17414 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2012-02-01 04:36:11 +08:00

[py3] Refactored __unicode__ to __str__. * Renamed the __unicode__ methods * Applied the python_2_unicode_compatible decorator * Removed the StrAndUnicode mix-in that is superseded by python_2_unicode_compatible * Kept the __unicode__ methods in classes that specifically test it under Python 2 2012-08-12 18:32:08 +08:00			`@python_2_unicode_compatible`
Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and converted Django tests to use them where appropriate. Thanks Greg Müllegger. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17414 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2012-02-01 04:36:11 +08:00			`class RootElement(Element):`
			`def __init__(self):`
			`super(RootElement, self).__init__(None, ())`

[py3] Refactored __unicode__ to __str__. * Renamed the __unicode__ methods * Applied the python_2_unicode_compatible decorator * Removed the StrAndUnicode mix-in that is superseded by python_2_unicode_compatible * Kept the __unicode__ methods in classes that specifically test it under Python 2 2012-08-12 18:32:08 +08:00			`def __str__(self):`
[py3] Replaced unicode/str by six.text_type/bytes. 2012-07-20 20:48:51 +08:00			`return ''.join(six.text_type(c) for c in self.children)`
Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and converted Django tests to use them where appropriate. Thanks Greg Müllegger. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17414 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2012-02-01 04:36:11 +08:00

			`class Parser(HTMLParser):`
			`SELF_CLOSING_TAGS = ('br' , 'hr', 'input', 'img', 'meta', 'spacer',`
			`'link', 'frame', 'base', 'col')`

			`def __init__(self):`
			`HTMLParser.__init__(self)`
			`self.root = RootElement()`
			`self.open_tags = []`
			`self.element_positions = {}`

			`def error(self, msg):`
			`raise HTMLParseError(msg, self.getpos())`

			`def format_position(self, position=None, element=None):`
			`if not position and element:`
			`position = self.element_positions[element]`
			`if position is None:`
			`position = self.getpos()`
			`if hasattr(position, 'lineno'):`
			`position = position.lineno, position.offset`
			`return 'Line %d, Column %d' % position`

			`@property`
			`def current(self):`
			`if self.open_tags:`
			`return self.open_tags[-1]`
			`else:`
			`return self.root`

			`def handle_startendtag(self, tag, attrs):`
			`self.handle_starttag(tag, attrs)`
			`if tag not in self.SELF_CLOSING_TAGS:`
			`self.handle_endtag(tag)`

			`def handle_starttag(self, tag, attrs):`
Fixed HTML comparisons of class="foo bar" and class="bar foo" in tests Refs #17758 2012-12-24 10:11:32 +08:00			`# Special case handling of 'class' attribute, so that comparisons of DOM`
			`# instances are not sensitive to ordering of classes.`
			`attrs = [`
			`(name, " ".join(sorted(value.split(" "))))`
			`if name == "class"`
			`else (name, value)`
			`for name, value in attrs`
			`]`
Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and converted Django tests to use them where appropriate. Thanks Greg Müllegger. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17414 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2012-02-01 04:36:11 +08:00			`element = Element(tag, attrs)`
			`self.current.append(element)`
			`if tag not in self.SELF_CLOSING_TAGS:`
			`self.open_tags.append(element)`
			`self.element_positions[element] = self.getpos()`

			`def handle_endtag(self, tag):`
			`if not self.open_tags:`
			self.error("Unexpected end tag `%s` (%s)" % (
			`tag, self.format_position()))`
			`element = self.open_tags.pop()`
			`while element.name != tag:`
			`if not self.open_tags:`
			self.error("Unexpected end tag `%s` (%s)" % (
			`tag, self.format_position()))`
			`element = self.open_tags.pop()`

			`def handle_data(self, data):`
			`self.current.append(data)`

			`def handle_charref(self, name):`
			`self.current.append('&%s;' % name)`

			`def handle_entityref(self, name):`
			`self.current.append('&%s;' % name)`


			`def parse_html(html):`
			`"""`
			`Takes a string that contains valid HTML and turns it into a Python object`
			`structure that can be easily compared against other HTML on semantic`
			`equivilance. Syntactical differences like which quotation is used on`
			`arguments will be ignored.`

			`"""`
			`parser = Parser()`
			`parser.feed(html)`
			`parser.close()`
			`document = parser.root`
			`document.finalize()`
			`# Removing ROOT element if it's not necessary`
			`if len(document.children) == 1:`
[py3] Replaced basestring by six.string_types. 2012-07-20 20:22:00 +08:00			`if not isinstance(document.children[0], six.string_types):`
Fixed #16921 -- Added assertHTMLEqual and assertHTMLNotEqual assertions, and converted Django tests to use them where appropriate. Thanks Greg Müllegger. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17414 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2012-02-01 04:36:11 +08:00			`document = document.children[0]`
			`return document`