Fixed #19237 (again) - Made strip_tags consistent between Python versions

This commit is contained in:
Claude Paroz 2013-05-23 14:00:17 +02:00
parent 8c2fd050f8
commit b664cb818d
3 changed files with 18 additions and 9 deletions

View File

@ -16,7 +16,7 @@ from django.utils.functional import allow_lazy
from django.utils import six from django.utils import six
from django.utils.text import normalize_newlines from django.utils.text import normalize_newlines
from .html_parser import HTMLParser from .html_parser import HTMLParser, HTMLParseError
# Configuration for urlize() function. # Configuration for urlize() function.
@ -136,13 +136,13 @@ class MLStripper(HTMLParser):
def strip_tags(value): def strip_tags(value):
"""Returns the given HTML with all tags stripped.""" """Returns the given HTML with all tags stripped."""
s = MLStripper() s = MLStripper()
s.feed(value)
data = s.get_data()
try: try:
res = s.close() s.feed(value)
except Exception as e: s.close()
data += s.rawdata except HTMLParseError:
return data return value
else:
return s.get_data()
strip_tags = allow_lazy(strip_tags) strip_tags = allow_lazy(strip_tags)
def remove_tags(html, tags): def remove_tags(html, tags):

View File

@ -490,7 +490,7 @@ Atom1Feed
Usually you should build up HTML using Django's templates to make use of its Usually you should build up HTML using Django's templates to make use of its
autoescape mechanism, using the utilities in :mod:`django.utils.safestring` autoescape mechanism, using the utilities in :mod:`django.utils.safestring`
where appropriate. This module provides some additional low level utilitiesfor where appropriate. This module provides some additional low level utilities for
escaping HTML. escaping HTML.
.. function:: escape(text) .. function:: escape(text)
@ -564,7 +564,13 @@ escaping HTML.
strip_tags(value) strip_tags(value)
If ``value`` is ``"<b>Joel</b> <button>is</button> a <span>slug</span>"`` the If ``value`` is ``"<b>Joel</b> <button>is</button> a <span>slug</span>"`` the
return value will be ``"Joel is a slug"``. return value will be ``"Joel is a slug"``. Note that ``strip_tags`` result
may still contain unsafe HTML content, so you might use
:func:`~django.utils.html.escape` to make it a safe string.
.. versionchanged:: 1.6
For improved safety, ``strip_tags`` is now parser-based.
.. function:: remove_tags(value, tags) .. function:: remove_tags(value, tags)

View File

@ -70,6 +70,9 @@ class TestUtilsHtml(TestCase):
('</adf>a', 'a'), ('</adf>a', 'a'),
('<asdf><asdf>e', 'e'), ('<asdf><asdf>e', 'e'),
('hi, <f x', 'hi, <f x'), ('hi, <f x', 'hi, <f x'),
('234<235, right?', '234<235, right?'),
('a4<a5 right?', 'a4<a5 right?'),
('b7>b2!', 'b7>b2!'),
('</fe', '</fe'), ('</fe', '</fe'),
('<x>b<y>', 'b'), ('<x>b<y>', 'b'),
('a<p onclick="alert(\'<test>\')">b</p>c', 'abc'), ('a<p onclick="alert(\'<test>\')">b</p>c', 'abc'),