diff --git a/django/utils/html.py b/django/utils/html.py index 573235092d..0d28c77a61 100644 --- a/django/utils/html.py +++ b/django/utils/html.py @@ -16,7 +16,7 @@ from django.utils.functional import allow_lazy from django.utils import six from django.utils.text import normalize_newlines -from .html_parser import HTMLParser +from .html_parser import HTMLParser, HTMLParseError # Configuration for urlize() function. @@ -136,13 +136,13 @@ class MLStripper(HTMLParser): def strip_tags(value): """Returns the given HTML with all tags stripped.""" s = MLStripper() - s.feed(value) - data = s.get_data() try: - res = s.close() - except Exception as e: - data += s.rawdata - return data + s.feed(value) + s.close() + except HTMLParseError: + return value + else: + return s.get_data() strip_tags = allow_lazy(strip_tags) def remove_tags(html, tags): diff --git a/docs/ref/utils.txt b/docs/ref/utils.txt index 14ae9aa9b8..bf14af0855 100644 --- a/docs/ref/utils.txt +++ b/docs/ref/utils.txt @@ -490,7 +490,7 @@ Atom1Feed Usually you should build up HTML using Django's templates to make use of its autoescape mechanism, using the utilities in :mod:`django.utils.safestring` -where appropriate. This module provides some additional low level utilitiesfor +where appropriate. This module provides some additional low level utilities for escaping HTML. .. function:: escape(text) @@ -564,7 +564,13 @@ escaping HTML. strip_tags(value) If ``value`` is ``"Joel a slug"`` the - return value will be ``"Joel is a slug"``. + return value will be ``"Joel is a slug"``. Note that ``strip_tags`` result + may still contain unsafe HTML content, so you might use + :func:`~django.utils.html.escape` to make it a safe string. + + .. versionchanged:: 1.6 + + For improved safety, ``strip_tags`` is now parser-based. .. function:: remove_tags(value, tags) diff --git a/tests/utils_tests/test_html.py b/tests/utils_tests/test_html.py index c3e9f7c878..b973f1c64f 100644 --- a/tests/utils_tests/test_html.py +++ b/tests/utils_tests/test_html.py @@ -70,6 +70,9 @@ class TestUtilsHtml(TestCase): ('a', 'a'), ('e', 'e'), ('hi, b2!', 'b7>b2!'), ('b', 'b'), ('a

b

c', 'abc'),