From bf1871d874a371ad0ae6c7e098e7665a468dca16 Mon Sep 17 00:00:00 2001 From: Chris Khoo Date: Sat, 24 Nov 2012 12:10:25 +0100 Subject: [PATCH] Fixed #19237 -- Improved strip_tags utility The previous pattern didn't properly addressed cases where '>' was present inside quoted tag content. --- django/utils/html.py | 3 ++- tests/regressiontests/utils/html.py | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/django/utils/html.py b/django/utils/html.py index 9816b9accb..d914234d60 100644 --- a/django/utils/html.py +++ b/django/utils/html.py @@ -33,6 +33,7 @@ link_target_attribute_re = re.compile(r'(]*?)target=[^\s>]+') html_gunk_re = re.compile(r'(?:
|<\/i>|<\/b>|<\/em>|<\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE) hard_coded_bullets_re = re.compile(r'((?:

(?:%s).*?[a-zA-Z].*?

\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL) trailing_empty_content_re = re.compile(r'(?:

(?: |\s|
)*?

\s*)+\Z') +strip_tags_re = re.compile(r'])*?>', re.IGNORECASE) def escape(text): @@ -117,7 +118,7 @@ linebreaks = allow_lazy(linebreaks, six.text_type) def strip_tags(value): """Returns the given HTML with all tags stripped.""" - return re.sub(r'<[^>]*?>', '', force_text(value)) + return strip_tags_re.sub('', force_text(value)) strip_tags = allow_lazy(strip_tags) def remove_tags(html, tags): diff --git a/tests/regressiontests/utils/html.py b/tests/regressiontests/utils/html.py index 6a93dff85e..a0226c4765 100644 --- a/tests/regressiontests/utils/html.py +++ b/tests/regressiontests/utils/html.py @@ -65,6 +65,9 @@ class TestUtilsHtml(unittest.TestCase): ('b', 'b'), + ('a

b

c', 'abc'), + ('a

b

c', 'abc'), + ('de

f', 'def'), ) for value, output in items: self.check_output(f, value, output)