From 9efe1a7210ee161d5688f66a759bcd8d89d33142 Mon Sep 17 00:00:00 2001
From: Chris Khoo <chris.khoo@gmail.com>
Date: Sat, 24 Nov 2012 12:10:25 +0100
Subject: [PATCH] [1.5.x] Fixed #19237 -- Improved strip_tags utility

The previous pattern didn't properly addressed cases where '>'
was present inside quoted tag content.
Backport of bf1871d87 from master.
---
 django/utils/html.py                | 3 ++-
 tests/regressiontests/utils/html.py | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/django/utils/html.py b/django/utils/html.py
index 9816b9accb..d914234d60 100644
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -33,6 +33,7 @@ link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+')
 html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE)
 hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL)
 trailing_empty_content_re = re.compile(r'(?:<p>(?:&nbsp;|\s|<br \/>)*?</p>\s*)+\Z')
+strip_tags_re = re.compile(r'</?\S([^=]*=(\s*"[^"]*"|\s*\'[^\']*\'|\S*)|[^>])*?>', re.IGNORECASE)
 
 
 def escape(text):
@@ -117,7 +118,7 @@ linebreaks = allow_lazy(linebreaks, six.text_type)
 
 def strip_tags(value):
     """Returns the given HTML with all tags stripped."""
-    return re.sub(r'<[^>]*?>', '', force_text(value))
+    return strip_tags_re.sub('', force_text(value))
 strip_tags = allow_lazy(strip_tags)
 
 def remove_tags(html, tags):
diff --git a/tests/regressiontests/utils/html.py b/tests/regressiontests/utils/html.py
index 6a93dff85e..a0226c4765 100644
--- a/tests/regressiontests/utils/html.py
+++ b/tests/regressiontests/utils/html.py
@@ -65,6 +65,9 @@ class TestUtilsHtml(unittest.TestCase):
             ('<f', '<f'),
             ('</fe', '</fe'),
             ('<x>b<y>', 'b'),
+            ('a<p onclick="alert(\'<test>\')">b</p>c', 'abc'),
+            ('a<p a >b</p>c', 'abc'),
+            ('d<a:b c:d>e</p>f', 'def'),
         )
         for value, output in items:
             self.check_output(f, value, output)