Fixed #20568 -- truncatewords_html no longer splits words containing HTML entities.

Thanks yann0 at hotmail.com for the report.
2013-07-18 10:45:34 +02:00 · 2013-07-18 10:45:34 +02:00 · 40b95a24ae
parent 58d555caf5
commit 40b95a24ae
3 changed files with 13 additions and 1 deletions
--- a/django/utils/text.py
+++ b/django/utils/text.py
@ -22,7 +22,7 @@ capfirst = lambda x: x and force_text(x)[0].upper() + force_text(x)[1:]
 capfirst = allow_lazy(capfirst, six.text_type)

 # Set up regular expressions
-re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U|re.S)
+re_words = re.compile(r'<.*?>|((?:\w[-\w]*|&.*?;)+)', re.U|re.S)
 re_tag = re.compile(r'<(/)?([^ ]+?)(?:(\s*/)| .*?)?>', re.S)


--- a/tests/defaultfilters/tests.py
+++ b/tests/defaultfilters/tests.py
@ -184,6 +184,9 @@ class DefaultFiltersTests(TestCase):
            '<p>one <a href="#">two - three <br>four</a> five</p>')
        self.assertEqual(truncatewords_html(
            '\xc5ngstr\xf6m was here', 1), '\xc5ngstr\xf6m ...')
+        self.assertEqual(truncatewords_html('<i>Buenos d&iacute;as! '
+            '&#x00bf;C&oacute;mo est&aacute;?</i>', 3),
+            '<i>Buenos d&iacute;as! &#x00bf;C&oacute;mo ...</i>')

    def test_upper(self):
        self.assertEqual(upper('Mixed case input'), 'MIXED CASE INPUT')
--- a/tests/utils_tests/test_text.py
+++ b/tests/utils_tests/test_text.py
@ -82,6 +82,15 @@ class TestUtilsText(SimpleTestCase):
        self.assertEqual('<br>The <hr/>quick <em>brown...</em>',
            truncator.words(3, '...', html=True ))

+        # Test html entities
+        truncator = text.Truncator('<i>Buenos d&iacute;as!'
+            ' &#x00bf;C&oacute;mo est&aacute;?</i>')
+        self.assertEqual('<i>Buenos d&iacute;as! &#x00bf;C&oacute;mo...</i>',
+            truncator.words(3, '...', html=True))
+        truncator = text.Truncator('<p>I &lt;3 python, what about you?</p>')
+        self.assertEqual('<p>I &lt;3 python...</p>',
+            truncator.words(3, '...', html=True))
+
    def test_wrap(self):
        digits = '1234 67 9'
        self.assertEqual(text.wrap(digits, 100), '1234 67 9')