Fixed #20568 -- truncatewords_html no longer splits words containing HTML entities.

Thanks yann0 at hotmail.com for the report.
This commit is contained in:
Jaap Roes 2013-07-18 10:45:34 +02:00 committed by Tim Graham
parent 58d555caf5
commit 40b95a24ae
3 changed files with 13 additions and 1 deletions

View File

@ -22,7 +22,7 @@ capfirst = lambda x: x and force_text(x)[0].upper() + force_text(x)[1:]
capfirst = allow_lazy(capfirst, six.text_type)
# Set up regular expressions
re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U|re.S)
re_words = re.compile(r'<.*?>|((?:\w[-\w]*|&.*?;)+)', re.U|re.S)
re_tag = re.compile(r'<(/)?([^ ]+?)(?:(\s*/)| .*?)?>', re.S)

View File

@ -184,6 +184,9 @@ class DefaultFiltersTests(TestCase):
'<p>one <a href="#">two - three <br>four</a> five</p>')
self.assertEqual(truncatewords_html(
'\xc5ngstr\xf6m was here', 1), '\xc5ngstr\xf6m ...')
self.assertEqual(truncatewords_html('<i>Buenos d&iacute;as! '
'&#x00bf;C&oacute;mo est&aacute;?</i>', 3),
'<i>Buenos d&iacute;as! &#x00bf;C&oacute;mo ...</i>')
def test_upper(self):
self.assertEqual(upper('Mixed case input'), 'MIXED CASE INPUT')

View File

@ -82,6 +82,15 @@ class TestUtilsText(SimpleTestCase):
self.assertEqual('<br>The <hr/>quick <em>brown...</em>',
truncator.words(3, '...', html=True ))
# Test html entities
truncator = text.Truncator('<i>Buenos d&iacute;as!'
' &#x00bf;C&oacute;mo est&aacute;?</i>')
self.assertEqual('<i>Buenos d&iacute;as! &#x00bf;C&oacute;mo...</i>',
truncator.words(3, '...', html=True))
truncator = text.Truncator('<p>I &lt;3 python, what about you?</p>')
self.assertEqual('<p>I &lt;3 python...</p>',
truncator.words(3, '...', html=True))
def test_wrap(self):
digits = '1234 67 9'
self.assertEqual(text.wrap(digits, 100), '1234 67 9')