django/tests/utils_tests/test_text.py

# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import warnings

from django.test import SimpleTestCase
from django.utils import text

class TestUtilsText(SimpleTestCase):

    def test_truncate_chars(self):
        truncator = text.Truncator(
            'The quick brown fox jumped over the lazy dog.'
        )
        self.assertEqual('The quick brown fox jumped over the lazy dog.',
            truncator.chars(100)),
        self.assertEqual('The quick brown fox ...',
            truncator.chars(23)),
        self.assertEqual('The quick brown fo.....',
            truncator.chars(23, '.....')),

        # Ensure that we normalize our unicode data first
        nfc = text.Truncator('o\xfco\xfco\xfco\xfc')
        nfd = text.Truncator('ou\u0308ou\u0308ou\u0308ou\u0308')
        self.assertEqual('oüoüoüoü', nfc.chars(8))
        self.assertEqual('oüoüoüoü', nfd.chars(8))
        self.assertEqual('oü...', nfc.chars(5))
        self.assertEqual('oü...', nfd.chars(5))

        # Ensure the final length is calculated correctly when there are
        # combining characters with no precomposed form, and that combining
        # characters are not split up.
        truncator = text.Truncator('-B\u030AB\u030A----8')
        self.assertEqual('-B\u030A...', truncator.chars(5))
        self.assertEqual('-B\u030AB\u030A-...', truncator.chars(7))
        self.assertEqual('-B\u030AB\u030A----8', truncator.chars(8))

        # Ensure the length of the end text is correctly calculated when it
        # contains combining characters with no precomposed form.
        truncator = text.Truncator('-----')
        self.assertEqual('---B\u030A', truncator.chars(4, 'B\u030A'))
        self.assertEqual('-----', truncator.chars(5, 'B\u030A'))

        # Make a best effort to shorten to the desired length, but requesting
        # a length shorter than the ellipsis shouldn't break
        self.assertEqual('...', text.Truncator('asdf').chars(1))

    def test_truncate_words(self):
        truncator = text.Truncator('The quick brown fox jumped over the lazy '
            'dog.')
        self.assertEqual('The quick brown fox jumped over the lazy dog.',
            truncator.words(10))
        self.assertEqual('The quick brown fox...', truncator.words(4))
        self.assertEqual('The quick brown fox[snip]',
            truncator.words(4, '[snip]'))

    def test_truncate_html_words(self):
        truncator = text.Truncator('<p id="par"><strong><em>The quick brown fox'
            ' jumped over the lazy dog.</em></strong></p>')
        self.assertEqual('<p id="par"><strong><em>The quick brown fox jumped over'
            ' the lazy dog.</em></strong></p>', truncator.words(10, html=True))
        self.assertEqual('<p id="par"><strong><em>The quick brown fox...</em>'
            '</strong></p>', truncator.words(4, html=True))
        self.assertEqual('<p id="par"><strong><em>The quick brown fox....</em>'
            '</strong></p>', truncator.words(4, '....', html=True))
        self.assertEqual('<p id="par"><strong><em>The quick brown fox</em>'
            '</strong></p>', truncator.words(4, '', html=True))

        # Test with new line inside tag
        truncator = text.Truncator('<p>The quick <a href="xyz.html"\n'
            'id="mylink">brown fox</a> jumped over the lazy dog.</p>')
        self.assertEqual('<p>The quick <a href="xyz.html"\n'
            'id="mylink">brown...</a></p>', truncator.words(3, '...', html=True))

        # Test self-closing tags
        truncator = text.Truncator('<br/>The <hr />quick brown fox jumped over'
            ' the lazy dog.')
        self.assertEqual('<br/>The <hr />quick brown...',
            truncator.words(3, '...', html=True ))
        truncator = text.Truncator('<br>The <hr/>quick <em>brown fox</em> '
            'jumped over the lazy dog.')
        self.assertEqual('<br>The <hr/>quick <em>brown...</em>',
            truncator.words(3, '...', html=True ))

    def test_wrap(self):
        digits = '1234 67 9'
        self.assertEqual(text.wrap(digits, 100), '1234 67 9')
        self.assertEqual(text.wrap(digits, 9), '1234 67 9')
        self.assertEqual(text.wrap(digits, 8), '1234 67\n9')

        self.assertEqual(text.wrap('short\na long line', 7),
                         'short\na long\nline')

        self.assertEqual(text.wrap('do-not-break-long-words please? ok', 8),
                         'do-not-break-long-words\nplease?\nok')

        long_word = 'l%sng' % ('o' * 20)
        self.assertEqual(text.wrap(long_word, 20), long_word)
        self.assertEqual(text.wrap('a %s word' % long_word, 10),
                         'a\n%s\nword' % long_word)

    def test_slugify(self):
        items = (
            ('Hello, World!', 'hello-world'),
            ('spam & eggs', 'spam-eggs'),
        )
        for value, output in items:
            self.assertEqual(text.slugify(value), output)

    def test_unescape_entities(self):
        items = [
            ('', ''),
            ('foo', 'foo'),
            ('&amp;', '&'),
            ('&#x26;', '&'),
            ('&#38;', '&'),
            ('foo &amp; bar', 'foo & bar'),
            ('foo & bar', 'foo & bar'),
        ]
        for value, output in items:
            self.assertEqual(text.unescape_entities(value), output)
Fixed #5025 -- Add a "truncatechars" template filter. Many thanks to Chris Beaven. git-svn-id: http://code.djangoproject.com/svn/django/trunk@16542 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2011-07-14 21:47:10 +08:00			`# -- coding: utf-8 --`
Fixed #18269 -- Applied unicode_literals for Python 3 compatibility. Thanks Vinay Sajip for the support of his django3 branch and Jannis Leidel for the review. 2012-06-08 00:08:47 +08:00			`from __future__ import unicode_literals`

Fixed #18042 -- Advanced deprecation warnings. Thanks Ramiro for the patch. 2012-05-03 21:27:01 +08:00			`import warnings`
Reorganized utils tests so it's all in separate modules. Thanks to Stephan Jaekel. git-svn-id: http://code.djangoproject.com/svn/django/trunk@13889 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2010-09-27 23:15:04 +08:00
Fixed #18042 -- Advanced deprecation warnings. Thanks Ramiro for the patch. 2012-05-03 21:27:01 +08:00			`from django.test import SimpleTestCase`
Reorganized utils tests so it's all in separate modules. Thanks to Stephan Jaekel. git-svn-id: http://code.djangoproject.com/svn/django/trunk@13889 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2010-09-27 23:15:04 +08:00			`from django.utils import text`

Fixed #18042 -- Advanced deprecation warnings. Thanks Ramiro for the patch. 2012-05-03 21:27:01 +08:00			`class TestUtilsText(SimpleTestCase):`

Fixed #5025 -- Add a "truncatechars" template filter. Many thanks to Chris Beaven. git-svn-id: http://code.djangoproject.com/svn/django/trunk@16542 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2011-07-14 21:47:10 +08:00			`def test_truncate_chars(self):`
			`truncator = text.Truncator(`
Fixed #18269 -- Applied unicode_literals for Python 3 compatibility. Thanks Vinay Sajip for the support of his django3 branch and Jannis Leidel for the review. 2012-06-08 00:08:47 +08:00			`'The quick brown fox jumped over the lazy dog.'`
Fixed #5025 -- Add a "truncatechars" template filter. Many thanks to Chris Beaven. git-svn-id: http://code.djangoproject.com/svn/django/trunk@16542 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2011-07-14 21:47:10 +08:00			`)`
Fixed #18269 -- Applied unicode_literals for Python 3 compatibility. Thanks Vinay Sajip for the support of his django3 branch and Jannis Leidel for the review. 2012-06-08 00:08:47 +08:00			`self.assertEqual('The quick brown fox jumped over the lazy dog.',`
Fixed #5025 -- Add a "truncatechars" template filter. Many thanks to Chris Beaven. git-svn-id: http://code.djangoproject.com/svn/django/trunk@16542 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2011-07-14 21:47:10 +08:00			`truncator.chars(100)),`
Fixed #18269 -- Applied unicode_literals for Python 3 compatibility. Thanks Vinay Sajip for the support of his django3 branch and Jannis Leidel for the review. 2012-06-08 00:08:47 +08:00			`self.assertEqual('The quick brown fox ...',`
Fixed #5025 -- Add a "truncatechars" template filter. Many thanks to Chris Beaven. git-svn-id: http://code.djangoproject.com/svn/django/trunk@16542 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2011-07-14 21:47:10 +08:00			`truncator.chars(23)),`
Fixed #18269 -- Applied unicode_literals for Python 3 compatibility. Thanks Vinay Sajip for the support of his django3 branch and Jannis Leidel for the review. 2012-06-08 00:08:47 +08:00			`self.assertEqual('The quick brown fo.....',`
Fixed #5025 -- Add a "truncatechars" template filter. Many thanks to Chris Beaven. git-svn-id: http://code.djangoproject.com/svn/django/trunk@16542 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2011-07-14 21:47:10 +08:00			`truncator.chars(23, '.....')),`

			`# Ensure that we normalize our unicode data first`
Fixed #18269 -- Applied unicode_literals for Python 3 compatibility. Thanks Vinay Sajip for the support of his django3 branch and Jannis Leidel for the review. 2012-06-08 00:08:47 +08:00			`nfc = text.Truncator('o\xfco\xfco\xfco\xfc')`
			`nfd = text.Truncator('ou\u0308ou\u0308ou\u0308ou\u0308')`
			`self.assertEqual('oüoüoüoü', nfc.chars(8))`
			`self.assertEqual('oüoüoüoü', nfd.chars(8))`
			`self.assertEqual('oü...', nfc.chars(5))`
			`self.assertEqual('oü...', nfd.chars(5))`
Fixed #5025 -- Add a "truncatechars" template filter. Many thanks to Chris Beaven. git-svn-id: http://code.djangoproject.com/svn/django/trunk@16542 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2011-07-14 21:47:10 +08:00
			`# Ensure the final length is calculated correctly when there are`
			`# combining characters with no precomposed form, and that combining`
			`# characters are not split up.`
Fixed #18269 -- Applied unicode_literals for Python 3 compatibility. Thanks Vinay Sajip for the support of his django3 branch and Jannis Leidel for the review. 2012-06-08 00:08:47 +08:00			`truncator = text.Truncator('-B\u030AB\u030A----8')`
			`self.assertEqual('-B\u030A...', truncator.chars(5))`
			`self.assertEqual('-B\u030AB\u030A-...', truncator.chars(7))`
			`self.assertEqual('-B\u030AB\u030A----8', truncator.chars(8))`
Fixed #5025 -- Add a "truncatechars" template filter. Many thanks to Chris Beaven. git-svn-id: http://code.djangoproject.com/svn/django/trunk@16542 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2011-07-14 21:47:10 +08:00
			`# Ensure the length of the end text is correctly calculated when it`
			`# contains combining characters with no precomposed form.`
Fixed #18269 -- Applied unicode_literals for Python 3 compatibility. Thanks Vinay Sajip for the support of his django3 branch and Jannis Leidel for the review. 2012-06-08 00:08:47 +08:00			`truncator = text.Truncator('-----')`
			`self.assertEqual('---B\u030A', truncator.chars(4, 'B\u030A'))`
			`self.assertEqual('-----', truncator.chars(5, 'B\u030A'))`
Fixed #5025 -- Add a "truncatechars" template filter. Many thanks to Chris Beaven. git-svn-id: http://code.djangoproject.com/svn/django/trunk@16542 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2011-07-14 21:47:10 +08:00
			`# Make a best effort to shorten to the desired length, but requesting`
			`# a length shorter than the ellipsis shouldn't break`
Fixed #18269 -- Applied unicode_literals for Python 3 compatibility. Thanks Vinay Sajip for the support of his django3 branch and Jannis Leidel for the review. 2012-06-08 00:08:47 +08:00			`self.assertEqual('...', text.Truncator('asdf').chars(1))`
Fixed #5025 -- Add a "truncatechars" template filter. Many thanks to Chris Beaven. git-svn-id: http://code.djangoproject.com/svn/django/trunk@16542 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2011-07-14 21:47:10 +08:00
Reorganized utils tests so it's all in separate modules. Thanks to Stephan Jaekel. git-svn-id: http://code.djangoproject.com/svn/django/trunk@13889 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2010-09-27 23:15:04 +08:00			`def test_truncate_words(self):`
Fixed #18269 -- Applied unicode_literals for Python 3 compatibility. Thanks Vinay Sajip for the support of his django3 branch and Jannis Leidel for the review. 2012-06-08 00:08:47 +08:00			`truncator = text.Truncator('The quick brown fox jumped over the lazy '`
Fixed #5025 -- Add a "truncatechars" template filter. Many thanks to Chris Beaven. git-svn-id: http://code.djangoproject.com/svn/django/trunk@16542 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2011-07-14 21:47:10 +08:00			`'dog.')`
Fixed #18269 -- Applied unicode_literals for Python 3 compatibility. Thanks Vinay Sajip for the support of his django3 branch and Jannis Leidel for the review. 2012-06-08 00:08:47 +08:00			`self.assertEqual('The quick brown fox jumped over the lazy dog.',`
Fixed #5025 -- Add a "truncatechars" template filter. Many thanks to Chris Beaven. git-svn-id: http://code.djangoproject.com/svn/django/trunk@16542 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2011-07-14 21:47:10 +08:00			`truncator.words(10))`
Fixed #18269 -- Applied unicode_literals for Python 3 compatibility. Thanks Vinay Sajip for the support of his django3 branch and Jannis Leidel for the review. 2012-06-08 00:08:47 +08:00			`self.assertEqual('The quick brown fox...', truncator.words(4))`
			`self.assertEqual('The quick brown fox[snip]',`
Fixed #5025 -- Add a "truncatechars" template filter. Many thanks to Chris Beaven. git-svn-id: http://code.djangoproject.com/svn/django/trunk@16542 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2011-07-14 21:47:10 +08:00			`truncator.words(4, '[snip]'))`

			`def test_truncate_html_words(self):`
Fixed #19693 -- Made truncatewords_html handle self-closing tags Thanks sneawo for the report and Jonathan Loy for the patch. 2013-02-14 01:24:49 +08:00			`truncator = text.Truncator('<p id="par"><strong><em>The quick brown fox'`
			`' jumped over the lazy dog.</em></strong></p>')`
			`self.assertEqual('<p id="par"><strong><em>The quick brown fox jumped over'`
			`' the lazy dog.</em></strong></p>', truncator.words(10, html=True))`
			`self.assertEqual('<p id="par"><strong><em>The quick brown fox...</em>'`
Fixed #5025 -- Add a "truncatechars" template filter. Many thanks to Chris Beaven. git-svn-id: http://code.djangoproject.com/svn/django/trunk@16542 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2011-07-14 21:47:10 +08:00			`'</strong></p>', truncator.words(4, html=True))`
Fixed #19693 -- Made truncatewords_html handle self-closing tags Thanks sneawo for the report and Jonathan Loy for the patch. 2013-02-14 01:24:49 +08:00			`self.assertEqual('<p id="par"><strong><em>The quick brown fox....</em>'`
Fixed #5025 -- Add a "truncatechars" template filter. Many thanks to Chris Beaven. git-svn-id: http://code.djangoproject.com/svn/django/trunk@16542 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2011-07-14 21:47:10 +08:00			`'</strong></p>', truncator.words(4, '....', html=True))`
Fixed #19693 -- Made truncatewords_html handle self-closing tags Thanks sneawo for the report and Jonathan Loy for the patch. 2013-02-14 01:24:49 +08:00			`self.assertEqual('<p id="par"><strong><em>The quick brown fox</em>'`
			`'</strong></p>', truncator.words(4, '', html=True))`

Fixed #10931 -- Made `Truncator` handle newlines properly. Thanks to gsong and Claude Paroz. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17329 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2012-01-03 02:47:18 +08:00			`# Test with new line inside tag`
			`truncator = text.Truncator('<p>The quick <a href="xyz.html"\n'`
			`'id="mylink">brown fox</a> jumped over the lazy dog.</p>')`
Fixed #18269 -- Applied unicode_literals for Python 3 compatibility. Thanks Vinay Sajip for the support of his django3 branch and Jannis Leidel for the review. 2012-06-08 00:08:47 +08:00			`self.assertEqual('<p>The quick <a href="xyz.html"\n'`
Fixed #10931 -- Made `Truncator` handle newlines properly. Thanks to gsong and Claude Paroz. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17329 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2012-01-03 02:47:18 +08:00			`'id="mylink">brown...</a></p>', truncator.words(3, '...', html=True))`
Fixed #5025 -- Add a "truncatechars" template filter. Many thanks to Chris Beaven. git-svn-id: http://code.djangoproject.com/svn/django/trunk@16542 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2011-07-14 21:47:10 +08:00
Fixed #19693 -- Made truncatewords_html handle self-closing tags Thanks sneawo for the report and Jonathan Loy for the patch. 2013-02-14 01:24:49 +08:00			`# Test self-closing tags`
			`truncator = text.Truncator('<br/>The <hr />quick brown fox jumped over'`
			`' the lazy dog.')`
			`self.assertEqual('<br/>The <hr />quick brown...',`
			`truncator.words(3, '...', html=True ))`
			`truncator = text.Truncator('<br>The <hr/>quick <em>brown fox</em> '`
			`'jumped over the lazy dog.')`
			`self.assertEqual('<br>The <hr/>quick <em>brown...</em>',`
			`truncator.words(3, '...', html=True ))`

Tests for utils.text.wrap git-svn-id: http://code.djangoproject.com/svn/django/trunk@14752 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2010-12-01 05:21:37 +08:00			`def test_wrap(self):`
			`digits = '1234 67 9'`
Fixed #18269 -- Applied unicode_literals for Python 3 compatibility. Thanks Vinay Sajip for the support of his django3 branch and Jannis Leidel for the review. 2012-06-08 00:08:47 +08:00			`self.assertEqual(text.wrap(digits, 100), '1234 67 9')`
			`self.assertEqual(text.wrap(digits, 9), '1234 67 9')`
			`self.assertEqual(text.wrap(digits, 8), '1234 67\n9')`
Tests for utils.text.wrap git-svn-id: http://code.djangoproject.com/svn/django/trunk@14752 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2010-12-01 05:21:37 +08:00
			`self.assertEqual(text.wrap('short\na long line', 7),`
Fixed #18269 -- Applied unicode_literals for Python 3 compatibility. Thanks Vinay Sajip for the support of his django3 branch and Jannis Leidel for the review. 2012-06-08 00:08:47 +08:00			`'short\na long\nline')`
Tests for utils.text.wrap git-svn-id: http://code.djangoproject.com/svn/django/trunk@14752 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2010-12-01 05:21:37 +08:00
			`self.assertEqual(text.wrap('do-not-break-long-words please? ok', 8),`
Fixed #18269 -- Applied unicode_literals for Python 3 compatibility. Thanks Vinay Sajip for the support of his django3 branch and Jannis Leidel for the review. 2012-06-08 00:08:47 +08:00			`'do-not-break-long-words\nplease?\nok')`
Tests for utils.text.wrap git-svn-id: http://code.djangoproject.com/svn/django/trunk@14752 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2010-12-01 05:21:37 +08:00
			`long_word = 'l%sng' % ('o' * 20)`
			`self.assertEqual(text.wrap(long_word, 20), long_word)`
			`self.assertEqual(text.wrap('a %s word' % long_word, 10),`
Fixed #18269 -- Applied unicode_literals for Python 3 compatibility. Thanks Vinay Sajip for the support of his django3 branch and Jannis Leidel for the review. 2012-06-08 00:08:47 +08:00			`'a\n%s\nword' % long_word)`
Fixed #14516 -- Extract methods from removetags and slugify template filters Patch by @jphalip updated to apply, documentation and release notes added. I've documented strip_tags as well as remove_tags as the difference between the two wouldn't be immediately obvious. 2012-08-18 20:53:22 +08:00
			`def test_slugify(self):`
			`items = (`
[py3] Made 212b9826bd Python 3-friendly 2012-08-18 23:47:21 +08:00			`('Hello, World!', 'hello-world'),`
			`('spam & eggs', 'spam-eggs'),`
Fixed #14516 -- Extract methods from removetags and slugify template filters Patch by @jphalip updated to apply, documentation and release notes added. I've documented strip_tags as well as remove_tags as the difference between the two wouldn't be immediately obvious. 2012-08-18 20:53:22 +08:00			`)`
			`for value, output in items:`
			`self.assertEqual(text.slugify(value), output)`
Fix #21185: Added tests for unescape_entities. Also fixed a py3 incompatibility. Thanks to brutasse for the report. 2013-09-27 23:00:42 +08:00
			`def test_unescape_entities(self):`
			`items = [`
			`('', ''),`
			`('foo', 'foo'),`
			`('&', '&'),`
			`('&', '&'),`
			`('&', '&'),`
			`('foo & bar', 'foo & bar'),`
			`('foo & bar', 'foo & bar'),`
			`]`
			`for value, output in items:`
			`self.assertEqual(text.unescape_entities(value), output)`