2011-07-14 21:47:10 +08:00
|
|
|
|
# -*- coding: utf-8 -*-
|
2012-06-08 00:08:47 +08:00
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
2014-02-19 01:07:07 +08:00
|
|
|
|
from unittest import skipUnless
|
2014-02-21 21:46:23 +08:00
|
|
|
|
import warnings
|
2014-02-19 01:07:07 +08:00
|
|
|
|
|
2012-05-03 21:27:01 +08:00
|
|
|
|
from django.test import SimpleTestCase
|
2013-12-12 22:58:14 +08:00
|
|
|
|
from django.utils import six, text
|
2010-09-27 23:15:04 +08:00
|
|
|
|
|
2014-02-19 01:07:07 +08:00
|
|
|
|
IS_WIDE_BUILD = (len('\U0001F4A9') == 1)
|
|
|
|
|
|
2013-11-03 05:34:05 +08:00
|
|
|
|
|
2012-05-03 21:27:01 +08:00
|
|
|
|
class TestUtilsText(SimpleTestCase):
|
|
|
|
|
|
2011-07-14 21:47:10 +08:00
|
|
|
|
def test_truncate_chars(self):
|
|
|
|
|
truncator = text.Truncator(
|
2012-06-08 00:08:47 +08:00
|
|
|
|
'The quick brown fox jumped over the lazy dog.'
|
2011-07-14 21:47:10 +08:00
|
|
|
|
)
|
2012-06-08 00:08:47 +08:00
|
|
|
|
self.assertEqual('The quick brown fox jumped over the lazy dog.',
|
2011-07-14 21:47:10 +08:00
|
|
|
|
truncator.chars(100)),
|
2012-06-08 00:08:47 +08:00
|
|
|
|
self.assertEqual('The quick brown fox ...',
|
2011-07-14 21:47:10 +08:00
|
|
|
|
truncator.chars(23)),
|
2012-06-08 00:08:47 +08:00
|
|
|
|
self.assertEqual('The quick brown fo.....',
|
2011-07-14 21:47:10 +08:00
|
|
|
|
truncator.chars(23, '.....')),
|
|
|
|
|
|
|
|
|
|
# Ensure that we normalize our unicode data first
|
2012-06-08 00:08:47 +08:00
|
|
|
|
nfc = text.Truncator('o\xfco\xfco\xfco\xfc')
|
|
|
|
|
nfd = text.Truncator('ou\u0308ou\u0308ou\u0308ou\u0308')
|
|
|
|
|
self.assertEqual('oüoüoüoü', nfc.chars(8))
|
|
|
|
|
self.assertEqual('oüoüoüoü', nfd.chars(8))
|
|
|
|
|
self.assertEqual('oü...', nfc.chars(5))
|
|
|
|
|
self.assertEqual('oü...', nfd.chars(5))
|
2011-07-14 21:47:10 +08:00
|
|
|
|
|
|
|
|
|
# Ensure the final length is calculated correctly when there are
|
|
|
|
|
# combining characters with no precomposed form, and that combining
|
|
|
|
|
# characters are not split up.
|
2012-06-08 00:08:47 +08:00
|
|
|
|
truncator = text.Truncator('-B\u030AB\u030A----8')
|
|
|
|
|
self.assertEqual('-B\u030A...', truncator.chars(5))
|
|
|
|
|
self.assertEqual('-B\u030AB\u030A-...', truncator.chars(7))
|
|
|
|
|
self.assertEqual('-B\u030AB\u030A----8', truncator.chars(8))
|
2011-07-14 21:47:10 +08:00
|
|
|
|
|
|
|
|
|
# Ensure the length of the end text is correctly calculated when it
|
|
|
|
|
# contains combining characters with no precomposed form.
|
2012-06-08 00:08:47 +08:00
|
|
|
|
truncator = text.Truncator('-----')
|
|
|
|
|
self.assertEqual('---B\u030A', truncator.chars(4, 'B\u030A'))
|
|
|
|
|
self.assertEqual('-----', truncator.chars(5, 'B\u030A'))
|
2011-07-14 21:47:10 +08:00
|
|
|
|
|
|
|
|
|
# Make a best effort to shorten to the desired length, but requesting
|
|
|
|
|
# a length shorter than the ellipsis shouldn't break
|
2012-06-08 00:08:47 +08:00
|
|
|
|
self.assertEqual('...', text.Truncator('asdf').chars(1))
|
2011-07-14 21:47:10 +08:00
|
|
|
|
|
2010-09-27 23:15:04 +08:00
|
|
|
|
def test_truncate_words(self):
|
2012-06-08 00:08:47 +08:00
|
|
|
|
truncator = text.Truncator('The quick brown fox jumped over the lazy '
|
2011-07-14 21:47:10 +08:00
|
|
|
|
'dog.')
|
2012-06-08 00:08:47 +08:00
|
|
|
|
self.assertEqual('The quick brown fox jumped over the lazy dog.',
|
2011-07-14 21:47:10 +08:00
|
|
|
|
truncator.words(10))
|
2012-06-08 00:08:47 +08:00
|
|
|
|
self.assertEqual('The quick brown fox...', truncator.words(4))
|
|
|
|
|
self.assertEqual('The quick brown fox[snip]',
|
2011-07-14 21:47:10 +08:00
|
|
|
|
truncator.words(4, '[snip]'))
|
|
|
|
|
|
|
|
|
|
def test_truncate_html_words(self):
|
2013-02-14 01:24:49 +08:00
|
|
|
|
truncator = text.Truncator('<p id="par"><strong><em>The quick brown fox'
|
|
|
|
|
' jumped over the lazy dog.</em></strong></p>')
|
|
|
|
|
self.assertEqual('<p id="par"><strong><em>The quick brown fox jumped over'
|
|
|
|
|
' the lazy dog.</em></strong></p>', truncator.words(10, html=True))
|
|
|
|
|
self.assertEqual('<p id="par"><strong><em>The quick brown fox...</em>'
|
2011-07-14 21:47:10 +08:00
|
|
|
|
'</strong></p>', truncator.words(4, html=True))
|
2013-02-14 01:24:49 +08:00
|
|
|
|
self.assertEqual('<p id="par"><strong><em>The quick brown fox....</em>'
|
2011-07-14 21:47:10 +08:00
|
|
|
|
'</strong></p>', truncator.words(4, '....', html=True))
|
2013-02-14 01:24:49 +08:00
|
|
|
|
self.assertEqual('<p id="par"><strong><em>The quick brown fox</em>'
|
|
|
|
|
'</strong></p>', truncator.words(4, '', html=True))
|
|
|
|
|
|
2012-01-03 02:47:18 +08:00
|
|
|
|
# Test with new line inside tag
|
|
|
|
|
truncator = text.Truncator('<p>The quick <a href="xyz.html"\n'
|
|
|
|
|
'id="mylink">brown fox</a> jumped over the lazy dog.</p>')
|
2012-06-08 00:08:47 +08:00
|
|
|
|
self.assertEqual('<p>The quick <a href="xyz.html"\n'
|
2012-01-03 02:47:18 +08:00
|
|
|
|
'id="mylink">brown...</a></p>', truncator.words(3, '...', html=True))
|
2011-07-14 21:47:10 +08:00
|
|
|
|
|
2013-02-14 01:24:49 +08:00
|
|
|
|
# Test self-closing tags
|
|
|
|
|
truncator = text.Truncator('<br/>The <hr />quick brown fox jumped over'
|
|
|
|
|
' the lazy dog.')
|
|
|
|
|
self.assertEqual('<br/>The <hr />quick brown...',
|
2013-10-15 03:13:14 +08:00
|
|
|
|
truncator.words(3, '...', html=True))
|
2013-02-14 01:24:49 +08:00
|
|
|
|
truncator = text.Truncator('<br>The <hr/>quick <em>brown fox</em> '
|
|
|
|
|
'jumped over the lazy dog.')
|
|
|
|
|
self.assertEqual('<br>The <hr/>quick <em>brown...</em>',
|
2013-10-15 03:13:14 +08:00
|
|
|
|
truncator.words(3, '...', html=True))
|
2013-02-14 01:24:49 +08:00
|
|
|
|
|
2013-07-18 16:45:34 +08:00
|
|
|
|
# Test html entities
|
|
|
|
|
truncator = text.Truncator('<i>Buenos días!'
|
|
|
|
|
' ¿Cómo está?</i>')
|
|
|
|
|
self.assertEqual('<i>Buenos días! ¿Cómo...</i>',
|
|
|
|
|
truncator.words(3, '...', html=True))
|
|
|
|
|
truncator = text.Truncator('<p>I <3 python, what about you?</p>')
|
|
|
|
|
self.assertEqual('<p>I <3 python...</p>',
|
|
|
|
|
truncator.words(3, '...', html=True))
|
|
|
|
|
|
2010-12-01 05:21:37 +08:00
|
|
|
|
def test_wrap(self):
|
|
|
|
|
digits = '1234 67 9'
|
2012-06-08 00:08:47 +08:00
|
|
|
|
self.assertEqual(text.wrap(digits, 100), '1234 67 9')
|
|
|
|
|
self.assertEqual(text.wrap(digits, 9), '1234 67 9')
|
|
|
|
|
self.assertEqual(text.wrap(digits, 8), '1234 67\n9')
|
2010-12-01 05:21:37 +08:00
|
|
|
|
|
|
|
|
|
self.assertEqual(text.wrap('short\na long line', 7),
|
2012-06-08 00:08:47 +08:00
|
|
|
|
'short\na long\nline')
|
2010-12-01 05:21:37 +08:00
|
|
|
|
|
|
|
|
|
self.assertEqual(text.wrap('do-not-break-long-words please? ok', 8),
|
2012-06-08 00:08:47 +08:00
|
|
|
|
'do-not-break-long-words\nplease?\nok')
|
2010-12-01 05:21:37 +08:00
|
|
|
|
|
|
|
|
|
long_word = 'l%sng' % ('o' * 20)
|
|
|
|
|
self.assertEqual(text.wrap(long_word, 20), long_word)
|
|
|
|
|
self.assertEqual(text.wrap('a %s word' % long_word, 10),
|
2012-06-08 00:08:47 +08:00
|
|
|
|
'a\n%s\nword' % long_word)
|
2012-08-18 20:53:22 +08:00
|
|
|
|
|
2013-12-07 16:28:22 +08:00
|
|
|
|
def test_normalize_newlines(self):
|
|
|
|
|
self.assertEqual(text.normalize_newlines("abc\ndef\rghi\r\n"),
|
|
|
|
|
"abc\ndef\nghi\n")
|
|
|
|
|
self.assertEqual(text.normalize_newlines("\n\r\r\n\r"), "\n\n\n\n")
|
|
|
|
|
self.assertEqual(text.normalize_newlines("abcdefghi"), "abcdefghi")
|
|
|
|
|
self.assertEqual(text.normalize_newlines(""), "")
|
|
|
|
|
|
2013-12-12 22:58:14 +08:00
|
|
|
|
def test_normalize_newlines_bytes(self):
|
|
|
|
|
"""normalize_newlines should be able to handle bytes too"""
|
|
|
|
|
normalized = text.normalize_newlines(b"abc\ndef\rghi\r\n")
|
|
|
|
|
self.assertEqual(normalized, "abc\ndef\nghi\n")
|
|
|
|
|
self.assertIsInstance(normalized, six.text_type)
|
|
|
|
|
|
2012-08-18 20:53:22 +08:00
|
|
|
|
def test_slugify(self):
|
|
|
|
|
items = (
|
2012-08-18 23:47:21 +08:00
|
|
|
|
('Hello, World!', 'hello-world'),
|
|
|
|
|
('spam & eggs', 'spam-eggs'),
|
2012-08-18 20:53:22 +08:00
|
|
|
|
)
|
|
|
|
|
for value, output in items:
|
|
|
|
|
self.assertEqual(text.slugify(value), output)
|
2013-09-27 23:00:42 +08:00
|
|
|
|
|
|
|
|
|
def test_unescape_entities(self):
|
|
|
|
|
items = [
|
|
|
|
|
('', ''),
|
|
|
|
|
('foo', 'foo'),
|
|
|
|
|
('&', '&'),
|
|
|
|
|
('&', '&'),
|
|
|
|
|
('&', '&'),
|
|
|
|
|
('foo & bar', 'foo & bar'),
|
|
|
|
|
('foo & bar', 'foo & bar'),
|
|
|
|
|
]
|
|
|
|
|
for value, output in items:
|
|
|
|
|
self.assertEqual(text.unescape_entities(value), output)
|
2013-11-24 16:10:21 +08:00
|
|
|
|
|
|
|
|
|
def test_get_valid_filename(self):
|
|
|
|
|
filename = "^&'@{}[],$=!-#()%+~_123.txt"
|
|
|
|
|
self.assertEqual(text.get_valid_filename(filename), "-_123.txt")
|
2014-01-04 12:57:50 +08:00
|
|
|
|
|
|
|
|
|
def test_javascript_quote(self):
|
|
|
|
|
input = "<script>alert('Hello \\xff.\n Welcome\there\r');</script>"
|
|
|
|
|
output = r"<script>alert(\'Hello \\xff.\n Welcome\there\r\');<\/script>"
|
2014-02-21 21:46:23 +08:00
|
|
|
|
with warnings.catch_warnings():
|
2014-03-05 15:57:13 +08:00
|
|
|
|
warnings.simplefilter("ignore", PendingDeprecationWarning)
|
2014-02-21 21:46:23 +08:00
|
|
|
|
self.assertEqual(text.javascript_quote(input), output)
|
2014-01-04 12:57:50 +08:00
|
|
|
|
|
2014-03-05 15:57:13 +08:00
|
|
|
|
# Exercising quote_double_quotes keyword argument
|
|
|
|
|
input = '"Text"'
|
2014-02-21 21:46:23 +08:00
|
|
|
|
self.assertEqual(text.javascript_quote(input), '"Text"')
|
|
|
|
|
self.assertEqual(text.javascript_quote(input, quote_double_quotes=True),
|
|
|
|
|
'"Text"')
|
2014-02-16 01:55:33 +08:00
|
|
|
|
|
2014-02-21 21:46:23 +08:00
|
|
|
|
@skipUnless(IS_WIDE_BUILD, 'Not running in a wide build of Python')
|
2014-02-16 01:55:33 +08:00
|
|
|
|
def test_javascript_quote_unicode(self):
|
|
|
|
|
input = "<script>alert('Hello \\xff.\n Wel𝕃come\there\r');</script>"
|
|
|
|
|
output = r"<script>alert(\'Hello \\xff.\n Wel𝕃come\there\r\');<\/script>"
|
2014-02-21 21:46:23 +08:00
|
|
|
|
with warnings.catch_warnings():
|
2014-03-05 15:57:13 +08:00
|
|
|
|
warnings.simplefilter("ignore", PendingDeprecationWarning)
|
2014-02-21 21:46:23 +08:00
|
|
|
|
self.assertEqual(text.javascript_quote(input), output)
|
|
|
|
|
|
|
|
|
|
def test_deprecation(self):
|
|
|
|
|
with warnings.catch_warnings(record=True) as w:
|
|
|
|
|
warnings.simplefilter("always")
|
|
|
|
|
text.javascript_quote('thingy')
|
|
|
|
|
self.assertEqual(len(w), 1)
|
|
|
|
|
self.assertIn('escapejs()', repr(w[0].message))
|