From 3b774583711e39dae7a5cfde314288f8019f59c6 Mon Sep 17 00:00:00 2001 From: Jannis Leidel Date: Thu, 14 Jul 2011 13:47:10 +0000 Subject: [PATCH] Fixed #5025 -- Add a "truncatechars" template filter. Many thanks to Chris Beaven. git-svn-id: http://code.djangoproject.com/svn/django/trunk@16542 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/contrib/admin/widgets.py | 4 +- django/template/defaultfilters.py | 20 ++- django/utils/text.py | 250 +++++++++++++++++++--------- docs/internals/deprecation.txt | 6 + docs/ref/templates/builtins.txt | 18 ++ docs/releases/1.4.txt | 8 + tests/regressiontests/utils/text.py | 61 ++++++- 7 files changed, 281 insertions(+), 86 deletions(-) diff --git a/django/contrib/admin/widgets.py b/django/contrib/admin/widgets.py index 1f672954e4..d0269a95f2 100644 --- a/django/contrib/admin/widgets.py +++ b/django/contrib/admin/widgets.py @@ -9,7 +9,7 @@ from django.forms.widgets import RadioFieldRenderer from django.forms.util import flatatt from django.templatetags.static import static from django.utils.html import escape -from django.utils.text import truncate_words +from django.utils.text import Truncator from django.utils.translation import ugettext as _ from django.utils.safestring import mark_safe from django.utils.encoding import force_unicode @@ -152,7 +152,7 @@ class ForeignKeyRawIdWidget(forms.TextInput): key = self.rel.get_related_field().name try: obj = self.rel.to._default_manager.using(self.db).get(**{key: value}) - return ' %s' % escape(truncate_words(obj, 14)) + return ' %s' % escape(Truncator(obj).words(14, truncate='...')) except (ValueError, self.rel.to.DoesNotExist): return '' diff --git a/django/template/defaultfilters.py b/django/template/defaultfilters.py index 74ae849314..d52f901be4 100644 --- a/django/template/defaultfilters.py +++ b/django/template/defaultfilters.py @@ -15,7 +15,7 @@ from django.utils.encoding import force_unicode, iri_to_uri from django.utils.html import (conditional_escape, escapejs, fix_ampersands, escape, urlize as urlize_impl, linebreaks, strip_tags) from django.utils.http import urlquote -from django.utils.text import truncate_words, truncate_html_words, wrap, phone2numeric +from django.utils.text import Truncator, wrap, phone2numeric from django.utils.safestring import mark_safe, SafeData, mark_for_escaping from django.utils.timesince import timesince, timeuntil from django.utils.translation import ugettext, ungettext @@ -244,6 +244,20 @@ def title(value): title.is_safe = True title = stringfilter(title) +def truncatechars(value, arg): + """ + Truncates a string after a certain number of characters. + + Argument: Number of characters to truncate after. + """ + try: + length = int(arg) + except ValueError: # Invalid literal for int(). + return value # Fail silently. + return Truncator(value).chars(value, length) +truncatechars.is_safe = True +truncatechars = stringfilter(truncatechars) + def truncatewords(value, arg): """ Truncates a string after a certain number of words. @@ -256,7 +270,7 @@ def truncatewords(value, arg): length = int(arg) except ValueError: # Invalid literal for int(). return value # Fail silently. - return truncate_words(value, length) + return Truncator(value).words(length, truncate=' ...') truncatewords.is_safe = True truncatewords = stringfilter(truncatewords) @@ -272,7 +286,7 @@ def truncatewords_html(value, arg): length = int(arg) except ValueError: # invalid literal for int() return value # Fail silently. - return truncate_html_words(value, length) + return Truncator(value).words(length, html=True, truncate=' ...') truncatewords_html.is_safe = True truncatewords_html = stringfilter(truncatewords_html) diff --git a/django/utils/text.py b/django/utils/text.py index 6c574b4541..14555ddd0e 100644 --- a/django/utils/text.py +++ b/django/utils/text.py @@ -1,4 +1,6 @@ import re +import unicodedata +import warnings from gzip import GzipFile from htmlentitydefs import name2codepoint @@ -8,14 +10,18 @@ except ImportError: from StringIO import StringIO from django.utils.encoding import force_unicode -from django.utils.functional import allow_lazy -from django.utils.translation import ugettext_lazy, ugettext as _ - +from django.utils.functional import allow_lazy, SimpleLazyObject +from django.utils.translation import ugettext_lazy, ugettext as _, pgettext # Capitalizes the first letter of a string. capfirst = lambda x: x and force_unicode(x)[0].upper() + force_unicode(x)[1:] capfirst = allow_lazy(capfirst, unicode) +# Set up regular expressions +re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U) +re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>') + + def wrap(text, width): """ A word-wrap function that preserves existing line breaks and most spaces in @@ -44,88 +50,172 @@ def wrap(text, width): return u''.join(_generator()) wrap = allow_lazy(wrap, unicode) -def truncate_words(s, num, end_text='...'): - """Truncates a string after a certain number of words. Takes an optional - argument of what should be used to notify that the string has been - truncated, defaulting to ellipsis (...) - Newlines in the string will be stripped. +class Truncator(SimpleLazyObject): """ - s = force_unicode(s) - length = int(num) - words = s.split() - if len(words) > length: - words = words[:length] - if not words[-1].endswith(end_text): - words.append(end_text) - return u' '.join(words) + An object used to truncate text, either by characters or words. + """ + def __init__(self, text): + super(Truncator, self).__init__(lambda: force_unicode(text)) + + def add_truncation_text(self, text, truncate=None): + if truncate is None: + truncate = pgettext( + 'String to return when truncating text', + u'%(truncated_text)s...') + truncate = force_unicode(truncate) + if '%(truncated_text)s' in truncate: + return truncate % {'truncated_text': text} + # The truncation text didn't contain the %(truncated_text)s string + # replacement argument so just append it to the text. + if text.endswith(truncate): + # But don't append the truncation text if the current text already + # ends in this. + return text + return '%s%s' % (text, truncate) + + def chars(self, num, truncate=None): + """ + Returns the text truncated to be no longer than the specified number + of characters. + + Takes an optional argument of what should be used to notify that the + string has been truncated, defaulting to a translatable string of an + ellipsis (...). + """ + length = int(num) + text = unicodedata.normalize('NFC', self._wrapped) + + # Calculate the length to truncate to (max length - end_text length) + truncate_len = length + for char in self.add_truncation_text('', truncate): + if not unicodedata.combining(char): + truncate_len -= 1 + if truncate_len == 0: + break + + s_len = 0 + end_index = None + for i, char in enumerate(text): + if unicodedata.combining(char): + # Don't consider combining characters + # as adding to the string length + continue + s_len += 1 + if end_index is None and s_len > truncate_len: + end_index = i + if s_len > length: + # Return the truncated string + return self.add_truncation_text(text[:end_index or 0], + truncate) + + # Return the original string since no truncation was necessary + return text + chars = allow_lazy(chars) + + def words(self, num, truncate=None, html=False): + """ + Truncates a string after a certain number of words. Takes an optional + argument of what should be used to notify that the string has been + truncated, defaulting to ellipsis (...). + """ + length = int(num) + if html: + return self._html_words(length, truncate) + return self._text_words(length, truncate) + words = allow_lazy(words) + + def _text_words(self, length, truncate): + """ + Truncates a string after a certain number of words. + + Newlines in the string will be stripped. + """ + words = self._wrapped.split() + if len(words) > length: + words = words[:length] + return self.add_truncation_text(u' '.join(words), truncate) + return u' '.join(words) + + def _html_words(self, length, truncate): + """ + Truncates HTML to a certain number of words (not counting tags and + comments). Closes opened tags if they were correctly closed in the + given HTML. + + Newlines in the HTML are preserved. + """ + if length <= 0: + return u'' + html4_singlets = ( + 'br', 'col', 'link', 'base', 'img', + 'param', 'area', 'hr', 'input' + ) + # Count non-HTML words and keep note of open tags + pos = 0 + end_text_pos = 0 + words = 0 + open_tags = [] + while words <= length: + m = re_words.search(self._wrapped, pos) + if not m: + # Checked through whole string + break + pos = m.end(0) + if m.group(1): + # It's an actual non-HTML word + words += 1 + if words == length: + end_text_pos = pos + continue + # Check for tag + tag = re_tag.match(m.group(0)) + if not tag or end_text_pos: + # Don't worry about non tags or tags after our truncate point + continue + closing_tag, tagname, self_closing = tag.groups() + # Element names are always case-insensitive + tagname = tagname.lower() + if self_closing or tagname in html4_singlets: + pass + elif closing_tag: + # Check for match in open tags list + try: + i = open_tags.index(tagname) + except ValueError: + pass + else: + # SGML: An end tag closes, back to the matching start tag, + # all unclosed intervening start tags with omitted end tags + open_tags = open_tags[i + 1:] + else: + # Add it to the start of the open tags list + open_tags.insert(0, tagname) + if words <= length: + # Don't try to close tags if we don't need to truncate + return self._wrapped + out = self._wrapped[:end_text_pos] + truncate_text = self.add_truncation_text('', truncate) + if truncate_text: + out += truncate_text + # Close any tags still open + for tag in open_tags: + out += '' % tag + # Return string + return out + +def truncate_words(s, num, end_text='...'): + warnings.warn('This function has been deprecated. Use the Truncator class ' + 'in django.utils.text instead.', category=PendingDeprecationWarning) + truncate = end_text and ' %s' % end_text or '' + return Truncator(s).words(num, truncate=truncate) truncate_words = allow_lazy(truncate_words, unicode) def truncate_html_words(s, num, end_text='...'): - """Truncates HTML to a certain number of words (not counting tags and - comments). Closes opened tags if they were correctly closed in the given - html. Takes an optional argument of what should be used to notify that the - string has been truncated, defaulting to ellipsis (...). - - Newlines in the HTML are preserved. - """ - s = force_unicode(s) - length = int(num) - if length <= 0: - return u'' - html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input') - # Set up regular expressions - re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U) - re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>') - # Count non-HTML words and keep note of open tags - pos = 0 - end_text_pos = 0 - words = 0 - open_tags = [] - while words <= length: - m = re_words.search(s, pos) - if not m: - # Checked through whole string - break - pos = m.end(0) - if m.group(1): - # It's an actual non-HTML word - words += 1 - if words == length: - end_text_pos = pos - continue - # Check for tag - tag = re_tag.match(m.group(0)) - if not tag or end_text_pos: - # Don't worry about non tags or tags after our truncate point - continue - closing_tag, tagname, self_closing = tag.groups() - tagname = tagname.lower() # Element names are always case-insensitive - if self_closing or tagname in html4_singlets: - pass - elif closing_tag: - # Check for match in open tags list - try: - i = open_tags.index(tagname) - except ValueError: - pass - else: - # SGML: An end tag closes, back to the matching start tag, all unclosed intervening start tags with omitted end tags - open_tags = open_tags[i+1:] - else: - # Add it to the start of the open tags list - open_tags.insert(0, tagname) - if words <= length: - # Don't try to close tags if we don't need to truncate - return s - out = s[:end_text_pos] - if end_text: - out += ' ' + end_text - # Close any tags still open - for tag in open_tags: - out += '' % tag - # Return string - return out + warnings.warn('This function has been deprecated. Use the Truncator class ' + 'in django.utils.text instead.', category=PendingDeprecationWarning) + truncate = end_text and ' %s' % end_text or '' + return Truncator(s).words(num, truncate=truncate, html=True) truncate_html_words = allow_lazy(truncate_html_words, unicode) def get_valid_filename(s): diff --git a/docs/internals/deprecation.txt b/docs/internals/deprecation.txt index c20d5bec82..3639cc2fd0 100644 --- a/docs/internals/deprecation.txt +++ b/docs/internals/deprecation.txt @@ -220,6 +220,12 @@ their deprecation, as per the :ref:`Django deprecation policy was deprecated since Django 1.4 and will be removed in favor of the generic static files handling. + * The builin truncation functions + :func:`django.utils.text.truncate_words` and + :func:`django.utils.text.truncate_html_words` + were deprecated since Django 1.4 and will be removed in favor + of the ``django.utils.text.Truncator`` class. + * 2.0 * ``django.views.defaults.shortcut()``. This function has been moved to ``django.contrib.contenttypes.views.shortcut()`` as part of the diff --git a/docs/ref/templates/builtins.txt b/docs/ref/templates/builtins.txt index bdac556652..5c08c66c21 100644 --- a/docs/ref/templates/builtins.txt +++ b/docs/ref/templates/builtins.txt @@ -2055,6 +2055,24 @@ For example:: If ``value`` is ``"my first post"``, the output will be ``"My First Post"``. +.. templatefilter:: truncatechars + +truncatechars +^^^^^^^^^^^^^ + +.. versionadded:: 1.4 + +Truncates a string if it is longer than the specified number of characters. +Truncated strings will end with a translatable ellipsis sequence ("..."). + +**Argument:** Number of characters to truncate to + +For example:: + + {{ value|truncatechars:9 }} + +If ``value`` is ``"Joel is a slug"``, the output will be ``"Joel i..."``. + .. templatefilter:: truncatewords truncatewords diff --git a/docs/releases/1.4.txt b/docs/releases/1.4.txt index 55d845305c..43c9ae25c5 100644 --- a/docs/releases/1.4.txt +++ b/docs/releases/1.4.txt @@ -145,6 +145,14 @@ A new helper function, ``template.Library`` to ease the creation of template tags that store some data in a specified context variable. +``truncatechars`` template filter +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Added a filter which truncates a string to be no longer than the specified +number of characters. Truncated strings end with a translatable ellipsis +sequence ("..."). See the :tfilter:`truncatechars docs ` for +more details. + CSRF improvements ~~~~~~~~~~~~~~~~~ diff --git a/tests/regressiontests/utils/text.py b/tests/regressiontests/utils/text.py index f565d87b1d..d4aa53fba9 100644 --- a/tests/regressiontests/utils/text.py +++ b/tests/regressiontests/utils/text.py @@ -1,10 +1,69 @@ +# -*- coding: utf-8 -*- import unittest from django.utils import text class TestUtilsText(unittest.TestCase): + def test_truncate_chars(self): + truncator = text.Truncator( + u'The quick brown fox jumped over the lazy dog.' + ) + self.assertEqual(u'The quick brown fox jumped over the lazy dog.', + truncator.chars(100)), + self.assertEqual(u'The quick brown fox ...', + truncator.chars(23)), + self.assertEqual(u'The quick brown fo.....', + truncator.chars(23, '.....')), + + # Ensure that we normalize our unicode data first + nfc = text.Truncator(u'o\xfco\xfco\xfco\xfc') + nfd = text.Truncator(u'ou\u0308ou\u0308ou\u0308ou\u0308') + self.assertEqual(u'oüoüoüoü', nfc.chars(8)) + self.assertEqual(u'oüoüoüoü', nfd.chars(8)) + self.assertEqual(u'oü...', nfc.chars(5)) + self.assertEqual(u'oü...', nfd.chars(5)) + + # Ensure the final length is calculated correctly when there are + # combining characters with no precomposed form, and that combining + # characters are not split up. + truncator = text.Truncator(u'-B\u030AB\u030A----8') + self.assertEqual(u'-B\u030A...', truncator.chars(5)) + self.assertEqual(u'-B\u030AB\u030A-...', truncator.chars(7)) + self.assertEqual(u'-B\u030AB\u030A----8', truncator.chars(8)) + + # Ensure the length of the end text is correctly calculated when it + # contains combining characters with no precomposed form. + truncator = text.Truncator(u'-----') + self.assertEqual(u'---B\u030A', truncator.chars(4, u'B\u030A')) + self.assertEqual(u'-----', truncator.chars(5, u'B\u030A')) + + # Make a best effort to shorten to the desired length, but requesting + # a length shorter than the ellipsis shouldn't break + self.assertEqual(u'...', text.Truncator(u'asdf').chars(1)) + def test_truncate_words(self): + truncator = text.Truncator(u'The quick brown fox jumped over the lazy ' + 'dog.') + self.assertEqual(u'The quick brown fox jumped over the lazy dog.', + truncator.words(10)) + self.assertEqual(u'The quick brown fox...', truncator.words(4)) + self.assertEqual(u'The quick brown fox[snip]', + truncator.words(4, '[snip]')) + + def test_truncate_html_words(self): + truncator = text.Truncator('

The quick brown fox jumped ' + 'over the lazy dog.

') + self.assertEqual(u'

The quick brown fox jumped over the ' + 'lazy dog.

', truncator.words(10, html=True)) + self.assertEqual(u'

The quick brown fox...' + '

', truncator.words(4, html=True)) + self.assertEqual(u'

The quick brown fox....' + '

', truncator.words(4, '....', html=True)) + self.assertEqual(u'

The quick brown fox' + '

', truncator.words(4, '', html=True)) + + def test_old_truncate_words(self): self.assertEqual(u'The quick brown fox jumped over the lazy dog.', text.truncate_words(u'The quick brown fox jumped over the lazy dog.', 10)) self.assertEqual(u'The quick brown fox ...', @@ -12,7 +71,7 @@ class TestUtilsText(unittest.TestCase): self.assertEqual(u'The quick brown fox ....', text.truncate_words('The quick brown fox jumped over the lazy dog.', 4, '....')) - def test_truncate_html_words(self): + def test_old_truncate_html_words(self): self.assertEqual(u'

The quick brown fox jumped over the lazy dog.

', text.truncate_html_words('

The quick brown fox jumped over the lazy dog.

', 10)) self.assertEqual(u'

The quick brown fox ...

',