Fixed #5025 -- Add a "truncatechars" template filter. Many thanks to Chris Beaven.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@16542 bcc190cf-cafb-0310-a4f2-bffc1f526a37
2011-07-14 13:47:10 +00:00 · 2011-07-14 13:47:10 +00:00 · 3b77458371
parent 12b7c2a702
commit 3b77458371
7 changed files with 281 additions and 86 deletions
--- a/django/contrib/admin/widgets.py
+++ b/django/contrib/admin/widgets.py
@ -9,7 +9,7 @@ from django.forms.widgets import RadioFieldRenderer
 from django.forms.util import flatatt
 from django.templatetags.static import static
 from django.utils.html import escape
-from django.utils.text import truncate_words
+from django.utils.text import Truncator
 from django.utils.translation import ugettext as _
 from django.utils.safestring import mark_safe
 from django.utils.encoding import force_unicode
@ -152,7 +152,7 @@ class ForeignKeyRawIdWidget(forms.TextInput):
        key = self.rel.get_related_field().name
        try:
            obj = self.rel.to._default_manager.using(self.db).get(**{key: value})
-            return '&nbsp;<strong>%s</strong>' % escape(truncate_words(obj, 14))
+            return '&nbsp;<strong>%s</strong>' % escape(Truncator(obj).words(14, truncate='...'))
        except (ValueError, self.rel.to.DoesNotExist):
            return ''
--- a/django/template/defaultfilters.py
+++ b/django/template/defaultfilters.py
@ -15,7 +15,7 @@ from django.utils.encoding import force_unicode, iri_to_uri
 from django.utils.html import (conditional_escape, escapejs, fix_ampersands,
    escape, urlize as urlize_impl, linebreaks, strip_tags)
 from django.utils.http import urlquote
-from django.utils.text import truncate_words, truncate_html_words, wrap, phone2numeric
+from django.utils.text import Truncator, wrap, phone2numeric
 from django.utils.safestring import mark_safe, SafeData, mark_for_escaping
 from django.utils.timesince import timesince, timeuntil
 from django.utils.translation import ugettext, ungettext
@ -244,6 +244,20 @@ def title(value):
 title.is_safe = True
 title = stringfilter(title)
 def truncatechars(value, arg):
    """
    Truncates a string after a certain number of characters.
    Argument: Number of characters to truncate after.
    """
    try:
        length = int(arg)
    except ValueError: # Invalid literal for int().
        return value # Fail silently.
    return Truncator(value).chars(value, length)
 truncatechars.is_safe = True
 truncatechars = stringfilter(truncatechars)
 def truncatewords(value, arg):
    """
    Truncates a string after a certain number of words.
@ -256,7 +270,7 @@ def truncatewords(value, arg):
        length = int(arg)
    except ValueError: # Invalid literal for int().
        return value # Fail silently.
-    return truncate_words(value, length)
+    return Truncator(value).words(length, truncate=' ...')
 truncatewords.is_safe = True
 truncatewords = stringfilter(truncatewords)
@ -272,7 +286,7 @@ def truncatewords_html(value, arg):
        length = int(arg)
    except ValueError: # invalid literal for int()
        return value # Fail silently.
-    return truncate_html_words(value, length)
+    return Truncator(value).words(length, html=True, truncate=' ...')
 truncatewords_html.is_safe = True
 truncatewords_html = stringfilter(truncatewords_html)
--- a/django/utils/text.py
+++ b/django/utils/text.py
@ -1,4 +1,6 @@
 import re
 import unicodedata
 import warnings
 from gzip import GzipFile
 from htmlentitydefs import name2codepoint
@ -8,14 +10,18 @@ except ImportError:
    from StringIO import StringIO
 from django.utils.encoding import force_unicode
-from django.utils.functional import allow_lazy
+from django.utils.functional import allow_lazy, SimpleLazyObject
-from django.utils.translation import ugettext_lazy, ugettext as _
+from django.utils.translation import ugettext_lazy, ugettext as _, pgettext
 # Capitalizes the first letter of a string.
 capfirst = lambda x: x and force_unicode(x)[0].upper() + force_unicode(x)[1:]
 capfirst = allow_lazy(capfirst, unicode)
 # Set up regular expressions
 re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
 re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
 def wrap(text, width):
    """
    A word-wrap function that preserves existing line breaks and most spaces in
@ -44,88 +50,172 @@ def wrap(text, width):
    return u''.join(_generator())
 wrap = allow_lazy(wrap, unicode)
 def truncate_words(s, num, end_text='...'):
    """Truncates a string after a certain number of words. Takes an optional
    argument of what should be used to notify that the string has been
    truncated, defaulting to ellipsis (...)
-    Newlines in the string will be stripped.
+class Truncator(SimpleLazyObject):
    """
-    s = force_unicode(s)
+    An object used to truncate text, either by characters or words.
-    length = int(num)
+    """
-    words = s.split()
+    def __init__(self, text):
-    if len(words) > length:
+        super(Truncator, self).__init__(lambda: force_unicode(text))
-        words = words[:length]
+
-        if not words[-1].endswith(end_text):
+    def add_truncation_text(self, text, truncate=None):
-            words.append(end_text)
+        if truncate is None:
-    return u' '.join(words)
+            truncate = pgettext(
                'String to return when truncating text',
                u'%(truncated_text)s...')
        truncate = force_unicode(truncate)
        if '%(truncated_text)s' in truncate:
            return truncate % {'truncated_text': text}
        # The truncation text didn't contain the %(truncated_text)s string
        # replacement argument so just append it to the text.
        if text.endswith(truncate):
            # But don't append the truncation text if the current text already
            # ends in this.
            return text
        return '%s%s' % (text, truncate)
    def chars(self, num, truncate=None):
        """
        Returns the text truncated to be no longer than the specified number
        of characters.
        Takes an optional argument of what should be used to notify that the
        string has been truncated, defaulting to a translatable string of an
        ellipsis (...).
        """
        length = int(num)
        text = unicodedata.normalize('NFC', self._wrapped)
        # Calculate the length to truncate to (max length - end_text length)
        truncate_len = length
        for char in self.add_truncation_text('', truncate):
            if not unicodedata.combining(char):
                truncate_len -= 1
                if truncate_len == 0:
                    break
        s_len = 0
        end_index = None
        for i, char in enumerate(text):
            if unicodedata.combining(char):
                # Don't consider combining characters
                # as adding to the string length
                continue
            s_len += 1
            if end_index is None and s_len > truncate_len:
                end_index = i
            if s_len > length:
                # Return the truncated string
                return self.add_truncation_text(text[:end_index or 0],
                                                truncate)
        # Return the original string since no truncation was necessary
        return text
    chars = allow_lazy(chars)
    def words(self, num, truncate=None, html=False):
        """
        Truncates a string after a certain number of words. Takes an optional
        argument of what should be used to notify that the string has been
        truncated, defaulting to ellipsis (...).
        """
        length = int(num)
        if html:
            return self._html_words(length, truncate)
        return self._text_words(length, truncate)
    words = allow_lazy(words)
    def _text_words(self, length, truncate):
        """
        Truncates a string after a certain number of words.
        Newlines in the string will be stripped.
        """
        words = self._wrapped.split()
        if len(words) > length:
            words = words[:length]
            return self.add_truncation_text(u' '.join(words), truncate)
        return u' '.join(words)
    def _html_words(self, length, truncate):
        """
        Truncates HTML to a certain number of words (not counting tags and
        comments). Closes opened tags if they were correctly closed in the
        given HTML.
        Newlines in the HTML are preserved.
        """
        if length <= 0:
            return u''
        html4_singlets = (
            'br', 'col', 'link', 'base', 'img',
            'param', 'area', 'hr', 'input'
        )
        # Count non-HTML words and keep note of open tags
        pos = 0
        end_text_pos = 0
        words = 0
        open_tags = []
        while words <= length:
            m = re_words.search(self._wrapped, pos)
            if not m:
                # Checked through whole string
                break
            pos = m.end(0)
            if m.group(1):
                # It's an actual non-HTML word
                words += 1
                if words == length:
                    end_text_pos = pos
                continue
            # Check for tag
            tag = re_tag.match(m.group(0))
            if not tag or end_text_pos:
                # Don't worry about non tags or tags after our truncate point
                continue
            closing_tag, tagname, self_closing = tag.groups()
            # Element names are always case-insensitive
            tagname = tagname.lower()
            if self_closing or tagname in html4_singlets:
                pass
            elif closing_tag:
                # Check for match in open tags list
                try:
                    i = open_tags.index(tagname)
                except ValueError:
                    pass
                else:
                    # SGML: An end tag closes, back to the matching start tag,
                    # all unclosed intervening start tags with omitted end tags
                    open_tags = open_tags[i + 1:]
            else:
                # Add it to the start of the open tags list
                open_tags.insert(0, tagname)
        if words <= length:
            # Don't try to close tags if we don't need to truncate
            return self._wrapped
        out = self._wrapped[:end_text_pos]
        truncate_text = self.add_truncation_text('', truncate)
        if truncate_text:
            out += truncate_text
        # Close any tags still open
        for tag in open_tags:
            out += '</%s>' % tag
        # Return string
        return out
 def truncate_words(s, num, end_text='...'):
    warnings.warn('This function has been deprecated. Use the Truncator class '
        'in django.utils.text instead.', category=PendingDeprecationWarning)
    truncate = end_text and ' %s' % end_text or ''
    return Truncator(s).words(num, truncate=truncate)
 truncate_words = allow_lazy(truncate_words, unicode)
 def truncate_html_words(s, num, end_text='...'):
-    """Truncates HTML to a certain number of words (not counting tags and
+    warnings.warn('This function has been deprecated. Use the Truncator class '
-    comments). Closes opened tags if they were correctly closed in the given
+        'in django.utils.text instead.', category=PendingDeprecationWarning)
-    html. Takes an optional argument of what should be used to notify that the
+    truncate = end_text and ' %s' % end_text or ''
-    string has been truncated, defaulting to ellipsis (...).
+    return Truncator(s).words(num, truncate=truncate, html=True)
    Newlines in the HTML are preserved.
    """
    s = force_unicode(s)
    length = int(num)
    if length <= 0:
        return u''
    html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
    # Set up regular expressions
    re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
    re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
    # Count non-HTML words and keep note of open tags
    pos = 0
    end_text_pos = 0
    words = 0
    open_tags = []
    while words <= length:
        m = re_words.search(s, pos)
        if not m:
            # Checked through whole string
            break
        pos = m.end(0)
        if m.group(1):
            # It's an actual non-HTML word
            words += 1
            if words == length:
                end_text_pos = pos
            continue
        # Check for tag
        tag = re_tag.match(m.group(0))
        if not tag or end_text_pos:
            # Don't worry about non tags or tags after our truncate point
            continue
        closing_tag, tagname, self_closing = tag.groups()
        tagname = tagname.lower()  # Element names are always case-insensitive
        if self_closing or tagname in html4_singlets:
            pass
        elif closing_tag:
            # Check for match in open tags list
            try:
                i = open_tags.index(tagname)
            except ValueError:
                pass
            else:
                # SGML: An end tag closes, back to the matching start tag, all unclosed intervening start tags with omitted end tags
                open_tags = open_tags[i+1:]
        else:
            # Add it to the start of the open tags list
            open_tags.insert(0, tagname)
    if words <= length:
        # Don't try to close tags if we don't need to truncate
        return s
    out = s[:end_text_pos]
    if end_text:
        out += ' ' + end_text
    # Close any tags still open
    for tag in open_tags:
        out += '</%s>' % tag
    # Return string
    return out
 truncate_html_words = allow_lazy(truncate_html_words, unicode)
 def get_valid_filename(s):
--- a/docs/internals/deprecation.txt
+++ b/docs/internals/deprecation.txt
@ -220,6 +220,12 @@ their deprecation, as per the :ref:`Django deprecation policy
          was deprecated since Django 1.4 and will be removed in favor of the
          generic static files handling.
        * The builin truncation functions
          :func:`django.utils.text.truncate_words` and
          :func:`django.utils.text.truncate_html_words`
          were deprecated since Django 1.4 and will be removed in favor
          of the ``django.utils.text.Truncator`` class.
    * 2.0
        * ``django.views.defaults.shortcut()``. This function has been moved
          to ``django.contrib.contenttypes.views.shortcut()`` as part of the
--- a/docs/ref/templates/builtins.txt
+++ b/docs/ref/templates/builtins.txt
@ -2055,6 +2055,24 @@ For example::
 If ``value`` is ``"my first post"``, the output will be ``"My First Post"``.
 .. templatefilter:: truncatechars
 truncatechars
 ^^^^^^^^^^^^^
 .. versionadded:: 1.4
 Truncates a string if it is longer than the specified number of characters.
 Truncated strings will end with a translatable ellipsis sequence ("...").
 **Argument:** Number of characters to truncate to
 For example::
    {{ value|truncatechars:9 }}
 If ``value`` is ``"Joel is a slug"``, the output will be ``"Joel i..."``.
 .. templatefilter:: truncatewords
 truncatewords
--- a/docs/releases/1.4.txt
+++ b/docs/releases/1.4.txt
@ -145,6 +145,14 @@ A new helper function,
 ``template.Library`` to ease the creation of template tags that store some
 data in a specified context variable.
 ``truncatechars`` template filter
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Added a filter which truncates a string to be no longer than the specified
 number of characters. Truncated strings end with a translatable ellipsis
 sequence ("..."). See the :tfilter:`truncatechars docs <truncatechars>` for
 more details.
 CSRF improvements
 ~~~~~~~~~~~~~~~~~
--- a/tests/regressiontests/utils/text.py
+++ b/tests/regressiontests/utils/text.py
@ -1,10 +1,69 @@
 # -*- coding: utf-8 -*-
 import unittest
 from django.utils import text
 class TestUtilsText(unittest.TestCase):
    def test_truncate_chars(self):
        truncator = text.Truncator(
            u'The quick brown fox jumped over the lazy dog.'
        )
        self.assertEqual(u'The quick brown fox jumped over the lazy dog.',
            truncator.chars(100)),
        self.assertEqual(u'The quick brown fox ...',
            truncator.chars(23)),
        self.assertEqual(u'The quick brown fo.....',
            truncator.chars(23, '.....')),
        # Ensure that we normalize our unicode data first
        nfc = text.Truncator(u'o\xfco\xfco\xfco\xfc')
        nfd = text.Truncator(u'ou\u0308ou\u0308ou\u0308ou\u0308')
        self.assertEqual(u'oüoüoüoü', nfc.chars(8))
        self.assertEqual(u'oüoüoüoü', nfd.chars(8))
        self.assertEqual(u'oü...', nfc.chars(5))
        self.assertEqual(u'oü...', nfd.chars(5))
        # Ensure the final length is calculated correctly when there are
        # combining characters with no precomposed form, and that combining
        # characters are not split up.
        truncator = text.Truncator(u'-B\u030AB\u030A----8')
        self.assertEqual(u'-B\u030A...', truncator.chars(5))
        self.assertEqual(u'-B\u030AB\u030A-...', truncator.chars(7))
        self.assertEqual(u'-B\u030AB\u030A----8', truncator.chars(8))
        # Ensure the length of the end text is correctly calculated when it
        # contains combining characters with no precomposed form.
        truncator = text.Truncator(u'-----')
        self.assertEqual(u'---B\u030A', truncator.chars(4, u'B\u030A'))
        self.assertEqual(u'-----', truncator.chars(5, u'B\u030A'))
        # Make a best effort to shorten to the desired length, but requesting
        # a length shorter than the ellipsis shouldn't break
        self.assertEqual(u'...', text.Truncator(u'asdf').chars(1))
    def test_truncate_words(self):
        truncator = text.Truncator(u'The quick brown fox jumped over the lazy '
            'dog.')
        self.assertEqual(u'The quick brown fox jumped over the lazy dog.',
            truncator.words(10))
        self.assertEqual(u'The quick brown fox...', truncator.words(4))
        self.assertEqual(u'The quick brown fox[snip]',
            truncator.words(4, '[snip]'))
    def test_truncate_html_words(self):
        truncator = text.Truncator('<p><strong><em>The quick brown fox jumped '
            'over the lazy dog.</em></strong></p>')
        self.assertEqual(u'<p><strong><em>The quick brown fox jumped over the '
            'lazy dog.</em></strong></p>', truncator.words(10, html=True))
        self.assertEqual(u'<p><strong><em>The quick brown fox...</em>'
            '</strong></p>', truncator.words(4, html=True))
        self.assertEqual(u'<p><strong><em>The quick brown fox....</em>'
            '</strong></p>', truncator.words(4, '....', html=True))
        self.assertEqual(u'<p><strong><em>The quick brown fox</em></strong>'
            '</p>', truncator.words(4, '', html=True))
    def test_old_truncate_words(self):
        self.assertEqual(u'The quick brown fox jumped over the lazy dog.',
            text.truncate_words(u'The quick brown fox jumped over the lazy dog.', 10))
        self.assertEqual(u'The quick brown fox ...',
@ -12,7 +71,7 @@ class TestUtilsText(unittest.TestCase):
        self.assertEqual(u'The quick brown fox ....',
            text.truncate_words('The quick brown fox jumped over the lazy dog.', 4, '....'))
-    def test_truncate_html_words(self):
+    def test_old_truncate_html_words(self):
        self.assertEqual(u'<p><strong><em>The quick brown fox jumped over the lazy dog.</em></strong></p>',
            text.truncate_html_words('<p><strong><em>The quick brown fox jumped over the lazy dog.</em></strong></p>', 10))
        self.assertEqual(u'<p><strong><em>The quick brown fox ...</em></strong></p>',