From f94f466cd3461527fb76a3e8951039a3c2388829 Mon Sep 17 00:00:00 2001 From: Jeremy Date: Thu, 10 Jan 2013 17:27:20 +0800 Subject: [PATCH] Fixed #19496 -- Added truncatechars_html filter. Thanks esevece for the suggestion and Nick Sandford and Martin Warne for the inital work on the patch. --- django/template/defaultfilters.py | 17 ++++++++++ django/utils/text.py | 53 +++++++++++++++++++------------ docs/ref/templates/builtins.txt | 20 ++++++++++++ docs/releases/1.7.txt | 5 ++- tests/defaultfilters/tests.py | 21 ++++++++++-- 5 files changed, 93 insertions(+), 23 deletions(-) diff --git a/django/template/defaultfilters.py b/django/template/defaultfilters.py index 865f5fe7f31..64f8e28190d 100644 --- a/django/template/defaultfilters.py +++ b/django/template/defaultfilters.py @@ -279,6 +279,23 @@ def truncatechars(value, arg): return Truncator(value).chars(length) +@register.filter(is_safe=True) +@stringfilter +def truncatechars_html(value, arg): + """ + Truncates HTML after a certain number of chars. + + Argument: Number of chars to truncate after. + + Newlines in the HTML are preserved. + """ + try: + length = int(arg) + except ValueError: # invalid literal for int() + return value # Fail silently. + return Truncator(value).chars(length, html=True) + + @register.filter(is_safe=True) @stringfilter def truncatewords(value, arg): diff --git a/django/utils/text.py b/django/utils/text.py index c2da31a554e..d277cee56b4 100644 --- a/django/utils/text.py +++ b/django/utils/text.py @@ -24,6 +24,7 @@ capfirst = allow_lazy(capfirst, six.text_type) # Set up regular expressions re_words = re.compile(r'<.*?>|((?:\w[-\w]*|&.*?;)+)', re.U | re.S) +re_chars = re.compile(r'<.*?>|(.)', re.U | re.S) re_tag = re.compile(r'<(/)?([^ ]+?)(?:(\s*/)| .*?)?>', re.S) re_newlines = re.compile(r'\r\n|\r') # Used in normalize_newlines re_camel_case = re.compile(r'(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))') @@ -82,7 +83,7 @@ class Truncator(SimpleLazyObject): return text return '%s%s' % (text, truncate) - def chars(self, num, truncate=None): + def chars(self, num, truncate=None, html=False): """ Returns the text truncated to be no longer than the specified number of characters. @@ -101,7 +102,15 @@ class Truncator(SimpleLazyObject): truncate_len -= 1 if truncate_len == 0: break + if html: + return self._truncate_html(length, truncate, text, truncate_len, False) + return self._text_chars(length, truncate, text, truncate_len) + chars = allow_lazy(chars) + def _text_chars(self, length, truncate, text, truncate_len): + """ + Truncates a string after a certain number of chars. + """ s_len = 0 end_index = None for i, char in enumerate(text): @@ -119,7 +128,6 @@ class Truncator(SimpleLazyObject): # Return the original string since no truncation was necessary return text - chars = allow_lazy(chars) def words(self, num, truncate=None, html=False): """ @@ -129,7 +137,7 @@ class Truncator(SimpleLazyObject): """ length = int(num) if html: - return self._html_words(length, truncate) + return self._truncate_html(length, truncate, self._wrapped, length, True) return self._text_words(length, truncate) words = allow_lazy(words) @@ -145,40 +153,45 @@ class Truncator(SimpleLazyObject): return self.add_truncation_text(' '.join(words), truncate) return ' '.join(words) - def _html_words(self, length, truncate): + def _truncate_html(self, length, truncate, text, truncate_len, words): """ - Truncates HTML to a certain number of words (not counting tags and - comments). Closes opened tags if they were correctly closed in the - given HTML. + Truncates HTML to a certain number of chars (not counting tags and + comments), or, if words is True, then to a certain number of words. + Closes opened tags if they were correctly closed in the given HTML. Newlines in the HTML are preserved. """ - if length <= 0: + if words and length <= 0: return '' + html4_singlets = ( 'br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input' ) - # Count non-HTML words and keep note of open tags + + # Count non-HTML chars/words and keep note of open tags pos = 0 end_text_pos = 0 - words = 0 + current_len = 0 open_tags = [] - while words <= length: - m = re_words.search(self._wrapped, pos) + + regex = re_words if words else re_chars + + while current_len <= length: + m = regex.search(text, pos) if not m: # Checked through whole string break pos = m.end(0) if m.group(1): - # It's an actual non-HTML word - words += 1 - if words == length: + # It's an actual non-HTML word or char + current_len += 1 + if current_len == truncate_len: end_text_pos = pos continue # Check for tag tag = re_tag.match(m.group(0)) - if not tag or end_text_pos: + if not tag or current_len >= truncate_len: # Don't worry about non tags or tags after our truncate point continue closing_tag, tagname, self_closing = tag.groups() @@ -199,10 +212,10 @@ class Truncator(SimpleLazyObject): else: # Add it to the start of the open tags list open_tags.insert(0, tagname) - if words <= length: - # Don't try to close tags if we don't need to truncate - return self._wrapped - out = self._wrapped[:end_text_pos] + + if current_len <= length: + return text + out = text[:end_text_pos] truncate_text = self.add_truncation_text('', truncate) if truncate_text: out += truncate_text diff --git a/docs/ref/templates/builtins.txt b/docs/ref/templates/builtins.txt index 737224794dc..93ec2adab5a 100644 --- a/docs/ref/templates/builtins.txt +++ b/docs/ref/templates/builtins.txt @@ -2172,6 +2172,26 @@ For example:: If ``value`` is ``"Joel is a slug"``, the output will be ``"Joel i..."``. +.. templatefilter:: truncatechars_html + +truncatechars_html +^^^^^^^^^^^^^^^^^^ + +.. versionadded:: 1.7 + +Similar to :tfilter:`truncatechars`, except that it is aware of HTML tags. Any +tags that are opened in the string and not closed before the truncation point +are closed immediately after the truncation. + +For example:: + + {{ value|truncatechars_html:9 }} + +If ``value`` is ``"

Joel is a slug

"``, the output will be +``"

Joel i...

"``. + +Newlines in the HTML content will be preserved. + .. templatefilter:: truncatewords truncatewords diff --git a/docs/releases/1.7.txt b/docs/releases/1.7.txt index 3519ac5bdaf..8d2f329fed7 100644 --- a/docs/releases/1.7.txt +++ b/docs/releases/1.7.txt @@ -657,7 +657,7 @@ Templates * :func:`django.shortcuts.render()` * :func:`django.shortcuts.render_to_response()` -* The :tfilter:`time` filter now accepts timzone-related :ref:`format +* The :tfilter:`time` filter now accepts timezone-related :ref:`format specifiers ` ``'e'``, ``'O'`` , ``'T'`` and ``'Z'`` and is able to digest :ref:`time-zone-aware ` ``datetime`` instances performing the expected @@ -668,6 +668,9 @@ Templates otherwise. It also now accepts an optional ``using`` keyword argument to control which cache it uses. +* The new :tfilter:`truncatechars_html` filter truncates a string to be no + longer than the specified number of characters, taking HTML into account. + Requests ^^^^^^^^ diff --git a/tests/defaultfilters/tests.py b/tests/defaultfilters/tests.py index 6e309dc97a2..f10b1c33050 100644 --- a/tests/defaultfilters/tests.py +++ b/tests/defaultfilters/tests.py @@ -13,8 +13,8 @@ from django.template.defaultfilters import ( linebreaks_filter, linenumbers, ljust, lower, make_list, phone2numeric_filter, pluralize, removetags, rjust, slice_filter, slugify, stringformat, striptags, time, timesince_filter, timeuntil_filter, title, - truncatewords, truncatewords_html, unordered_list, upper, urlencode, - urlize, urlizetrunc, wordcount, wordwrap, yesno, + truncatechars_html, truncatewords, truncatewords_html, unordered_list, + upper, urlencode, urlize, urlizetrunc, wordcount, wordwrap, yesno, ) from django.test import TestCase from django.utils import six @@ -195,6 +195,23 @@ class DefaultFiltersTests(TestCase): '¿Cómo está?', 3), 'Buenos días! ¿Cómo ...') + def test_truncatechars_html(self): + self.assertEqual(truncatechars_html( + '

one two - three
four
five

', 0), '...') + self.assertEqual(truncatechars_html('

one two - ' + 'three
four
five

', 6), + '

one...

') + self.assertEqual(truncatechars_html( + '

one two - three
four
five

', 11), + '

one two ...

') + self.assertEqual(truncatechars_html( + '

one two - three
four
five

', 100), + '

one two - three
four
five

') + self.assertEqual(truncatechars_html( + '\xc5ngstr\xf6m was here', 5), '\xc5n...') + self.assertEqual(truncatechars_html( + 'abc', 3), 'abc') + def test_upper(self): self.assertEqual(upper('Mixed case input'), 'MIXED CASE INPUT') # lowercase e umlaut