Fixed #19496 -- Added truncatechars_html filter.

Thanks esevece for the suggestion and  Nick Sandford and Martin Warne
for the inital work on the patch.
This commit is contained in:
Jeremy 2013-01-10 17:27:20 +08:00 committed by Tim Graham
parent 390001ce52
commit f94f466cd3
5 changed files with 93 additions and 23 deletions

View File

@ -279,6 +279,23 @@ def truncatechars(value, arg):
return Truncator(value).chars(length) return Truncator(value).chars(length)
@register.filter(is_safe=True)
@stringfilter
def truncatechars_html(value, arg):
"""
Truncates HTML after a certain number of chars.
Argument: Number of chars to truncate after.
Newlines in the HTML are preserved.
"""
try:
length = int(arg)
except ValueError: # invalid literal for int()
return value # Fail silently.
return Truncator(value).chars(length, html=True)
@register.filter(is_safe=True) @register.filter(is_safe=True)
@stringfilter @stringfilter
def truncatewords(value, arg): def truncatewords(value, arg):

View File

@ -24,6 +24,7 @@ capfirst = allow_lazy(capfirst, six.text_type)
# Set up regular expressions # Set up regular expressions
re_words = re.compile(r'<.*?>|((?:\w[-\w]*|&.*?;)+)', re.U | re.S) re_words = re.compile(r'<.*?>|((?:\w[-\w]*|&.*?;)+)', re.U | re.S)
re_chars = re.compile(r'<.*?>|(.)', re.U | re.S)
re_tag = re.compile(r'<(/)?([^ ]+?)(?:(\s*/)| .*?)?>', re.S) re_tag = re.compile(r'<(/)?([^ ]+?)(?:(\s*/)| .*?)?>', re.S)
re_newlines = re.compile(r'\r\n|\r') # Used in normalize_newlines re_newlines = re.compile(r'\r\n|\r') # Used in normalize_newlines
re_camel_case = re.compile(r'(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))') re_camel_case = re.compile(r'(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))')
@ -82,7 +83,7 @@ class Truncator(SimpleLazyObject):
return text return text
return '%s%s' % (text, truncate) return '%s%s' % (text, truncate)
def chars(self, num, truncate=None): def chars(self, num, truncate=None, html=False):
""" """
Returns the text truncated to be no longer than the specified number Returns the text truncated to be no longer than the specified number
of characters. of characters.
@ -101,7 +102,15 @@ class Truncator(SimpleLazyObject):
truncate_len -= 1 truncate_len -= 1
if truncate_len == 0: if truncate_len == 0:
break break
if html:
return self._truncate_html(length, truncate, text, truncate_len, False)
return self._text_chars(length, truncate, text, truncate_len)
chars = allow_lazy(chars)
def _text_chars(self, length, truncate, text, truncate_len):
"""
Truncates a string after a certain number of chars.
"""
s_len = 0 s_len = 0
end_index = None end_index = None
for i, char in enumerate(text): for i, char in enumerate(text):
@ -119,7 +128,6 @@ class Truncator(SimpleLazyObject):
# Return the original string since no truncation was necessary # Return the original string since no truncation was necessary
return text return text
chars = allow_lazy(chars)
def words(self, num, truncate=None, html=False): def words(self, num, truncate=None, html=False):
""" """
@ -129,7 +137,7 @@ class Truncator(SimpleLazyObject):
""" """
length = int(num) length = int(num)
if html: if html:
return self._html_words(length, truncate) return self._truncate_html(length, truncate, self._wrapped, length, True)
return self._text_words(length, truncate) return self._text_words(length, truncate)
words = allow_lazy(words) words = allow_lazy(words)
@ -145,40 +153,45 @@ class Truncator(SimpleLazyObject):
return self.add_truncation_text(' '.join(words), truncate) return self.add_truncation_text(' '.join(words), truncate)
return ' '.join(words) return ' '.join(words)
def _html_words(self, length, truncate): def _truncate_html(self, length, truncate, text, truncate_len, words):
""" """
Truncates HTML to a certain number of words (not counting tags and Truncates HTML to a certain number of chars (not counting tags and
comments). Closes opened tags if they were correctly closed in the comments), or, if words is True, then to a certain number of words.
given HTML. Closes opened tags if they were correctly closed in the given HTML.
Newlines in the HTML are preserved. Newlines in the HTML are preserved.
""" """
if length <= 0: if words and length <= 0:
return '' return ''
html4_singlets = ( html4_singlets = (
'br', 'col', 'link', 'base', 'img', 'br', 'col', 'link', 'base', 'img',
'param', 'area', 'hr', 'input' 'param', 'area', 'hr', 'input'
) )
# Count non-HTML words and keep note of open tags
# Count non-HTML chars/words and keep note of open tags
pos = 0 pos = 0
end_text_pos = 0 end_text_pos = 0
words = 0 current_len = 0
open_tags = [] open_tags = []
while words <= length:
m = re_words.search(self._wrapped, pos) regex = re_words if words else re_chars
while current_len <= length:
m = regex.search(text, pos)
if not m: if not m:
# Checked through whole string # Checked through whole string
break break
pos = m.end(0) pos = m.end(0)
if m.group(1): if m.group(1):
# It's an actual non-HTML word # It's an actual non-HTML word or char
words += 1 current_len += 1
if words == length: if current_len == truncate_len:
end_text_pos = pos end_text_pos = pos
continue continue
# Check for tag # Check for tag
tag = re_tag.match(m.group(0)) tag = re_tag.match(m.group(0))
if not tag or end_text_pos: if not tag or current_len >= truncate_len:
# Don't worry about non tags or tags after our truncate point # Don't worry about non tags or tags after our truncate point
continue continue
closing_tag, tagname, self_closing = tag.groups() closing_tag, tagname, self_closing = tag.groups()
@ -199,10 +212,10 @@ class Truncator(SimpleLazyObject):
else: else:
# Add it to the start of the open tags list # Add it to the start of the open tags list
open_tags.insert(0, tagname) open_tags.insert(0, tagname)
if words <= length:
# Don't try to close tags if we don't need to truncate if current_len <= length:
return self._wrapped return text
out = self._wrapped[:end_text_pos] out = text[:end_text_pos]
truncate_text = self.add_truncation_text('', truncate) truncate_text = self.add_truncation_text('', truncate)
if truncate_text: if truncate_text:
out += truncate_text out += truncate_text

View File

@ -2172,6 +2172,26 @@ For example::
If ``value`` is ``"Joel is a slug"``, the output will be ``"Joel i..."``. If ``value`` is ``"Joel is a slug"``, the output will be ``"Joel i..."``.
.. templatefilter:: truncatechars_html
truncatechars_html
^^^^^^^^^^^^^^^^^^
.. versionadded:: 1.7
Similar to :tfilter:`truncatechars`, except that it is aware of HTML tags. Any
tags that are opened in the string and not closed before the truncation point
are closed immediately after the truncation.
For example::
{{ value|truncatechars_html:9 }}
If ``value`` is ``"<p>Joel is a slug</p>"``, the output will be
``"<p>Joel i...</p>"``.
Newlines in the HTML content will be preserved.
.. templatefilter:: truncatewords .. templatefilter:: truncatewords
truncatewords truncatewords

View File

@ -657,7 +657,7 @@ Templates
* :func:`django.shortcuts.render()` * :func:`django.shortcuts.render()`
* :func:`django.shortcuts.render_to_response()` * :func:`django.shortcuts.render_to_response()`
* The :tfilter:`time` filter now accepts timzone-related :ref:`format * The :tfilter:`time` filter now accepts timezone-related :ref:`format
specifiers <date-and-time-formatting-specifiers>` ``'e'``, ``'O'`` , ``'T'`` specifiers <date-and-time-formatting-specifiers>` ``'e'``, ``'O'`` , ``'T'``
and ``'Z'`` and is able to digest :ref:`time-zone-aware and ``'Z'`` and is able to digest :ref:`time-zone-aware
<naive_vs_aware_datetimes>` ``datetime`` instances performing the expected <naive_vs_aware_datetimes>` ``datetime`` instances performing the expected
@ -668,6 +668,9 @@ Templates
otherwise. It also now accepts an optional ``using`` keyword argument to otherwise. It also now accepts an optional ``using`` keyword argument to
control which cache it uses. control which cache it uses.
* The new :tfilter:`truncatechars_html` filter truncates a string to be no
longer than the specified number of characters, taking HTML into account.
Requests Requests
^^^^^^^^ ^^^^^^^^

View File

@ -13,8 +13,8 @@ from django.template.defaultfilters import (
linebreaks_filter, linenumbers, ljust, lower, make_list, linebreaks_filter, linenumbers, ljust, lower, make_list,
phone2numeric_filter, pluralize, removetags, rjust, slice_filter, slugify, phone2numeric_filter, pluralize, removetags, rjust, slice_filter, slugify,
stringformat, striptags, time, timesince_filter, timeuntil_filter, title, stringformat, striptags, time, timesince_filter, timeuntil_filter, title,
truncatewords, truncatewords_html, unordered_list, upper, urlencode, truncatechars_html, truncatewords, truncatewords_html, unordered_list,
urlize, urlizetrunc, wordcount, wordwrap, yesno, upper, urlencode, urlize, urlizetrunc, wordcount, wordwrap, yesno,
) )
from django.test import TestCase from django.test import TestCase
from django.utils import six from django.utils import six
@ -195,6 +195,23 @@ class DefaultFiltersTests(TestCase):
'&#x00bf;C&oacute;mo est&aacute;?</i>', 3), '&#x00bf;C&oacute;mo est&aacute;?</i>', 3),
'<i>Buenos d&iacute;as! &#x00bf;C&oacute;mo ...</i>') '<i>Buenos d&iacute;as! &#x00bf;C&oacute;mo ...</i>')
def test_truncatechars_html(self):
self.assertEqual(truncatechars_html(
'<p>one <a href="#">two - three <br>four</a> five</p>', 0), '...')
self.assertEqual(truncatechars_html('<p>one <a href="#">two - '
'three <br>four</a> five</p>', 6),
'<p>one...</p>')
self.assertEqual(truncatechars_html(
'<p>one <a href="#">two - three <br>four</a> five</p>', 11),
'<p>one <a href="#">two ...</a></p>')
self.assertEqual(truncatechars_html(
'<p>one <a href="#">two - three <br>four</a> five</p>', 100),
'<p>one <a href="#">two - three <br>four</a> five</p>')
self.assertEqual(truncatechars_html(
'<b>\xc5ngstr\xf6m</b> was here', 5), '<b>\xc5n...</b>')
self.assertEqual(truncatechars_html(
'a<b>b</b>c', 3), 'a<b>b</b>c')
def test_upper(self): def test_upper(self):
self.assertEqual(upper('Mixed case input'), 'MIXED CASE INPUT') self.assertEqual(upper('Mixed case input'), 'MIXED CASE INPUT')
# lowercase e umlaut # lowercase e umlaut