Fixed #14516 -- Extract methods from removetags and slugify template filters

Patch by @jphalip updated to apply, documentation and release notes added. I've documented strip_tags as well as remove_tags as the difference between the two wouldn't be immediately obvious.
2012-08-18 13:53:22 +01:00 · 2012-08-18 13:53:22 +01:00 · 212b9826bd
parent 58683e9c82
commit 212b9826bd
7 changed files with 94 additions and 5 deletions
--- a/django/template/defaultfilters.py
+++ b/django/template/defaultfilters.py
@ -231,12 +231,12 @@ def make_list(value):
@stringfilter
 def slugify(value):
    """
-    Normalizes string, converts to lowercase, removes non-alpha characters,
-    and converts spaces to hyphens.
+    Converts to lowercase, removes non-word characters (alphanumerics and
+    underscores) and converts spaces to hyphens. Also strips leading and
+    trailing whitespace.
    """
-    value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode()
-    value = re.sub('[^\w\s-]', '', value).strip().lower()
-    return mark_safe(re.sub('[-\s]+', '-', value))
+    from django.utils.text import slugify
+    return slugify(value)

@register.filter(is_safe=True)
 def stringformat(value, arg):
--- a/django/utils/html.py
+++ b/django/utils/html.py
@ -123,6 +123,17 @@ def strip_tags(value):
    return re.sub(r'<[^>]*?>', '', force_text(value))
 strip_tags = allow_lazy(strip_tags)

+def remove_tags(html, tags):
+    """Returns the given HTML with given tags removed."""
+    tags = [re.escape(tag) for tag in tags.split()]
+    tags_re = u'(%s)' % u'|'.join(tags)
+    starttag_re = re.compile(ur'<%s(/?>|(\s+[^>]*>))' % tags_re, re.U)
+    endtag_re = re.compile(u'</%s>' % tags_re)
+    html = starttag_re.sub(u'', html)
+    html = endtag_re.sub(u'', html)
+    return html
+remove_tags = allow_lazy(remove_tags, unicode)
+
 def strip_spaces_between_tags(value):
    """Returns the given HTML with spaces between tags removed."""
    return re.sub(r'>\s+<', '><', force_text(value))
--- a/django/utils/text.py
+++ b/django/utils/text.py
@ -16,6 +16,7 @@ if not six.PY3:
 from django.utils.functional import allow_lazy, SimpleLazyObject
 from django.utils import six
 from django.utils.translation import ugettext_lazy, ugettext as _, pgettext
+from django.utils.safestring import mark_safe

 # Capitalizes the first letter of a string.
 capfirst = lambda x: x and force_text(x)[0].upper() + force_text(x)[1:]
@ -383,3 +384,14 @@ def unescape_string_literal(s):
    quote = s[0]
    return s[1:-1].replace(r'\%s' % quote, quote).replace(r'\\', '\\')
 unescape_string_literal = allow_lazy(unescape_string_literal)
+
+def slugify(value):
+    """
+    Converts to lowercase, removes non-word characters (alphanumerics and
+    underscores) and converts spaces to hyphens. Also strips leading and
+    trailing whitespace.
+    """
+    value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
+    value = unicode(re.sub('[^\w\s-]', '', value).strip().lower())
+    return mark_safe(re.sub('[-\s]+', '-', value))
+slugify = allow_lazy(slugify, unicode)
--- a/docs/ref/utils.txt
+++ b/docs/ref/utils.txt
@ -486,6 +486,33 @@ escaping HTML.
    through :func:`conditional_escape` which (ultimately) calls
    :func:`~django.utils.encoding.force_text` on the values.

+.. function:: strip_tags(value)
+
+    Removes anything that looks like an html tag from the string, that is
+    anything contained within ``<>``.
+
+    For example::
+
+        strip_tags(value)
+
+    If ``value`` is ``"<b>Joel</b> <button>is</button> a <span>slug</span>"`` the
+    return value will be ``"Joel is a slug"``.
+
+.. function:: remove_tags(value, tags)
+
+    Removes a list of [X]HTML tag names from the output.
+
+    For example::
+
+        remove_tags(value, ["b", "span"])
+
+    If ``value`` is ``"<b>Joel</b> <button>is</button> a <span>slug</span>"`` the
+    return value will be ``"Joel <button>is</button> a slug"``.
+
+    Note that this filter is case-sensitive.
+
+    If ``value`` is ``"<B>Joel</B> <button>is</button> a <span>slug</span>"`` the
+    return value will be ``"<B>Joel</B> <button>is</button> a slug"``.

 .. _str.format: http://docs.python.org/library/stdtypes.html#str.format

@ -599,6 +626,24 @@ appropriate entities.
    Can be called multiple times on a single string (the resulting escaping is
    only applied once).

+``django.utils.text``
+=====================
+
+.. module:: django.utils.text
+    :synopsis: Text manipulation.
+
+.. function:: slugify
+
+    Converts to lowercase, removes non-word characters (alphanumerics and
+    underscores) and converts spaces to hyphens. Also strips leading and trailing
+    whitespace.
+
+    For example::
+
+        slugify(value)
+
+    If ``value`` is ``"Joel is a slug"``, the output will be ``"joel-is-a-slug"``.
+
 ``django.utils.translation``
 ============================

--- a/docs/releases/1.5.txt
+++ b/docs/releases/1.5.txt
@ -267,6 +267,10 @@ Miscellaneous
 * :func:`~django.utils.http.int_to_base36` properly raises a :exc:`TypeError`
  instead of :exc:`ValueError` for non-integer inputs.

+* The ``slugify`` template filter is now available as a standard python
+  function at :func:`django.utils.text.slugify`. Similarly, ``remove_tags`` is
+  available at :func:`django.utils.html.remove_tags`.
+
 Features deprecated in 1.5
 ==========================

--- a/tests/regressiontests/utils/html.py
+++ b/tests/regressiontests/utils/html.py
@ -146,3 +146,12 @@ class TestUtilsHtml(unittest.TestCase):
        )
        for value, output in items:
            self.check_output(f, value, output)
+
+    def test_remove_tags(self):
+        f = html.remove_tags
+        items = (
+            ("<b><i>Yes</i></b>", "b i", "Yes"),
+            ("<a>x</a> <p><b>y</b></p>", "a b", "x <p>y</p>"),
+        )
+        for value, tags, output in items:
+            self.assertEquals(f(value, tags), output)
--- a/tests/regressiontests/utils/text.py
+++ b/tests/regressiontests/utils/text.py
@ -113,3 +113,11 @@ class TestUtilsText(SimpleTestCase):
        self.assertEqual(text.wrap(long_word, 20), long_word)
        self.assertEqual(text.wrap('a %s word' % long_word, 10),
                         'a\n%s\nword' % long_word)
+
+    def test_slugify(self):
+        items = (
+            (u'Hello, World!', 'hello-world'),
+            (u'spam & eggs', 'spam-eggs'),
+        )
+        for value, output in items:
+            self.assertEqual(text.slugify(value), output)