Fixed #16656 -- Changed the urlize filter to accept more top-level domains.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@17359 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Aymeric Augustin 2012-01-08 16:08:43 +00:00
parent 27508918fb
commit 05a3ecbf96
3 changed files with 25 additions and 11 deletions

View File

@ -24,7 +24,7 @@ punctuation_re = re.compile('^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)
('|'.join([re.escape(x) for x in LEADING_PUNCTUATION]), ('|'.join([re.escape(x) for x in LEADING_PUNCTUATION]),
'|'.join([re.escape(x) for x in TRAILING_PUNCTUATION]))) '|'.join([re.escape(x) for x in TRAILING_PUNCTUATION])))
simple_url_re = re.compile(r'^https?://\w') simple_url_re = re.compile(r'^https?://\w')
simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@]+\.(com|net|org)$') simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org|[a-z]{2})$')
simple_email_re = re.compile(r'^\S+@\S+\.\S+$') simple_email_re = re.compile(r'^\S+@\S+\.\S+$')
link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+') link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+')
html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE) html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE)
@ -127,10 +127,11 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
""" """
Converts any URLs in text into clickable links. Converts any URLs in text into clickable links.
Works on http://, https://, www. links and links ending in .org, .net or Works on http://, https://, www. links, and also on links ending in one of
.com. Links can have trailing punctuation (periods, commas, close-parens) the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org) or
and leading punctuation (opening parens) and it'll still do the right a two-letter ccTLD. Links can have trailing punctuation (periods, commas,
thing. close-parens) and leading punctuation (opening parens) and it'll still do
the right thing.
If trim_url_limit is not None, the URLs in link text longer than this limit If trim_url_limit is not None, the URLs in link text longer than this limit
will truncated to trim_url_limit-3 characters and appended with an elipsis. will truncated to trim_url_limit-3 characters and appended with an elipsis.

View File

@ -2226,13 +2226,20 @@ urlize
Converts URLs in text into clickable links. Converts URLs in text into clickable links.
Works on links prefixed with ``http://``, ``https://``, or ``www.``. For This template tag works on several kinds of links:
example, ``http://goo.gl/aia1t`` will get converted but ``goo.gl/aia1t``
won't.
Also works on domain-only links ending in one of the common ``.com``, ``.net``, - links prefixed with ``http://``, ``https://``, or ``www.``. For example,
or ``.org`` top level domains. For example, ``djangoproject.com`` will still ``http://goo.gl/aia1t`` will get converted but ``goo.gl/aia1t`` won't.
get converted. - domain-only links ending in one of the original top level domains
(``.com``, ``.edu``, ``.gov``, ``.int``, ``.mil``, ``.net``, and
``.org``). For example, ``djangoproject.com`` also gets converted.
- domain-only links ending in a dot followed by two letters. This covers
most country codes. For example, ``djangocon.eu`` still gets converted.
.. versionchanged:: 1.4
Until Django 1.4, only the ``.com``, ``.net`` and ``.org`` suffixes were
supported for domain-only links.
Links can have trailing punctuation (periods, commas, close-parens) and leading Links can have trailing punctuation (periods, commas, close-parens) and leading
punctuation (opening parens) and ``urlize`` will still do the right thing. punctuation (opening parens) and ``urlize`` will still do the right thing.

View File

@ -276,6 +276,12 @@ class DefaultFiltersTests(TestCase):
self.assertEqual(urlize('http://@foo.com'), self.assertEqual(urlize('http://@foo.com'),
u'http://@foo.com') u'http://@foo.com')
# Check urlize accepts more TLDs - see #16656
self.assertEqual(urlize('usa.gov'),
u'<a href="http://usa.gov" rel="nofollow">usa.gov</a>')
self.assertEqual(urlize('europa.eu'),
u'<a href="http://europa.eu" rel="nofollow">europa.eu</a>')
def test_wordcount(self): def test_wordcount(self):
self.assertEqual(wordcount(''), 0) self.assertEqual(wordcount(''), 0)
self.assertEqual(wordcount(u'oneword'), 1) self.assertEqual(wordcount(u'oneword'), 1)