Simplified django.utils.html.urlize().

This commit is contained in:
Tim Graham 2018-10-12 12:15:26 -04:00 committed by GitHub
parent 910548634a
commit 1e3cd51163
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 13 additions and 23 deletions

View File

@ -259,23 +259,14 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
return x
return '%s' % x[:max(0, limit - 1)]
def unescape(text, trail):
def unescape(text):
"""
If input URL is HTML-escaped, unescape it so that it can be safely fed
to smart_urlquote. For example:
http://example.com?x=1&amp;y=&lt;2&gt; => http://example.com?x=1&y=<2>
"""
unescaped = (text + trail).replace(
'&amp;', '&').replace('&lt;', '<').replace(
return text.replace('&amp;', '&').replace('&lt;', '<').replace(
'&gt;', '>').replace('&quot;', '"').replace('&#39;', "'")
if trail and unescaped.endswith(trail):
# Remove trail for unescaped if it was not consumed by unescape
unescaped = unescaped[:-len(trail)]
elif trail == ';':
# Trail was consumed by unescape (as end-of-entity marker), move it to text
text += trail
trail = ''
return text, unescaped, trail
def trim_punctuation(lead, middle, trail):
"""
@ -286,14 +277,6 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
trimmed_something = True
while trimmed_something:
trimmed_something = False
# Trim trailing punctuation.
stripped = middle.rstrip(TRAILING_PUNCTUATION_CHARS)
if middle != stripped:
trail = middle[len(stripped):] + trail
middle = stripped
trimmed_something = True
# Trim wrapping punctuation.
for opening, closing in WRAPPING_PUNCTUATION:
if middle.startswith(opening):
@ -306,6 +289,15 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
middle = middle[:-len(closing)]
trail = closing + trail
trimmed_something = True
# Trim trailing punctuation (after trimming wrapping punctuation,
# as encoded entities contain ';'). Unescape entites to avoid
# breaking them by removing ';'.
middle_unescaped = unescape(middle)
stripped = middle_unescaped.rstrip(TRAILING_PUNCTUATION_CHARS)
if middle_unescaped != stripped:
trail = middle[len(stripped):] + trail
middle = middle[:len(stripped) - len(middle_unescaped)]
trimmed_something = True
return lead, middle, trail
def is_email_simple(value):
@ -337,11 +329,9 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
url = None
nofollow_attr = ' rel="nofollow"' if nofollow else ''
if simple_url_re.match(middle):
middle, middle_unescaped, trail = unescape(middle, trail)
url = smart_urlquote(middle_unescaped)
url = smart_urlquote(unescape(middle))
elif simple_url_2_re.match(middle):
middle, middle_unescaped, trail = unescape(middle, trail)
url = smart_urlquote('http://%s' % middle_unescaped)
url = smart_urlquote('http://%s' % unescape(middle))
elif ':' not in middle and is_email_simple(middle):
local, domain = middle.rsplit('@', 1)
try: