Simplified django.utils.html.urlize().
This commit is contained in:
parent
910548634a
commit
1e3cd51163
|
@ -259,23 +259,14 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
|
|||
return x
|
||||
return '%s…' % x[:max(0, limit - 1)]
|
||||
|
||||
def unescape(text, trail):
|
||||
def unescape(text):
|
||||
"""
|
||||
If input URL is HTML-escaped, unescape it so that it can be safely fed
|
||||
to smart_urlquote. For example:
|
||||
http://example.com?x=1&y=<2> => http://example.com?x=1&y=<2>
|
||||
"""
|
||||
unescaped = (text + trail).replace(
|
||||
'&', '&').replace('<', '<').replace(
|
||||
return text.replace('&', '&').replace('<', '<').replace(
|
||||
'>', '>').replace('"', '"').replace(''', "'")
|
||||
if trail and unescaped.endswith(trail):
|
||||
# Remove trail for unescaped if it was not consumed by unescape
|
||||
unescaped = unescaped[:-len(trail)]
|
||||
elif trail == ';':
|
||||
# Trail was consumed by unescape (as end-of-entity marker), move it to text
|
||||
text += trail
|
||||
trail = ''
|
||||
return text, unescaped, trail
|
||||
|
||||
def trim_punctuation(lead, middle, trail):
|
||||
"""
|
||||
|
@ -286,14 +277,6 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
|
|||
trimmed_something = True
|
||||
while trimmed_something:
|
||||
trimmed_something = False
|
||||
|
||||
# Trim trailing punctuation.
|
||||
stripped = middle.rstrip(TRAILING_PUNCTUATION_CHARS)
|
||||
if middle != stripped:
|
||||
trail = middle[len(stripped):] + trail
|
||||
middle = stripped
|
||||
trimmed_something = True
|
||||
|
||||
# Trim wrapping punctuation.
|
||||
for opening, closing in WRAPPING_PUNCTUATION:
|
||||
if middle.startswith(opening):
|
||||
|
@ -306,6 +289,15 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
|
|||
middle = middle[:-len(closing)]
|
||||
trail = closing + trail
|
||||
trimmed_something = True
|
||||
# Trim trailing punctuation (after trimming wrapping punctuation,
|
||||
# as encoded entities contain ';'). Unescape entites to avoid
|
||||
# breaking them by removing ';'.
|
||||
middle_unescaped = unescape(middle)
|
||||
stripped = middle_unescaped.rstrip(TRAILING_PUNCTUATION_CHARS)
|
||||
if middle_unescaped != stripped:
|
||||
trail = middle[len(stripped):] + trail
|
||||
middle = middle[:len(stripped) - len(middle_unescaped)]
|
||||
trimmed_something = True
|
||||
return lead, middle, trail
|
||||
|
||||
def is_email_simple(value):
|
||||
|
@ -337,11 +329,9 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
|
|||
url = None
|
||||
nofollow_attr = ' rel="nofollow"' if nofollow else ''
|
||||
if simple_url_re.match(middle):
|
||||
middle, middle_unescaped, trail = unescape(middle, trail)
|
||||
url = smart_urlquote(middle_unescaped)
|
||||
url = smart_urlquote(unescape(middle))
|
||||
elif simple_url_2_re.match(middle):
|
||||
middle, middle_unescaped, trail = unescape(middle, trail)
|
||||
url = smart_urlquote('http://%s' % middle_unescaped)
|
||||
url = smart_urlquote('http://%s' % unescape(middle))
|
||||
elif ':' not in middle and is_email_simple(middle):
|
||||
local, domain = middle.rsplit('@', 1)
|
||||
try:
|
||||
|
|
Loading…
Reference in New Issue