Fixed #26193 -- Made urlize() trim multiple trailing punctuation.
This commit is contained in:
parent
fcd08c1757
commit
dec334cb66
|
@ -17,7 +17,12 @@ from django.utils.text import normalize_newlines
|
|||
from .html_parser import HTMLParseError, HTMLParser
|
||||
|
||||
# Configuration for urlize() function.
|
||||
TRAILING_PUNCTUATION = ['.', ',', ':', ';', '.)', '"', '\'', '!']
|
||||
TRAILING_PUNCTUATION_RE = re.compile(
|
||||
'^' # Beginning of word
|
||||
'(.*?)' # The URL in word
|
||||
'([.,:;!]+)' # Allowed non-wrapping, trailing punctuation
|
||||
'$' # End of word
|
||||
)
|
||||
WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('<', '>'), ('"', '"'), ('\'', '\'')]
|
||||
|
||||
# List of possible strings used for bullets in bulleted lists.
|
||||
|
@ -268,24 +273,46 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
|
|||
trail = ''
|
||||
return text, unescaped, trail
|
||||
|
||||
words = word_split_re.split(force_text(text))
|
||||
for i, word in enumerate(words):
|
||||
if '.' in word or '@' in word or ':' in word:
|
||||
# Deal with punctuation.
|
||||
lead, middle, trail = '', word, ''
|
||||
for punctuation in TRAILING_PUNCTUATION:
|
||||
if middle.endswith(punctuation):
|
||||
middle = middle[:-len(punctuation)]
|
||||
trail = punctuation + trail
|
||||
def trim_punctuation(lead, middle, trail):
|
||||
"""
|
||||
Trim trailing and wrapping punctuation from `middle`. Return the items
|
||||
of the new state.
|
||||
"""
|
||||
# Continue trimming until middle remains unchanged.
|
||||
trimmed_something = True
|
||||
while trimmed_something:
|
||||
trimmed_something = False
|
||||
|
||||
# Trim trailing punctuation.
|
||||
match = TRAILING_PUNCTUATION_RE.match(middle)
|
||||
if match:
|
||||
middle = match.group(1)
|
||||
trail = match.group(2) + trail
|
||||
trimmed_something = True
|
||||
|
||||
# Trim wrapping punctuation.
|
||||
for opening, closing in WRAPPING_PUNCTUATION:
|
||||
if middle.startswith(opening):
|
||||
middle = middle[len(opening):]
|
||||
lead = lead + opening
|
||||
lead += opening
|
||||
trimmed_something = True
|
||||
# Keep parentheses at the end only if they're balanced.
|
||||
if (middle.endswith(closing)
|
||||
and middle.count(closing) == middle.count(opening) + 1):
|
||||
if (middle.endswith(closing) and
|
||||
middle.count(closing) == middle.count(opening) + 1):
|
||||
middle = middle[:-len(closing)]
|
||||
trail = closing + trail
|
||||
trimmed_something = True
|
||||
return lead, middle, trail
|
||||
|
||||
words = word_split_re.split(force_text(text))
|
||||
for i, word in enumerate(words):
|
||||
if '.' in word or '@' in word or ':' in word:
|
||||
# lead: Current punctuation trimmed from the beginning of the word.
|
||||
# middle: Current state of the word.
|
||||
# trail: Current punctuation trimmed from the end of the word.
|
||||
lead, middle, trail = '', word, ''
|
||||
# Deal with punctuation.
|
||||
lead, middle, trail = trim_punctuation(lead, middle, trail)
|
||||
|
||||
# Make URL we want to point to.
|
||||
url = None
|
||||
|
|
|
@ -246,6 +246,24 @@ class FunctionTests(SimpleTestCase):
|
|||
'(Go to <a href="http://www.example.com/foo" rel="nofollow">http://www.example.com/foo</a>.)',
|
||||
)
|
||||
|
||||
def test_trailing_multiple_punctuation(self):
|
||||
self.assertEqual(
|
||||
urlize('A test http://testing.com/example..'),
|
||||
'A test <a href="http://testing.com/example" rel="nofollow">http://testing.com/example</a>..'
|
||||
)
|
||||
self.assertEqual(
|
||||
urlize('A test http://testing.com/example!!'),
|
||||
'A test <a href="http://testing.com/example" rel="nofollow">http://testing.com/example</a>!!'
|
||||
)
|
||||
self.assertEqual(
|
||||
urlize('A test http://testing.com/example!!!'),
|
||||
'A test <a href="http://testing.com/example" rel="nofollow">http://testing.com/example</a>!!!'
|
||||
)
|
||||
self.assertEqual(
|
||||
urlize('A test http://testing.com/example.,:;)"!'),
|
||||
'A test <a href="http://testing.com/example" rel="nofollow">http://testing.com/example</a>.,:;)"!'
|
||||
)
|
||||
|
||||
def test_brackets(self):
|
||||
"""
|
||||
#19070 - Check urlize handles brackets properly
|
||||
|
|
Loading…
Reference in New Issue