Fixed #26193 -- Made urlize() trim multiple trailing punctuation.
This commit is contained in:
parent
fcd08c1757
commit
dec334cb66
|
@ -17,7 +17,12 @@ from django.utils.text import normalize_newlines
|
||||||
from .html_parser import HTMLParseError, HTMLParser
|
from .html_parser import HTMLParseError, HTMLParser
|
||||||
|
|
||||||
# Configuration for urlize() function.
|
# Configuration for urlize() function.
|
||||||
TRAILING_PUNCTUATION = ['.', ',', ':', ';', '.)', '"', '\'', '!']
|
TRAILING_PUNCTUATION_RE = re.compile(
|
||||||
|
'^' # Beginning of word
|
||||||
|
'(.*?)' # The URL in word
|
||||||
|
'([.,:;!]+)' # Allowed non-wrapping, trailing punctuation
|
||||||
|
'$' # End of word
|
||||||
|
)
|
||||||
WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('<', '>'), ('"', '"'), ('\'', '\'')]
|
WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('<', '>'), ('"', '"'), ('\'', '\'')]
|
||||||
|
|
||||||
# List of possible strings used for bullets in bulleted lists.
|
# List of possible strings used for bullets in bulleted lists.
|
||||||
|
@ -268,24 +273,46 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
|
||||||
trail = ''
|
trail = ''
|
||||||
return text, unescaped, trail
|
return text, unescaped, trail
|
||||||
|
|
||||||
words = word_split_re.split(force_text(text))
|
def trim_punctuation(lead, middle, trail):
|
||||||
for i, word in enumerate(words):
|
"""
|
||||||
if '.' in word or '@' in word or ':' in word:
|
Trim trailing and wrapping punctuation from `middle`. Return the items
|
||||||
# Deal with punctuation.
|
of the new state.
|
||||||
lead, middle, trail = '', word, ''
|
"""
|
||||||
for punctuation in TRAILING_PUNCTUATION:
|
# Continue trimming until middle remains unchanged.
|
||||||
if middle.endswith(punctuation):
|
trimmed_something = True
|
||||||
middle = middle[:-len(punctuation)]
|
while trimmed_something:
|
||||||
trail = punctuation + trail
|
trimmed_something = False
|
||||||
|
|
||||||
|
# Trim trailing punctuation.
|
||||||
|
match = TRAILING_PUNCTUATION_RE.match(middle)
|
||||||
|
if match:
|
||||||
|
middle = match.group(1)
|
||||||
|
trail = match.group(2) + trail
|
||||||
|
trimmed_something = True
|
||||||
|
|
||||||
|
# Trim wrapping punctuation.
|
||||||
for opening, closing in WRAPPING_PUNCTUATION:
|
for opening, closing in WRAPPING_PUNCTUATION:
|
||||||
if middle.startswith(opening):
|
if middle.startswith(opening):
|
||||||
middle = middle[len(opening):]
|
middle = middle[len(opening):]
|
||||||
lead = lead + opening
|
lead += opening
|
||||||
|
trimmed_something = True
|
||||||
# Keep parentheses at the end only if they're balanced.
|
# Keep parentheses at the end only if they're balanced.
|
||||||
if (middle.endswith(closing)
|
if (middle.endswith(closing) and
|
||||||
and middle.count(closing) == middle.count(opening) + 1):
|
middle.count(closing) == middle.count(opening) + 1):
|
||||||
middle = middle[:-len(closing)]
|
middle = middle[:-len(closing)]
|
||||||
trail = closing + trail
|
trail = closing + trail
|
||||||
|
trimmed_something = True
|
||||||
|
return lead, middle, trail
|
||||||
|
|
||||||
|
words = word_split_re.split(force_text(text))
|
||||||
|
for i, word in enumerate(words):
|
||||||
|
if '.' in word or '@' in word or ':' in word:
|
||||||
|
# lead: Current punctuation trimmed from the beginning of the word.
|
||||||
|
# middle: Current state of the word.
|
||||||
|
# trail: Current punctuation trimmed from the end of the word.
|
||||||
|
lead, middle, trail = '', word, ''
|
||||||
|
# Deal with punctuation.
|
||||||
|
lead, middle, trail = trim_punctuation(lead, middle, trail)
|
||||||
|
|
||||||
# Make URL we want to point to.
|
# Make URL we want to point to.
|
||||||
url = None
|
url = None
|
||||||
|
|
|
@ -246,6 +246,24 @@ class FunctionTests(SimpleTestCase):
|
||||||
'(Go to <a href="http://www.example.com/foo" rel="nofollow">http://www.example.com/foo</a>.)',
|
'(Go to <a href="http://www.example.com/foo" rel="nofollow">http://www.example.com/foo</a>.)',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_trailing_multiple_punctuation(self):
|
||||||
|
self.assertEqual(
|
||||||
|
urlize('A test http://testing.com/example..'),
|
||||||
|
'A test <a href="http://testing.com/example" rel="nofollow">http://testing.com/example</a>..'
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
urlize('A test http://testing.com/example!!'),
|
||||||
|
'A test <a href="http://testing.com/example" rel="nofollow">http://testing.com/example</a>!!'
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
urlize('A test http://testing.com/example!!!'),
|
||||||
|
'A test <a href="http://testing.com/example" rel="nofollow">http://testing.com/example</a>!!!'
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
urlize('A test http://testing.com/example.,:;)"!'),
|
||||||
|
'A test <a href="http://testing.com/example" rel="nofollow">http://testing.com/example</a>.,:;)"!'
|
||||||
|
)
|
||||||
|
|
||||||
def test_brackets(self):
|
def test_brackets(self):
|
||||||
"""
|
"""
|
||||||
#19070 - Check urlize handles brackets properly
|
#19070 - Check urlize handles brackets properly
|
||||||
|
|
Loading…
Reference in New Issue