diff --git a/django/utils/regex_helper.py b/django/utils/regex_helper.py deleted file mode 100644 index 064890a34d1..00000000000 --- a/django/utils/regex_helper.py +++ /dev/null @@ -1,123 +0,0 @@ -""" -Functions for reversing a regular expression (used in reverse URL resolving). - -This is not, and is not intended to be, a complete reg-exp decompiler. It -should be good enough for almost all sane URLs. -""" - -import re -from bisect import bisect - -GROUP_CLASS = re.compile(r'''\((?: - (?P[^?])| # Unnamed (positional) capturing group. - \?(?: - P<(?P[\w]+)>(?P.*)| # Named capturing group. - P=(?P.+)| # Repeat of a previous named group. - (?P:)| # Non-capturing grouping parens. - (?P\#)| # Comment group - (?P.) # Anything else (which will be an error) - ) - ).*\)''', re.VERBOSE) - -def normalize(pattern): - """ - Given a reg-exp pattern, normalizes it to a list of forms that suffice for - reverse matching. This does the following: - - (1) For any repeating sections, keeps the minimum number of occurrences - permitted (this means zero for optional groups). - (2) If an optional group includes parameters, include one occurrence of - that group (along with the zero occurrence case from step (1)). - (3) Select the first (essentially an arbitrary) element from any character - class. Select an arbitrary character for any unordered class (e.g. '.' or - '\w') in the pattern. - (4) Take the first alternative in any '|' division, unless other - alternatives would involve different parameters. - (5) Ignore comments. Error on all other non-capturing (?...) forms (e.g. - look-ahead and look-behind matches). - - Returns a list of tuples, each tuple containing (a) a pattern, (b) the - number of parameters, (c) the names of the parameters. Any unnamed - parameters are called '_0', '_1', etc. - """ - # Do a linear scan to work out the special features of this pattern. The - # idea is that we scan once here and collect all the information we need to - # make future decisions. - groups = [] # (start, end) - quantifiers = [] # start pos - ranges = [] # (start, end) - eols = [] # pos - disjunctions = [] # pos - unclosed_groups = [] - unclosed_ranges = [] - escaped = False - quantify = False - in_range = False - for pos, c in enumerate(pattern): - if in_range and c != ']' or (c == ']' and - unclosed_ranges[-1] == pos - 1): - continue - elif c == '[': - unclosed_ranges.append(pos) - elif c == ']': - ranges.append((unclosed_ranges.pop(), pos + 1)) - in_range = False - elif c == '.': - # Treat this as a one-character long range: - ranges.append((pos, pos + 1)) - elif escaped or c == '\\': - escaped = not escaped - elif c == '(': - unclosed_groups.append(pos) - elif c == ')': - groups.append((unclosed_groups.pop(), pos + 1)) - elif quantify and c == '?': - quantify = False - elif c in '?*+{': - quantifiers.append(pos) - quantify = True - elif c == '$': - eols.append(pos) - elif c == '|': - disjunctions.append(pos) - - # Now classify each of the parenthetical groups to work out which ones take - # parameters. Only the outer-most of a set of nested capturing groups is - # important. - groups.sort() - params = [] - comments = [] - last_end = 0 - for start, end in groups: - if start < last_end: - # Skip over inner nested capturing groups. - continue - m = GROUP_CLASS.match(pattern, start) - if m.group('positional'): - params.append((start, end, '_%d' % len(params), start + 1)) - elif m.group('named'): - params.append((start, end, m.group('named'), m.start('contents'))) - elif m.group('repeat'): - params.append((start, end, m.group('repeat'), start + 1)) - elif m.group('illegal'): - raise ValueError('The pattern construct %r is not valid here.' - % pattern[start:end]) - elif m.group('comment'): - comments.extend([start, end]) - else: - # This is a non-capturing set, so nesting prohibitions don't apply - # to any inner groups. - continue - last_end = end - - # XXX: Got to here! - results = [] - end = groups[0][0] - # The first bit, before the first group starts. - if end == 0: - # FIXME: don't want to handle this case just yet. - raise Exception - - quant_end = bisect(quantifiers, end) - range_end = bisect(ranges, end) - dis_end = bisect(disjunctions, end) diff --git a/tests/regressiontests/urlpatterns_reverse/tests.py b/tests/regressiontests/urlpatterns_reverse/tests.py index de0441ea400..610c3f7bc7e 100644 --- a/tests/regressiontests/urlpatterns_reverse/tests.py +++ b/tests/regressiontests/urlpatterns_reverse/tests.py @@ -1,32 +1,19 @@ -""" -Unit tests for reverse URL lookups. -""" - -import re -import unittest +"Unit tests for reverse URL lookup" from django.core.urlresolvers import reverse_helper, NoReverseMatch +import re, unittest test_data = ( ('^places/(\d+)/$', 'places/3/', [3], {}), ('^places/(\d+)/$', 'places/3/', ['3'], {}), ('^places/(\d+)/$', NoReverseMatch, ['a'], {}), ('^places/(\d+)/$', NoReverseMatch, [], {}), - ('^places?/$', '/', [], {}), - ('^places+/$', 'places/', [], {}), - ('^places*/$', '/', [], {}), - (r'^places/(\d+|[a-z_]+)/', 'places/4/', [4], {}), - (r'^places/(\d+|[a-z_]+)/', 'places/harlem/', ['harlem'], {}), - (r'^places/(\d+|[a-z_]+)/', NoReverseMatch, ['harlem64'], {}), ('^places/(?P\d+)/$', 'places/3/', [], {'id': 3}), - ('^people/(?P\w+)/$', NoReverseMatch, [], {}), ('^people/(?P\w+)/$', 'people/adrian/', ['adrian'], {}), ('^people/(?P\w+)/$', 'people/adrian/', [], {'name': 'adrian'}), ('^people/(?P\w+)/$', NoReverseMatch, ['name with spaces'], {}), ('^people/(?P\w+)/$', NoReverseMatch, [], {'name': 'name with spaces'}), - ('^people/(?:name/)', 'people/name/', [], {}), - ('^people/(?:name/)?', 'people/', [], {}), - ('^people/(?:name/(\w+)/)?', 'people/name/fred/', ['fred'], {}), + ('^people/(?P\w+)/$', NoReverseMatch, [], {}), ('^hardcoded/$', 'hardcoded/', [], {}), ('^hardcoded/$', 'hardcoded/', ['any arg'], {}), ('^hardcoded/$', 'hardcoded/', [], {'kwarg': 'foo'}), @@ -37,31 +24,17 @@ test_data = ( ('^people/(?P\w\w)/(?P\w+)/$', NoReverseMatch, [], {'name': 'adrian'}), ('^people/(?P\w\w)/(\w+)/$', NoReverseMatch, ['il'], {'name': 'adrian'}), ('^people/(?P\w\w)/(\w+)/$', 'people/il/adrian/', ['adrian'], {'state': 'il'}), - (r'^people/((?P\w\w)/test)?/(\w+)/$', 'people/il/test/adrian/', ['adrian'], {'state': 'il'}), - (r'^people/((?P\w\w)/test)?/(\w+)/$', NoReverseMatch, ['adrian'], {}), - ('^character_set/[abcdef0-9]/$', 'character_set/a/', [], {}), - ('^character_set/[\w]/$', 'character_set/a/', [], {}), - (r'^price/\$(\d+)/$', 'price/$10/', ['10'], {}), - (r'^price/[$](\d+)/$', 'price/$10/', ['10'], {}), - (r'^price/[\$](\d+)/$', 'price/$10/', ['10'], {}), - (r'^product/(?P\w+)\+\(\$(?P\d+(\.\d+)?)\)/$', 'product/chocolate+($2.00)/', ['2.00'], {'product': 'chocolate'}), - (r'^headlines/(?P\d+)\.(?P\d+)\.(?P\d+)/$', 'headlines/2007.5.21/', [], dict(year=2007, month=5, day=21)), - (r'^windows_path/(?P[A-Z]):\\(?P.+)/$', r'windows_path/C:\Documents and Settings\spam/', [], dict(drive_name='C', path=r'Documents and Settings\spam')), - (r'^special_chars/(.+)/$', r'special_chars/+\$*/', [r'+\$*'], {}), - (r'^special_chars/(.+)/$', NoReverseMatch, [''], {}), - (r'^(?P.+)/\d+/$', NoReverseMatch, [], {'name': 'john'}), - (r'^repeats/a{1,2}/$', 'repeats/a/', [], {}), - (r'^repeats/a{2,4}/$', 'repeats/aa/', [], {}), - (r'^people/(?:(?:wilma|fred)/)$', '/people/wilma', [], {}), ) class URLPatternReverse(unittest.TestCase): def test_urlpattern_reverse(self): for regex, expected, args, kwargs in test_data: try: - got = reverse_helper(regex, *args, **kwargs) + got = reverse_helper(re.compile(regex), *args, **kwargs) except NoReverseMatch, e: self.assertEqual(expected, NoReverseMatch) else: self.assertEquals(got, expected) +if __name__ == "__main__": + run_tests(1)