Fixed #26621 -- Corrected simplify_regex()'s handling of named capture groups.
This commit is contained in:
parent
fd2f7e4767
commit
f0ef0c49e9
|
@ -144,3 +144,94 @@ if docutils_is_available:
|
||||||
|
|
||||||
for name, urlbase in ROLES.items():
|
for name, urlbase in ROLES.items():
|
||||||
create_reference_role(name, urlbase)
|
create_reference_role(name, urlbase)
|
||||||
|
|
||||||
|
# Match the beginning of a named or unnamed group.
|
||||||
|
named_group_matcher = re.compile(r'\(\?P(<\w+>)')
|
||||||
|
unnamed_group_matcher = re.compile(r'\(')
|
||||||
|
|
||||||
|
|
||||||
|
def replace_named_groups(pattern):
|
||||||
|
r"""
|
||||||
|
Find named groups in `pattern` and replace them with the group name. E.g.,
|
||||||
|
1. ^(?P<a>\w+)/b/(\w+)$ ==> ^<a>/b/(\w+)$
|
||||||
|
2. ^(?P<a>\w+)/b/(?P<c>\w+)/$ ==> ^<a>/b/<c>/$
|
||||||
|
"""
|
||||||
|
named_group_indices = [
|
||||||
|
(m.start(0), m.end(0), m.group(1))
|
||||||
|
for m in named_group_matcher.finditer(pattern)
|
||||||
|
]
|
||||||
|
# Tuples of (named capture group pattern, group name).
|
||||||
|
group_pattern_and_name = []
|
||||||
|
# Loop over the groups and their start and end indices.
|
||||||
|
for start, end, group_name in named_group_indices:
|
||||||
|
# Handle nested parentheses, e.g. '^(?P<a>(x|y))/b'.
|
||||||
|
unmatched_open_brackets, prev_char = 1, None
|
||||||
|
for idx, val in enumerate(list(pattern[end:])):
|
||||||
|
# If brackets are balanced, the end of the string for the current
|
||||||
|
# named capture group pattern has been reached.
|
||||||
|
if unmatched_open_brackets == 0:
|
||||||
|
group_pattern_and_name.append((pattern[start:end + idx], group_name))
|
||||||
|
break
|
||||||
|
|
||||||
|
# Check for unescaped `(` and `)`. They mark the start and end of a
|
||||||
|
# nested group.
|
||||||
|
if val == '(' and prev_char != '\\':
|
||||||
|
unmatched_open_brackets += 1
|
||||||
|
elif val == ')' and prev_char != '\\':
|
||||||
|
unmatched_open_brackets -= 1
|
||||||
|
prev_char = val
|
||||||
|
|
||||||
|
# Replace the string for named capture groups with their group names.
|
||||||
|
for group_pattern, group_name in group_pattern_and_name:
|
||||||
|
pattern = pattern.replace(group_pattern, group_name)
|
||||||
|
return pattern
|
||||||
|
|
||||||
|
|
||||||
|
def replace_unnamed_groups(pattern):
|
||||||
|
r"""
|
||||||
|
Find unnamed groups in `pattern` and replace them with '<var>'. E.g.,
|
||||||
|
1. ^(?P<a>\w+)/b/(\w+)$ ==> ^(?P<a>\w+)/b/<var>$
|
||||||
|
2. ^(?P<a>\w+)/b/((x|y)\w+)$ ==> ^(?P<a>\w+)/b/<var>$
|
||||||
|
"""
|
||||||
|
unnamed_group_indices = [m.start(0) for m in unnamed_group_matcher.finditer(pattern)]
|
||||||
|
# Indices of the start of unnamed capture groups.
|
||||||
|
group_indices = []
|
||||||
|
# Loop over the start indices of the groups.
|
||||||
|
for start in unnamed_group_indices:
|
||||||
|
# Handle nested parentheses, e.g. '^b/((x|y)\w+)$'.
|
||||||
|
unmatched_open_brackets, prev_char = 1, None
|
||||||
|
for idx, val in enumerate(list(pattern[start + 1:])):
|
||||||
|
if unmatched_open_brackets == 0:
|
||||||
|
group_indices.append((start, start + 1 + idx))
|
||||||
|
break
|
||||||
|
|
||||||
|
# Check for unescaped `(` and `)`. They mark the start and end of
|
||||||
|
# a nested group.
|
||||||
|
if val == '(' and prev_char != '\\':
|
||||||
|
unmatched_open_brackets += 1
|
||||||
|
elif val == ')' and prev_char != '\\':
|
||||||
|
unmatched_open_brackets -= 1
|
||||||
|
prev_char = val
|
||||||
|
|
||||||
|
# Remove unnamed group matches inside other unnamed capture groups.
|
||||||
|
group_start_end_indices = []
|
||||||
|
prev_end = None
|
||||||
|
for start, end in group_indices:
|
||||||
|
if prev_end and start > prev_end or not prev_end:
|
||||||
|
group_start_end_indices.append((start, end))
|
||||||
|
prev_end = end
|
||||||
|
|
||||||
|
if group_start_end_indices:
|
||||||
|
# Replace unnamed groups with <var>. Handle the fact that replacing the
|
||||||
|
# string between indices will change string length and thus indices
|
||||||
|
# will point to the wrong substring if not corrected.
|
||||||
|
final_pattern, prev_end = [], None
|
||||||
|
for start, end in group_start_end_indices:
|
||||||
|
if prev_end:
|
||||||
|
final_pattern.append(pattern[prev_end:start])
|
||||||
|
final_pattern.append(pattern[:start] + '<var>')
|
||||||
|
prev_end = end
|
||||||
|
final_pattern.append(pattern[prev_end:])
|
||||||
|
return ''.join(final_pattern)
|
||||||
|
else:
|
||||||
|
return pattern
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
import inspect
|
import inspect
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
from importlib import import_module
|
from importlib import import_module
|
||||||
|
|
||||||
from django.apps import apps
|
from django.apps import apps
|
||||||
|
@ -8,6 +7,9 @@ from django.conf import settings
|
||||||
from django.contrib import admin
|
from django.contrib import admin
|
||||||
from django.contrib.admin.views.decorators import staff_member_required
|
from django.contrib.admin.views.decorators import staff_member_required
|
||||||
from django.contrib.admindocs import utils
|
from django.contrib.admindocs import utils
|
||||||
|
from django.contrib.admindocs.utils import (
|
||||||
|
replace_named_groups, replace_unnamed_groups,
|
||||||
|
)
|
||||||
from django.core.exceptions import ImproperlyConfigured, ViewDoesNotExist
|
from django.core.exceptions import ImproperlyConfigured, ViewDoesNotExist
|
||||||
from django.db import models
|
from django.db import models
|
||||||
from django.http import Http404
|
from django.http import Http404
|
||||||
|
@ -426,24 +428,16 @@ def extract_views_from_urlpatterns(urlpatterns, base='', namespace=None):
|
||||||
return views
|
return views
|
||||||
|
|
||||||
|
|
||||||
named_group_matcher = re.compile(r'\(\?P(<\w+>).+?\)')
|
|
||||||
non_named_group_matcher = re.compile(r'\(.*?\)')
|
|
||||||
|
|
||||||
|
|
||||||
def simplify_regex(pattern):
|
def simplify_regex(pattern):
|
||||||
r"""
|
r"""
|
||||||
Clean up urlpattern regexes into something more readable by humans. For
|
Clean up urlpattern regexes into something more readable by humans. For
|
||||||
example, turn "^(?P<sport_slug>\w+)/athletes/(?P<athlete_slug>\w+)/$"
|
example, turn "^(?P<sport_slug>\w+)/athletes/(?P<athlete_slug>\w+)/$"
|
||||||
into "/<sport_slug>/athletes/<athlete_slug>/".
|
into "/<sport_slug>/athletes/<athlete_slug>/".
|
||||||
"""
|
"""
|
||||||
# handle named groups first
|
pattern = replace_named_groups(pattern)
|
||||||
pattern = named_group_matcher.sub(lambda m: m.group(1), pattern)
|
pattern = replace_unnamed_groups(pattern)
|
||||||
|
|
||||||
# handle non-named groups
|
|
||||||
pattern = non_named_group_matcher.sub("<var>", pattern)
|
|
||||||
|
|
||||||
# clean up any outstanding regex-y characters.
|
# clean up any outstanding regex-y characters.
|
||||||
pattern = pattern.replace('^', '').replace('$', '').replace('?', '').replace('//', '/').replace('\\', '')
|
pattern = pattern.replace('^', '').replace('$', '')
|
||||||
if not pattern.startswith('/'):
|
if not pattern.startswith('/'):
|
||||||
pattern = '/' + pattern
|
pattern = '/' + pattern
|
||||||
return pattern
|
return pattern
|
||||||
|
|
|
@ -335,6 +335,11 @@ class AdminDocViewFunctionsTests(SimpleTestCase):
|
||||||
(r'^a', '/a'),
|
(r'^a', '/a'),
|
||||||
(r'^(?P<a>\w+)/b/(?P<c>\w+)/$', '/<a>/b/<c>/'),
|
(r'^(?P<a>\w+)/b/(?P<c>\w+)/$', '/<a>/b/<c>/'),
|
||||||
(r'^(?P<a>\w+)/b/(?P<c>\w+)$', '/<a>/b/<c>'),
|
(r'^(?P<a>\w+)/b/(?P<c>\w+)$', '/<a>/b/<c>'),
|
||||||
|
(r'^(?P<a>\w+)/b/(\w+)$', '/<a>/b/<var>'),
|
||||||
|
(r'^(?P<a>\w+)/b/((x|y)\w+)$', '/<a>/b/<var>'),
|
||||||
|
(r'^(?P<a>(x|y))/b/(?P<c>\w+)$', '/<a>/b/<c>'),
|
||||||
|
(r'^(?P<a>(x|y))/b/(?P<c>\w+)ab', '/<a>/b/<c>ab'),
|
||||||
|
(r'^(?P<a>(x|y)(\(|\)))/b/(?P<c>\w+)ab', '/<a>/b/<c>ab'),
|
||||||
)
|
)
|
||||||
for pattern, output in tests:
|
for pattern, output in tests:
|
||||||
self.assertEqual(simplify_regex(pattern), output)
|
self.assertEqual(simplify_regex(pattern), output)
|
||||||
|
|
Loading…
Reference in New Issue