From f0ef0c49e9284f262fbc63e8a497699ca4a248fe Mon Sep 17 00:00:00 2001 From: Ketan Bhatt Date: Mon, 30 May 2016 20:44:00 +0530 Subject: [PATCH] Fixed #26621 -- Corrected simplify_regex()'s handling of named capture groups. --- django/contrib/admindocs/utils.py | 91 +++++++++++++++++++++++++++++++ django/contrib/admindocs/views.py | 18 ++---- tests/admin_docs/test_views.py | 5 ++ 3 files changed, 102 insertions(+), 12 deletions(-) diff --git a/django/contrib/admindocs/utils.py b/django/contrib/admindocs/utils.py index 1d6e700fc90..b6a23c88494 100644 --- a/django/contrib/admindocs/utils.py +++ b/django/contrib/admindocs/utils.py @@ -144,3 +144,94 @@ if docutils_is_available: for name, urlbase in ROLES.items(): create_reference_role(name, urlbase) + +# Match the beginning of a named or unnamed group. +named_group_matcher = re.compile(r'\(\?P(<\w+>)') +unnamed_group_matcher = re.compile(r'\(') + + +def replace_named_groups(pattern): + r""" + Find named groups in `pattern` and replace them with the group name. E.g., + 1. ^(?P\w+)/b/(\w+)$ ==> ^/b/(\w+)$ + 2. ^(?P\w+)/b/(?P\w+)/$ ==> ^/b//$ + """ + named_group_indices = [ + (m.start(0), m.end(0), m.group(1)) + for m in named_group_matcher.finditer(pattern) + ] + # Tuples of (named capture group pattern, group name). + group_pattern_and_name = [] + # Loop over the groups and their start and end indices. + for start, end, group_name in named_group_indices: + # Handle nested parentheses, e.g. '^(?P(x|y))/b'. + unmatched_open_brackets, prev_char = 1, None + for idx, val in enumerate(list(pattern[end:])): + # If brackets are balanced, the end of the string for the current + # named capture group pattern has been reached. + if unmatched_open_brackets == 0: + group_pattern_and_name.append((pattern[start:end + idx], group_name)) + break + + # Check for unescaped `(` and `)`. They mark the start and end of a + # nested group. + if val == '(' and prev_char != '\\': + unmatched_open_brackets += 1 + elif val == ')' and prev_char != '\\': + unmatched_open_brackets -= 1 + prev_char = val + + # Replace the string for named capture groups with their group names. + for group_pattern, group_name in group_pattern_and_name: + pattern = pattern.replace(group_pattern, group_name) + return pattern + + +def replace_unnamed_groups(pattern): + r""" + Find unnamed groups in `pattern` and replace them with ''. E.g., + 1. ^(?P\w+)/b/(\w+)$ ==> ^(?P\w+)/b/$ + 2. ^(?P\w+)/b/((x|y)\w+)$ ==> ^(?P\w+)/b/$ + """ + unnamed_group_indices = [m.start(0) for m in unnamed_group_matcher.finditer(pattern)] + # Indices of the start of unnamed capture groups. + group_indices = [] + # Loop over the start indices of the groups. + for start in unnamed_group_indices: + # Handle nested parentheses, e.g. '^b/((x|y)\w+)$'. + unmatched_open_brackets, prev_char = 1, None + for idx, val in enumerate(list(pattern[start + 1:])): + if unmatched_open_brackets == 0: + group_indices.append((start, start + 1 + idx)) + break + + # Check for unescaped `(` and `)`. They mark the start and end of + # a nested group. + if val == '(' and prev_char != '\\': + unmatched_open_brackets += 1 + elif val == ')' and prev_char != '\\': + unmatched_open_brackets -= 1 + prev_char = val + + # Remove unnamed group matches inside other unnamed capture groups. + group_start_end_indices = [] + prev_end = None + for start, end in group_indices: + if prev_end and start > prev_end or not prev_end: + group_start_end_indices.append((start, end)) + prev_end = end + + if group_start_end_indices: + # Replace unnamed groups with . Handle the fact that replacing the + # string between indices will change string length and thus indices + # will point to the wrong substring if not corrected. + final_pattern, prev_end = [], None + for start, end in group_start_end_indices: + if prev_end: + final_pattern.append(pattern[prev_end:start]) + final_pattern.append(pattern[:start] + '') + prev_end = end + final_pattern.append(pattern[prev_end:]) + return ''.join(final_pattern) + else: + return pattern diff --git a/django/contrib/admindocs/views.py b/django/contrib/admindocs/views.py index 7e894b51fc2..3488e9fb3aa 100644 --- a/django/contrib/admindocs/views.py +++ b/django/contrib/admindocs/views.py @@ -1,6 +1,5 @@ import inspect import os -import re from importlib import import_module from django.apps import apps @@ -8,6 +7,9 @@ from django.conf import settings from django.contrib import admin from django.contrib.admin.views.decorators import staff_member_required from django.contrib.admindocs import utils +from django.contrib.admindocs.utils import ( + replace_named_groups, replace_unnamed_groups, +) from django.core.exceptions import ImproperlyConfigured, ViewDoesNotExist from django.db import models from django.http import Http404 @@ -426,24 +428,16 @@ def extract_views_from_urlpatterns(urlpatterns, base='', namespace=None): return views -named_group_matcher = re.compile(r'\(\?P(<\w+>).+?\)') -non_named_group_matcher = re.compile(r'\(.*?\)') - - def simplify_regex(pattern): r""" Clean up urlpattern regexes into something more readable by humans. For example, turn "^(?P\w+)/athletes/(?P\w+)/$" into "//athletes//". """ - # handle named groups first - pattern = named_group_matcher.sub(lambda m: m.group(1), pattern) - - # handle non-named groups - pattern = non_named_group_matcher.sub("", pattern) - + pattern = replace_named_groups(pattern) + pattern = replace_unnamed_groups(pattern) # clean up any outstanding regex-y characters. - pattern = pattern.replace('^', '').replace('$', '').replace('?', '').replace('//', '/').replace('\\', '') + pattern = pattern.replace('^', '').replace('$', '') if not pattern.startswith('/'): pattern = '/' + pattern return pattern diff --git a/tests/admin_docs/test_views.py b/tests/admin_docs/test_views.py index a5134d76277..b48147fc852 100644 --- a/tests/admin_docs/test_views.py +++ b/tests/admin_docs/test_views.py @@ -335,6 +335,11 @@ class AdminDocViewFunctionsTests(SimpleTestCase): (r'^a', '/a'), (r'^(?P\w+)/b/(?P\w+)/$', '//b//'), (r'^(?P\w+)/b/(?P\w+)$', '//b/'), + (r'^(?P\w+)/b/(\w+)$', '//b/'), + (r'^(?P\w+)/b/((x|y)\w+)$', '//b/'), + (r'^(?P(x|y))/b/(?P\w+)$', '//b/'), + (r'^(?P(x|y))/b/(?P\w+)ab', '//b/ab'), + (r'^(?P(x|y)(\(|\)))/b/(?P\w+)ab', '//b/ab'), ) for pattern, output in tests: self.assertEqual(simplify_regex(pattern), output)