From e3d0b4d5501c6d0bc39f035e4345e5bdfde12e41 Mon Sep 17 00:00:00 2001 From: Hasan Ramezani Date: Sat, 26 Oct 2019 16:42:32 +0200 Subject: [PATCH] Fixed #30899 -- Lazily compiled import time regular expressions. --- django/contrib/admin/utils.py | 4 ++-- django/contrib/admindocs/utils.py | 5 +++-- django/contrib/gis/db/models/lookups.py | 5 ++--- django/contrib/gis/gdal/libgdal.py | 4 ++-- django/contrib/gis/geometry.py | 20 +++++++++++-------- django/core/handlers/wsgi.py | 4 ++-- .../core/management/commands/makemessages.py | 3 ++- django/core/management/commands/runserver.py | 3 ++- django/core/signing.py | 4 ++-- django/db/backends/mysql/base.py | 5 ++--- django/db/backends/oracle/operations.py | 4 ++-- django/db/backends/sqlite3/base.py | 3 ++- django/db/backends/sqlite3/introspection.py | 3 ++- django/db/models/sql/constants.py | 4 ++-- django/forms/fields.py | 3 ++- django/forms/widgets.py | 4 ++-- django/http/request.py | 4 ++-- django/http/response.py | 3 ++- django/middleware/gzip.py | 5 ++--- django/template/base.py | 7 ++++--- django/test/client.py | 6 +++--- django/test/html.py | 5 +++-- django/urls/resolvers.py | 4 ++-- django/utils/cache.py | 4 ++-- django/utils/dateformat.py | 6 +++--- django/utils/dateparse.py | 14 ++++++------- django/utils/datetime_safe.py | 5 +++-- django/utils/html.py | 12 +++++++---- django/utils/http.py | 11 +++++----- django/utils/text.py | 15 +++++++------- django/utils/translation/__init__.py | 4 ++-- django/utils/translation/template.py | 16 +++++++-------- django/utils/translation/trans_real.py | 7 ++++--- django/views/debug.py | 3 ++- 34 files changed, 114 insertions(+), 95 deletions(-) diff --git a/django/contrib/admin/utils.py b/django/contrib/admin/utils.py index 14ae00a7de9..d3c62f6b0ec 100644 --- a/django/contrib/admin/utils.py +++ b/django/contrib/admin/utils.py @@ -1,6 +1,5 @@ import datetime import decimal -import re from collections import defaultdict from django.core.exceptions import FieldDoesNotExist @@ -11,12 +10,13 @@ from django.forms.utils import pretty_name from django.urls import NoReverseMatch, reverse from django.utils import formats, timezone from django.utils.html import format_html +from django.utils.regex_helper import _lazy_re_compile from django.utils.text import capfirst from django.utils.translation import ngettext, override as translation_override QUOTE_MAP = {i: '_%02X' % i for i in b'":/_#?;@&=+$,"[]<>%\n\\'} UNQUOTE_MAP = {v: chr(k) for k, v in QUOTE_MAP.items()} -UNQUOTE_RE = re.compile('_(?:%s)' % '|'.join([x[1:] for x in UNQUOTE_MAP])) +UNQUOTE_RE = _lazy_re_compile('_(?:%s)' % '|'.join([x[1:] for x in UNQUOTE_MAP])) class FieldIsAForeignKeyColumnName(Exception): diff --git a/django/contrib/admindocs/utils.py b/django/contrib/admindocs/utils.py index a3155e5c8cc..4c0e7e2a56e 100644 --- a/django/contrib/admindocs/utils.py +++ b/django/contrib/admindocs/utils.py @@ -5,6 +5,7 @@ from email.errors import HeaderParseError from email.parser import HeaderParser from django.urls import reverse +from django.utils.regex_helper import _lazy_re_compile from django.utils.safestring import mark_safe try: @@ -146,8 +147,8 @@ if docutils_is_available: create_reference_role(name, urlbase) # Match the beginning of a named or unnamed group. -named_group_matcher = re.compile(r'\(\?P(<\w+>)') -unnamed_group_matcher = re.compile(r'\(') +named_group_matcher = _lazy_re_compile(r'\(\?P(<\w+>)') +unnamed_group_matcher = _lazy_re_compile(r'\(') def replace_named_groups(pattern): diff --git a/django/contrib/gis/db/models/lookups.py b/django/contrib/gis/db/models/lookups.py index f0f5e14b435..f2af05d9c14 100644 --- a/django/contrib/gis/db/models/lookups.py +++ b/django/contrib/gis/db/models/lookups.py @@ -1,11 +1,10 @@ -import re - from django.contrib.gis.db.models.fields import BaseSpatialField from django.contrib.gis.measure import Distance from django.db import NotSupportedError from django.db.models.expressions import Expression from django.db.models.lookups import Lookup, Transform from django.db.models.sql.query import Query +from django.utils.regex_helper import _lazy_re_compile class RasterBandTransform(Transform): @@ -253,7 +252,7 @@ class OverlapsLookup(GISLookup): class RelateLookup(GISLookup): lookup_name = 'relate' sql_template = '%(func)s(%(lhs)s, %(rhs)s, %%s)' - pattern_regex = re.compile(r'^[012TF\*]{9}$') + pattern_regex = _lazy_re_compile(r'^[012TF\*]{9}$') def process_rhs(self, compiler, connection): # Check the pattern argument diff --git a/django/contrib/gis/gdal/libgdal.py b/django/contrib/gis/gdal/libgdal.py index 12350ad6747..5bdb362db7e 100644 --- a/django/contrib/gis/gdal/libgdal.py +++ b/django/contrib/gis/gdal/libgdal.py @@ -1,11 +1,11 @@ import logging import os -import re from ctypes import CDLL, CFUNCTYPE, c_char_p, c_int from ctypes.util import find_library from django.contrib.gis.gdal.error import GDALException from django.core.exceptions import ImproperlyConfigured +from django.utils.regex_helper import _lazy_re_compile logger = logging.getLogger('django.contrib.gis') @@ -83,7 +83,7 @@ def gdal_full_version(): return _version_info('') -version_regex = re.compile(r'^(?P\d+)\.(?P\d+)(\.(?P\d+))?') +version_regex = _lazy_re_compile(r'^(?P\d+)\.(?P\d+)(\.(?P\d+))?') def gdal_version_info(): diff --git a/django/contrib/gis/geometry.py b/django/contrib/gis/geometry.py index 6727c7d9420..815d8257d71 100644 --- a/django/contrib/gis/geometry.py +++ b/django/contrib/gis/geometry.py @@ -1,13 +1,17 @@ import re +from django.utils.regex_helper import _lazy_re_compile + # Regular expression for recognizing HEXEWKB and WKT. A prophylactic measure # to prevent potentially malicious input from reaching the underlying C # library. Not a substitute for good Web security programming practices. -hex_regex = re.compile(r'^[0-9A-F]+$', re.I) -wkt_regex = re.compile(r'^(SRID=(?P\-?\d+);)?' - r'(?P' - r'(?PPOINT|LINESTRING|LINEARRING|POLYGON|MULTIPOINT|' - r'MULTILINESTRING|MULTIPOLYGON|GEOMETRYCOLLECTION)' - r'[ACEGIMLONPSRUTYZ\d,\.\-\+\(\) ]+)$', - re.I) -json_regex = re.compile(r'^(\s+)?\{.*}(\s+)?$', re.DOTALL) +hex_regex = _lazy_re_compile(r'^[0-9A-F]+$', re.I) +wkt_regex = _lazy_re_compile( + r'^(SRID=(?P\-?\d+);)?' + r'(?P' + r'(?PPOINT|LINESTRING|LINEARRING|POLYGON|MULTIPOINT|' + r'MULTILINESTRING|MULTIPOLYGON|GEOMETRYCOLLECTION)' + r'[ACEGIMLONPSRUTYZ\d,\.\-\+\(\) ]+)$', + re.I +) +json_regex = _lazy_re_compile(r'^(\s+)?\{.*}(\s+)?$', re.DOTALL) diff --git a/django/core/handlers/wsgi.py b/django/core/handlers/wsgi.py index cb740e5c505..f8e7a57dc8a 100644 --- a/django/core/handlers/wsgi.py +++ b/django/core/handlers/wsgi.py @@ -1,4 +1,3 @@ -import re from io import BytesIO from django.conf import settings @@ -8,8 +7,9 @@ from django.http import HttpRequest, QueryDict, parse_cookie from django.urls import set_script_prefix from django.utils.encoding import repercent_broken_unicode from django.utils.functional import cached_property +from django.utils.regex_helper import _lazy_re_compile -_slashes_re = re.compile(br'/+') +_slashes_re = _lazy_re_compile(br'/+') class LimitedStream: diff --git a/django/core/management/commands/makemessages.py b/django/core/management/commands/makemessages.py index 69d7c41a2b7..5cbecb0bbbe 100644 --- a/django/core/management/commands/makemessages.py +++ b/django/core/management/commands/makemessages.py @@ -16,10 +16,11 @@ from django.core.management.utils import ( from django.utils.encoding import DEFAULT_LOCALE_ENCODING from django.utils.functional import cached_property from django.utils.jslex import prepare_js_for_gettext +from django.utils.regex_helper import _lazy_re_compile from django.utils.text import get_text_list from django.utils.translation import templatize -plural_forms_re = re.compile(r'^(?P"Plural-Forms.+?\\n")\s*$', re.MULTILINE | re.DOTALL) +plural_forms_re = _lazy_re_compile(r'^(?P"Plural-Forms.+?\\n")\s*$', re.MULTILINE | re.DOTALL) STATUS_OK = 0 NO_LOCALE_DIR = object() diff --git a/django/core/management/commands/runserver.py b/django/core/management/commands/runserver.py index c195ff1398d..c2a8c325f13 100644 --- a/django/core/management/commands/runserver.py +++ b/django/core/management/commands/runserver.py @@ -11,8 +11,9 @@ from django.core.servers.basehttp import ( WSGIServer, get_internal_wsgi_application, run, ) from django.utils import autoreload +from django.utils.regex_helper import _lazy_re_compile -naiveip_re = re.compile(r"""^(?: +naiveip_re = _lazy_re_compile(r"""^(?: (?P (?P\d{1,3}(?:\.\d{1,3}){3}) | # IPv4 address (?P\[[a-fA-F0-9:]+\]) | # IPv6 address diff --git a/django/core/signing.py b/django/core/signing.py index 1e928385963..575fef28351 100644 --- a/django/core/signing.py +++ b/django/core/signing.py @@ -36,7 +36,6 @@ These functions make use of all of them. import base64 import datetime import json -import re import time import zlib @@ -45,8 +44,9 @@ from django.utils import baseconv from django.utils.crypto import constant_time_compare, salted_hmac from django.utils.encoding import force_bytes from django.utils.module_loading import import_string +from django.utils.regex_helper import _lazy_re_compile -_SEP_UNSAFE = re.compile(r'^[A-z0-9-_=]*$') +_SEP_UNSAFE = _lazy_re_compile(r'^[A-z0-9-_=]*$') class BadSignature(Exception): diff --git a/django/db/backends/mysql/base.py b/django/db/backends/mysql/base.py index d60ff7937d0..74c98a091c8 100644 --- a/django/db/backends/mysql/base.py +++ b/django/db/backends/mysql/base.py @@ -3,14 +3,13 @@ MySQL database backend for Django. Requires mysqlclient: https://pypi.org/project/mysqlclient/ """ -import re - from django.core.exceptions import ImproperlyConfigured from django.db import utils from django.db.backends import utils as backend_utils from django.db.backends.base.base import BaseDatabaseWrapper from django.utils.asyncio import async_unsafe from django.utils.functional import cached_property +from django.utils.regex_helper import _lazy_re_compile try: import MySQLdb as Database @@ -47,7 +46,7 @@ django_conversions = { # This should match the numerical portion of the version numbers (we can treat # versions like 5.0.24 and 5.0.24a as the same). -server_version_re = re.compile(r'(\d{1,2})\.(\d{1,2})\.(\d{1,2})') +server_version_re = _lazy_re_compile(r'(\d{1,2})\.(\d{1,2})\.(\d{1,2})') class CursorWrapper: diff --git a/django/db/backends/oracle/operations.py b/django/db/backends/oracle/operations.py index 4a6c71e1498..59f55dccd58 100644 --- a/django/db/backends/oracle/operations.py +++ b/django/db/backends/oracle/operations.py @@ -1,5 +1,4 @@ import datetime -import re import uuid from functools import lru_cache @@ -12,6 +11,7 @@ from django.db.utils import DatabaseError from django.utils import timezone from django.utils.encoding import force_bytes, force_str from django.utils.functional import cached_property +from django.utils.regex_helper import _lazy_re_compile from .base import Database from .utils import BulkInsertMapper, InsertVar, Oracle_datetime @@ -102,7 +102,7 @@ END; # if the time zone name is passed in parameter. Use interpolation instead. # https://groups.google.com/forum/#!msg/django-developers/zwQju7hbG78/9l934yelwfsJ # This regexp matches all time zone names from the zoneinfo database. - _tzname_re = re.compile(r'^[\w/:+-]+$') + _tzname_re = _lazy_re_compile(r'^[\w/:+-]+$') def _prepare_tzname_delta(self, tzname): if '+' in tzname: diff --git a/django/db/backends/sqlite3/base.py b/django/db/backends/sqlite3/base.py index f959f921f17..45a22f5a364 100644 --- a/django/db/backends/sqlite3/base.py +++ b/django/db/backends/sqlite3/base.py @@ -23,6 +23,7 @@ from django.utils import timezone from django.utils.asyncio import async_unsafe from django.utils.dateparse import parse_datetime, parse_time from django.utils.duration import duration_microseconds +from django.utils.regex_helper import _lazy_re_compile from .client import DatabaseClient # isort:skip from .creation import DatabaseCreation # isort:skip @@ -380,7 +381,7 @@ class DatabaseWrapper(BaseDatabaseWrapper): return self.creation.is_in_memory_db(self.settings_dict['NAME']) -FORMAT_QMARK_REGEX = re.compile(r'(?[^\s;]+)', re.I) +_charset_from_content_type_re = _lazy_re_compile(r';\s*charset=(?P[^\s;]+)', re.I) class BadHeaderError(ValueError): diff --git a/django/middleware/gzip.py b/django/middleware/gzip.py index 0c3905c9596..14346c5b121 100644 --- a/django/middleware/gzip.py +++ b/django/middleware/gzip.py @@ -1,10 +1,9 @@ -import re - from django.utils.cache import patch_vary_headers from django.utils.deprecation import MiddlewareMixin +from django.utils.regex_helper import _lazy_re_compile from django.utils.text import compress_sequence, compress_string -re_accepts_gzip = re.compile(r'\bgzip\b') +re_accepts_gzip = _lazy_re_compile(r'\bgzip\b') class GZipMiddleware(MiddlewareMixin): diff --git a/django/template/base.py b/django/template/base.py index 7ee23f7ebf9..cba047e4deb 100644 --- a/django/template/base.py +++ b/django/template/base.py @@ -58,6 +58,7 @@ from inspect import getcallargs, getfullargspec, unwrap from django.template.context import BaseContext from django.utils.formats import localize from django.utils.html import conditional_escape, escape +from django.utils.regex_helper import _lazy_re_compile from django.utils.safestring import SafeData, mark_safe from django.utils.text import ( get_text_list, smart_split, unescape_string_literal, @@ -87,7 +88,7 @@ UNKNOWN_SOURCE = '' # match a variable or block tag and capture the entire tag, including start/end # delimiters -tag_re = (re.compile('(%s.*?%s|%s.*?%s|%s.*?%s)' % +tag_re = (_lazy_re_compile('(%s.*?%s|%s.*?%s|%s.*?%s)' % (re.escape(BLOCK_TAG_START), re.escape(BLOCK_TAG_END), re.escape(VARIABLE_TAG_START), re.escape(VARIABLE_TAG_END), re.escape(COMMENT_TAG_START), re.escape(COMMENT_TAG_END)))) @@ -603,7 +604,7 @@ filter_raw_string = r""" 'arg_sep': re.escape(FILTER_ARGUMENT_SEPARATOR), } -filter_re = re.compile(filter_raw_string, re.VERBOSE) +filter_re = _lazy_re_compile(filter_raw_string, re.VERBOSE) class FilterExpression: @@ -993,7 +994,7 @@ class VariableNode(Node): # Regex for token keyword arguments -kwarg_re = re.compile(r"(?:(\w+)=)?(.+)") +kwarg_re = _lazy_re_compile(r"(?:(\w+)=)?(.+)") def token_kwargs(bits, parser, support_legacy=False): diff --git a/django/test/client.py b/django/test/client.py index 98ede36499c..0a683f50262 100644 --- a/django/test/client.py +++ b/django/test/client.py @@ -1,7 +1,6 @@ import json import mimetypes import os -import re import sys from copy import copy from functools import partial @@ -26,15 +25,16 @@ from django.utils.encoding import force_bytes from django.utils.functional import SimpleLazyObject from django.utils.http import urlencode from django.utils.itercompat import is_iterable +from django.utils.regex_helper import _lazy_re_compile __all__ = ('Client', 'RedirectCycleError', 'RequestFactory', 'encode_file', 'encode_multipart') BOUNDARY = 'BoUnDaRyStRiNg' MULTIPART_CONTENT = 'multipart/form-data; boundary=%s' % BOUNDARY -CONTENT_TYPE_RE = re.compile(r'.*; charset=([\w\d-]+);?') +CONTENT_TYPE_RE = _lazy_re_compile(r'.*; charset=([\w\d-]+);?') # Structured suffix spec: https://tools.ietf.org/html/rfc6838#section-4.2.8 -JSON_CONTENT_TYPE_RE = re.compile(r'^application\/(.+\+)?json') +JSON_CONTENT_TYPE_RE = _lazy_re_compile(r'^application\/(.+\+)?json') class RedirectCycleError(Exception): diff --git a/django/test/html.py b/django/test/html.py index 511c08bb264..36b44b04664 100644 --- a/django/test/html.py +++ b/django/test/html.py @@ -1,12 +1,13 @@ """Compare two HTML documents.""" -import re from html.parser import HTMLParser +from django.utils.regex_helper import _lazy_re_compile + # ASCII whitespace is U+0009 TAB, U+000A LF, U+000C FF, U+000D CR, or U+0020 # SPACE. # https://infra.spec.whatwg.org/#ascii-whitespace -ASCII_WHITESPACE = re.compile(r'[\t\n\f\r ]+') +ASCII_WHITESPACE = _lazy_re_compile(r'[\t\n\f\r ]+') def normalize_whitespace(string): diff --git a/django/urls/resolvers.py b/django/urls/resolvers.py index 2ff8b2c775a..2d4610aac41 100644 --- a/django/urls/resolvers.py +++ b/django/urls/resolvers.py @@ -21,7 +21,7 @@ from django.core.exceptions import ImproperlyConfigured, ViewDoesNotExist from django.utils.datastructures import MultiValueDict from django.utils.functional import cached_property from django.utils.http import RFC3986_SUBDELIMS, escape_leading_slashes -from django.utils.regex_helper import normalize +from django.utils.regex_helper import _lazy_re_compile, normalize from django.utils.translation import get_language from .converters import get_converter @@ -195,7 +195,7 @@ class RegexPattern(CheckURLMixin): return str(self._regex) -_PATH_PARAMETER_COMPONENT_RE = re.compile( +_PATH_PARAMETER_COMPONENT_RE = _lazy_re_compile( r'<(?:(?P[^>:]+):)?(?P\w+)>' ) diff --git a/django/utils/cache.py b/django/utils/cache.py index 14e8256b949..df9c4c755aa 100644 --- a/django/utils/cache.py +++ b/django/utils/cache.py @@ -17,7 +17,6 @@ An example: i18n middleware would need to distinguish caches by the "Accept-language" header. """ import hashlib -import re import time from collections import defaultdict @@ -29,10 +28,11 @@ from django.utils.http import ( http_date, parse_etags, parse_http_date_safe, quote_etag, ) from django.utils.log import log_response +from django.utils.regex_helper import _lazy_re_compile from django.utils.timezone import get_current_timezone_name from django.utils.translation import get_language -cc_delim_re = re.compile(r'\s*,\s*') +cc_delim_re = _lazy_re_compile(r'\s*,\s*') def patch_cache_control(response, **kwargs): diff --git a/django/utils/dateformat.py b/django/utils/dateformat.py index d3f586aacf0..836b40a70a2 100644 --- a/django/utils/dateformat.py +++ b/django/utils/dateformat.py @@ -12,17 +12,17 @@ Usage: """ import calendar import datetime -import re import time from django.utils.dates import ( MONTHS, MONTHS_3, MONTHS_ALT, MONTHS_AP, WEEKDAYS, WEEKDAYS_ABBR, ) +from django.utils.regex_helper import _lazy_re_compile from django.utils.timezone import get_default_timezone, is_aware, is_naive from django.utils.translation import gettext as _ -re_formatchars = re.compile(r'(?\d{4})-(?P\d{1,2})-(?P\d{1,2})$' ) -time_re = re.compile( +time_re = _lazy_re_compile( r'(?P\d{1,2}):(?P\d{1,2})' r'(?::(?P\d{1,2})(?:\.(?P\d{1,6})\d{0,6})?)?' ) -datetime_re = re.compile( +datetime_re = _lazy_re_compile( r'(?P\d{4})-(?P\d{1,2})-(?P\d{1,2})' r'[T ](?P\d{1,2}):(?P\d{1,2})' r'(?::(?P\d{1,2})(?:\.(?P\d{1,6})\d{0,6})?)?' r'(?PZ|[+-]\d{2}(?::?\d{2})?)?$' ) -standard_duration_re = re.compile( +standard_duration_re = _lazy_re_compile( r'^' r'(?:(?P-?\d+) (days?, )?)?' r'(?P-?)' @@ -39,7 +39,7 @@ standard_duration_re = re.compile( # Support the sections of ISO 8601 date representation that are accepted by # timedelta -iso8601_duration_re = re.compile( +iso8601_duration_re = _lazy_re_compile( r'^(?P[-+]?)' r'P' r'(?:(?P\d+(.\d+)?)D)?' @@ -54,7 +54,7 @@ iso8601_duration_re = re.compile( # Support PostgreSQL's day-time interval format, e.g. "3 days 04:05:06". The # year-month and mixed intervals cannot be converted to a timedelta and thus # aren't accepted. -postgres_interval_re = re.compile( +postgres_interval_re = _lazy_re_compile( r'^' r'(?:(?P-?\d+) (days? ?))?' r'(?:(?P[-+])?' diff --git a/django/utils/datetime_safe.py b/django/utils/datetime_safe.py index 7eaa5c21ce7..ade2dca6101 100644 --- a/django/utils/datetime_safe.py +++ b/django/utils/datetime_safe.py @@ -7,12 +7,13 @@ # >>> datetime_safe.date(10, 8, 2).strftime("%Y/%m/%d was a %A") # '0010/08/02 was a Monday' -import re import time as ttime from datetime import ( date as real_date, datetime as real_datetime, time as real_time, ) +from django.utils.regex_helper import _lazy_re_compile + class date(real_date): def strftime(self, fmt): @@ -54,7 +55,7 @@ def new_datetime(d): # This library does not support strftime's "%s" or "%y" format strings. # Allowed if there's an even number of "%"s because they are escaped. -_illegal_formatting = re.compile(r"((^|[^%])(%%)*%[sy])") +_illegal_formatting = _lazy_re_compile(r"((^|[^%])(%%)*%[sy])") def _findall(text, substr): diff --git a/django/utils/html.py b/django/utils/html.py index 94aa0ff35e9..2b8f2a8c896 100644 --- a/django/utils/html.py +++ b/django/utils/html.py @@ -11,6 +11,7 @@ from urllib.parse import ( from django.utils.encoding import punycode from django.utils.functional import Promise, keep_lazy, keep_lazy_text from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS +from django.utils.regex_helper import _lazy_re_compile from django.utils.safestring import SafeData, SafeString, mark_safe from django.utils.text import normalize_newlines @@ -21,10 +22,13 @@ WRAPPING_PUNCTUATION = [('(', ')'), ('[', ']')] # List of possible strings used for bullets in bulleted lists. DOTS = ['·', '*', '\u2022', '•', '•', '•'] -unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)') -word_split_re = re.compile(r'''([\s<>"']+)''') -simple_url_re = re.compile(r'^https?://\[?\w', re.IGNORECASE) -simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)($|/.*)$', re.IGNORECASE) +unencoded_ampersands_re = _lazy_re_compile(r'&(?!(\w+|#\d+);)') +word_split_re = _lazy_re_compile(r'''([\s<>"']+)''') +simple_url_re = _lazy_re_compile(r'^https?://\[?\w', re.IGNORECASE) +simple_url_2_re = _lazy_re_compile( + r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)($|/.*)$', + re.IGNORECASE +) @keep_lazy(str, SafeString) diff --git a/django/utils/http.py b/django/utils/http.py index ff2f08ac1e8..709ce60e1f3 100644 --- a/django/utils/http.py +++ b/django/utils/http.py @@ -16,9 +16,10 @@ from django.core.exceptions import TooManyFieldsSent from django.utils.datastructures import MultiValueDict from django.utils.deprecation import RemovedInDjango40Warning from django.utils.functional import keep_lazy_text +from django.utils.regex_helper import _lazy_re_compile # based on RFC 7232, Appendix C -ETAG_MATCH = re.compile(r''' +ETAG_MATCH = _lazy_re_compile(r''' \A( # start of string and capture group (?:W/)? # optional weak indicator " # opening quote @@ -34,14 +35,14 @@ __M = r'(?P\w{3})' __Y = r'(?P\d{4})' __Y2 = r'(?P\d{2})' __T = r'(?P\d{2}):(?P\d{2}):(?P\d{2})' -RFC1123_DATE = re.compile(r'^\w{3}, %s %s %s %s GMT$' % (__D, __M, __Y, __T)) -RFC850_DATE = re.compile(r'^\w{6,9}, %s-%s-%s %s GMT$' % (__D, __M, __Y2, __T)) -ASCTIME_DATE = re.compile(r'^\w{3} %s %s %s %s$' % (__M, __D2, __T, __Y)) +RFC1123_DATE = _lazy_re_compile(r'^\w{3}, %s %s %s %s GMT$' % (__D, __M, __Y, __T)) +RFC850_DATE = _lazy_re_compile(r'^\w{6,9}, %s-%s-%s %s GMT$' % (__D, __M, __Y2, __T)) +ASCTIME_DATE = _lazy_re_compile(r'^\w{3} %s %s %s %s$' % (__M, __D2, __T, __Y)) RFC3986_GENDELIMS = ":/?#[]@" RFC3986_SUBDELIMS = "!$&'()*+,;=" -FIELDS_MATCH = re.compile('[&;]') +FIELDS_MATCH = _lazy_re_compile('[&;]') @keep_lazy_text diff --git a/django/utils/text.py b/django/utils/text.py index 03e2d05177c..5e1409116ed 100644 --- a/django/utils/text.py +++ b/django/utils/text.py @@ -7,6 +7,7 @@ from io import BytesIO from django.utils.deprecation import RemovedInDjango40Warning from django.utils.functional import SimpleLazyObject, keep_lazy_text, lazy +from django.utils.regex_helper import _lazy_re_compile from django.utils.translation import gettext as _, gettext_lazy, pgettext @@ -17,11 +18,11 @@ def capfirst(x): # Set up regular expressions -re_words = re.compile(r'<[^>]+?>|([^<>\s]+)', re.S) -re_chars = re.compile(r'<[^>]+?>|(.)', re.S) -re_tag = re.compile(r'<(/)?(\S+?)(?:(\s*/)|\s.*?)?>', re.S) -re_newlines = re.compile(r'\r\n|\r') # Used in normalize_newlines -re_camel_case = re.compile(r'(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))') +re_words = _lazy_re_compile(r'<[^>]+?>|([^<>\s]+)', re.S) +re_chars = _lazy_re_compile(r'<[^>]+?>|(.)', re.S) +re_tag = _lazy_re_compile(r'<(/)?(\S+?)(?:(\s*/)|\s.*?)?>', re.S) +re_newlines = _lazy_re_compile(r'\r\n|\r') # Used in normalize_newlines +re_camel_case = _lazy_re_compile(r'(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))') @keep_lazy_text @@ -306,7 +307,7 @@ def compress_sequence(sequence): # Expression to match some_token and some_token="with spaces" (and similarly # for single-quoted strings). -smart_split_re = re.compile(r""" +smart_split_re = _lazy_re_compile(r""" ((?: [^\s'"]* (?: @@ -355,7 +356,7 @@ def _replace_entity(match): return match.group(0) -_entity_re = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));") +_entity_re = _lazy_re_compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));") @keep_lazy_text diff --git a/django/utils/translation/__init__.py b/django/utils/translation/__init__.py index e48c7d245de..728286c78e1 100644 --- a/django/utils/translation/__init__.py +++ b/django/utils/translation/__init__.py @@ -1,7 +1,6 @@ """ Internationalization support. """ -import re import warnings from contextlib import ContextDecorator from decimal import ROUND_UP, Decimal @@ -9,6 +8,7 @@ from decimal import ROUND_UP, Decimal from django.utils.autoreload import autoreload_started, file_changed from django.utils.deprecation import RemovedInDjango40Warning from django.utils.functional import lazy +from django.utils.regex_helper import _lazy_re_compile __all__ = [ 'activate', 'deactivate', 'override', 'deactivate_all', @@ -328,7 +328,7 @@ def get_language_info(lang_code): return info -trim_whitespace_re = re.compile(r'\s*\n\s*') +trim_whitespace_re = _lazy_re_compile(r'\s*\n\s*') def trim_whitespace(s): diff --git a/django/utils/translation/template.py b/django/utils/translation/template.py index aa849b09376..979ae1ade6f 100644 --- a/django/utils/translation/template.py +++ b/django/utils/translation/template.py @@ -1,12 +1,12 @@ -import re import warnings from io import StringIO from django.template.base import TRANSLATOR_COMMENT_MARK, Lexer, TokenType +from django.utils.regex_helper import _lazy_re_compile from . import TranslatorCommentWarning, trim_whitespace -dot_re = re.compile(r'\S') +dot_re = _lazy_re_compile(r'\S') def blankout(src, char): @@ -17,8 +17,8 @@ def blankout(src, char): return dot_re.sub(char, src) -context_re = re.compile(r"""^\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?'))\s*""") -inline_re = re.compile( +context_re = _lazy_re_compile(r"""^\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?'))\s*""") +inline_re = _lazy_re_compile( # Match the trans 'some text' part r"""^\s*trans\s+((?:"[^"]*?")|(?:'[^']*?'))""" # Match and ignore optional filters @@ -26,10 +26,10 @@ inline_re = re.compile( # Match the optional context part r"""(\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?')))?\s*""" ) -block_re = re.compile(r"""^\s*blocktrans(\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?')))?(?:\s+|$)""") -endblock_re = re.compile(r"""^\s*endblocktrans$""") -plural_re = re.compile(r"""^\s*plural$""") -constant_re = re.compile(r"""_\(((?:".*?")|(?:'.*?'))\)""") +block_re = _lazy_re_compile(r"""^\s*blocktrans(\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?')))?(?:\s+|$)""") +endblock_re = _lazy_re_compile(r"""^\s*endblocktrans$""") +plural_re = _lazy_re_compile(r"""^\s*plural$""") +constant_re = _lazy_re_compile(r"""_\(((?:".*?")|(?:'.*?'))\)""") def templatize(src, origin=None): diff --git a/django/utils/translation/trans_real.py b/django/utils/translation/trans_real.py index e089597ccb4..d8526753605 100644 --- a/django/utils/translation/trans_real.py +++ b/django/utils/translation/trans_real.py @@ -14,6 +14,7 @@ from django.conf.locale import LANG_INFO from django.core.exceptions import AppRegistryNotReady from django.core.signals import setting_changed from django.dispatch import receiver +from django.utils.regex_helper import _lazy_re_compile from django.utils.safestring import SafeData, mark_safe from . import to_language, to_locale @@ -31,18 +32,18 @@ CONTEXT_SEPARATOR = "\x04" # Format of Accept-Language header values. From RFC 2616, section 14.4 and 3.9 # and RFC 3066, section 2.1 -accept_language_re = re.compile(r''' +accept_language_re = _lazy_re_compile(r''' ([A-Za-z]{1,8}(?:-[A-Za-z0-9]{1,8})*|\*) # "en", "en-au", "x-y-z", "es-419", "*" (?:\s*;\s*q=(0(?:\.\d{,3})?|1(?:\.0{,3})?))? # Optional "q=1.00", "q=0.8" (?:\s*,\s*|$) # Multiple accepts per header. ''', re.VERBOSE) -language_code_re = re.compile( +language_code_re = _lazy_re_compile( r'^[a-z]{1,8}(?:-[a-z0-9]{1,8})*(?:@[a-z0-9]{1,20})?$', re.IGNORECASE ) -language_code_prefix_re = re.compile(r'^/(\w+([@-]\w+)?)(/|$)') +language_code_prefix_re = _lazy_re_compile(r'^/(\w+([@-]\w+)?)(/|$)') @receiver(setting_changed) diff --git a/django/views/debug.py b/django/views/debug.py index 86da47ee205..98b29c2cc5d 100644 --- a/django/views/debug.py +++ b/django/views/debug.py @@ -13,6 +13,7 @@ from django.utils import timezone from django.utils.datastructures import MultiValueDict from django.utils.encoding import force_str from django.utils.module_loading import import_string +from django.utils.regex_helper import _lazy_re_compile from django.utils.version import get_docs_version # Minimal Django templates engine to render the error templates @@ -24,7 +25,7 @@ DEBUG_ENGINE = Engine( libraries={'i18n': 'django.templatetags.i18n'}, ) -HIDDEN_SETTINGS = re.compile('API|TOKEN|KEY|SECRET|PASS|SIGNATURE', flags=re.IGNORECASE) +HIDDEN_SETTINGS = _lazy_re_compile('API|TOKEN|KEY|SECRET|PASS|SIGNATURE', flags=re.IGNORECASE) CLEANSED_SUBSTITUTE = '********************'