Fixed #28628 -- Changed \d to [0-9] in regexes where appropriate.

This commit is contained in:
Ad Timmering 2021-11-26 10:44:54 +09:00 committed by Mariusz Felisiak
parent fe76944269
commit bdf3e156b4
10 changed files with 35 additions and 17 deletions

View File

@ -7,11 +7,11 @@ from django.utils.regex_helper import _lazy_re_compile
# library. Not a substitute for good web security programming practices.
hex_regex = _lazy_re_compile(r'^[0-9A-F]+$', re.I)
wkt_regex = _lazy_re_compile(
r'^(SRID=(?P<srid>\-?\d+);)?'
r'^(SRID=(?P<srid>\-?[0-9]+);)?'
r'(?P<wkt>'
r'(?P<type>POINT|LINESTRING|LINEARRING|POLYGON|MULTIPOINT|'
r'MULTILINESTRING|MULTIPOLYGON|GEOMETRYCOLLECTION)'
r'[ACEGIMLONPSRUTYZ\d,\.\-\+\(\) ]+)$',
r'[ACEGIMLONPSRUTYZ0-9,\.\-\+\(\) ]+)$',
re.I
)
json_regex = _lazy_re_compile(r'^(\s+)?\{.*}(\s+)?$', re.DOTALL)

View File

@ -64,7 +64,10 @@ class URLValidator(RegexValidator):
ul = '\u00a1-\uffff' # Unicode letters range (must not be a raw string).
# IP patterns
ipv4_re = r'(?:0|25[0-5]|2[0-4]\d|1\d?\d?|[1-9]\d?)(?:\.(?:0|25[0-5]|2[0-4]\d|1\d?\d?|[1-9]\d?)){3}'
ipv4_re = (
r'(?:0|25[0-5]|2[0-4][0-9]|1[0-9]?[0-9]?|[1-9][0-9]?)'
r'(?:\.(?:0|25[0-5]|2[0-4][0-9]|1[0-9]?[0-9]?|[1-9][0-9]?)){3}'
)
ipv6_re = r'\[[0-9a-f:.]+\]' # (simple regex, validated later)
# Host patterns
@ -85,7 +88,7 @@ class URLValidator(RegexValidator):
r'^(?:[a-z0-9.+-]*)://' # scheme is validated separately
r'(?:[^\s:@/]+(?::[^\s:@/]*)?@)?' # user:pass authentication
r'(?:' + ipv4_re + '|' + ipv6_re + '|' + host_re + ')'
r'(?::\d{1,5})?' # port
r'(?::[0-9]{1,5})?' # port
r'(?:[/?#][^\s]*)?' # resource path
r'\Z', re.IGNORECASE)
message = _('Enter a valid URL.')
@ -128,7 +131,7 @@ class URLValidator(RegexValidator):
raise
else:
# Now verify IPv6 in the netloc part
host_match = re.search(r'^\[(.+)\](?::\d{1,5})?$', splitted_url.netloc)
host_match = re.search(r'^\[(.+)\](?::[0-9]{1,5})?$', splitted_url.netloc)
if host_match:
potential_ip = host_match[1]
try:

View File

@ -23,7 +23,7 @@ from django.utils.regex_helper import _lazy_re_compile
from .multipartparser import parse_header
RAISE_ERROR = object()
host_validation_re = _lazy_re_compile(r"^([a-z0-9.-]+|\[[a-f0-9]*:[a-f0-9\.:]+\])(:\d+)?$")
host_validation_re = _lazy_re_compile(r"^([a-z0-9.-]+|\[[a-f0-9]*:[a-f0-9\.:]+\])(:[0-9]+)?$")
class UnreadablePostError(OSError):

View File

@ -38,7 +38,7 @@ __all__ = (
BOUNDARY = 'BoUnDaRyStRiNg'
MULTIPART_CONTENT = 'multipart/form-data; boundary=%s' % BOUNDARY
CONTENT_TYPE_RE = _lazy_re_compile(r'.*; charset=([\w\d-]+);?')
CONTENT_TYPE_RE = _lazy_re_compile(r'.*; charset=([\w-]+);?')
# Structured suffix spec: https://tools.ietf.org/html/rfc6838#section-4.2.8
JSON_CONTENT_TYPE_RE = _lazy_re_compile(r'^application\/(.+\+)?json')

View File

@ -23,12 +23,12 @@ ETAG_MATCH = _lazy_re_compile(r'''
''', re.X)
MONTHS = 'jan feb mar apr may jun jul aug sep oct nov dec'.split()
__D = r'(?P<day>\d{2})'
__D2 = r'(?P<day>[ \d]\d)'
__D = r'(?P<day>[0-9]{2})'
__D2 = r'(?P<day>[ 0-9][0-9])'
__M = r'(?P<mon>\w{3})'
__Y = r'(?P<year>\d{4})'
__Y2 = r'(?P<year>\d{2})'
__T = r'(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})'
__Y = r'(?P<year>[0-9]{4})'
__Y2 = r'(?P<year>[0-9]{2})'
__T = r'(?P<hour>[0-9]{2}):(?P<min>[0-9]{2}):(?P<sec>[0-9]{2})'
RFC1123_DATE = _lazy_re_compile(r'^\w{3}, %s %s %s %s GMT$' % (__D, __M, __Y, __T))
RFC850_DATE = _lazy_re_compile(r'^\w{6,9}, %s-%s-%s %s GMT$' % (__D, __M, __Y2, __T))
ASCTIME_DATE = _lazy_re_compile(r'^\w{3} %s %s %s %s$' % (__M, __D2, __T, __Y))

View File

@ -33,9 +33,9 @@ CONTEXT_SEPARATOR = "\x04"
# Format of Accept-Language header values. From RFC 2616, section 14.4 and 3.9
# and RFC 3066, section 2.1
accept_language_re = _lazy_re_compile(r'''
([A-Za-z]{1,8}(?:-[A-Za-z0-9]{1,8})*|\*) # "en", "en-au", "x-y-z", "es-419", "*"
(?:\s*;\s*q=(0(?:\.\d{,3})?|1(?:\.0{,3})?))? # Optional "q=1.00", "q=0.8"
(?:\s*,\s*|$) # Multiple accepts per header.
([A-Za-z]{1,8}(?:-[A-Za-z0-9]{1,8})*|\*) # "en", "en-au", "x-y-z", "es-419", "*"
(?:\s*;\s*q=(0(?:\.[0-9]{,3})?|1(?:\.0{,3})?))? # Optional "q=1.00", "q=0.8"
(?:\s*,\s*|$) # Multiple accepts per header.
''', re.VERBOSE)
language_code_re = _lazy_re_compile(

View File

@ -246,8 +246,8 @@ following URL patterns which optionally take a page argument::
from django.urls import re_path
urlpatterns = [
re_path(r'^blog/(page-(\d+)/)?$', blog_articles), # bad
re_path(r'^comments/(?:page-(?P<page_number>\d+)/)?$', comments), # good
re_path(r'^blog/(page-([0-9]+)/)?$', blog_articles), # bad
re_path(r'^comments/(?:page-(?P<page_number>[0-9]+)/)?$', comments), # good
]
Both patterns use nested arguments and will resolve: for example,

View File

@ -31,6 +31,13 @@ class GEOSTest(SimpleTestCase, TestDataMixin):
if geom.hasz:
self.assertEqual(g.ewkt, geom.wkt)
def test_wkt_invalid(self):
msg = 'String input unrecognized as WKT EWKT, and HEXEWKB.'
with self.assertRaisesMessage(ValueError, msg):
fromstr('POINT(٠٠١ ٠)')
with self.assertRaisesMessage(ValueError, msg):
fromstr('SRID=٧٥٨٣;POINT(100 0)')
def test_hex(self):
"Testing HEX output."
for g in self.geometries.hex_wkt:

View File

@ -1406,6 +1406,7 @@ class MiscTests(SimpleTestCase):
('12-345', []),
('', []),
('en;q=1e0', []),
('en-au;q=.', []),
]
for value, expected in tests:
with self.subTest(value=value):

View File

@ -328,6 +328,13 @@ class HttpDateProcessingTests(unittest.TestCase):
datetime(1994, 11, 6, 8, 49, 37, tzinfo=timezone.utc),
)
def test_parsing_asctime_nonascii_digits(self):
"""Non-ASCII unicode decimals raise an error."""
with self.assertRaises(ValueError):
parse_http_date('Sun Nov 6 08:49:37 ')
with self.assertRaises(ValueError):
parse_http_date('Sun Nov 08:49:37 1994')
def test_parsing_year_less_than_70(self):
parsed = parse_http_date('Sun Nov 6 08:49:37 0037')
self.assertEqual(