Fixed #28628 -- Changed \d to [0-9] in regexes where appropriate.

This commit is contained in:
Ad Timmering 2021-11-26 10:44:54 +09:00 committed by Mariusz Felisiak
parent fe76944269
commit bdf3e156b4
10 changed files with 35 additions and 17 deletions

View File

@ -7,11 +7,11 @@ from django.utils.regex_helper import _lazy_re_compile
# library. Not a substitute for good web security programming practices. # library. Not a substitute for good web security programming practices.
hex_regex = _lazy_re_compile(r'^[0-9A-F]+$', re.I) hex_regex = _lazy_re_compile(r'^[0-9A-F]+$', re.I)
wkt_regex = _lazy_re_compile( wkt_regex = _lazy_re_compile(
r'^(SRID=(?P<srid>\-?\d+);)?' r'^(SRID=(?P<srid>\-?[0-9]+);)?'
r'(?P<wkt>' r'(?P<wkt>'
r'(?P<type>POINT|LINESTRING|LINEARRING|POLYGON|MULTIPOINT|' r'(?P<type>POINT|LINESTRING|LINEARRING|POLYGON|MULTIPOINT|'
r'MULTILINESTRING|MULTIPOLYGON|GEOMETRYCOLLECTION)' r'MULTILINESTRING|MULTIPOLYGON|GEOMETRYCOLLECTION)'
r'[ACEGIMLONPSRUTYZ\d,\.\-\+\(\) ]+)$', r'[ACEGIMLONPSRUTYZ0-9,\.\-\+\(\) ]+)$',
re.I re.I
) )
json_regex = _lazy_re_compile(r'^(\s+)?\{.*}(\s+)?$', re.DOTALL) json_regex = _lazy_re_compile(r'^(\s+)?\{.*}(\s+)?$', re.DOTALL)

View File

@ -64,7 +64,10 @@ class URLValidator(RegexValidator):
ul = '\u00a1-\uffff' # Unicode letters range (must not be a raw string). ul = '\u00a1-\uffff' # Unicode letters range (must not be a raw string).
# IP patterns # IP patterns
ipv4_re = r'(?:0|25[0-5]|2[0-4]\d|1\d?\d?|[1-9]\d?)(?:\.(?:0|25[0-5]|2[0-4]\d|1\d?\d?|[1-9]\d?)){3}' ipv4_re = (
r'(?:0|25[0-5]|2[0-4][0-9]|1[0-9]?[0-9]?|[1-9][0-9]?)'
r'(?:\.(?:0|25[0-5]|2[0-4][0-9]|1[0-9]?[0-9]?|[1-9][0-9]?)){3}'
)
ipv6_re = r'\[[0-9a-f:.]+\]' # (simple regex, validated later) ipv6_re = r'\[[0-9a-f:.]+\]' # (simple regex, validated later)
# Host patterns # Host patterns
@ -85,7 +88,7 @@ class URLValidator(RegexValidator):
r'^(?:[a-z0-9.+-]*)://' # scheme is validated separately r'^(?:[a-z0-9.+-]*)://' # scheme is validated separately
r'(?:[^\s:@/]+(?::[^\s:@/]*)?@)?' # user:pass authentication r'(?:[^\s:@/]+(?::[^\s:@/]*)?@)?' # user:pass authentication
r'(?:' + ipv4_re + '|' + ipv6_re + '|' + host_re + ')' r'(?:' + ipv4_re + '|' + ipv6_re + '|' + host_re + ')'
r'(?::\d{1,5})?' # port r'(?::[0-9]{1,5})?' # port
r'(?:[/?#][^\s]*)?' # resource path r'(?:[/?#][^\s]*)?' # resource path
r'\Z', re.IGNORECASE) r'\Z', re.IGNORECASE)
message = _('Enter a valid URL.') message = _('Enter a valid URL.')
@ -128,7 +131,7 @@ class URLValidator(RegexValidator):
raise raise
else: else:
# Now verify IPv6 in the netloc part # Now verify IPv6 in the netloc part
host_match = re.search(r'^\[(.+)\](?::\d{1,5})?$', splitted_url.netloc) host_match = re.search(r'^\[(.+)\](?::[0-9]{1,5})?$', splitted_url.netloc)
if host_match: if host_match:
potential_ip = host_match[1] potential_ip = host_match[1]
try: try:

View File

@ -23,7 +23,7 @@ from django.utils.regex_helper import _lazy_re_compile
from .multipartparser import parse_header from .multipartparser import parse_header
RAISE_ERROR = object() RAISE_ERROR = object()
host_validation_re = _lazy_re_compile(r"^([a-z0-9.-]+|\[[a-f0-9]*:[a-f0-9\.:]+\])(:\d+)?$") host_validation_re = _lazy_re_compile(r"^([a-z0-9.-]+|\[[a-f0-9]*:[a-f0-9\.:]+\])(:[0-9]+)?$")
class UnreadablePostError(OSError): class UnreadablePostError(OSError):

View File

@ -38,7 +38,7 @@ __all__ = (
BOUNDARY = 'BoUnDaRyStRiNg' BOUNDARY = 'BoUnDaRyStRiNg'
MULTIPART_CONTENT = 'multipart/form-data; boundary=%s' % BOUNDARY MULTIPART_CONTENT = 'multipart/form-data; boundary=%s' % BOUNDARY
CONTENT_TYPE_RE = _lazy_re_compile(r'.*; charset=([\w\d-]+);?') CONTENT_TYPE_RE = _lazy_re_compile(r'.*; charset=([\w-]+);?')
# Structured suffix spec: https://tools.ietf.org/html/rfc6838#section-4.2.8 # Structured suffix spec: https://tools.ietf.org/html/rfc6838#section-4.2.8
JSON_CONTENT_TYPE_RE = _lazy_re_compile(r'^application\/(.+\+)?json') JSON_CONTENT_TYPE_RE = _lazy_re_compile(r'^application\/(.+\+)?json')

View File

@ -23,12 +23,12 @@ ETAG_MATCH = _lazy_re_compile(r'''
''', re.X) ''', re.X)
MONTHS = 'jan feb mar apr may jun jul aug sep oct nov dec'.split() MONTHS = 'jan feb mar apr may jun jul aug sep oct nov dec'.split()
__D = r'(?P<day>\d{2})' __D = r'(?P<day>[0-9]{2})'
__D2 = r'(?P<day>[ \d]\d)' __D2 = r'(?P<day>[ 0-9][0-9])'
__M = r'(?P<mon>\w{3})' __M = r'(?P<mon>\w{3})'
__Y = r'(?P<year>\d{4})' __Y = r'(?P<year>[0-9]{4})'
__Y2 = r'(?P<year>\d{2})' __Y2 = r'(?P<year>[0-9]{2})'
__T = r'(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})' __T = r'(?P<hour>[0-9]{2}):(?P<min>[0-9]{2}):(?P<sec>[0-9]{2})'
RFC1123_DATE = _lazy_re_compile(r'^\w{3}, %s %s %s %s GMT$' % (__D, __M, __Y, __T)) RFC1123_DATE = _lazy_re_compile(r'^\w{3}, %s %s %s %s GMT$' % (__D, __M, __Y, __T))
RFC850_DATE = _lazy_re_compile(r'^\w{6,9}, %s-%s-%s %s GMT$' % (__D, __M, __Y2, __T)) RFC850_DATE = _lazy_re_compile(r'^\w{6,9}, %s-%s-%s %s GMT$' % (__D, __M, __Y2, __T))
ASCTIME_DATE = _lazy_re_compile(r'^\w{3} %s %s %s %s$' % (__M, __D2, __T, __Y)) ASCTIME_DATE = _lazy_re_compile(r'^\w{3} %s %s %s %s$' % (__M, __D2, __T, __Y))

View File

@ -33,9 +33,9 @@ CONTEXT_SEPARATOR = "\x04"
# Format of Accept-Language header values. From RFC 2616, section 14.4 and 3.9 # Format of Accept-Language header values. From RFC 2616, section 14.4 and 3.9
# and RFC 3066, section 2.1 # and RFC 3066, section 2.1
accept_language_re = _lazy_re_compile(r''' accept_language_re = _lazy_re_compile(r'''
([A-Za-z]{1,8}(?:-[A-Za-z0-9]{1,8})*|\*) # "en", "en-au", "x-y-z", "es-419", "*" ([A-Za-z]{1,8}(?:-[A-Za-z0-9]{1,8})*|\*) # "en", "en-au", "x-y-z", "es-419", "*"
(?:\s*;\s*q=(0(?:\.\d{,3})?|1(?:\.0{,3})?))? # Optional "q=1.00", "q=0.8" (?:\s*;\s*q=(0(?:\.[0-9]{,3})?|1(?:\.0{,3})?))? # Optional "q=1.00", "q=0.8"
(?:\s*,\s*|$) # Multiple accepts per header. (?:\s*,\s*|$) # Multiple accepts per header.
''', re.VERBOSE) ''', re.VERBOSE)
language_code_re = _lazy_re_compile( language_code_re = _lazy_re_compile(

View File

@ -246,8 +246,8 @@ following URL patterns which optionally take a page argument::
from django.urls import re_path from django.urls import re_path
urlpatterns = [ urlpatterns = [
re_path(r'^blog/(page-(\d+)/)?$', blog_articles), # bad re_path(r'^blog/(page-([0-9]+)/)?$', blog_articles), # bad
re_path(r'^comments/(?:page-(?P<page_number>\d+)/)?$', comments), # good re_path(r'^comments/(?:page-(?P<page_number>[0-9]+)/)?$', comments), # good
] ]
Both patterns use nested arguments and will resolve: for example, Both patterns use nested arguments and will resolve: for example,

View File

@ -31,6 +31,13 @@ class GEOSTest(SimpleTestCase, TestDataMixin):
if geom.hasz: if geom.hasz:
self.assertEqual(g.ewkt, geom.wkt) self.assertEqual(g.ewkt, geom.wkt)
def test_wkt_invalid(self):
msg = 'String input unrecognized as WKT EWKT, and HEXEWKB.'
with self.assertRaisesMessage(ValueError, msg):
fromstr('POINT(٠٠١ ٠)')
with self.assertRaisesMessage(ValueError, msg):
fromstr('SRID=٧٥٨٣;POINT(100 0)')
def test_hex(self): def test_hex(self):
"Testing HEX output." "Testing HEX output."
for g in self.geometries.hex_wkt: for g in self.geometries.hex_wkt:

View File

@ -1406,6 +1406,7 @@ class MiscTests(SimpleTestCase):
('12-345', []), ('12-345', []),
('', []), ('', []),
('en;q=1e0', []), ('en;q=1e0', []),
('en-au;q=.', []),
] ]
for value, expected in tests: for value, expected in tests:
with self.subTest(value=value): with self.subTest(value=value):

View File

@ -328,6 +328,13 @@ class HttpDateProcessingTests(unittest.TestCase):
datetime(1994, 11, 6, 8, 49, 37, tzinfo=timezone.utc), datetime(1994, 11, 6, 8, 49, 37, tzinfo=timezone.utc),
) )
def test_parsing_asctime_nonascii_digits(self):
"""Non-ASCII unicode decimals raise an error."""
with self.assertRaises(ValueError):
parse_http_date('Sun Nov 6 08:49:37 ')
with self.assertRaises(ValueError):
parse_http_date('Sun Nov 08:49:37 1994')
def test_parsing_year_less_than_70(self): def test_parsing_year_less_than_70(self):
parsed = parse_http_date('Sun Nov 6 08:49:37 0037') parsed = parse_http_date('Sun Nov 6 08:49:37 0037')
self.assertEqual( self.assertEqual(