Refs #21231 -- Backport urllib.parse.parse_qsl() from Python 3.8.
This commit is contained in:
parent
8d59075184
commit
fd209f62f1
|
@ -4,12 +4,12 @@ import copy
|
|||
import warnings
|
||||
from io import BytesIO
|
||||
from itertools import chain
|
||||
from urllib.parse import quote, urlencode, urljoin, urlsplit
|
||||
from urllib.parse import parse_qsl, quote, urlencode, urljoin, urlsplit
|
||||
|
||||
from django.conf import settings
|
||||
from django.core import signing
|
||||
from django.core.exceptions import (
|
||||
DisallowedHost, ImproperlyConfigured, RequestDataTooBig,
|
||||
DisallowedHost, ImproperlyConfigured, RequestDataTooBig, TooManyFieldsSent,
|
||||
)
|
||||
from django.core.files import uploadhandler
|
||||
from django.http.multipartparser import MultiPartParser, MultiPartParserError
|
||||
|
@ -19,11 +19,20 @@ from django.utils.datastructures import (
|
|||
from django.utils.deprecation import RemovedInDjango40Warning
|
||||
from django.utils.encoding import escape_uri_path, iri_to_uri
|
||||
from django.utils.functional import cached_property
|
||||
from django.utils.http import is_same_domain, limited_parse_qsl
|
||||
from django.utils.http import is_same_domain
|
||||
from django.utils.inspect import func_supports_parameter
|
||||
from django.utils.regex_helper import _lazy_re_compile
|
||||
|
||||
from .multipartparser import parse_header
|
||||
|
||||
# TODO: Remove when dropping support for PY37. inspect.signature() is used to
|
||||
# detect whether the max_num_fields argument is available as this security fix
|
||||
# was backported to Python 3.6.8 and 3.7.2, and may also have been applied by
|
||||
# downstream package maintainers to other versions in their repositories.
|
||||
if func_supports_parameter(parse_qsl, 'max_num_fields'):
|
||||
from django.utils.http import parse_qsl
|
||||
|
||||
|
||||
RAISE_ERROR = object()
|
||||
host_validation_re = _lazy_re_compile(r"^([a-z0-9.-]+|\[[a-f0-9]*:[a-f0-9\.:]+\])(:\d+)?$")
|
||||
|
||||
|
@ -446,8 +455,8 @@ class QueryDict(MultiValueDict):
|
|||
query_string = query_string or ''
|
||||
parse_qsl_kwargs = {
|
||||
'keep_blank_values': True,
|
||||
'fields_limit': settings.DATA_UPLOAD_MAX_NUMBER_FIELDS,
|
||||
'encoding': self.encoding,
|
||||
'max_num_fields': settings.DATA_UPLOAD_MAX_NUMBER_FIELDS,
|
||||
}
|
||||
if isinstance(query_string, bytes):
|
||||
# query_string normally contains URL-encoded data, a subset of ASCII.
|
||||
|
@ -456,8 +465,18 @@ class QueryDict(MultiValueDict):
|
|||
except UnicodeDecodeError:
|
||||
# ... but some user agents are misbehaving :-(
|
||||
query_string = query_string.decode('iso-8859-1')
|
||||
for key, value in limited_parse_qsl(query_string, **parse_qsl_kwargs):
|
||||
try:
|
||||
for key, value in parse_qsl(query_string, **parse_qsl_kwargs):
|
||||
self.appendlist(key, value)
|
||||
except ValueError as e:
|
||||
# ValueError can also be raised if the strict_parsing argument to
|
||||
# parse_qsl() is True. As that is not used by Django, assume that
|
||||
# the exception was raised by exceeding the value of max_num_fields
|
||||
# instead of fragile checks of exception message strings.
|
||||
raise TooManyFieldsSent(
|
||||
'The number of GET/POST parameters exceeded '
|
||||
'settings.DATA_UPLOAD_MAX_NUMBER_FIELDS.'
|
||||
) from e
|
||||
self._mutable = mutable
|
||||
|
||||
@classmethod
|
||||
|
|
|
@ -12,7 +12,6 @@ from urllib.parse import (
|
|||
urlencode as original_urlencode, uses_params,
|
||||
)
|
||||
|
||||
from django.core.exceptions import TooManyFieldsSent
|
||||
from django.utils.datastructures import MultiValueDict
|
||||
from django.utils.deprecation import RemovedInDjango40Warning
|
||||
from django.utils.functional import keep_lazy_text
|
||||
|
@ -42,8 +41,6 @@ ASCTIME_DATE = _lazy_re_compile(r'^\w{3} %s %s %s %s$' % (__M, __D2, __T, __Y))
|
|||
RFC3986_GENDELIMS = ":/?#[]@"
|
||||
RFC3986_SUBDELIMS = "!$&'()*+,;="
|
||||
|
||||
FIELDS_MATCH = _lazy_re_compile('[&;]')
|
||||
|
||||
|
||||
@keep_lazy_text
|
||||
def urlquote(url, safe='/'):
|
||||
|
@ -415,13 +412,20 @@ def _url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False):
|
|||
(not scheme or scheme in valid_schemes))
|
||||
|
||||
|
||||
def limited_parse_qsl(qs, keep_blank_values=False, encoding='utf-8',
|
||||
errors='replace', fields_limit=None):
|
||||
# TODO: Remove when dropping support for PY37.
|
||||
def parse_qsl(
|
||||
qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8',
|
||||
errors='replace', max_num_fields=None,
|
||||
):
|
||||
"""
|
||||
Return a list of key/value tuples parsed from query string.
|
||||
|
||||
Copied from urlparse with an additional "fields_limit" argument.
|
||||
Copyright (C) 2013 Python Software Foundation (see LICENSE.python).
|
||||
Backport of urllib.parse.parse_qsl() from Python 3.8.
|
||||
Copyright (C) 2020 Python Software Foundation (see LICENSE.python).
|
||||
|
||||
----
|
||||
|
||||
Parse a query given as a string argument.
|
||||
|
||||
Arguments:
|
||||
|
||||
|
@ -433,37 +437,49 @@ def limited_parse_qsl(qs, keep_blank_values=False, encoding='utf-8',
|
|||
strings. The default false value indicates that blank values
|
||||
are to be ignored and treated as if they were not included.
|
||||
|
||||
strict_parsing: flag indicating what to do with parsing errors. If false
|
||||
(the default), errors are silently ignored. If true, errors raise a
|
||||
ValueError exception.
|
||||
|
||||
encoding and errors: specify how to decode percent-encoded sequences
|
||||
into Unicode characters, as accepted by the bytes.decode() method.
|
||||
|
||||
fields_limit: maximum number of fields parsed or an exception
|
||||
is raised. None means no limit and is the default.
|
||||
max_num_fields: int. If set, then throws a ValueError if there are more
|
||||
than n fields read by parse_qsl().
|
||||
|
||||
Returns a list, as G-d intended.
|
||||
"""
|
||||
if fields_limit:
|
||||
pairs = FIELDS_MATCH.split(qs, fields_limit)
|
||||
if len(pairs) > fields_limit:
|
||||
raise TooManyFieldsSent(
|
||||
'The number of GET/POST parameters exceeded '
|
||||
'settings.DATA_UPLOAD_MAX_NUMBER_FIELDS.'
|
||||
)
|
||||
else:
|
||||
pairs = FIELDS_MATCH.split(qs)
|
||||
qs, _coerce_result = _coerce_args(qs)
|
||||
|
||||
# If max_num_fields is defined then check that the number of fields is less
|
||||
# than max_num_fields. This prevents a memory exhaustion DOS attack via
|
||||
# post bodies with many fields.
|
||||
if max_num_fields is not None:
|
||||
num_fields = 1 + qs.count('&') + qs.count(';')
|
||||
if max_num_fields < num_fields:
|
||||
raise ValueError('Max number of fields exceeded')
|
||||
|
||||
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
|
||||
r = []
|
||||
for name_value in pairs:
|
||||
if not name_value:
|
||||
if not name_value and not strict_parsing:
|
||||
continue
|
||||
nv = name_value.split('=', 1)
|
||||
if len(nv) != 2:
|
||||
# Handle case of a control-name with no equal sign
|
||||
if strict_parsing:
|
||||
raise ValueError("bad query field: %r" % (name_value,))
|
||||
# Handle case of a control-name with no equal sign.
|
||||
if keep_blank_values:
|
||||
nv.append('')
|
||||
else:
|
||||
continue
|
||||
if nv[1] or keep_blank_values:
|
||||
if len(nv[1]) or keep_blank_values:
|
||||
name = nv[0].replace('+', ' ')
|
||||
name = unquote(name, encoding=encoding, errors=errors)
|
||||
name = _coerce_result(name)
|
||||
value = nv[1].replace('+', ' ')
|
||||
value = unquote(value, encoding=encoding, errors=errors)
|
||||
value = _coerce_result(value)
|
||||
r.append((name, value))
|
||||
return r
|
||||
|
||||
|
|
|
@ -481,6 +481,9 @@ Miscellaneous
|
|||
* Keyword arguments to :func:`~django.test.utils.setup_databases` are now
|
||||
keyword-only.
|
||||
|
||||
* The undocumented ``django.utils.http.limited_parse_qsl()`` function is
|
||||
removed. Please use :func:`urllib.parse.parse_qsl` instead.
|
||||
|
||||
.. _deprecated-features-3.2:
|
||||
|
||||
Features deprecated in 3.2
|
||||
|
|
|
@ -8,9 +8,10 @@ from django.utils.datastructures import MultiValueDict
|
|||
from django.utils.deprecation import RemovedInDjango40Warning
|
||||
from django.utils.http import (
|
||||
base36_to_int, escape_leading_slashes, http_date, int_to_base36,
|
||||
is_safe_url, is_same_domain, parse_etags, parse_http_date, quote_etag,
|
||||
url_has_allowed_host_and_scheme, urlencode, urlquote, urlquote_plus,
|
||||
urlsafe_base64_decode, urlsafe_base64_encode, urlunquote, urlunquote_plus,
|
||||
is_safe_url, is_same_domain, parse_etags, parse_http_date, parse_qsl,
|
||||
quote_etag, url_has_allowed_host_and_scheme, urlencode, urlquote,
|
||||
urlquote_plus, urlsafe_base64_decode, urlsafe_base64_encode, urlunquote,
|
||||
urlunquote_plus,
|
||||
)
|
||||
|
||||
|
||||
|
@ -359,3 +360,68 @@ class EscapeLeadingSlashesTests(unittest.TestCase):
|
|||
for url, expected in tests:
|
||||
with self.subTest(url=url):
|
||||
self.assertEqual(escape_leading_slashes(url), expected)
|
||||
|
||||
|
||||
# TODO: Remove when dropping support for PY37. Backport of unit tests for
|
||||
# urllib.parse.parse_qsl() from Python 3.8. Copyright (C) 2020 Python Software
|
||||
# Foundation (see LICENSE.python).
|
||||
class ParseQSLBackportTests(unittest.TestCase):
|
||||
def test_parse_qsl(self):
|
||||
tests = [
|
||||
('', []),
|
||||
('&', []),
|
||||
('&&', []),
|
||||
('=', [('', '')]),
|
||||
('=a', [('', 'a')]),
|
||||
('a', [('a', '')]),
|
||||
('a=', [('a', '')]),
|
||||
('&a=b', [('a', 'b')]),
|
||||
('a=a+b&b=b+c', [('a', 'a b'), ('b', 'b c')]),
|
||||
('a=1&a=2', [('a', '1'), ('a', '2')]),
|
||||
(b'', []),
|
||||
(b'&', []),
|
||||
(b'&&', []),
|
||||
(b'=', [(b'', b'')]),
|
||||
(b'=a', [(b'', b'a')]),
|
||||
(b'a', [(b'a', b'')]),
|
||||
(b'a=', [(b'a', b'')]),
|
||||
(b'&a=b', [(b'a', b'b')]),
|
||||
(b'a=a+b&b=b+c', [(b'a', b'a b'), (b'b', b'b c')]),
|
||||
(b'a=1&a=2', [(b'a', b'1'), (b'a', b'2')]),
|
||||
(';', []),
|
||||
(';;', []),
|
||||
(';a=b', [('a', 'b')]),
|
||||
('a=a+b;b=b+c', [('a', 'a b'), ('b', 'b c')]),
|
||||
('a=1;a=2', [('a', '1'), ('a', '2')]),
|
||||
(b';', []),
|
||||
(b';;', []),
|
||||
(b';a=b', [(b'a', b'b')]),
|
||||
(b'a=a+b;b=b+c', [(b'a', b'a b'), (b'b', b'b c')]),
|
||||
(b'a=1;a=2', [(b'a', b'1'), (b'a', b'2')]),
|
||||
]
|
||||
for original, expected in tests:
|
||||
with self.subTest(original):
|
||||
result = parse_qsl(original, keep_blank_values=True)
|
||||
self.assertEqual(result, expected, 'Error parsing %r' % original)
|
||||
expect_without_blanks = [v for v in expected if len(v[1])]
|
||||
result = parse_qsl(original, keep_blank_values=False)
|
||||
self.assertEqual(result, expect_without_blanks, 'Error parsing %r' % original)
|
||||
|
||||
def test_parse_qsl_encoding(self):
|
||||
result = parse_qsl('key=\u0141%E9', encoding='latin-1')
|
||||
self.assertEqual(result, [('key', '\u0141\xE9')])
|
||||
result = parse_qsl('key=\u0141%C3%A9', encoding='utf-8')
|
||||
self.assertEqual(result, [('key', '\u0141\xE9')])
|
||||
result = parse_qsl('key=\u0141%C3%A9', encoding='ascii')
|
||||
self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
|
||||
result = parse_qsl('key=\u0141%E9-', encoding='ascii')
|
||||
self.assertEqual(result, [('key', '\u0141\ufffd-')])
|
||||
result = parse_qsl('key=\u0141%E9-', encoding='ascii', errors='ignore')
|
||||
self.assertEqual(result, [('key', '\u0141-')])
|
||||
|
||||
def test_parse_qsl_max_num_fields(self):
|
||||
with self.assertRaises(ValueError):
|
||||
parse_qsl('&'.join(['a=a'] * 11), max_num_fields=10)
|
||||
with self.assertRaises(ValueError):
|
||||
parse_qsl(';'.join(['a=a'] * 11), max_num_fields=10)
|
||||
parse_qsl('&'.join(['a=a'] * 10), max_num_fields=10)
|
||||
|
|
Loading…
Reference in New Issue