Refs #21231 -- Backport urllib.parse.parse_qsl() from Python 3.8.
This commit is contained in:
parent
8d59075184
commit
fd209f62f1
|
@ -4,12 +4,12 @@ import copy
|
||||||
import warnings
|
import warnings
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
from urllib.parse import quote, urlencode, urljoin, urlsplit
|
from urllib.parse import parse_qsl, quote, urlencode, urljoin, urlsplit
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.core import signing
|
from django.core import signing
|
||||||
from django.core.exceptions import (
|
from django.core.exceptions import (
|
||||||
DisallowedHost, ImproperlyConfigured, RequestDataTooBig,
|
DisallowedHost, ImproperlyConfigured, RequestDataTooBig, TooManyFieldsSent,
|
||||||
)
|
)
|
||||||
from django.core.files import uploadhandler
|
from django.core.files import uploadhandler
|
||||||
from django.http.multipartparser import MultiPartParser, MultiPartParserError
|
from django.http.multipartparser import MultiPartParser, MultiPartParserError
|
||||||
|
@ -19,11 +19,20 @@ from django.utils.datastructures import (
|
||||||
from django.utils.deprecation import RemovedInDjango40Warning
|
from django.utils.deprecation import RemovedInDjango40Warning
|
||||||
from django.utils.encoding import escape_uri_path, iri_to_uri
|
from django.utils.encoding import escape_uri_path, iri_to_uri
|
||||||
from django.utils.functional import cached_property
|
from django.utils.functional import cached_property
|
||||||
from django.utils.http import is_same_domain, limited_parse_qsl
|
from django.utils.http import is_same_domain
|
||||||
|
from django.utils.inspect import func_supports_parameter
|
||||||
from django.utils.regex_helper import _lazy_re_compile
|
from django.utils.regex_helper import _lazy_re_compile
|
||||||
|
|
||||||
from .multipartparser import parse_header
|
from .multipartparser import parse_header
|
||||||
|
|
||||||
|
# TODO: Remove when dropping support for PY37. inspect.signature() is used to
|
||||||
|
# detect whether the max_num_fields argument is available as this security fix
|
||||||
|
# was backported to Python 3.6.8 and 3.7.2, and may also have been applied by
|
||||||
|
# downstream package maintainers to other versions in their repositories.
|
||||||
|
if func_supports_parameter(parse_qsl, 'max_num_fields'):
|
||||||
|
from django.utils.http import parse_qsl
|
||||||
|
|
||||||
|
|
||||||
RAISE_ERROR = object()
|
RAISE_ERROR = object()
|
||||||
host_validation_re = _lazy_re_compile(r"^([a-z0-9.-]+|\[[a-f0-9]*:[a-f0-9\.:]+\])(:\d+)?$")
|
host_validation_re = _lazy_re_compile(r"^([a-z0-9.-]+|\[[a-f0-9]*:[a-f0-9\.:]+\])(:\d+)?$")
|
||||||
|
|
||||||
|
@ -446,8 +455,8 @@ class QueryDict(MultiValueDict):
|
||||||
query_string = query_string or ''
|
query_string = query_string or ''
|
||||||
parse_qsl_kwargs = {
|
parse_qsl_kwargs = {
|
||||||
'keep_blank_values': True,
|
'keep_blank_values': True,
|
||||||
'fields_limit': settings.DATA_UPLOAD_MAX_NUMBER_FIELDS,
|
|
||||||
'encoding': self.encoding,
|
'encoding': self.encoding,
|
||||||
|
'max_num_fields': settings.DATA_UPLOAD_MAX_NUMBER_FIELDS,
|
||||||
}
|
}
|
||||||
if isinstance(query_string, bytes):
|
if isinstance(query_string, bytes):
|
||||||
# query_string normally contains URL-encoded data, a subset of ASCII.
|
# query_string normally contains URL-encoded data, a subset of ASCII.
|
||||||
|
@ -456,8 +465,18 @@ class QueryDict(MultiValueDict):
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
# ... but some user agents are misbehaving :-(
|
# ... but some user agents are misbehaving :-(
|
||||||
query_string = query_string.decode('iso-8859-1')
|
query_string = query_string.decode('iso-8859-1')
|
||||||
for key, value in limited_parse_qsl(query_string, **parse_qsl_kwargs):
|
try:
|
||||||
|
for key, value in parse_qsl(query_string, **parse_qsl_kwargs):
|
||||||
self.appendlist(key, value)
|
self.appendlist(key, value)
|
||||||
|
except ValueError as e:
|
||||||
|
# ValueError can also be raised if the strict_parsing argument to
|
||||||
|
# parse_qsl() is True. As that is not used by Django, assume that
|
||||||
|
# the exception was raised by exceeding the value of max_num_fields
|
||||||
|
# instead of fragile checks of exception message strings.
|
||||||
|
raise TooManyFieldsSent(
|
||||||
|
'The number of GET/POST parameters exceeded '
|
||||||
|
'settings.DATA_UPLOAD_MAX_NUMBER_FIELDS.'
|
||||||
|
) from e
|
||||||
self._mutable = mutable
|
self._mutable = mutable
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
|
@ -12,7 +12,6 @@ from urllib.parse import (
|
||||||
urlencode as original_urlencode, uses_params,
|
urlencode as original_urlencode, uses_params,
|
||||||
)
|
)
|
||||||
|
|
||||||
from django.core.exceptions import TooManyFieldsSent
|
|
||||||
from django.utils.datastructures import MultiValueDict
|
from django.utils.datastructures import MultiValueDict
|
||||||
from django.utils.deprecation import RemovedInDjango40Warning
|
from django.utils.deprecation import RemovedInDjango40Warning
|
||||||
from django.utils.functional import keep_lazy_text
|
from django.utils.functional import keep_lazy_text
|
||||||
|
@ -42,8 +41,6 @@ ASCTIME_DATE = _lazy_re_compile(r'^\w{3} %s %s %s %s$' % (__M, __D2, __T, __Y))
|
||||||
RFC3986_GENDELIMS = ":/?#[]@"
|
RFC3986_GENDELIMS = ":/?#[]@"
|
||||||
RFC3986_SUBDELIMS = "!$&'()*+,;="
|
RFC3986_SUBDELIMS = "!$&'()*+,;="
|
||||||
|
|
||||||
FIELDS_MATCH = _lazy_re_compile('[&;]')
|
|
||||||
|
|
||||||
|
|
||||||
@keep_lazy_text
|
@keep_lazy_text
|
||||||
def urlquote(url, safe='/'):
|
def urlquote(url, safe='/'):
|
||||||
|
@ -415,13 +412,20 @@ def _url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False):
|
||||||
(not scheme or scheme in valid_schemes))
|
(not scheme or scheme in valid_schemes))
|
||||||
|
|
||||||
|
|
||||||
def limited_parse_qsl(qs, keep_blank_values=False, encoding='utf-8',
|
# TODO: Remove when dropping support for PY37.
|
||||||
errors='replace', fields_limit=None):
|
def parse_qsl(
|
||||||
|
qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8',
|
||||||
|
errors='replace', max_num_fields=None,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Return a list of key/value tuples parsed from query string.
|
Return a list of key/value tuples parsed from query string.
|
||||||
|
|
||||||
Copied from urlparse with an additional "fields_limit" argument.
|
Backport of urllib.parse.parse_qsl() from Python 3.8.
|
||||||
Copyright (C) 2013 Python Software Foundation (see LICENSE.python).
|
Copyright (C) 2020 Python Software Foundation (see LICENSE.python).
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
|
Parse a query given as a string argument.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
|
|
||||||
|
@ -433,37 +437,49 @@ def limited_parse_qsl(qs, keep_blank_values=False, encoding='utf-8',
|
||||||
strings. The default false value indicates that blank values
|
strings. The default false value indicates that blank values
|
||||||
are to be ignored and treated as if they were not included.
|
are to be ignored and treated as if they were not included.
|
||||||
|
|
||||||
|
strict_parsing: flag indicating what to do with parsing errors. If false
|
||||||
|
(the default), errors are silently ignored. If true, errors raise a
|
||||||
|
ValueError exception.
|
||||||
|
|
||||||
encoding and errors: specify how to decode percent-encoded sequences
|
encoding and errors: specify how to decode percent-encoded sequences
|
||||||
into Unicode characters, as accepted by the bytes.decode() method.
|
into Unicode characters, as accepted by the bytes.decode() method.
|
||||||
|
|
||||||
fields_limit: maximum number of fields parsed or an exception
|
max_num_fields: int. If set, then throws a ValueError if there are more
|
||||||
is raised. None means no limit and is the default.
|
than n fields read by parse_qsl().
|
||||||
|
|
||||||
|
Returns a list, as G-d intended.
|
||||||
"""
|
"""
|
||||||
if fields_limit:
|
qs, _coerce_result = _coerce_args(qs)
|
||||||
pairs = FIELDS_MATCH.split(qs, fields_limit)
|
|
||||||
if len(pairs) > fields_limit:
|
# If max_num_fields is defined then check that the number of fields is less
|
||||||
raise TooManyFieldsSent(
|
# than max_num_fields. This prevents a memory exhaustion DOS attack via
|
||||||
'The number of GET/POST parameters exceeded '
|
# post bodies with many fields.
|
||||||
'settings.DATA_UPLOAD_MAX_NUMBER_FIELDS.'
|
if max_num_fields is not None:
|
||||||
)
|
num_fields = 1 + qs.count('&') + qs.count(';')
|
||||||
else:
|
if max_num_fields < num_fields:
|
||||||
pairs = FIELDS_MATCH.split(qs)
|
raise ValueError('Max number of fields exceeded')
|
||||||
|
|
||||||
|
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
|
||||||
r = []
|
r = []
|
||||||
for name_value in pairs:
|
for name_value in pairs:
|
||||||
if not name_value:
|
if not name_value and not strict_parsing:
|
||||||
continue
|
continue
|
||||||
nv = name_value.split('=', 1)
|
nv = name_value.split('=', 1)
|
||||||
if len(nv) != 2:
|
if len(nv) != 2:
|
||||||
# Handle case of a control-name with no equal sign
|
if strict_parsing:
|
||||||
|
raise ValueError("bad query field: %r" % (name_value,))
|
||||||
|
# Handle case of a control-name with no equal sign.
|
||||||
if keep_blank_values:
|
if keep_blank_values:
|
||||||
nv.append('')
|
nv.append('')
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
if nv[1] or keep_blank_values:
|
if len(nv[1]) or keep_blank_values:
|
||||||
name = nv[0].replace('+', ' ')
|
name = nv[0].replace('+', ' ')
|
||||||
name = unquote(name, encoding=encoding, errors=errors)
|
name = unquote(name, encoding=encoding, errors=errors)
|
||||||
|
name = _coerce_result(name)
|
||||||
value = nv[1].replace('+', ' ')
|
value = nv[1].replace('+', ' ')
|
||||||
value = unquote(value, encoding=encoding, errors=errors)
|
value = unquote(value, encoding=encoding, errors=errors)
|
||||||
|
value = _coerce_result(value)
|
||||||
r.append((name, value))
|
r.append((name, value))
|
||||||
return r
|
return r
|
||||||
|
|
||||||
|
|
|
@ -481,6 +481,9 @@ Miscellaneous
|
||||||
* Keyword arguments to :func:`~django.test.utils.setup_databases` are now
|
* Keyword arguments to :func:`~django.test.utils.setup_databases` are now
|
||||||
keyword-only.
|
keyword-only.
|
||||||
|
|
||||||
|
* The undocumented ``django.utils.http.limited_parse_qsl()`` function is
|
||||||
|
removed. Please use :func:`urllib.parse.parse_qsl` instead.
|
||||||
|
|
||||||
.. _deprecated-features-3.2:
|
.. _deprecated-features-3.2:
|
||||||
|
|
||||||
Features deprecated in 3.2
|
Features deprecated in 3.2
|
||||||
|
|
|
@ -8,9 +8,10 @@ from django.utils.datastructures import MultiValueDict
|
||||||
from django.utils.deprecation import RemovedInDjango40Warning
|
from django.utils.deprecation import RemovedInDjango40Warning
|
||||||
from django.utils.http import (
|
from django.utils.http import (
|
||||||
base36_to_int, escape_leading_slashes, http_date, int_to_base36,
|
base36_to_int, escape_leading_slashes, http_date, int_to_base36,
|
||||||
is_safe_url, is_same_domain, parse_etags, parse_http_date, quote_etag,
|
is_safe_url, is_same_domain, parse_etags, parse_http_date, parse_qsl,
|
||||||
url_has_allowed_host_and_scheme, urlencode, urlquote, urlquote_plus,
|
quote_etag, url_has_allowed_host_and_scheme, urlencode, urlquote,
|
||||||
urlsafe_base64_decode, urlsafe_base64_encode, urlunquote, urlunquote_plus,
|
urlquote_plus, urlsafe_base64_decode, urlsafe_base64_encode, urlunquote,
|
||||||
|
urlunquote_plus,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -359,3 +360,68 @@ class EscapeLeadingSlashesTests(unittest.TestCase):
|
||||||
for url, expected in tests:
|
for url, expected in tests:
|
||||||
with self.subTest(url=url):
|
with self.subTest(url=url):
|
||||||
self.assertEqual(escape_leading_slashes(url), expected)
|
self.assertEqual(escape_leading_slashes(url), expected)
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: Remove when dropping support for PY37. Backport of unit tests for
|
||||||
|
# urllib.parse.parse_qsl() from Python 3.8. Copyright (C) 2020 Python Software
|
||||||
|
# Foundation (see LICENSE.python).
|
||||||
|
class ParseQSLBackportTests(unittest.TestCase):
|
||||||
|
def test_parse_qsl(self):
|
||||||
|
tests = [
|
||||||
|
('', []),
|
||||||
|
('&', []),
|
||||||
|
('&&', []),
|
||||||
|
('=', [('', '')]),
|
||||||
|
('=a', [('', 'a')]),
|
||||||
|
('a', [('a', '')]),
|
||||||
|
('a=', [('a', '')]),
|
||||||
|
('&a=b', [('a', 'b')]),
|
||||||
|
('a=a+b&b=b+c', [('a', 'a b'), ('b', 'b c')]),
|
||||||
|
('a=1&a=2', [('a', '1'), ('a', '2')]),
|
||||||
|
(b'', []),
|
||||||
|
(b'&', []),
|
||||||
|
(b'&&', []),
|
||||||
|
(b'=', [(b'', b'')]),
|
||||||
|
(b'=a', [(b'', b'a')]),
|
||||||
|
(b'a', [(b'a', b'')]),
|
||||||
|
(b'a=', [(b'a', b'')]),
|
||||||
|
(b'&a=b', [(b'a', b'b')]),
|
||||||
|
(b'a=a+b&b=b+c', [(b'a', b'a b'), (b'b', b'b c')]),
|
||||||
|
(b'a=1&a=2', [(b'a', b'1'), (b'a', b'2')]),
|
||||||
|
(';', []),
|
||||||
|
(';;', []),
|
||||||
|
(';a=b', [('a', 'b')]),
|
||||||
|
('a=a+b;b=b+c', [('a', 'a b'), ('b', 'b c')]),
|
||||||
|
('a=1;a=2', [('a', '1'), ('a', '2')]),
|
||||||
|
(b';', []),
|
||||||
|
(b';;', []),
|
||||||
|
(b';a=b', [(b'a', b'b')]),
|
||||||
|
(b'a=a+b;b=b+c', [(b'a', b'a b'), (b'b', b'b c')]),
|
||||||
|
(b'a=1;a=2', [(b'a', b'1'), (b'a', b'2')]),
|
||||||
|
]
|
||||||
|
for original, expected in tests:
|
||||||
|
with self.subTest(original):
|
||||||
|
result = parse_qsl(original, keep_blank_values=True)
|
||||||
|
self.assertEqual(result, expected, 'Error parsing %r' % original)
|
||||||
|
expect_without_blanks = [v for v in expected if len(v[1])]
|
||||||
|
result = parse_qsl(original, keep_blank_values=False)
|
||||||
|
self.assertEqual(result, expect_without_blanks, 'Error parsing %r' % original)
|
||||||
|
|
||||||
|
def test_parse_qsl_encoding(self):
|
||||||
|
result = parse_qsl('key=\u0141%E9', encoding='latin-1')
|
||||||
|
self.assertEqual(result, [('key', '\u0141\xE9')])
|
||||||
|
result = parse_qsl('key=\u0141%C3%A9', encoding='utf-8')
|
||||||
|
self.assertEqual(result, [('key', '\u0141\xE9')])
|
||||||
|
result = parse_qsl('key=\u0141%C3%A9', encoding='ascii')
|
||||||
|
self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
|
||||||
|
result = parse_qsl('key=\u0141%E9-', encoding='ascii')
|
||||||
|
self.assertEqual(result, [('key', '\u0141\ufffd-')])
|
||||||
|
result = parse_qsl('key=\u0141%E9-', encoding='ascii', errors='ignore')
|
||||||
|
self.assertEqual(result, [('key', '\u0141-')])
|
||||||
|
|
||||||
|
def test_parse_qsl_max_num_fields(self):
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
parse_qsl('&'.join(['a=a'] * 11), max_num_fields=10)
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
parse_qsl(';'.join(['a=a'] * 11), max_num_fields=10)
|
||||||
|
parse_qsl('&'.join(['a=a'] * 10), max_num_fields=10)
|
||||||
|
|
Loading…
Reference in New Issue