Refs #21231 -- Backport urllib.parse.parse_qsl() from Python 3.8.

This commit is contained in:
Nick Pope 2019-11-04 12:06:49 +00:00 committed by Mariusz Felisiak
parent 8d59075184
commit fd209f62f1
4 changed files with 134 additions and 30 deletions

View File

@ -4,12 +4,12 @@ import copy
import warnings
from io import BytesIO
from itertools import chain
from urllib.parse import quote, urlencode, urljoin, urlsplit
from urllib.parse import parse_qsl, quote, urlencode, urljoin, urlsplit
from django.conf import settings
from django.core import signing
from django.core.exceptions import (
DisallowedHost, ImproperlyConfigured, RequestDataTooBig,
DisallowedHost, ImproperlyConfigured, RequestDataTooBig, TooManyFieldsSent,
)
from django.core.files import uploadhandler
from django.http.multipartparser import MultiPartParser, MultiPartParserError
@ -19,11 +19,20 @@ from django.utils.datastructures import (
from django.utils.deprecation import RemovedInDjango40Warning
from django.utils.encoding import escape_uri_path, iri_to_uri
from django.utils.functional import cached_property
from django.utils.http import is_same_domain, limited_parse_qsl
from django.utils.http import is_same_domain
from django.utils.inspect import func_supports_parameter
from django.utils.regex_helper import _lazy_re_compile
from .multipartparser import parse_header
# TODO: Remove when dropping support for PY37. inspect.signature() is used to
# detect whether the max_num_fields argument is available as this security fix
# was backported to Python 3.6.8 and 3.7.2, and may also have been applied by
# downstream package maintainers to other versions in their repositories.
if func_supports_parameter(parse_qsl, 'max_num_fields'):
from django.utils.http import parse_qsl
RAISE_ERROR = object()
host_validation_re = _lazy_re_compile(r"^([a-z0-9.-]+|\[[a-f0-9]*:[a-f0-9\.:]+\])(:\d+)?$")
@ -446,8 +455,8 @@ class QueryDict(MultiValueDict):
query_string = query_string or ''
parse_qsl_kwargs = {
'keep_blank_values': True,
'fields_limit': settings.DATA_UPLOAD_MAX_NUMBER_FIELDS,
'encoding': self.encoding,
'max_num_fields': settings.DATA_UPLOAD_MAX_NUMBER_FIELDS,
}
if isinstance(query_string, bytes):
# query_string normally contains URL-encoded data, a subset of ASCII.
@ -456,8 +465,18 @@ class QueryDict(MultiValueDict):
except UnicodeDecodeError:
# ... but some user agents are misbehaving :-(
query_string = query_string.decode('iso-8859-1')
for key, value in limited_parse_qsl(query_string, **parse_qsl_kwargs):
self.appendlist(key, value)
try:
for key, value in parse_qsl(query_string, **parse_qsl_kwargs):
self.appendlist(key, value)
except ValueError as e:
# ValueError can also be raised if the strict_parsing argument to
# parse_qsl() is True. As that is not used by Django, assume that
# the exception was raised by exceeding the value of max_num_fields
# instead of fragile checks of exception message strings.
raise TooManyFieldsSent(
'The number of GET/POST parameters exceeded '
'settings.DATA_UPLOAD_MAX_NUMBER_FIELDS.'
) from e
self._mutable = mutable
@classmethod

View File

@ -12,7 +12,6 @@ from urllib.parse import (
urlencode as original_urlencode, uses_params,
)
from django.core.exceptions import TooManyFieldsSent
from django.utils.datastructures import MultiValueDict
from django.utils.deprecation import RemovedInDjango40Warning
from django.utils.functional import keep_lazy_text
@ -42,8 +41,6 @@ ASCTIME_DATE = _lazy_re_compile(r'^\w{3} %s %s %s %s$' % (__M, __D2, __T, __Y))
RFC3986_GENDELIMS = ":/?#[]@"
RFC3986_SUBDELIMS = "!$&'()*+,;="
FIELDS_MATCH = _lazy_re_compile('[&;]')
@keep_lazy_text
def urlquote(url, safe='/'):
@ -415,13 +412,20 @@ def _url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False):
(not scheme or scheme in valid_schemes))
def limited_parse_qsl(qs, keep_blank_values=False, encoding='utf-8',
errors='replace', fields_limit=None):
# TODO: Remove when dropping support for PY37.
def parse_qsl(
qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8',
errors='replace', max_num_fields=None,
):
"""
Return a list of key/value tuples parsed from query string.
Copied from urlparse with an additional "fields_limit" argument.
Copyright (C) 2013 Python Software Foundation (see LICENSE.python).
Backport of urllib.parse.parse_qsl() from Python 3.8.
Copyright (C) 2020 Python Software Foundation (see LICENSE.python).
----
Parse a query given as a string argument.
Arguments:
@ -433,37 +437,49 @@ def limited_parse_qsl(qs, keep_blank_values=False, encoding='utf-8',
strings. The default false value indicates that blank values
are to be ignored and treated as if they were not included.
strict_parsing: flag indicating what to do with parsing errors. If false
(the default), errors are silently ignored. If true, errors raise a
ValueError exception.
encoding and errors: specify how to decode percent-encoded sequences
into Unicode characters, as accepted by the bytes.decode() method.
fields_limit: maximum number of fields parsed or an exception
is raised. None means no limit and is the default.
max_num_fields: int. If set, then throws a ValueError if there are more
than n fields read by parse_qsl().
Returns a list, as G-d intended.
"""
if fields_limit:
pairs = FIELDS_MATCH.split(qs, fields_limit)
if len(pairs) > fields_limit:
raise TooManyFieldsSent(
'The number of GET/POST parameters exceeded '
'settings.DATA_UPLOAD_MAX_NUMBER_FIELDS.'
)
else:
pairs = FIELDS_MATCH.split(qs)
qs, _coerce_result = _coerce_args(qs)
# If max_num_fields is defined then check that the number of fields is less
# than max_num_fields. This prevents a memory exhaustion DOS attack via
# post bodies with many fields.
if max_num_fields is not None:
num_fields = 1 + qs.count('&') + qs.count(';')
if max_num_fields < num_fields:
raise ValueError('Max number of fields exceeded')
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
r = []
for name_value in pairs:
if not name_value:
if not name_value and not strict_parsing:
continue
nv = name_value.split('=', 1)
if len(nv) != 2:
# Handle case of a control-name with no equal sign
if strict_parsing:
raise ValueError("bad query field: %r" % (name_value,))
# Handle case of a control-name with no equal sign.
if keep_blank_values:
nv.append('')
else:
continue
if nv[1] or keep_blank_values:
if len(nv[1]) or keep_blank_values:
name = nv[0].replace('+', ' ')
name = unquote(name, encoding=encoding, errors=errors)
name = _coerce_result(name)
value = nv[1].replace('+', ' ')
value = unquote(value, encoding=encoding, errors=errors)
value = _coerce_result(value)
r.append((name, value))
return r

View File

@ -481,6 +481,9 @@ Miscellaneous
* Keyword arguments to :func:`~django.test.utils.setup_databases` are now
keyword-only.
* The undocumented ``django.utils.http.limited_parse_qsl()`` function is
removed. Please use :func:`urllib.parse.parse_qsl` instead.
.. _deprecated-features-3.2:
Features deprecated in 3.2

View File

@ -8,9 +8,10 @@ from django.utils.datastructures import MultiValueDict
from django.utils.deprecation import RemovedInDjango40Warning
from django.utils.http import (
base36_to_int, escape_leading_slashes, http_date, int_to_base36,
is_safe_url, is_same_domain, parse_etags, parse_http_date, quote_etag,
url_has_allowed_host_and_scheme, urlencode, urlquote, urlquote_plus,
urlsafe_base64_decode, urlsafe_base64_encode, urlunquote, urlunquote_plus,
is_safe_url, is_same_domain, parse_etags, parse_http_date, parse_qsl,
quote_etag, url_has_allowed_host_and_scheme, urlencode, urlquote,
urlquote_plus, urlsafe_base64_decode, urlsafe_base64_encode, urlunquote,
urlunquote_plus,
)
@ -359,3 +360,68 @@ class EscapeLeadingSlashesTests(unittest.TestCase):
for url, expected in tests:
with self.subTest(url=url):
self.assertEqual(escape_leading_slashes(url), expected)
# TODO: Remove when dropping support for PY37. Backport of unit tests for
# urllib.parse.parse_qsl() from Python 3.8. Copyright (C) 2020 Python Software
# Foundation (see LICENSE.python).
class ParseQSLBackportTests(unittest.TestCase):
def test_parse_qsl(self):
tests = [
('', []),
('&', []),
('&&', []),
('=', [('', '')]),
('=a', [('', 'a')]),
('a', [('a', '')]),
('a=', [('a', '')]),
('&a=b', [('a', 'b')]),
('a=a+b&b=b+c', [('a', 'a b'), ('b', 'b c')]),
('a=1&a=2', [('a', '1'), ('a', '2')]),
(b'', []),
(b'&', []),
(b'&&', []),
(b'=', [(b'', b'')]),
(b'=a', [(b'', b'a')]),
(b'a', [(b'a', b'')]),
(b'a=', [(b'a', b'')]),
(b'&a=b', [(b'a', b'b')]),
(b'a=a+b&b=b+c', [(b'a', b'a b'), (b'b', b'b c')]),
(b'a=1&a=2', [(b'a', b'1'), (b'a', b'2')]),
(';', []),
(';;', []),
(';a=b', [('a', 'b')]),
('a=a+b;b=b+c', [('a', 'a b'), ('b', 'b c')]),
('a=1;a=2', [('a', '1'), ('a', '2')]),
(b';', []),
(b';;', []),
(b';a=b', [(b'a', b'b')]),
(b'a=a+b;b=b+c', [(b'a', b'a b'), (b'b', b'b c')]),
(b'a=1;a=2', [(b'a', b'1'), (b'a', b'2')]),
]
for original, expected in tests:
with self.subTest(original):
result = parse_qsl(original, keep_blank_values=True)
self.assertEqual(result, expected, 'Error parsing %r' % original)
expect_without_blanks = [v for v in expected if len(v[1])]
result = parse_qsl(original, keep_blank_values=False)
self.assertEqual(result, expect_without_blanks, 'Error parsing %r' % original)
def test_parse_qsl_encoding(self):
result = parse_qsl('key=\u0141%E9', encoding='latin-1')
self.assertEqual(result, [('key', '\u0141\xE9')])
result = parse_qsl('key=\u0141%C3%A9', encoding='utf-8')
self.assertEqual(result, [('key', '\u0141\xE9')])
result = parse_qsl('key=\u0141%C3%A9', encoding='ascii')
self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
result = parse_qsl('key=\u0141%E9-', encoding='ascii')
self.assertEqual(result, [('key', '\u0141\ufffd-')])
result = parse_qsl('key=\u0141%E9-', encoding='ascii', errors='ignore')
self.assertEqual(result, [('key', '\u0141-')])
def test_parse_qsl_max_num_fields(self):
with self.assertRaises(ValueError):
parse_qsl('&'.join(['a=a'] * 11), max_num_fields=10)
with self.assertRaises(ValueError):
parse_qsl(';'.join(['a=a'] * 11), max_num_fields=10)
parse_qsl('&'.join(['a=a'] * 10), max_num_fields=10)