Refs #21231 -- Backport urllib.parse.parse_qsl() from Python 3.8.

This commit is contained in:
Nick Pope 2019-11-04 12:06:49 +00:00 committed by Mariusz Felisiak
parent 8d59075184
commit fd209f62f1
4 changed files with 134 additions and 30 deletions

View File

@ -4,12 +4,12 @@ import copy
import warnings import warnings
from io import BytesIO from io import BytesIO
from itertools import chain from itertools import chain
from urllib.parse import quote, urlencode, urljoin, urlsplit from urllib.parse import parse_qsl, quote, urlencode, urljoin, urlsplit
from django.conf import settings from django.conf import settings
from django.core import signing from django.core import signing
from django.core.exceptions import ( from django.core.exceptions import (
DisallowedHost, ImproperlyConfigured, RequestDataTooBig, DisallowedHost, ImproperlyConfigured, RequestDataTooBig, TooManyFieldsSent,
) )
from django.core.files import uploadhandler from django.core.files import uploadhandler
from django.http.multipartparser import MultiPartParser, MultiPartParserError from django.http.multipartparser import MultiPartParser, MultiPartParserError
@ -19,11 +19,20 @@ from django.utils.datastructures import (
from django.utils.deprecation import RemovedInDjango40Warning from django.utils.deprecation import RemovedInDjango40Warning
from django.utils.encoding import escape_uri_path, iri_to_uri from django.utils.encoding import escape_uri_path, iri_to_uri
from django.utils.functional import cached_property from django.utils.functional import cached_property
from django.utils.http import is_same_domain, limited_parse_qsl from django.utils.http import is_same_domain
from django.utils.inspect import func_supports_parameter
from django.utils.regex_helper import _lazy_re_compile from django.utils.regex_helper import _lazy_re_compile
from .multipartparser import parse_header from .multipartparser import parse_header
# TODO: Remove when dropping support for PY37. inspect.signature() is used to
# detect whether the max_num_fields argument is available as this security fix
# was backported to Python 3.6.8 and 3.7.2, and may also have been applied by
# downstream package maintainers to other versions in their repositories.
if func_supports_parameter(parse_qsl, 'max_num_fields'):
from django.utils.http import parse_qsl
RAISE_ERROR = object() RAISE_ERROR = object()
host_validation_re = _lazy_re_compile(r"^([a-z0-9.-]+|\[[a-f0-9]*:[a-f0-9\.:]+\])(:\d+)?$") host_validation_re = _lazy_re_compile(r"^([a-z0-9.-]+|\[[a-f0-9]*:[a-f0-9\.:]+\])(:\d+)?$")
@ -446,8 +455,8 @@ class QueryDict(MultiValueDict):
query_string = query_string or '' query_string = query_string or ''
parse_qsl_kwargs = { parse_qsl_kwargs = {
'keep_blank_values': True, 'keep_blank_values': True,
'fields_limit': settings.DATA_UPLOAD_MAX_NUMBER_FIELDS,
'encoding': self.encoding, 'encoding': self.encoding,
'max_num_fields': settings.DATA_UPLOAD_MAX_NUMBER_FIELDS,
} }
if isinstance(query_string, bytes): if isinstance(query_string, bytes):
# query_string normally contains URL-encoded data, a subset of ASCII. # query_string normally contains URL-encoded data, a subset of ASCII.
@ -456,8 +465,18 @@ class QueryDict(MultiValueDict):
except UnicodeDecodeError: except UnicodeDecodeError:
# ... but some user agents are misbehaving :-( # ... but some user agents are misbehaving :-(
query_string = query_string.decode('iso-8859-1') query_string = query_string.decode('iso-8859-1')
for key, value in limited_parse_qsl(query_string, **parse_qsl_kwargs): try:
for key, value in parse_qsl(query_string, **parse_qsl_kwargs):
self.appendlist(key, value) self.appendlist(key, value)
except ValueError as e:
# ValueError can also be raised if the strict_parsing argument to
# parse_qsl() is True. As that is not used by Django, assume that
# the exception was raised by exceeding the value of max_num_fields
# instead of fragile checks of exception message strings.
raise TooManyFieldsSent(
'The number of GET/POST parameters exceeded '
'settings.DATA_UPLOAD_MAX_NUMBER_FIELDS.'
) from e
self._mutable = mutable self._mutable = mutable
@classmethod @classmethod

View File

@ -12,7 +12,6 @@ from urllib.parse import (
urlencode as original_urlencode, uses_params, urlencode as original_urlencode, uses_params,
) )
from django.core.exceptions import TooManyFieldsSent
from django.utils.datastructures import MultiValueDict from django.utils.datastructures import MultiValueDict
from django.utils.deprecation import RemovedInDjango40Warning from django.utils.deprecation import RemovedInDjango40Warning
from django.utils.functional import keep_lazy_text from django.utils.functional import keep_lazy_text
@ -42,8 +41,6 @@ ASCTIME_DATE = _lazy_re_compile(r'^\w{3} %s %s %s %s$' % (__M, __D2, __T, __Y))
RFC3986_GENDELIMS = ":/?#[]@" RFC3986_GENDELIMS = ":/?#[]@"
RFC3986_SUBDELIMS = "!$&'()*+,;=" RFC3986_SUBDELIMS = "!$&'()*+,;="
FIELDS_MATCH = _lazy_re_compile('[&;]')
@keep_lazy_text @keep_lazy_text
def urlquote(url, safe='/'): def urlquote(url, safe='/'):
@ -415,13 +412,20 @@ def _url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False):
(not scheme or scheme in valid_schemes)) (not scheme or scheme in valid_schemes))
def limited_parse_qsl(qs, keep_blank_values=False, encoding='utf-8', # TODO: Remove when dropping support for PY37.
errors='replace', fields_limit=None): def parse_qsl(
qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8',
errors='replace', max_num_fields=None,
):
""" """
Return a list of key/value tuples parsed from query string. Return a list of key/value tuples parsed from query string.
Copied from urlparse with an additional "fields_limit" argument. Backport of urllib.parse.parse_qsl() from Python 3.8.
Copyright (C) 2013 Python Software Foundation (see LICENSE.python). Copyright (C) 2020 Python Software Foundation (see LICENSE.python).
----
Parse a query given as a string argument.
Arguments: Arguments:
@ -433,37 +437,49 @@ def limited_parse_qsl(qs, keep_blank_values=False, encoding='utf-8',
strings. The default false value indicates that blank values strings. The default false value indicates that blank values
are to be ignored and treated as if they were not included. are to be ignored and treated as if they were not included.
strict_parsing: flag indicating what to do with parsing errors. If false
(the default), errors are silently ignored. If true, errors raise a
ValueError exception.
encoding and errors: specify how to decode percent-encoded sequences encoding and errors: specify how to decode percent-encoded sequences
into Unicode characters, as accepted by the bytes.decode() method. into Unicode characters, as accepted by the bytes.decode() method.
fields_limit: maximum number of fields parsed or an exception max_num_fields: int. If set, then throws a ValueError if there are more
is raised. None means no limit and is the default. than n fields read by parse_qsl().
Returns a list, as G-d intended.
""" """
if fields_limit: qs, _coerce_result = _coerce_args(qs)
pairs = FIELDS_MATCH.split(qs, fields_limit)
if len(pairs) > fields_limit: # If max_num_fields is defined then check that the number of fields is less
raise TooManyFieldsSent( # than max_num_fields. This prevents a memory exhaustion DOS attack via
'The number of GET/POST parameters exceeded ' # post bodies with many fields.
'settings.DATA_UPLOAD_MAX_NUMBER_FIELDS.' if max_num_fields is not None:
) num_fields = 1 + qs.count('&') + qs.count(';')
else: if max_num_fields < num_fields:
pairs = FIELDS_MATCH.split(qs) raise ValueError('Max number of fields exceeded')
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
r = [] r = []
for name_value in pairs: for name_value in pairs:
if not name_value: if not name_value and not strict_parsing:
continue continue
nv = name_value.split('=', 1) nv = name_value.split('=', 1)
if len(nv) != 2: if len(nv) != 2:
# Handle case of a control-name with no equal sign if strict_parsing:
raise ValueError("bad query field: %r" % (name_value,))
# Handle case of a control-name with no equal sign.
if keep_blank_values: if keep_blank_values:
nv.append('') nv.append('')
else: else:
continue continue
if nv[1] or keep_blank_values: if len(nv[1]) or keep_blank_values:
name = nv[0].replace('+', ' ') name = nv[0].replace('+', ' ')
name = unquote(name, encoding=encoding, errors=errors) name = unquote(name, encoding=encoding, errors=errors)
name = _coerce_result(name)
value = nv[1].replace('+', ' ') value = nv[1].replace('+', ' ')
value = unquote(value, encoding=encoding, errors=errors) value = unquote(value, encoding=encoding, errors=errors)
value = _coerce_result(value)
r.append((name, value)) r.append((name, value))
return r return r

View File

@ -481,6 +481,9 @@ Miscellaneous
* Keyword arguments to :func:`~django.test.utils.setup_databases` are now * Keyword arguments to :func:`~django.test.utils.setup_databases` are now
keyword-only. keyword-only.
* The undocumented ``django.utils.http.limited_parse_qsl()`` function is
removed. Please use :func:`urllib.parse.parse_qsl` instead.
.. _deprecated-features-3.2: .. _deprecated-features-3.2:
Features deprecated in 3.2 Features deprecated in 3.2

View File

@ -8,9 +8,10 @@ from django.utils.datastructures import MultiValueDict
from django.utils.deprecation import RemovedInDjango40Warning from django.utils.deprecation import RemovedInDjango40Warning
from django.utils.http import ( from django.utils.http import (
base36_to_int, escape_leading_slashes, http_date, int_to_base36, base36_to_int, escape_leading_slashes, http_date, int_to_base36,
is_safe_url, is_same_domain, parse_etags, parse_http_date, quote_etag, is_safe_url, is_same_domain, parse_etags, parse_http_date, parse_qsl,
url_has_allowed_host_and_scheme, urlencode, urlquote, urlquote_plus, quote_etag, url_has_allowed_host_and_scheme, urlencode, urlquote,
urlsafe_base64_decode, urlsafe_base64_encode, urlunquote, urlunquote_plus, urlquote_plus, urlsafe_base64_decode, urlsafe_base64_encode, urlunquote,
urlunquote_plus,
) )
@ -359,3 +360,68 @@ class EscapeLeadingSlashesTests(unittest.TestCase):
for url, expected in tests: for url, expected in tests:
with self.subTest(url=url): with self.subTest(url=url):
self.assertEqual(escape_leading_slashes(url), expected) self.assertEqual(escape_leading_slashes(url), expected)
# TODO: Remove when dropping support for PY37. Backport of unit tests for
# urllib.parse.parse_qsl() from Python 3.8. Copyright (C) 2020 Python Software
# Foundation (see LICENSE.python).
class ParseQSLBackportTests(unittest.TestCase):
def test_parse_qsl(self):
tests = [
('', []),
('&', []),
('&&', []),
('=', [('', '')]),
('=a', [('', 'a')]),
('a', [('a', '')]),
('a=', [('a', '')]),
('&a=b', [('a', 'b')]),
('a=a+b&b=b+c', [('a', 'a b'), ('b', 'b c')]),
('a=1&a=2', [('a', '1'), ('a', '2')]),
(b'', []),
(b'&', []),
(b'&&', []),
(b'=', [(b'', b'')]),
(b'=a', [(b'', b'a')]),
(b'a', [(b'a', b'')]),
(b'a=', [(b'a', b'')]),
(b'&a=b', [(b'a', b'b')]),
(b'a=a+b&b=b+c', [(b'a', b'a b'), (b'b', b'b c')]),
(b'a=1&a=2', [(b'a', b'1'), (b'a', b'2')]),
(';', []),
(';;', []),
(';a=b', [('a', 'b')]),
('a=a+b;b=b+c', [('a', 'a b'), ('b', 'b c')]),
('a=1;a=2', [('a', '1'), ('a', '2')]),
(b';', []),
(b';;', []),
(b';a=b', [(b'a', b'b')]),
(b'a=a+b;b=b+c', [(b'a', b'a b'), (b'b', b'b c')]),
(b'a=1;a=2', [(b'a', b'1'), (b'a', b'2')]),
]
for original, expected in tests:
with self.subTest(original):
result = parse_qsl(original, keep_blank_values=True)
self.assertEqual(result, expected, 'Error parsing %r' % original)
expect_without_blanks = [v for v in expected if len(v[1])]
result = parse_qsl(original, keep_blank_values=False)
self.assertEqual(result, expect_without_blanks, 'Error parsing %r' % original)
def test_parse_qsl_encoding(self):
result = parse_qsl('key=\u0141%E9', encoding='latin-1')
self.assertEqual(result, [('key', '\u0141\xE9')])
result = parse_qsl('key=\u0141%C3%A9', encoding='utf-8')
self.assertEqual(result, [('key', '\u0141\xE9')])
result = parse_qsl('key=\u0141%C3%A9', encoding='ascii')
self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
result = parse_qsl('key=\u0141%E9-', encoding='ascii')
self.assertEqual(result, [('key', '\u0141\ufffd-')])
result = parse_qsl('key=\u0141%E9-', encoding='ascii', errors='ignore')
self.assertEqual(result, [('key', '\u0141-')])
def test_parse_qsl_max_num_fields(self):
with self.assertRaises(ValueError):
parse_qsl('&'.join(['a=a'] * 11), max_num_fields=10)
with self.assertRaises(ValueError):
parse_qsl(';'.join(['a=a'] * 11), max_num_fields=10)
parse_qsl('&'.join(['a=a'] * 10), max_num_fields=10)