From fd209f62f1d83233cc634443cfac5ee4328d98b8 Mon Sep 17 00:00:00 2001 From: Nick Pope Date: Mon, 4 Nov 2019 12:06:49 +0000 Subject: [PATCH] Refs #21231 -- Backport urllib.parse.parse_qsl() from Python 3.8. --- django/http/request.py | 31 ++++++++++++--- django/utils/http.py | 58 +++++++++++++++++---------- docs/releases/3.2.txt | 3 ++ tests/utils_tests/test_http.py | 72 ++++++++++++++++++++++++++++++++-- 4 files changed, 134 insertions(+), 30 deletions(-) diff --git a/django/http/request.py b/django/http/request.py index 083bf2d80f..addcf05329 100644 --- a/django/http/request.py +++ b/django/http/request.py @@ -4,12 +4,12 @@ import copy import warnings from io import BytesIO from itertools import chain -from urllib.parse import quote, urlencode, urljoin, urlsplit +from urllib.parse import parse_qsl, quote, urlencode, urljoin, urlsplit from django.conf import settings from django.core import signing from django.core.exceptions import ( - DisallowedHost, ImproperlyConfigured, RequestDataTooBig, + DisallowedHost, ImproperlyConfigured, RequestDataTooBig, TooManyFieldsSent, ) from django.core.files import uploadhandler from django.http.multipartparser import MultiPartParser, MultiPartParserError @@ -19,11 +19,20 @@ from django.utils.datastructures import ( from django.utils.deprecation import RemovedInDjango40Warning from django.utils.encoding import escape_uri_path, iri_to_uri from django.utils.functional import cached_property -from django.utils.http import is_same_domain, limited_parse_qsl +from django.utils.http import is_same_domain +from django.utils.inspect import func_supports_parameter from django.utils.regex_helper import _lazy_re_compile from .multipartparser import parse_header +# TODO: Remove when dropping support for PY37. inspect.signature() is used to +# detect whether the max_num_fields argument is available as this security fix +# was backported to Python 3.6.8 and 3.7.2, and may also have been applied by +# downstream package maintainers to other versions in their repositories. +if func_supports_parameter(parse_qsl, 'max_num_fields'): + from django.utils.http import parse_qsl + + RAISE_ERROR = object() host_validation_re = _lazy_re_compile(r"^([a-z0-9.-]+|\[[a-f0-9]*:[a-f0-9\.:]+\])(:\d+)?$") @@ -446,8 +455,8 @@ class QueryDict(MultiValueDict): query_string = query_string or '' parse_qsl_kwargs = { 'keep_blank_values': True, - 'fields_limit': settings.DATA_UPLOAD_MAX_NUMBER_FIELDS, 'encoding': self.encoding, + 'max_num_fields': settings.DATA_UPLOAD_MAX_NUMBER_FIELDS, } if isinstance(query_string, bytes): # query_string normally contains URL-encoded data, a subset of ASCII. @@ -456,8 +465,18 @@ class QueryDict(MultiValueDict): except UnicodeDecodeError: # ... but some user agents are misbehaving :-( query_string = query_string.decode('iso-8859-1') - for key, value in limited_parse_qsl(query_string, **parse_qsl_kwargs): - self.appendlist(key, value) + try: + for key, value in parse_qsl(query_string, **parse_qsl_kwargs): + self.appendlist(key, value) + except ValueError as e: + # ValueError can also be raised if the strict_parsing argument to + # parse_qsl() is True. As that is not used by Django, assume that + # the exception was raised by exceeding the value of max_num_fields + # instead of fragile checks of exception message strings. + raise TooManyFieldsSent( + 'The number of GET/POST parameters exceeded ' + 'settings.DATA_UPLOAD_MAX_NUMBER_FIELDS.' + ) from e self._mutable = mutable @classmethod diff --git a/django/utils/http.py b/django/utils/http.py index c1005458e7..810d7970ba 100644 --- a/django/utils/http.py +++ b/django/utils/http.py @@ -12,7 +12,6 @@ from urllib.parse import ( urlencode as original_urlencode, uses_params, ) -from django.core.exceptions import TooManyFieldsSent from django.utils.datastructures import MultiValueDict from django.utils.deprecation import RemovedInDjango40Warning from django.utils.functional import keep_lazy_text @@ -42,8 +41,6 @@ ASCTIME_DATE = _lazy_re_compile(r'^\w{3} %s %s %s %s$' % (__M, __D2, __T, __Y)) RFC3986_GENDELIMS = ":/?#[]@" RFC3986_SUBDELIMS = "!$&'()*+,;=" -FIELDS_MATCH = _lazy_re_compile('[&;]') - @keep_lazy_text def urlquote(url, safe='/'): @@ -415,13 +412,20 @@ def _url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False): (not scheme or scheme in valid_schemes)) -def limited_parse_qsl(qs, keep_blank_values=False, encoding='utf-8', - errors='replace', fields_limit=None): +# TODO: Remove when dropping support for PY37. +def parse_qsl( + qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', + errors='replace', max_num_fields=None, +): """ Return a list of key/value tuples parsed from query string. - Copied from urlparse with an additional "fields_limit" argument. - Copyright (C) 2013 Python Software Foundation (see LICENSE.python). + Backport of urllib.parse.parse_qsl() from Python 3.8. + Copyright (C) 2020 Python Software Foundation (see LICENSE.python). + + ---- + + Parse a query given as a string argument. Arguments: @@ -433,37 +437,49 @@ def limited_parse_qsl(qs, keep_blank_values=False, encoding='utf-8', strings. The default false value indicates that blank values are to be ignored and treated as if they were not included. + strict_parsing: flag indicating what to do with parsing errors. If false + (the default), errors are silently ignored. If true, errors raise a + ValueError exception. + encoding and errors: specify how to decode percent-encoded sequences into Unicode characters, as accepted by the bytes.decode() method. - fields_limit: maximum number of fields parsed or an exception - is raised. None means no limit and is the default. + max_num_fields: int. If set, then throws a ValueError if there are more + than n fields read by parse_qsl(). + + Returns a list, as G-d intended. """ - if fields_limit: - pairs = FIELDS_MATCH.split(qs, fields_limit) - if len(pairs) > fields_limit: - raise TooManyFieldsSent( - 'The number of GET/POST parameters exceeded ' - 'settings.DATA_UPLOAD_MAX_NUMBER_FIELDS.' - ) - else: - pairs = FIELDS_MATCH.split(qs) + qs, _coerce_result = _coerce_args(qs) + + # If max_num_fields is defined then check that the number of fields is less + # than max_num_fields. This prevents a memory exhaustion DOS attack via + # post bodies with many fields. + if max_num_fields is not None: + num_fields = 1 + qs.count('&') + qs.count(';') + if max_num_fields < num_fields: + raise ValueError('Max number of fields exceeded') + + pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] r = [] for name_value in pairs: - if not name_value: + if not name_value and not strict_parsing: continue nv = name_value.split('=', 1) if len(nv) != 2: - # Handle case of a control-name with no equal sign + if strict_parsing: + raise ValueError("bad query field: %r" % (name_value,)) + # Handle case of a control-name with no equal sign. if keep_blank_values: nv.append('') else: continue - if nv[1] or keep_blank_values: + if len(nv[1]) or keep_blank_values: name = nv[0].replace('+', ' ') name = unquote(name, encoding=encoding, errors=errors) + name = _coerce_result(name) value = nv[1].replace('+', ' ') value = unquote(value, encoding=encoding, errors=errors) + value = _coerce_result(value) r.append((name, value)) return r diff --git a/docs/releases/3.2.txt b/docs/releases/3.2.txt index 9017c33138..cc7ea744a2 100644 --- a/docs/releases/3.2.txt +++ b/docs/releases/3.2.txt @@ -481,6 +481,9 @@ Miscellaneous * Keyword arguments to :func:`~django.test.utils.setup_databases` are now keyword-only. +* The undocumented ``django.utils.http.limited_parse_qsl()`` function is + removed. Please use :func:`urllib.parse.parse_qsl` instead. + .. _deprecated-features-3.2: Features deprecated in 3.2 diff --git a/tests/utils_tests/test_http.py b/tests/utils_tests/test_http.py index aa9f194a8a..1966386e77 100644 --- a/tests/utils_tests/test_http.py +++ b/tests/utils_tests/test_http.py @@ -8,9 +8,10 @@ from django.utils.datastructures import MultiValueDict from django.utils.deprecation import RemovedInDjango40Warning from django.utils.http import ( base36_to_int, escape_leading_slashes, http_date, int_to_base36, - is_safe_url, is_same_domain, parse_etags, parse_http_date, quote_etag, - url_has_allowed_host_and_scheme, urlencode, urlquote, urlquote_plus, - urlsafe_base64_decode, urlsafe_base64_encode, urlunquote, urlunquote_plus, + is_safe_url, is_same_domain, parse_etags, parse_http_date, parse_qsl, + quote_etag, url_has_allowed_host_and_scheme, urlencode, urlquote, + urlquote_plus, urlsafe_base64_decode, urlsafe_base64_encode, urlunquote, + urlunquote_plus, ) @@ -359,3 +360,68 @@ class EscapeLeadingSlashesTests(unittest.TestCase): for url, expected in tests: with self.subTest(url=url): self.assertEqual(escape_leading_slashes(url), expected) + + +# TODO: Remove when dropping support for PY37. Backport of unit tests for +# urllib.parse.parse_qsl() from Python 3.8. Copyright (C) 2020 Python Software +# Foundation (see LICENSE.python). +class ParseQSLBackportTests(unittest.TestCase): + def test_parse_qsl(self): + tests = [ + ('', []), + ('&', []), + ('&&', []), + ('=', [('', '')]), + ('=a', [('', 'a')]), + ('a', [('a', '')]), + ('a=', [('a', '')]), + ('&a=b', [('a', 'b')]), + ('a=a+b&b=b+c', [('a', 'a b'), ('b', 'b c')]), + ('a=1&a=2', [('a', '1'), ('a', '2')]), + (b'', []), + (b'&', []), + (b'&&', []), + (b'=', [(b'', b'')]), + (b'=a', [(b'', b'a')]), + (b'a', [(b'a', b'')]), + (b'a=', [(b'a', b'')]), + (b'&a=b', [(b'a', b'b')]), + (b'a=a+b&b=b+c', [(b'a', b'a b'), (b'b', b'b c')]), + (b'a=1&a=2', [(b'a', b'1'), (b'a', b'2')]), + (';', []), + (';;', []), + (';a=b', [('a', 'b')]), + ('a=a+b;b=b+c', [('a', 'a b'), ('b', 'b c')]), + ('a=1;a=2', [('a', '1'), ('a', '2')]), + (b';', []), + (b';;', []), + (b';a=b', [(b'a', b'b')]), + (b'a=a+b;b=b+c', [(b'a', b'a b'), (b'b', b'b c')]), + (b'a=1;a=2', [(b'a', b'1'), (b'a', b'2')]), + ] + for original, expected in tests: + with self.subTest(original): + result = parse_qsl(original, keep_blank_values=True) + self.assertEqual(result, expected, 'Error parsing %r' % original) + expect_without_blanks = [v for v in expected if len(v[1])] + result = parse_qsl(original, keep_blank_values=False) + self.assertEqual(result, expect_without_blanks, 'Error parsing %r' % original) + + def test_parse_qsl_encoding(self): + result = parse_qsl('key=\u0141%E9', encoding='latin-1') + self.assertEqual(result, [('key', '\u0141\xE9')]) + result = parse_qsl('key=\u0141%C3%A9', encoding='utf-8') + self.assertEqual(result, [('key', '\u0141\xE9')]) + result = parse_qsl('key=\u0141%C3%A9', encoding='ascii') + self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')]) + result = parse_qsl('key=\u0141%E9-', encoding='ascii') + self.assertEqual(result, [('key', '\u0141\ufffd-')]) + result = parse_qsl('key=\u0141%E9-', encoding='ascii', errors='ignore') + self.assertEqual(result, [('key', '\u0141-')]) + + def test_parse_qsl_max_num_fields(self): + with self.assertRaises(ValueError): + parse_qsl('&'.join(['a=a'] * 11), max_num_fields=10) + with self.assertRaises(ValueError): + parse_qsl(';'.join(['a=a'] * 11), max_num_fields=10) + parse_qsl('&'.join(['a=a'] * 10), max_num_fields=10)