From d4d5427571b4bf3a21c902276c2a00215c2a37cc Mon Sep 17 00:00:00 2001 From: Mehrdad Date: Fri, 24 Jun 2022 14:46:34 -0400 Subject: [PATCH] Refs #33697 -- Used django.utils.http.parse_header_parameters() for parsing boundary streams. This also removes unused parse_header() and _parse_header_params() helpers in django.http.multipartparser. --- django/http/multipartparser.py | 51 ++-------------------------------- django/utils/http.py | 13 ++++++++- tests/file_uploads/tests.py | 45 ------------------------------ tests/utils_tests/test_http.py | 38 +++++++++++++++++++++++++ 4 files changed, 52 insertions(+), 95 deletions(-) diff --git a/django/http/multipartparser.py b/django/http/multipartparser.py index 73ef074744..b3e0925a42 100644 --- a/django/http/multipartparser.py +++ b/django/http/multipartparser.py @@ -8,7 +8,6 @@ import base64 import binascii import collections import html -from urllib.parse import unquote from django.conf import settings from django.core.exceptions import ( @@ -675,8 +674,9 @@ def parse_boundary_stream(stream, max_header_size): # This terminology ("main value" and "dictionary of # parameters") is from the Python docs. try: - main_value_pair, params = parse_header(line) + main_value_pair, params = parse_header_parameters(line.decode()) name, value = main_value_pair.split(":", 1) + params = {k: v.encode() for k, v in params.items()} except ValueError: # Invalid header. continue @@ -703,50 +703,3 @@ class Parser: for sub_stream in boundarystream: # Iterate over each part yield parse_boundary_stream(sub_stream, 1024) - - -def parse_header(line): - """ - Parse the header into a key-value. - - Input (line): bytes, output: str for key/name, bytes for values which - will be decoded later. - """ - plist = _parse_header_params(b";" + line) - key = plist.pop(0).lower().decode("ascii") - pdict = {} - for p in plist: - i = p.find(b"=") - if i >= 0: - has_encoding = False - name = p[:i].strip().lower().decode("ascii") - if name.endswith("*"): - # Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext") - # https://tools.ietf.org/html/rfc2231#section-4 - name = name[:-1] - if p.count(b"'") == 2: - has_encoding = True - value = p[i + 1 :].strip() - if len(value) >= 2 and value[:1] == value[-1:] == b'"': - value = value[1:-1] - value = value.replace(b"\\\\", b"\\").replace(b'\\"', b'"') - if has_encoding: - encoding, lang, value = value.split(b"'") - value = unquote(value.decode(), encoding=encoding.decode()) - pdict[name] = value - return key, pdict - - -def _parse_header_params(s): - plist = [] - while s[:1] == b";": - s = s[1:] - end = s.find(b";") - while end > 0 and (s.count(b'"', 0, end) - s.count(b'\\"', 0, end)) % 2: - end = s.find(b";", end + 1) - if end < 0: - end = len(s) - f = s[:end] - plist.append(f.strip()) - s = s[end:] - return plist diff --git a/django/utils/http.py b/django/utils/http.py index 6e2091bf52..51fdc4b149 100644 --- a/django/utils/http.py +++ b/django/utils/http.py @@ -11,6 +11,7 @@ from urllib.parse import ( _splitnetloc, _splitparams, scheme_chars, + unquote, ) from urllib.parse import urlencode as original_urlencode from urllib.parse import uses_params @@ -387,15 +388,25 @@ def parse_header_parameters(line): Return the main content-type and a dictionary of options. """ parts = _parseparam(";" + line) - key = parts.__next__() + key = parts.__next__().lower() pdict = {} for p in parts: i = p.find("=") if i >= 0: + has_encoding = False name = p[:i].strip().lower() + if name.endswith("*"): + # Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext") + # https://tools.ietf.org/html/rfc2231#section-4 + name = name[:-1] + if p.count("'") == 2: + has_encoding = True value = p[i + 1 :].strip() if len(value) >= 2 and value[0] == value[-1] == '"': value = value[1:-1] value = value.replace("\\\\", "\\").replace('\\"', '"') + if has_encoding: + encoding, lang, value = value.split("'") + value = unquote(value, encoding=encoding) pdict[name] = value return key, pdict diff --git a/tests/file_uploads/tests.py b/tests/file_uploads/tests.py index 44c54d908e..c6d76aa4c9 100644 --- a/tests/file_uploads/tests.py +++ b/tests/file_uploads/tests.py @@ -17,7 +17,6 @@ from django.http.multipartparser import ( MultiPartParser, MultiPartParserError, Parser, - parse_header, ) from django.test import SimpleTestCase, TestCase, client, override_settings @@ -906,47 +905,3 @@ class MultiParserTests(SimpleTestCase): for file_name in CANDIDATE_INVALID_FILE_NAMES: with self.subTest(file_name=file_name): self.assertIsNone(parser.sanitize_file_name(file_name)) - - def test_rfc2231_parsing(self): - test_data = ( - ( - b"Content-Type: application/x-stuff; " - b"title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A", - "This is ***fun***", - ), - ( - b"Content-Type: application/x-stuff; title*=UTF-8''foo-%c3%a4.html", - "foo-ä.html", - ), - ( - b"Content-Type: application/x-stuff; title*=iso-8859-1''foo-%E4.html", - "foo-ä.html", - ), - ) - for raw_line, expected_title in test_data: - parsed = parse_header(raw_line) - self.assertEqual(parsed[1]["title"], expected_title) - - def test_rfc2231_wrong_title(self): - """ - Test wrongly formatted RFC 2231 headers (missing double single quotes). - Parsing should not crash (#24209). - """ - test_data = ( - ( - b"Content-Type: application/x-stuff; " - b"title*='This%20is%20%2A%2A%2Afun%2A%2A%2A", - b"'This%20is%20%2A%2A%2Afun%2A%2A%2A", - ), - (b"Content-Type: application/x-stuff; title*='foo.html", b"'foo.html"), - (b"Content-Type: application/x-stuff; title*=bar.html", b"bar.html"), - ) - for raw_line, expected_title in test_data: - parsed = parse_header(raw_line) - self.assertEqual(parsed[1]["title"], expected_title) - - def test_parse_header_with_double_quotes_and_semicolon(self): - self.assertEqual( - parse_header(b'form-data; name="files"; filename="fo\\"o;bar"'), - ("form-data", {"name": b"files", "filename": b'fo"o;bar'}), - ) diff --git a/tests/utils_tests/test_http.py b/tests/utils_tests/test_http.py index 2ba617dfc9..b2754b4ddb 100644 --- a/tests/utils_tests/test_http.py +++ b/tests/utils_tests/test_http.py @@ -472,3 +472,41 @@ class ParseHeaderParameterTests(unittest.TestCase): for header, expected in tests: with self.subTest(header=header): self.assertEqual(parse_header_parameters(header), expected) + + def test_rfc2231_parsing(self): + test_data = ( + ( + "Content-Type: application/x-stuff; " + "title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A", + "This is ***fun***", + ), + ( + "Content-Type: application/x-stuff; title*=UTF-8''foo-%c3%a4.html", + "foo-ä.html", + ), + ( + "Content-Type: application/x-stuff; title*=iso-8859-1''foo-%E4.html", + "foo-ä.html", + ), + ) + for raw_line, expected_title in test_data: + parsed = parse_header_parameters(raw_line) + self.assertEqual(parsed[1]["title"], expected_title) + + def test_rfc2231_wrong_title(self): + """ + Test wrongly formatted RFC 2231 headers (missing double single quotes). + Parsing should not crash (#24209). + """ + test_data = ( + ( + "Content-Type: application/x-stuff; " + "title*='This%20is%20%2A%2A%2Afun%2A%2A%2A", + "'This%20is%20%2A%2A%2Afun%2A%2A%2A", + ), + ("Content-Type: application/x-stuff; title*='foo.html", "'foo.html"), + ("Content-Type: application/x-stuff; title*=bar.html", "bar.html"), + ) + for raw_line, expected_title in test_data: + parsed = parse_header_parameters(raw_line) + self.assertEqual(parsed[1]["title"], expected_title)