Refs #33697 -- Used django.utils.http.parse_header_parameters() for parsing boundary streams.

This also removes unused parse_header() and _parse_header_params() helpers in django.http.multipartparser.
2022-06-24 14:46:34 -04:00 · 2022-06-24 14:46:34 -04:00 · d4d5427571
parent bff5c114be
commit d4d5427571
4 changed files with 52 additions and 95 deletions
--- a/django/http/multipartparser.py
+++ b/django/http/multipartparser.py
@ -8,7 +8,6 @@ import base64
 import binascii
 import collections
 import html
-from urllib.parse import unquote

 from django.conf import settings
 from django.core.exceptions import (
@ -675,8 +674,9 @@ def parse_boundary_stream(stream, max_header_size):
        # This terminology ("main value" and "dictionary of
        # parameters") is from the Python docs.
        try:
-            main_value_pair, params = parse_header(line)
+            main_value_pair, params = parse_header_parameters(line.decode())
            name, value = main_value_pair.split(":", 1)
+            params = {k: v.encode() for k, v in params.items()}
        except ValueError:  # Invalid header.
            continue

@ -703,50 +703,3 @@ class Parser:
        for sub_stream in boundarystream:
            # Iterate over each part
            yield parse_boundary_stream(sub_stream, 1024)
-
-
-def parse_header(line):
-    """
-    Parse the header into a key-value.
-
-    Input (line): bytes, output: str for key/name, bytes for values which
-    will be decoded later.
-    """
-    plist = _parse_header_params(b";" + line)
-    key = plist.pop(0).lower().decode("ascii")
-    pdict = {}
-    for p in plist:
-        i = p.find(b"=")
-        if i >= 0:
-            has_encoding = False
-            name = p[:i].strip().lower().decode("ascii")
-            if name.endswith("*"):
-                # Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext")
-                # https://tools.ietf.org/html/rfc2231#section-4
-                name = name[:-1]
-                if p.count(b"'") == 2:
-                    has_encoding = True
-            value = p[i + 1 :].strip()
-            if len(value) >= 2 and value[:1] == value[-1:] == b'"':
-                value = value[1:-1]
-                value = value.replace(b"\\\\", b"\\").replace(b'\\"', b'"')
-            if has_encoding:
-                encoding, lang, value = value.split(b"'")
-                value = unquote(value.decode(), encoding=encoding.decode())
-            pdict[name] = value
-    return key, pdict
-
-
-def _parse_header_params(s):
-    plist = []
-    while s[:1] == b";":
-        s = s[1:]
-        end = s.find(b";")
-        while end > 0 and (s.count(b'"', 0, end) - s.count(b'\\"', 0, end)) % 2:
-            end = s.find(b";", end + 1)
-        if end < 0:
-            end = len(s)
-        f = s[:end]
-        plist.append(f.strip())
-        s = s[end:]
-    return plist
--- a/django/utils/http.py
+++ b/django/utils/http.py
@ -11,6 +11,7 @@ from urllib.parse import (
    _splitnetloc,
    _splitparams,
    scheme_chars,
+    unquote,
 )
 from urllib.parse import urlencode as original_urlencode
 from urllib.parse import uses_params
@ -387,15 +388,25 @@ def parse_header_parameters(line):
    Return the main content-type and a dictionary of options.
    """
    parts = _parseparam(";" + line)
-    key = parts.__next__()
+    key = parts.__next__().lower()
    pdict = {}
    for p in parts:
        i = p.find("=")
        if i >= 0:
+            has_encoding = False
            name = p[:i].strip().lower()
+            if name.endswith("*"):
+                # Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext")
+                # https://tools.ietf.org/html/rfc2231#section-4
+                name = name[:-1]
+                if p.count("'") == 2:
+                    has_encoding = True
            value = p[i + 1 :].strip()
            if len(value) >= 2 and value[0] == value[-1] == '"':
                value = value[1:-1]
                value = value.replace("\\\\", "\\").replace('\\"', '"')
+            if has_encoding:
+                encoding, lang, value = value.split("'")
+                value = unquote(value, encoding=encoding)
            pdict[name] = value
    return key, pdict
--- a/tests/file_uploads/tests.py
+++ b/tests/file_uploads/tests.py
@ -17,7 +17,6 @@ from django.http.multipartparser import (
    MultiPartParser,
    MultiPartParserError,
    Parser,
-    parse_header,
 )
 from django.test import SimpleTestCase, TestCase, client, override_settings

@ -906,47 +905,3 @@ class MultiParserTests(SimpleTestCase):
        for file_name in CANDIDATE_INVALID_FILE_NAMES:
            with self.subTest(file_name=file_name):
                self.assertIsNone(parser.sanitize_file_name(file_name))
-
-    def test_rfc2231_parsing(self):
-        test_data = (
-            (
-                b"Content-Type: application/x-stuff; "
-                b"title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A",
-                "This is ***fun***",
-            ),
-            (
-                b"Content-Type: application/x-stuff; title*=UTF-8''foo-%c3%a4.html",
-                "foo-ä.html",
-            ),
-            (
-                b"Content-Type: application/x-stuff; title*=iso-8859-1''foo-%E4.html",
-                "foo-ä.html",
-            ),
-        )
-        for raw_line, expected_title in test_data:
-            parsed = parse_header(raw_line)
-            self.assertEqual(parsed[1]["title"], expected_title)
-
-    def test_rfc2231_wrong_title(self):
-        """
-        Test wrongly formatted RFC 2231 headers (missing double single quotes).
-        Parsing should not crash (#24209).
-        """
-        test_data = (
-            (
-                b"Content-Type: application/x-stuff; "
-                b"title*='This%20is%20%2A%2A%2Afun%2A%2A%2A",
-                b"'This%20is%20%2A%2A%2Afun%2A%2A%2A",
-            ),
-            (b"Content-Type: application/x-stuff; title*='foo.html", b"'foo.html"),
-            (b"Content-Type: application/x-stuff; title*=bar.html", b"bar.html"),
-        )
-        for raw_line, expected_title in test_data:
-            parsed = parse_header(raw_line)
-            self.assertEqual(parsed[1]["title"], expected_title)
-
-    def test_parse_header_with_double_quotes_and_semicolon(self):
-        self.assertEqual(
-            parse_header(b'form-data; name="files"; filename="fo\\"o;bar"'),
-            ("form-data", {"name": b"files", "filename": b'fo"o;bar'}),
-        )
--- a/tests/utils_tests/test_http.py
+++ b/tests/utils_tests/test_http.py
@ -472,3 +472,41 @@ class ParseHeaderParameterTests(unittest.TestCase):
        for header, expected in tests:
            with self.subTest(header=header):
                self.assertEqual(parse_header_parameters(header), expected)
+
+    def test_rfc2231_parsing(self):
+        test_data = (
+            (
+                "Content-Type: application/x-stuff; "
+                "title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A",
+                "This is ***fun***",
+            ),
+            (
+                "Content-Type: application/x-stuff; title*=UTF-8''foo-%c3%a4.html",
+                "foo-ä.html",
+            ),
+            (
+                "Content-Type: application/x-stuff; title*=iso-8859-1''foo-%E4.html",
+                "foo-ä.html",
+            ),
+        )
+        for raw_line, expected_title in test_data:
+            parsed = parse_header_parameters(raw_line)
+            self.assertEqual(parsed[1]["title"], expected_title)
+
+    def test_rfc2231_wrong_title(self):
+        """
+        Test wrongly formatted RFC 2231 headers (missing double single quotes).
+        Parsing should not crash (#24209).
+        """
+        test_data = (
+            (
+                "Content-Type: application/x-stuff; "
+                "title*='This%20is%20%2A%2A%2Afun%2A%2A%2A",
+                "'This%20is%20%2A%2A%2Afun%2A%2A%2A",
+            ),
+            ("Content-Type: application/x-stuff; title*='foo.html", "'foo.html"),
+            ("Content-Type: application/x-stuff; title*=bar.html", "bar.html"),
+        )
+        for raw_line, expected_title in test_data:
+            parsed = parse_header_parameters(raw_line)
+            self.assertEqual(parsed[1]["title"], expected_title)