Refs #33697 -- Used django.utils.http.parse_header_parameters() for parsing boundary streams.

This also removes unused parse_header() and _parse_header_params() helpers in django.http.multipartparser.
2022-06-24 14:46:34 -04:00 · 2022-06-24 14:46:34 -04:00 · d4d5427571
parent bff5c114be
commit d4d5427571
4 changed files with 52 additions and 95 deletions
--- a/django/http/multipartparser.py
+++ b/django/http/multipartparser.py
@ -8,7 +8,6 @@ import base64
 import binascii
 import collections
 import html
 from urllib.parse import unquote
 from django.conf import settings
 from django.core.exceptions import (
@ -675,8 +674,9 @@ def parse_boundary_stream(stream, max_header_size):
        # This terminology ("main value" and "dictionary of
        # parameters") is from the Python docs.
        try:
-            main_value_pair, params = parse_header(line)
+            main_value_pair, params = parse_header_parameters(line.decode())
            name, value = main_value_pair.split(":", 1)
            params = {k: v.encode() for k, v in params.items()}
        except ValueError:  # Invalid header.
            continue
@ -703,50 +703,3 @@ class Parser:
        for sub_stream in boundarystream:
            # Iterate over each part
            yield parse_boundary_stream(sub_stream, 1024)
 def parse_header(line):
    """
    Parse the header into a key-value.
    Input (line): bytes, output: str for key/name, bytes for values which
    will be decoded later.
    """
    plist = _parse_header_params(b";" + line)
    key = plist.pop(0).lower().decode("ascii")
    pdict = {}
    for p in plist:
        i = p.find(b"=")
        if i >= 0:
            has_encoding = False
            name = p[:i].strip().lower().decode("ascii")
            if name.endswith("*"):
                # Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext")
                # https://tools.ietf.org/html/rfc2231#section-4
                name = name[:-1]
                if p.count(b"'") == 2:
                    has_encoding = True
            value = p[i + 1 :].strip()
            if len(value) >= 2 and value[:1] == value[-1:] == b'"':
                value = value[1:-1]
                value = value.replace(b"\\\\", b"\\").replace(b'\\"', b'"')
            if has_encoding:
                encoding, lang, value = value.split(b"'")
                value = unquote(value.decode(), encoding=encoding.decode())
            pdict[name] = value
    return key, pdict
 def _parse_header_params(s):
    plist = []
    while s[:1] == b";":
        s = s[1:]
        end = s.find(b";")
        while end > 0 and (s.count(b'"', 0, end) - s.count(b'\\"', 0, end)) % 2:
            end = s.find(b";", end + 1)
        if end < 0:
            end = len(s)
        f = s[:end]
        plist.append(f.strip())
        s = s[end:]
    return plist
--- a/django/utils/http.py
+++ b/django/utils/http.py
@ -11,6 +11,7 @@ from urllib.parse import (
    _splitnetloc,
    _splitparams,
    scheme_chars,
    unquote,
 )
 from urllib.parse import urlencode as original_urlencode
 from urllib.parse import uses_params
@ -387,15 +388,25 @@ def parse_header_parameters(line):
    Return the main content-type and a dictionary of options.
    """
    parts = _parseparam(";" + line)
-    key = parts.__next__()
+    key = parts.__next__().lower()
    pdict = {}
    for p in parts:
        i = p.find("=")
        if i >= 0:
            has_encoding = False
            name = p[:i].strip().lower()
            if name.endswith("*"):
                # Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext")
                # https://tools.ietf.org/html/rfc2231#section-4
                name = name[:-1]
                if p.count("'") == 2:
                    has_encoding = True
            value = p[i + 1 :].strip()
            if len(value) >= 2 and value[0] == value[-1] == '"':
                value = value[1:-1]
                value = value.replace("\\\\", "\\").replace('\\"', '"')
            if has_encoding:
                encoding, lang, value = value.split("'")
                value = unquote(value, encoding=encoding)
            pdict[name] = value
    return key, pdict
--- a/tests/file_uploads/tests.py
+++ b/tests/file_uploads/tests.py
@ -17,7 +17,6 @@ from django.http.multipartparser import (
    MultiPartParser,
    MultiPartParserError,
    Parser,
    parse_header,
 )
 from django.test import SimpleTestCase, TestCase, client, override_settings
@ -906,47 +905,3 @@ class MultiParserTests(SimpleTestCase):
        for file_name in CANDIDATE_INVALID_FILE_NAMES:
            with self.subTest(file_name=file_name):
                self.assertIsNone(parser.sanitize_file_name(file_name))
    def test_rfc2231_parsing(self):
        test_data = (
            (
                b"Content-Type: application/x-stuff; "
                b"title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A",
                "This is ***fun***",
            ),
            (
                b"Content-Type: application/x-stuff; title*=UTF-8''foo-%c3%a4.html",
                "foo-ä.html",
            ),
            (
                b"Content-Type: application/x-stuff; title*=iso-8859-1''foo-%E4.html",
                "foo-ä.html",
            ),
        )
        for raw_line, expected_title in test_data:
            parsed = parse_header(raw_line)
            self.assertEqual(parsed[1]["title"], expected_title)
    def test_rfc2231_wrong_title(self):
        """
        Test wrongly formatted RFC 2231 headers (missing double single quotes).
        Parsing should not crash (#24209).
        """
        test_data = (
            (
                b"Content-Type: application/x-stuff; "
                b"title*='This%20is%20%2A%2A%2Afun%2A%2A%2A",
                b"'This%20is%20%2A%2A%2Afun%2A%2A%2A",
            ),
            (b"Content-Type: application/x-stuff; title*='foo.html", b"'foo.html"),
            (b"Content-Type: application/x-stuff; title*=bar.html", b"bar.html"),
        )
        for raw_line, expected_title in test_data:
            parsed = parse_header(raw_line)
            self.assertEqual(parsed[1]["title"], expected_title)
    def test_parse_header_with_double_quotes_and_semicolon(self):
        self.assertEqual(
            parse_header(b'form-data; name="files"; filename="fo\\"o;bar"'),
            ("form-data", {"name": b"files", "filename": b'fo"o;bar'}),
        )
--- a/tests/utils_tests/test_http.py
+++ b/tests/utils_tests/test_http.py
@ -472,3 +472,41 @@ class ParseHeaderParameterTests(unittest.TestCase):
        for header, expected in tests:
            with self.subTest(header=header):
                self.assertEqual(parse_header_parameters(header), expected)
    def test_rfc2231_parsing(self):
        test_data = (
            (
                "Content-Type: application/x-stuff; "
                "title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A",
                "This is ***fun***",
            ),
            (
                "Content-Type: application/x-stuff; title*=UTF-8''foo-%c3%a4.html",
                "foo-ä.html",
            ),
            (
                "Content-Type: application/x-stuff; title*=iso-8859-1''foo-%E4.html",
                "foo-ä.html",
            ),
        )
        for raw_line, expected_title in test_data:
            parsed = parse_header_parameters(raw_line)
            self.assertEqual(parsed[1]["title"], expected_title)
    def test_rfc2231_wrong_title(self):
        """
        Test wrongly formatted RFC 2231 headers (missing double single quotes).
        Parsing should not crash (#24209).
        """
        test_data = (
            (
                "Content-Type: application/x-stuff; "
                "title*='This%20is%20%2A%2A%2Afun%2A%2A%2A",
                "'This%20is%20%2A%2A%2Afun%2A%2A%2A",
            ),
            ("Content-Type: application/x-stuff; title*='foo.html", "'foo.html"),
            ("Content-Type: application/x-stuff; title*=bar.html", "bar.html"),
        )
        for raw_line, expected_title in test_data:
            parsed = parse_header_parameters(raw_line)
            self.assertEqual(parsed[1]["title"], expected_title)