Refs #33697 -- Used django.utils.http.parse_header_parameters() for parsing boundary streams.

This also removes unused parse_header() and _parse_header_params()
helpers in django.http.multipartparser.
This commit is contained in:
Mehrdad 2022-06-24 14:46:34 -04:00 committed by Mariusz Felisiak
parent bff5c114be
commit d4d5427571
4 changed files with 52 additions and 95 deletions

View File

@ -8,7 +8,6 @@ import base64
import binascii import binascii
import collections import collections
import html import html
from urllib.parse import unquote
from django.conf import settings from django.conf import settings
from django.core.exceptions import ( from django.core.exceptions import (
@ -675,8 +674,9 @@ def parse_boundary_stream(stream, max_header_size):
# This terminology ("main value" and "dictionary of # This terminology ("main value" and "dictionary of
# parameters") is from the Python docs. # parameters") is from the Python docs.
try: try:
main_value_pair, params = parse_header(line) main_value_pair, params = parse_header_parameters(line.decode())
name, value = main_value_pair.split(":", 1) name, value = main_value_pair.split(":", 1)
params = {k: v.encode() for k, v in params.items()}
except ValueError: # Invalid header. except ValueError: # Invalid header.
continue continue
@ -703,50 +703,3 @@ class Parser:
for sub_stream in boundarystream: for sub_stream in boundarystream:
# Iterate over each part # Iterate over each part
yield parse_boundary_stream(sub_stream, 1024) yield parse_boundary_stream(sub_stream, 1024)
def parse_header(line):
"""
Parse the header into a key-value.
Input (line): bytes, output: str for key/name, bytes for values which
will be decoded later.
"""
plist = _parse_header_params(b";" + line)
key = plist.pop(0).lower().decode("ascii")
pdict = {}
for p in plist:
i = p.find(b"=")
if i >= 0:
has_encoding = False
name = p[:i].strip().lower().decode("ascii")
if name.endswith("*"):
# Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext")
# https://tools.ietf.org/html/rfc2231#section-4
name = name[:-1]
if p.count(b"'") == 2:
has_encoding = True
value = p[i + 1 :].strip()
if len(value) >= 2 and value[:1] == value[-1:] == b'"':
value = value[1:-1]
value = value.replace(b"\\\\", b"\\").replace(b'\\"', b'"')
if has_encoding:
encoding, lang, value = value.split(b"'")
value = unquote(value.decode(), encoding=encoding.decode())
pdict[name] = value
return key, pdict
def _parse_header_params(s):
plist = []
while s[:1] == b";":
s = s[1:]
end = s.find(b";")
while end > 0 and (s.count(b'"', 0, end) - s.count(b'\\"', 0, end)) % 2:
end = s.find(b";", end + 1)
if end < 0:
end = len(s)
f = s[:end]
plist.append(f.strip())
s = s[end:]
return plist

View File

@ -11,6 +11,7 @@ from urllib.parse import (
_splitnetloc, _splitnetloc,
_splitparams, _splitparams,
scheme_chars, scheme_chars,
unquote,
) )
from urllib.parse import urlencode as original_urlencode from urllib.parse import urlencode as original_urlencode
from urllib.parse import uses_params from urllib.parse import uses_params
@ -387,15 +388,25 @@ def parse_header_parameters(line):
Return the main content-type and a dictionary of options. Return the main content-type and a dictionary of options.
""" """
parts = _parseparam(";" + line) parts = _parseparam(";" + line)
key = parts.__next__() key = parts.__next__().lower()
pdict = {} pdict = {}
for p in parts: for p in parts:
i = p.find("=") i = p.find("=")
if i >= 0: if i >= 0:
has_encoding = False
name = p[:i].strip().lower() name = p[:i].strip().lower()
if name.endswith("*"):
# Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext")
# https://tools.ietf.org/html/rfc2231#section-4
name = name[:-1]
if p.count("'") == 2:
has_encoding = True
value = p[i + 1 :].strip() value = p[i + 1 :].strip()
if len(value) >= 2 and value[0] == value[-1] == '"': if len(value) >= 2 and value[0] == value[-1] == '"':
value = value[1:-1] value = value[1:-1]
value = value.replace("\\\\", "\\").replace('\\"', '"') value = value.replace("\\\\", "\\").replace('\\"', '"')
if has_encoding:
encoding, lang, value = value.split("'")
value = unquote(value, encoding=encoding)
pdict[name] = value pdict[name] = value
return key, pdict return key, pdict

View File

@ -17,7 +17,6 @@ from django.http.multipartparser import (
MultiPartParser, MultiPartParser,
MultiPartParserError, MultiPartParserError,
Parser, Parser,
parse_header,
) )
from django.test import SimpleTestCase, TestCase, client, override_settings from django.test import SimpleTestCase, TestCase, client, override_settings
@ -906,47 +905,3 @@ class MultiParserTests(SimpleTestCase):
for file_name in CANDIDATE_INVALID_FILE_NAMES: for file_name in CANDIDATE_INVALID_FILE_NAMES:
with self.subTest(file_name=file_name): with self.subTest(file_name=file_name):
self.assertIsNone(parser.sanitize_file_name(file_name)) self.assertIsNone(parser.sanitize_file_name(file_name))
def test_rfc2231_parsing(self):
test_data = (
(
b"Content-Type: application/x-stuff; "
b"title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A",
"This is ***fun***",
),
(
b"Content-Type: application/x-stuff; title*=UTF-8''foo-%c3%a4.html",
"foo-ä.html",
),
(
b"Content-Type: application/x-stuff; title*=iso-8859-1''foo-%E4.html",
"foo-ä.html",
),
)
for raw_line, expected_title in test_data:
parsed = parse_header(raw_line)
self.assertEqual(parsed[1]["title"], expected_title)
def test_rfc2231_wrong_title(self):
"""
Test wrongly formatted RFC 2231 headers (missing double single quotes).
Parsing should not crash (#24209).
"""
test_data = (
(
b"Content-Type: application/x-stuff; "
b"title*='This%20is%20%2A%2A%2Afun%2A%2A%2A",
b"'This%20is%20%2A%2A%2Afun%2A%2A%2A",
),
(b"Content-Type: application/x-stuff; title*='foo.html", b"'foo.html"),
(b"Content-Type: application/x-stuff; title*=bar.html", b"bar.html"),
)
for raw_line, expected_title in test_data:
parsed = parse_header(raw_line)
self.assertEqual(parsed[1]["title"], expected_title)
def test_parse_header_with_double_quotes_and_semicolon(self):
self.assertEqual(
parse_header(b'form-data; name="files"; filename="fo\\"o;bar"'),
("form-data", {"name": b"files", "filename": b'fo"o;bar'}),
)

View File

@ -472,3 +472,41 @@ class ParseHeaderParameterTests(unittest.TestCase):
for header, expected in tests: for header, expected in tests:
with self.subTest(header=header): with self.subTest(header=header):
self.assertEqual(parse_header_parameters(header), expected) self.assertEqual(parse_header_parameters(header), expected)
def test_rfc2231_parsing(self):
test_data = (
(
"Content-Type: application/x-stuff; "
"title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A",
"This is ***fun***",
),
(
"Content-Type: application/x-stuff; title*=UTF-8''foo-%c3%a4.html",
"foo-ä.html",
),
(
"Content-Type: application/x-stuff; title*=iso-8859-1''foo-%E4.html",
"foo-ä.html",
),
)
for raw_line, expected_title in test_data:
parsed = parse_header_parameters(raw_line)
self.assertEqual(parsed[1]["title"], expected_title)
def test_rfc2231_wrong_title(self):
"""
Test wrongly formatted RFC 2231 headers (missing double single quotes).
Parsing should not crash (#24209).
"""
test_data = (
(
"Content-Type: application/x-stuff; "
"title*='This%20is%20%2A%2A%2Afun%2A%2A%2A",
"'This%20is%20%2A%2A%2Afun%2A%2A%2A",
),
("Content-Type: application/x-stuff; title*='foo.html", "'foo.html"),
("Content-Type: application/x-stuff; title*=bar.html", "bar.html"),
)
for raw_line, expected_title in test_data:
parsed = parse_header_parameters(raw_line)
self.assertEqual(parsed[1]["title"], expected_title)