Refs #33697 -- Used django.utils.http.parse_header_parameters() for parsing boundary streams.
This also removes unused parse_header() and _parse_header_params() helpers in django.http.multipartparser.
This commit is contained in:
parent
bff5c114be
commit
d4d5427571
|
@ -8,7 +8,6 @@ import base64
|
|||
import binascii
|
||||
import collections
|
||||
import html
|
||||
from urllib.parse import unquote
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.exceptions import (
|
||||
|
@ -675,8 +674,9 @@ def parse_boundary_stream(stream, max_header_size):
|
|||
# This terminology ("main value" and "dictionary of
|
||||
# parameters") is from the Python docs.
|
||||
try:
|
||||
main_value_pair, params = parse_header(line)
|
||||
main_value_pair, params = parse_header_parameters(line.decode())
|
||||
name, value = main_value_pair.split(":", 1)
|
||||
params = {k: v.encode() for k, v in params.items()}
|
||||
except ValueError: # Invalid header.
|
||||
continue
|
||||
|
||||
|
@ -703,50 +703,3 @@ class Parser:
|
|||
for sub_stream in boundarystream:
|
||||
# Iterate over each part
|
||||
yield parse_boundary_stream(sub_stream, 1024)
|
||||
|
||||
|
||||
def parse_header(line):
|
||||
"""
|
||||
Parse the header into a key-value.
|
||||
|
||||
Input (line): bytes, output: str for key/name, bytes for values which
|
||||
will be decoded later.
|
||||
"""
|
||||
plist = _parse_header_params(b";" + line)
|
||||
key = plist.pop(0).lower().decode("ascii")
|
||||
pdict = {}
|
||||
for p in plist:
|
||||
i = p.find(b"=")
|
||||
if i >= 0:
|
||||
has_encoding = False
|
||||
name = p[:i].strip().lower().decode("ascii")
|
||||
if name.endswith("*"):
|
||||
# Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext")
|
||||
# https://tools.ietf.org/html/rfc2231#section-4
|
||||
name = name[:-1]
|
||||
if p.count(b"'") == 2:
|
||||
has_encoding = True
|
||||
value = p[i + 1 :].strip()
|
||||
if len(value) >= 2 and value[:1] == value[-1:] == b'"':
|
||||
value = value[1:-1]
|
||||
value = value.replace(b"\\\\", b"\\").replace(b'\\"', b'"')
|
||||
if has_encoding:
|
||||
encoding, lang, value = value.split(b"'")
|
||||
value = unquote(value.decode(), encoding=encoding.decode())
|
||||
pdict[name] = value
|
||||
return key, pdict
|
||||
|
||||
|
||||
def _parse_header_params(s):
|
||||
plist = []
|
||||
while s[:1] == b";":
|
||||
s = s[1:]
|
||||
end = s.find(b";")
|
||||
while end > 0 and (s.count(b'"', 0, end) - s.count(b'\\"', 0, end)) % 2:
|
||||
end = s.find(b";", end + 1)
|
||||
if end < 0:
|
||||
end = len(s)
|
||||
f = s[:end]
|
||||
plist.append(f.strip())
|
||||
s = s[end:]
|
||||
return plist
|
||||
|
|
|
@ -11,6 +11,7 @@ from urllib.parse import (
|
|||
_splitnetloc,
|
||||
_splitparams,
|
||||
scheme_chars,
|
||||
unquote,
|
||||
)
|
||||
from urllib.parse import urlencode as original_urlencode
|
||||
from urllib.parse import uses_params
|
||||
|
@ -387,15 +388,25 @@ def parse_header_parameters(line):
|
|||
Return the main content-type and a dictionary of options.
|
||||
"""
|
||||
parts = _parseparam(";" + line)
|
||||
key = parts.__next__()
|
||||
key = parts.__next__().lower()
|
||||
pdict = {}
|
||||
for p in parts:
|
||||
i = p.find("=")
|
||||
if i >= 0:
|
||||
has_encoding = False
|
||||
name = p[:i].strip().lower()
|
||||
if name.endswith("*"):
|
||||
# Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext")
|
||||
# https://tools.ietf.org/html/rfc2231#section-4
|
||||
name = name[:-1]
|
||||
if p.count("'") == 2:
|
||||
has_encoding = True
|
||||
value = p[i + 1 :].strip()
|
||||
if len(value) >= 2 and value[0] == value[-1] == '"':
|
||||
value = value[1:-1]
|
||||
value = value.replace("\\\\", "\\").replace('\\"', '"')
|
||||
if has_encoding:
|
||||
encoding, lang, value = value.split("'")
|
||||
value = unquote(value, encoding=encoding)
|
||||
pdict[name] = value
|
||||
return key, pdict
|
||||
|
|
|
@ -17,7 +17,6 @@ from django.http.multipartparser import (
|
|||
MultiPartParser,
|
||||
MultiPartParserError,
|
||||
Parser,
|
||||
parse_header,
|
||||
)
|
||||
from django.test import SimpleTestCase, TestCase, client, override_settings
|
||||
|
||||
|
@ -906,47 +905,3 @@ class MultiParserTests(SimpleTestCase):
|
|||
for file_name in CANDIDATE_INVALID_FILE_NAMES:
|
||||
with self.subTest(file_name=file_name):
|
||||
self.assertIsNone(parser.sanitize_file_name(file_name))
|
||||
|
||||
def test_rfc2231_parsing(self):
|
||||
test_data = (
|
||||
(
|
||||
b"Content-Type: application/x-stuff; "
|
||||
b"title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A",
|
||||
"This is ***fun***",
|
||||
),
|
||||
(
|
||||
b"Content-Type: application/x-stuff; title*=UTF-8''foo-%c3%a4.html",
|
||||
"foo-ä.html",
|
||||
),
|
||||
(
|
||||
b"Content-Type: application/x-stuff; title*=iso-8859-1''foo-%E4.html",
|
||||
"foo-ä.html",
|
||||
),
|
||||
)
|
||||
for raw_line, expected_title in test_data:
|
||||
parsed = parse_header(raw_line)
|
||||
self.assertEqual(parsed[1]["title"], expected_title)
|
||||
|
||||
def test_rfc2231_wrong_title(self):
|
||||
"""
|
||||
Test wrongly formatted RFC 2231 headers (missing double single quotes).
|
||||
Parsing should not crash (#24209).
|
||||
"""
|
||||
test_data = (
|
||||
(
|
||||
b"Content-Type: application/x-stuff; "
|
||||
b"title*='This%20is%20%2A%2A%2Afun%2A%2A%2A",
|
||||
b"'This%20is%20%2A%2A%2Afun%2A%2A%2A",
|
||||
),
|
||||
(b"Content-Type: application/x-stuff; title*='foo.html", b"'foo.html"),
|
||||
(b"Content-Type: application/x-stuff; title*=bar.html", b"bar.html"),
|
||||
)
|
||||
for raw_line, expected_title in test_data:
|
||||
parsed = parse_header(raw_line)
|
||||
self.assertEqual(parsed[1]["title"], expected_title)
|
||||
|
||||
def test_parse_header_with_double_quotes_and_semicolon(self):
|
||||
self.assertEqual(
|
||||
parse_header(b'form-data; name="files"; filename="fo\\"o;bar"'),
|
||||
("form-data", {"name": b"files", "filename": b'fo"o;bar'}),
|
||||
)
|
||||
|
|
|
@ -472,3 +472,41 @@ class ParseHeaderParameterTests(unittest.TestCase):
|
|||
for header, expected in tests:
|
||||
with self.subTest(header=header):
|
||||
self.assertEqual(parse_header_parameters(header), expected)
|
||||
|
||||
def test_rfc2231_parsing(self):
|
||||
test_data = (
|
||||
(
|
||||
"Content-Type: application/x-stuff; "
|
||||
"title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A",
|
||||
"This is ***fun***",
|
||||
),
|
||||
(
|
||||
"Content-Type: application/x-stuff; title*=UTF-8''foo-%c3%a4.html",
|
||||
"foo-ä.html",
|
||||
),
|
||||
(
|
||||
"Content-Type: application/x-stuff; title*=iso-8859-1''foo-%E4.html",
|
||||
"foo-ä.html",
|
||||
),
|
||||
)
|
||||
for raw_line, expected_title in test_data:
|
||||
parsed = parse_header_parameters(raw_line)
|
||||
self.assertEqual(parsed[1]["title"], expected_title)
|
||||
|
||||
def test_rfc2231_wrong_title(self):
|
||||
"""
|
||||
Test wrongly formatted RFC 2231 headers (missing double single quotes).
|
||||
Parsing should not crash (#24209).
|
||||
"""
|
||||
test_data = (
|
||||
(
|
||||
"Content-Type: application/x-stuff; "
|
||||
"title*='This%20is%20%2A%2A%2Afun%2A%2A%2A",
|
||||
"'This%20is%20%2A%2A%2Afun%2A%2A%2A",
|
||||
),
|
||||
("Content-Type: application/x-stuff; title*='foo.html", "'foo.html"),
|
||||
("Content-Type: application/x-stuff; title*=bar.html", "bar.html"),
|
||||
)
|
||||
for raw_line, expected_title in test_data:
|
||||
parsed = parse_header_parameters(raw_line)
|
||||
self.assertEqual(parsed[1]["title"], expected_title)
|
||||
|
|
Loading…
Reference in New Issue