Refs #33697 -- Used django.utils.http.parse_header_parameters() for parsing boundary streams.
This also removes unused parse_header() and _parse_header_params() helpers in django.http.multipartparser.
This commit is contained in:
parent
bff5c114be
commit
d4d5427571
|
@ -8,7 +8,6 @@ import base64
|
||||||
import binascii
|
import binascii
|
||||||
import collections
|
import collections
|
||||||
import html
|
import html
|
||||||
from urllib.parse import unquote
|
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.core.exceptions import (
|
from django.core.exceptions import (
|
||||||
|
@ -675,8 +674,9 @@ def parse_boundary_stream(stream, max_header_size):
|
||||||
# This terminology ("main value" and "dictionary of
|
# This terminology ("main value" and "dictionary of
|
||||||
# parameters") is from the Python docs.
|
# parameters") is from the Python docs.
|
||||||
try:
|
try:
|
||||||
main_value_pair, params = parse_header(line)
|
main_value_pair, params = parse_header_parameters(line.decode())
|
||||||
name, value = main_value_pair.split(":", 1)
|
name, value = main_value_pair.split(":", 1)
|
||||||
|
params = {k: v.encode() for k, v in params.items()}
|
||||||
except ValueError: # Invalid header.
|
except ValueError: # Invalid header.
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -703,50 +703,3 @@ class Parser:
|
||||||
for sub_stream in boundarystream:
|
for sub_stream in boundarystream:
|
||||||
# Iterate over each part
|
# Iterate over each part
|
||||||
yield parse_boundary_stream(sub_stream, 1024)
|
yield parse_boundary_stream(sub_stream, 1024)
|
||||||
|
|
||||||
|
|
||||||
def parse_header(line):
|
|
||||||
"""
|
|
||||||
Parse the header into a key-value.
|
|
||||||
|
|
||||||
Input (line): bytes, output: str for key/name, bytes for values which
|
|
||||||
will be decoded later.
|
|
||||||
"""
|
|
||||||
plist = _parse_header_params(b";" + line)
|
|
||||||
key = plist.pop(0).lower().decode("ascii")
|
|
||||||
pdict = {}
|
|
||||||
for p in plist:
|
|
||||||
i = p.find(b"=")
|
|
||||||
if i >= 0:
|
|
||||||
has_encoding = False
|
|
||||||
name = p[:i].strip().lower().decode("ascii")
|
|
||||||
if name.endswith("*"):
|
|
||||||
# Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext")
|
|
||||||
# https://tools.ietf.org/html/rfc2231#section-4
|
|
||||||
name = name[:-1]
|
|
||||||
if p.count(b"'") == 2:
|
|
||||||
has_encoding = True
|
|
||||||
value = p[i + 1 :].strip()
|
|
||||||
if len(value) >= 2 and value[:1] == value[-1:] == b'"':
|
|
||||||
value = value[1:-1]
|
|
||||||
value = value.replace(b"\\\\", b"\\").replace(b'\\"', b'"')
|
|
||||||
if has_encoding:
|
|
||||||
encoding, lang, value = value.split(b"'")
|
|
||||||
value = unquote(value.decode(), encoding=encoding.decode())
|
|
||||||
pdict[name] = value
|
|
||||||
return key, pdict
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_header_params(s):
|
|
||||||
plist = []
|
|
||||||
while s[:1] == b";":
|
|
||||||
s = s[1:]
|
|
||||||
end = s.find(b";")
|
|
||||||
while end > 0 and (s.count(b'"', 0, end) - s.count(b'\\"', 0, end)) % 2:
|
|
||||||
end = s.find(b";", end + 1)
|
|
||||||
if end < 0:
|
|
||||||
end = len(s)
|
|
||||||
f = s[:end]
|
|
||||||
plist.append(f.strip())
|
|
||||||
s = s[end:]
|
|
||||||
return plist
|
|
||||||
|
|
|
@ -11,6 +11,7 @@ from urllib.parse import (
|
||||||
_splitnetloc,
|
_splitnetloc,
|
||||||
_splitparams,
|
_splitparams,
|
||||||
scheme_chars,
|
scheme_chars,
|
||||||
|
unquote,
|
||||||
)
|
)
|
||||||
from urllib.parse import urlencode as original_urlencode
|
from urllib.parse import urlencode as original_urlencode
|
||||||
from urllib.parse import uses_params
|
from urllib.parse import uses_params
|
||||||
|
@ -387,15 +388,25 @@ def parse_header_parameters(line):
|
||||||
Return the main content-type and a dictionary of options.
|
Return the main content-type and a dictionary of options.
|
||||||
"""
|
"""
|
||||||
parts = _parseparam(";" + line)
|
parts = _parseparam(";" + line)
|
||||||
key = parts.__next__()
|
key = parts.__next__().lower()
|
||||||
pdict = {}
|
pdict = {}
|
||||||
for p in parts:
|
for p in parts:
|
||||||
i = p.find("=")
|
i = p.find("=")
|
||||||
if i >= 0:
|
if i >= 0:
|
||||||
|
has_encoding = False
|
||||||
name = p[:i].strip().lower()
|
name = p[:i].strip().lower()
|
||||||
|
if name.endswith("*"):
|
||||||
|
# Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext")
|
||||||
|
# https://tools.ietf.org/html/rfc2231#section-4
|
||||||
|
name = name[:-1]
|
||||||
|
if p.count("'") == 2:
|
||||||
|
has_encoding = True
|
||||||
value = p[i + 1 :].strip()
|
value = p[i + 1 :].strip()
|
||||||
if len(value) >= 2 and value[0] == value[-1] == '"':
|
if len(value) >= 2 and value[0] == value[-1] == '"':
|
||||||
value = value[1:-1]
|
value = value[1:-1]
|
||||||
value = value.replace("\\\\", "\\").replace('\\"', '"')
|
value = value.replace("\\\\", "\\").replace('\\"', '"')
|
||||||
|
if has_encoding:
|
||||||
|
encoding, lang, value = value.split("'")
|
||||||
|
value = unquote(value, encoding=encoding)
|
||||||
pdict[name] = value
|
pdict[name] = value
|
||||||
return key, pdict
|
return key, pdict
|
||||||
|
|
|
@ -17,7 +17,6 @@ from django.http.multipartparser import (
|
||||||
MultiPartParser,
|
MultiPartParser,
|
||||||
MultiPartParserError,
|
MultiPartParserError,
|
||||||
Parser,
|
Parser,
|
||||||
parse_header,
|
|
||||||
)
|
)
|
||||||
from django.test import SimpleTestCase, TestCase, client, override_settings
|
from django.test import SimpleTestCase, TestCase, client, override_settings
|
||||||
|
|
||||||
|
@ -906,47 +905,3 @@ class MultiParserTests(SimpleTestCase):
|
||||||
for file_name in CANDIDATE_INVALID_FILE_NAMES:
|
for file_name in CANDIDATE_INVALID_FILE_NAMES:
|
||||||
with self.subTest(file_name=file_name):
|
with self.subTest(file_name=file_name):
|
||||||
self.assertIsNone(parser.sanitize_file_name(file_name))
|
self.assertIsNone(parser.sanitize_file_name(file_name))
|
||||||
|
|
||||||
def test_rfc2231_parsing(self):
|
|
||||||
test_data = (
|
|
||||||
(
|
|
||||||
b"Content-Type: application/x-stuff; "
|
|
||||||
b"title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A",
|
|
||||||
"This is ***fun***",
|
|
||||||
),
|
|
||||||
(
|
|
||||||
b"Content-Type: application/x-stuff; title*=UTF-8''foo-%c3%a4.html",
|
|
||||||
"foo-ä.html",
|
|
||||||
),
|
|
||||||
(
|
|
||||||
b"Content-Type: application/x-stuff; title*=iso-8859-1''foo-%E4.html",
|
|
||||||
"foo-ä.html",
|
|
||||||
),
|
|
||||||
)
|
|
||||||
for raw_line, expected_title in test_data:
|
|
||||||
parsed = parse_header(raw_line)
|
|
||||||
self.assertEqual(parsed[1]["title"], expected_title)
|
|
||||||
|
|
||||||
def test_rfc2231_wrong_title(self):
|
|
||||||
"""
|
|
||||||
Test wrongly formatted RFC 2231 headers (missing double single quotes).
|
|
||||||
Parsing should not crash (#24209).
|
|
||||||
"""
|
|
||||||
test_data = (
|
|
||||||
(
|
|
||||||
b"Content-Type: application/x-stuff; "
|
|
||||||
b"title*='This%20is%20%2A%2A%2Afun%2A%2A%2A",
|
|
||||||
b"'This%20is%20%2A%2A%2Afun%2A%2A%2A",
|
|
||||||
),
|
|
||||||
(b"Content-Type: application/x-stuff; title*='foo.html", b"'foo.html"),
|
|
||||||
(b"Content-Type: application/x-stuff; title*=bar.html", b"bar.html"),
|
|
||||||
)
|
|
||||||
for raw_line, expected_title in test_data:
|
|
||||||
parsed = parse_header(raw_line)
|
|
||||||
self.assertEqual(parsed[1]["title"], expected_title)
|
|
||||||
|
|
||||||
def test_parse_header_with_double_quotes_and_semicolon(self):
|
|
||||||
self.assertEqual(
|
|
||||||
parse_header(b'form-data; name="files"; filename="fo\\"o;bar"'),
|
|
||||||
("form-data", {"name": b"files", "filename": b'fo"o;bar'}),
|
|
||||||
)
|
|
||||||
|
|
|
@ -472,3 +472,41 @@ class ParseHeaderParameterTests(unittest.TestCase):
|
||||||
for header, expected in tests:
|
for header, expected in tests:
|
||||||
with self.subTest(header=header):
|
with self.subTest(header=header):
|
||||||
self.assertEqual(parse_header_parameters(header), expected)
|
self.assertEqual(parse_header_parameters(header), expected)
|
||||||
|
|
||||||
|
def test_rfc2231_parsing(self):
|
||||||
|
test_data = (
|
||||||
|
(
|
||||||
|
"Content-Type: application/x-stuff; "
|
||||||
|
"title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A",
|
||||||
|
"This is ***fun***",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Content-Type: application/x-stuff; title*=UTF-8''foo-%c3%a4.html",
|
||||||
|
"foo-ä.html",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Content-Type: application/x-stuff; title*=iso-8859-1''foo-%E4.html",
|
||||||
|
"foo-ä.html",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
for raw_line, expected_title in test_data:
|
||||||
|
parsed = parse_header_parameters(raw_line)
|
||||||
|
self.assertEqual(parsed[1]["title"], expected_title)
|
||||||
|
|
||||||
|
def test_rfc2231_wrong_title(self):
|
||||||
|
"""
|
||||||
|
Test wrongly formatted RFC 2231 headers (missing double single quotes).
|
||||||
|
Parsing should not crash (#24209).
|
||||||
|
"""
|
||||||
|
test_data = (
|
||||||
|
(
|
||||||
|
"Content-Type: application/x-stuff; "
|
||||||
|
"title*='This%20is%20%2A%2A%2Afun%2A%2A%2A",
|
||||||
|
"'This%20is%20%2A%2A%2Afun%2A%2A%2A",
|
||||||
|
),
|
||||||
|
("Content-Type: application/x-stuff; title*='foo.html", "'foo.html"),
|
||||||
|
("Content-Type: application/x-stuff; title*=bar.html", "bar.html"),
|
||||||
|
)
|
||||||
|
for raw_line, expected_title in test_data:
|
||||||
|
parsed = parse_header_parameters(raw_line)
|
||||||
|
self.assertEqual(parsed[1]["title"], expected_title)
|
||||||
|
|
Loading…
Reference in New Issue