From b42e5ca058178d67027bf66d37d00ade635b4c26 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Sat, 12 Jul 2014 14:08:50 +0200 Subject: [PATCH] Fixed #22971 -- Properly parsed RFC 2388 encoded headers Thanks homm for the report, Cea Stapleton for patch improvements and Ian Cordasco, Christian Schmitt and Tim Graham for the review. --- django/http/multipartparser.py | 13 +++++++ tests/file_uploads/tests.py | 66 +++++++++++++++++++++++++++++++++- 2 files changed, 78 insertions(+), 1 deletion(-) diff --git a/django/http/multipartparser.py b/django/http/multipartparser.py index 1bcace94cd..fd9f3a5b32 100644 --- a/django/http/multipartparser.py +++ b/django/http/multipartparser.py @@ -16,6 +16,7 @@ from django.core.exceptions import SuspiciousMultipartForm from django.utils.datastructures import MultiValueDict from django.utils.encoding import force_text from django.utils import six +from django.utils.six.moves.urllib.parse import unquote from django.utils.text import unescape_entities from django.core.files.uploadhandler import StopUpload, SkipFile, StopFutureHandlers @@ -631,8 +632,20 @@ def parse_header(line): for p in plist: i = p.find(b'=') if i >= 0: + has_encoding = False name = p[:i].strip().lower().decode('ascii') + if name.endswith('*'): + # Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext") + # http://tools.ietf.org/html/rfc2231#section-4 + name = name[:-1] + has_encoding = True value = p[i + 1:].strip() + if has_encoding: + encoding, lang, value = value.split(b"'") + if six.PY3: + value = unquote(value.decode(), encoding=encoding.decode()) + else: + value = unquote(value).decode(encoding) if len(value) >= 2 and value[:1] == value[-1:] == b'"': value = value[1:-1] value = value.replace(b'\\\\', b'\\').replace(b'\\"', b'"') diff --git a/tests/file_uploads/tests.py b/tests/file_uploads/tests.py index 9ba7d097f4..39f4caf6cd 100644 --- a/tests/file_uploads/tests.py +++ b/tests/file_uploads/tests.py @@ -12,10 +12,11 @@ import unittest from django.core.files import temp as tempfile from django.core.files.uploadedfile import SimpleUploadedFile -from django.http.multipartparser import MultiPartParser +from django.http.multipartparser import MultiPartParser, parse_header from django.test import TestCase, client from django.test import override_settings from django.utils.encoding import force_bytes +from django.utils.http import urlquote from django.utils.six import StringIO from . import uploadhandler @@ -120,6 +121,56 @@ class FileUploadTests(TestCase): self.assertEqual(response.status_code, 200) + def test_unicode_file_name_rfc2231(self): + """ + Test receiving file upload when filename is encoded with RFC2231 + (#22971). + """ + payload = client.FakePayload() + payload.write('\r\n'.join([ + '--' + client.BOUNDARY, + 'Content-Disposition: form-data; name="file_unicode"; filename*=UTF-8\'\'%s' % urlquote(UNICODE_FILENAME), + 'Content-Type: application/octet-stream', + '', + 'You got pwnd.\r\n', + '\r\n--' + client.BOUNDARY + '--\r\n' + ])) + + r = { + 'CONTENT_LENGTH': len(payload), + 'CONTENT_TYPE': client.MULTIPART_CONTENT, + 'PATH_INFO': "/unicode_name/", + 'REQUEST_METHOD': 'POST', + 'wsgi.input': payload, + } + response = self.client.request(**r) + self.assertEqual(response.status_code, 200) + + def test_unicode_name_rfc2231(self): + """ + Test receiving file upload when filename is encoded with RFC2231 + (#22971). + """ + payload = client.FakePayload() + payload.write('\r\n'.join([ + '--' + client.BOUNDARY, + 'Content-Disposition: form-data; name*=UTF-8\'\'file_unicode; filename*=UTF-8\'\'%s' % urlquote(UNICODE_FILENAME), + 'Content-Type: application/octet-stream', + '', + 'You got pwnd.\r\n', + '\r\n--' + client.BOUNDARY + '--\r\n' + ])) + + r = { + 'CONTENT_LENGTH': len(payload), + 'CONTENT_TYPE': client.MULTIPART_CONTENT, + 'PATH_INFO': "/unicode_name/", + 'REQUEST_METHOD': 'POST', + 'wsgi.input': payload, + } + response = self.client.request(**r) + self.assertEqual(response.status_code, 200) + def test_dangerous_file_names(self): """Uploaded file names should be sanitized before ever reaching the view.""" # This test simulates possible directory traversal attacks by a @@ -483,3 +534,16 @@ class MultiParserTests(unittest.TestCase): 'CONTENT_TYPE': 'multipart/form-data; boundary=_foo', 'CONTENT_LENGTH': '1' }, StringIO('x'), [], 'utf-8') + + def test_rfc2231_parsing(self): + test_data = ( + (b"Content-Type: application/x-stuff; title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A", + "This is ***fun***"), + (b"Content-Type: application/x-stuff; title*=UTF-8''foo-%c3%a4.html", + "foo-ä.html"), + (b"Content-Type: application/x-stuff; title*=iso-8859-1''foo-%E4.html", + "foo-ä.html"), + ) + for raw_line, expected_title in test_data: + parsed = parse_header(raw_line) + self.assertEqual(parsed[1]['title'], expected_title)