Fixed #5076 -- Properly decode POSTs with non-utf-8 payload encoding

Thanks daniel at blogg.se for the report and Aymeric Augustin for
his assistance on the patch.
This commit is contained in:
Claude Paroz 2012-10-29 20:33:00 +01:00
parent 9741912a9a
commit 6de6988f99
2 changed files with 39 additions and 0 deletions

View File

@ -1,5 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import codecs
import logging import logging
import sys import sys
from io import BytesIO from io import BytesIO
@ -144,6 +145,14 @@ class WSGIRequest(http.HttpRequest):
self.META['PATH_INFO'] = path_info self.META['PATH_INFO'] = path_info
self.META['SCRIPT_NAME'] = script_name self.META['SCRIPT_NAME'] = script_name
self.method = environ['REQUEST_METHOD'].upper() self.method = environ['REQUEST_METHOD'].upper()
_, content_params = self._parse_content_type(self.META.get('CONTENT_TYPE', ''))
if 'charset' in content_params:
try:
codecs.lookup(content_params['charset'])
except LookupError:
pass
else:
self.encoding = content_params['charset']
self._post_parse_error = False self._post_parse_error = False
try: try:
content_length = int(self.environ.get('CONTENT_LENGTH')) content_length = int(self.environ.get('CONTENT_LENGTH'))
@ -155,6 +164,21 @@ class WSGIRequest(http.HttpRequest):
def _is_secure(self): def _is_secure(self):
return 'wsgi.url_scheme' in self.environ and self.environ['wsgi.url_scheme'] == 'https' return 'wsgi.url_scheme' in self.environ and self.environ['wsgi.url_scheme'] == 'https'
def _parse_content_type(self, ctype):
"""
Media Types parsing according to RFC 2616, section 3.7.
Returns the data type and parameters. For example:
Input: "text/plain; charset=iso-8859-1"
Output: ('text/plain', {'charset': 'iso-8859-1'})
"""
content_type, _, params = ctype.partition(';')
content_params = {}
for parameter in params.split(';'):
k, _, v = parameter.strip().partition('=')
content_params[k] = v
return content_type, content_params
def _get_request(self): def _get_request(self):
if not hasattr(self, '_request'): if not hasattr(self, '_request'):
self._request = datastructures.MergeDict(self.POST, self.GET) self._request = datastructures.MergeDict(self.POST, self.GET)

View File

@ -1,3 +1,4 @@
# -*- encoding: utf-8 -*-
from __future__ import unicode_literals from __future__ import unicode_literals
import time import time
@ -352,6 +353,20 @@ class RequestsTests(unittest.TestCase):
self.assertRaises(Exception, lambda: request.body) self.assertRaises(Exception, lambda: request.body)
self.assertEqual(request.POST, {}) self.assertEqual(request.POST, {})
def test_alternate_charset_POST(self):
"""
Test a POST with non-utf-8 payload encoding.
"""
from django.utils.http import urllib_parse
payload = FakePayload(urllib_parse.urlencode({'key': 'España'.encode('latin-1')}))
request = WSGIRequest({
'REQUEST_METHOD': 'POST',
'CONTENT_LENGTH': len(payload),
'CONTENT_TYPE': 'application/x-www-form-urlencoded; charset=iso-8859-1',
'wsgi.input': payload,
})
self.assertEqual(request.POST, {'key': ['España']})
def test_body_after_POST_multipart(self): def test_body_after_POST_multipart(self):
""" """
Reading body after parsing multipart is not allowed Reading body after parsing multipart is not allowed