From 3f3076edbf8d6fb204984a1a7fddbde408d5b104 Mon Sep 17 00:00:00 2001 From: Claude Paroz Date: Mon, 29 Oct 2012 20:33:00 +0100 Subject: [PATCH] [1.5.x] Fixed #5076 -- Properly decode POSTs with non-utf-8 payload encoding Thanks daniel at blogg.se for the report and Aymeric Augustin for his assistance on the patch. Backport of 6de6988f9 from master. --- django/core/handlers/wsgi.py | 24 ++++++++++++++++++++++++ tests/regressiontests/requests/tests.py | 15 +++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/django/core/handlers/wsgi.py b/django/core/handlers/wsgi.py index 45cb2268ed..4c0710549a 100644 --- a/django/core/handlers/wsgi.py +++ b/django/core/handlers/wsgi.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +import codecs import logging import sys from io import BytesIO @@ -144,6 +145,14 @@ class WSGIRequest(http.HttpRequest): self.META['PATH_INFO'] = path_info self.META['SCRIPT_NAME'] = script_name self.method = environ['REQUEST_METHOD'].upper() + _, content_params = self._parse_content_type(self.META.get('CONTENT_TYPE', '')) + if 'charset' in content_params: + try: + codecs.lookup(content_params['charset']) + except LookupError: + pass + else: + self.encoding = content_params['charset'] self._post_parse_error = False try: content_length = int(self.environ.get('CONTENT_LENGTH')) @@ -155,6 +164,21 @@ class WSGIRequest(http.HttpRequest): def _is_secure(self): return 'wsgi.url_scheme' in self.environ and self.environ['wsgi.url_scheme'] == 'https' + def _parse_content_type(self, ctype): + """ + Media Types parsing according to RFC 2616, section 3.7. + + Returns the data type and parameters. For example: + Input: "text/plain; charset=iso-8859-1" + Output: ('text/plain', {'charset': 'iso-8859-1'}) + """ + content_type, _, params = ctype.partition(';') + content_params = {} + for parameter in params.split(';'): + k, _, v = parameter.strip().partition('=') + content_params[k] = v + return content_type, content_params + def _get_request(self): if not hasattr(self, '_request'): self._request = datastructures.MergeDict(self.POST, self.GET) diff --git a/tests/regressiontests/requests/tests.py b/tests/regressiontests/requests/tests.py index 6522620d5f..eaf25ea7a6 100644 --- a/tests/regressiontests/requests/tests.py +++ b/tests/regressiontests/requests/tests.py @@ -1,3 +1,4 @@ +# -*- encoding: utf-8 -*- from __future__ import unicode_literals import time @@ -352,6 +353,20 @@ class RequestsTests(unittest.TestCase): self.assertRaises(Exception, lambda: request.body) self.assertEqual(request.POST, {}) + def test_alternate_charset_POST(self): + """ + Test a POST with non-utf-8 payload encoding. + """ + from django.utils.http import urllib_parse + payload = FakePayload(urllib_parse.urlencode({'key': 'España'.encode('latin-1')})) + request = WSGIRequest({ + 'REQUEST_METHOD': 'POST', + 'CONTENT_LENGTH': len(payload), + 'CONTENT_TYPE': 'application/x-www-form-urlencoded; charset=iso-8859-1', + 'wsgi.input': payload, + }) + self.assertEqual(request.POST, {'key': ['España']}) + def test_body_after_POST_multipart(self): """ Reading body after parsing multipart is not allowed