[1.5.x] Fixed #5076 -- Properly decode POSTs with non-utf-8 payload encoding

Thanks daniel at blogg.se for the report and Aymeric Augustin for his assistance on the patch. Backport of 6de6988f9 from master.
2012-10-29 20:33:00 +01:00 · 2012-10-29 20:33:00 +01:00 · 3f3076edbf
parent e8269a6729
commit 3f3076edbf
2 changed files with 39 additions and 0 deletions
--- a/django/core/handlers/wsgi.py
+++ b/django/core/handlers/wsgi.py
@ -1,5 +1,6 @@
 from __future__ import unicode_literals
 import codecs
 import logging
 import sys
 from io import BytesIO
@ -144,6 +145,14 @@ class WSGIRequest(http.HttpRequest):
        self.META['PATH_INFO'] = path_info
        self.META['SCRIPT_NAME'] = script_name
        self.method = environ['REQUEST_METHOD'].upper()
        _, content_params = self._parse_content_type(self.META.get('CONTENT_TYPE', ''))
        if 'charset' in content_params:
            try:
                codecs.lookup(content_params['charset'])
            except LookupError:
                pass
            else:
                self.encoding = content_params['charset']
        self._post_parse_error = False
        try:
            content_length = int(self.environ.get('CONTENT_LENGTH'))
@ -155,6 +164,21 @@ class WSGIRequest(http.HttpRequest):
    def _is_secure(self):
        return 'wsgi.url_scheme' in self.environ and self.environ['wsgi.url_scheme'] == 'https'
    def _parse_content_type(self, ctype):
        """
        Media Types parsing according to RFC 2616, section 3.7.
        Returns the data type and parameters. For example:
        Input: "text/plain; charset=iso-8859-1"
        Output: ('text/plain', {'charset': 'iso-8859-1'})
        """
        content_type, _, params = ctype.partition(';')
        content_params = {}
        for parameter in params.split(';'):
            k, _, v = parameter.strip().partition('=')
            content_params[k] = v
        return content_type, content_params
    def _get_request(self):
        if not hasattr(self, '_request'):
            self._request = datastructures.MergeDict(self.POST, self.GET)
--- a/tests/regressiontests/requests/tests.py
+++ b/tests/regressiontests/requests/tests.py
@ -1,3 +1,4 @@
 # -*- encoding: utf-8 -*-
 from __future__ import unicode_literals
 import time
@ -352,6 +353,20 @@ class RequestsTests(unittest.TestCase):
        self.assertRaises(Exception, lambda: request.body)
        self.assertEqual(request.POST, {})
    def test_alternate_charset_POST(self):
        """
        Test a POST with non-utf-8 payload encoding.
        """
        from django.utils.http import urllib_parse
        payload = FakePayload(urllib_parse.urlencode({'key': 'España'.encode('latin-1')}))
        request = WSGIRequest({
            'REQUEST_METHOD': 'POST',
            'CONTENT_LENGTH': len(payload),
            'CONTENT_TYPE': 'application/x-www-form-urlencoded; charset=iso-8859-1',
            'wsgi.input': payload,
        })
        self.assertEqual(request.POST, {'key': ['España']})
    def test_body_after_POST_multipart(self):
        """
        Reading body after parsing multipart is not allowed