From 364583b8947dc7073445e3a8449bf021a80ca593 Mon Sep 17 00:00:00 2001 From: Luke Plant Date: Fri, 10 Sep 2010 22:56:56 +0000 Subject: [PATCH] Fixed #14235 - UnicodeDecodeError in CSRF middleware Thanks to jbg for the report. This changeset essentially backs out [13698] in favour of a method that sanitizes the token rather than escaping it. git-svn-id: http://code.djangoproject.com/svn/django/trunk@13732 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/middleware/csrf.py | 23 ++++++++++++++++----- django/template/defaulttags.py | 3 +-- tests/regressiontests/csrf_tests/tests.py | 25 ++++++++++++++++------- 3 files changed, 37 insertions(+), 14 deletions(-) diff --git a/django/middleware/csrf.py b/django/middleware/csrf.py index ca2ec8aa38..5d3a871adb 100644 --- a/django/middleware/csrf.py +++ b/django/middleware/csrf.py @@ -13,7 +13,6 @@ from django.conf import settings from django.core.urlresolvers import get_callable from django.utils.cache import patch_vary_headers from django.utils.hashcompat import md5_constructor -from django.utils.html import escape from django.utils.safestring import mark_safe _POST_FORM_RE = \ @@ -53,8 +52,8 @@ def _make_legacy_session_token(session_id): def get_token(request): """ - Returns the the CSRF token required for a POST form. No assumptions should - be made about what characters might be in the CSRF token. + Returns the the CSRF token required for a POST form. The token is an + alphanumeric value. A side effect of calling this function is to make the the csrf_protect decorator and the CsrfViewMiddleware add a CSRF cookie and a 'Vary: Cookie' @@ -65,6 +64,17 @@ def get_token(request): return request.META.get("CSRF_COOKIE", None) +def _sanitize_token(token): + # Allow only alphanum, and ensure we return a 'str' for the sake of the post + # processing middleware. + token = re.sub('[^a-zA-Z0-9]', '', str(token.decode('ascii', 'ignore'))) + if token == "": + # In case the cookie has been truncated to nothing at some point. + return _get_new_csrf_key() + else: + return token + + class CsrfViewMiddleware(object): """ Middleware that requires a present and correct csrfmiddlewaretoken @@ -90,7 +100,10 @@ class CsrfViewMiddleware(object): # request, so it's available to the view. We'll store it in a cookie when # we reach the response. try: - request.META["CSRF_COOKIE"] = request.COOKIES[settings.CSRF_COOKIE_NAME] + # In case of cookies from untrusted sources, we strip anything + # dangerous at this point, so that the cookie + token will have the + # same, sanitized value. + request.META["CSRF_COOKIE"] = _sanitize_token(request.COOKIES[settings.CSRF_COOKIE_NAME]) cookie_is_new = False except KeyError: # No cookie, so create one. This will be sent with the next @@ -249,7 +262,7 @@ class CsrfResponseMiddleware(object): """Returns the matched
tag plus the added element""" return mark_safe(match.group() + "
" + \ "
") # Modify any POST forms diff --git a/django/template/defaulttags.py b/django/template/defaulttags.py index 0914b1c3b1..1b07413530 100644 --- a/django/template/defaulttags.py +++ b/django/template/defaulttags.py @@ -9,7 +9,6 @@ from django.template import TemplateSyntaxError, VariableDoesNotExist, BLOCK_TAG from django.template import get_library, Library, InvalidTemplateLibrary from django.template.smartif import IfParser, Literal from django.conf import settings -from django.utils.html import escape from django.utils.encoding import smart_str, smart_unicode from django.utils.safestring import mark_safe @@ -43,7 +42,7 @@ class CsrfTokenNode(Node): if csrf_token == 'NOTPROVIDED': return mark_safe(u"") else: - return mark_safe(u"
" % escape(csrf_token)) + return mark_safe(u"
" % csrf_token) else: # It's very probable that the token is missing because of # misconfiguration, so we raise a warning diff --git a/tests/regressiontests/csrf_tests/tests.py b/tests/regressiontests/csrf_tests/tests.py index fadecf565e..8fbc7669a5 100644 --- a/tests/regressiontests/csrf_tests/tests.py +++ b/tests/regressiontests/csrf_tests/tests.py @@ -6,15 +6,14 @@ from django.middleware.csrf import CsrfMiddleware, CsrfViewMiddleware from django.views.decorators.csrf import csrf_exempt, csrf_view_exempt from django.core.context_processors import csrf from django.contrib.sessions.middleware import SessionMiddleware -from django.utils.html import escape from django.utils.importlib import import_module from django.conf import settings from django.template import RequestContext, Template # Response/views used for CsrfResponseMiddleware and CsrfViewMiddleware tests def post_form_response(): - resp = HttpResponse(content=""" -
+ resp = HttpResponse(content=u""" +

\u00a1Unicode!
""", mimetype="text/html") return resp @@ -58,8 +57,9 @@ class TestingHttpRequest(HttpRequest): class CsrfMiddlewareTest(TestCase): # The csrf token is potentially from an untrusted source, so could have - # characters that need escaping - _csrf_id = "<1>" + # characters that need dealing with. + _csrf_id_cookie = "<1>\xc2\xa1" + _csrf_id = "1" # This is a valid session token for this ID and secret key. This was generated using # the old code that we're to be backwards-compatible with. Don't use the CSRF code @@ -74,7 +74,7 @@ class CsrfMiddlewareTest(TestCase): def _get_GET_csrf_cookie_request(self): req = TestingHttpRequest() - req.COOKIES[settings.CSRF_COOKIE_NAME] = self._csrf_id + req.COOKIES[settings.CSRF_COOKIE_NAME] = self._csrf_id_cookie return req def _get_POST_csrf_cookie_request(self): @@ -104,7 +104,7 @@ class CsrfMiddlewareTest(TestCase): return req def _check_token_present(self, response, csrf_id=None): - self.assertContains(response, "name='csrfmiddlewaretoken' value='%s'" % escape(csrf_id or self._csrf_id)) + self.assertContains(response, "name='csrfmiddlewaretoken' value='%s'" % (csrf_id or self._csrf_id)) # Check the post processing and outgoing cookie def test_process_response_no_csrf_cookie(self): @@ -290,6 +290,17 @@ class CsrfMiddlewareTest(TestCase): resp = token_view(req) self.assertEquals(u"", resp.content) + def test_token_node_empty_csrf_cookie(self): + """ + Check that we get a new token if the csrf_cookie is the empty string + """ + req = self._get_GET_no_csrf_cookie_request() + req.COOKIES[settings.CSRF_COOKIE_NAME] = "" + CsrfViewMiddleware().process_view(req, token_view, (), {}) + resp = token_view(req) + + self.assertNotEqual(u"", resp.content) + def test_token_node_with_csrf_cookie(self): """ Check that CsrfTokenNode works when a CSRF cookie is set