diff --git a/django/middleware/csrf.py b/django/middleware/csrf.py index ba9f63ec3d..276b31e10f 100644 --- a/django/middleware/csrf.py +++ b/django/middleware/csrf.py @@ -8,6 +8,7 @@ from __future__ import unicode_literals import logging import re +import string from django.conf import settings from django.urls import get_callable @@ -16,8 +17,10 @@ from django.utils.crypto import constant_time_compare, get_random_string from django.utils.deprecation import MiddlewareMixin from django.utils.encoding import force_text from django.utils.http import is_same_domain +from django.utils.six.moves import zip from django.utils.six.moves.urllib.parse import urlparse + logger = logging.getLogger('django.request') REASON_NO_REFERER = "Referer checking failed - no Referer." @@ -27,7 +30,9 @@ REASON_BAD_TOKEN = "CSRF token missing or incorrect." REASON_MALFORMED_REFERER = "Referer checking failed - Referer is malformed." REASON_INSECURE_REFERER = "Referer checking failed - Referer is insecure while host is secure." -CSRF_KEY_LENGTH = 32 +CSRF_SECRET_LENGTH = 32 +CSRF_TOKEN_LENGTH = 2 * CSRF_SECRET_LENGTH +CSRF_ALLOWED_CHARS = string.ascii_letters + string.digits def _get_failure_view(): @@ -37,8 +42,38 @@ def _get_failure_view(): return get_callable(settings.CSRF_FAILURE_VIEW) -def _get_new_csrf_key(): - return get_random_string(CSRF_KEY_LENGTH) +def _get_new_csrf_string(): + return get_random_string(CSRF_SECRET_LENGTH, allowed_chars=CSRF_ALLOWED_CHARS) + + +def _salt_cipher_secret(secret): + """ + Given a secret (assumed to be a string of CSRF_ALLOWED_CHARS), generate a + token by adding a salt and using it to encrypt the secret. + """ + salt = _get_new_csrf_string() + chars = CSRF_ALLOWED_CHARS + pairs = zip((chars.index(x) for x in secret), (chars.index(x) for x in salt)) + cipher = ''.join(chars[(x + y) % len(chars)] for x, y in pairs) + return salt + cipher + + +def _unsalt_cipher_token(token): + """ + Given a token (assumed to be a string of CSRF_ALLOWED_CHARS, of length + CSRF_TOKEN_LENGTH, and that its first half is a salt), use it to decrypt + the second half to produce the original secret. + """ + salt = token[:CSRF_SECRET_LENGTH] + token = token[CSRF_SECRET_LENGTH:] + chars = CSRF_ALLOWED_CHARS + pairs = zip((chars.index(x) for x in token), (chars.index(x) for x in salt)) + secret = ''.join(chars[x - y] for x, y in pairs) # Note negative values are ok + return secret + + +def _get_new_csrf_token(): + return _salt_cipher_secret(_get_new_csrf_string()) def get_token(request): @@ -52,9 +87,12 @@ def get_token(request): function lazily, as is done by the csrf context processor. """ if "CSRF_COOKIE" not in request.META: - request.META["CSRF_COOKIE"] = _get_new_csrf_key() + csrf_secret = _get_new_csrf_string() + request.META["CSRF_COOKIE"] = _salt_cipher_secret(csrf_secret) + else: + csrf_secret = _unsalt_cipher_token(request.META["CSRF_COOKIE"]) request.META["CSRF_COOKIE_USED"] = True - return request.META["CSRF_COOKIE"] + return _salt_cipher_secret(csrf_secret) def rotate_token(request): @@ -64,19 +102,35 @@ def rotate_token(request): """ request.META.update({ "CSRF_COOKIE_USED": True, - "CSRF_COOKIE": _get_new_csrf_key(), + "CSRF_COOKIE": _get_new_csrf_token(), }) + request.csrf_cookie_needs_reset = True def _sanitize_token(token): - # Allow only alphanum - if len(token) > CSRF_KEY_LENGTH: - return _get_new_csrf_key() - token = re.sub('[^a-zA-Z0-9]+', '', force_text(token)) - if token == "": - # In case the cookie has been truncated to nothing at some point. - return _get_new_csrf_key() - return token + # Allow only ASCII alphanumerics + if re.search('[^a-zA-Z0-9]', force_text(token)): + return _get_new_csrf_token() + elif len(token) == CSRF_TOKEN_LENGTH: + return token + elif len(token) == CSRF_SECRET_LENGTH: + # Older Django versions set cookies to values of CSRF_SECRET_LENGTH + # alphanumeric characters. For backwards compatibility, accept + # such values as unsalted secrets. + # It's easier to salt here and be consistent later, rather than add + # different code paths in the checks, although that might be a tad more + # efficient. + return _salt_cipher_secret(token) + return _get_new_csrf_token() + + +def _compare_salted_tokens(request_csrf_token, csrf_token): + # Assume both arguments are sanitized -- that is, strings of + # length CSRF_TOKEN_LENGTH, all CSRF_ALLOWED_CHARS. + return constant_time_compare( + _unsalt_cipher_token(request_csrf_token), + _unsalt_cipher_token(csrf_token), + ) class CsrfViewMiddleware(MiddlewareMixin): @@ -112,12 +166,17 @@ class CsrfViewMiddleware(MiddlewareMixin): return None try: - csrf_token = _sanitize_token( - request.COOKIES[settings.CSRF_COOKIE_NAME]) - # Use same token next time - request.META['CSRF_COOKIE'] = csrf_token + cookie_token = request.COOKIES[settings.CSRF_COOKIE_NAME] except KeyError: csrf_token = None + else: + csrf_token = _sanitize_token(cookie_token) + if csrf_token != cookie_token: + # Cookie token needed to be replaced; + # the cookie needs to be reset. + request.csrf_cookie_needs_reset = True + # Use same token next time. + request.META['CSRF_COOKIE'] = csrf_token # Wait until request.META["CSRF_COOKIE"] has been manipulated before # bailing out, so that get_token still works @@ -142,7 +201,7 @@ class CsrfViewMiddleware(MiddlewareMixin): # # The attacker will need to provide a CSRF cookie and token, but # that's no problem for a MITM and the session-independent - # nonce we're using. So the MITM can circumvent the CSRF + # secret we're using. So the MITM can circumvent the CSRF # protection. This is true for any HTTP connection, but anyone # using HTTPS expects better! For this reason, for # https://example.com/ we need additional protection that treats @@ -213,14 +272,16 @@ class CsrfViewMiddleware(MiddlewareMixin): # and possible for PUT/DELETE. request_csrf_token = request.META.get(settings.CSRF_HEADER_NAME, '') - if not constant_time_compare(request_csrf_token, csrf_token): + request_csrf_token = _sanitize_token(request_csrf_token) + if not _compare_salted_tokens(request_csrf_token, csrf_token): return self._reject(request, REASON_BAD_TOKEN) return self._accept(request) def process_response(self, request, response): - if getattr(response, 'csrf_processing_done', False): - return response + if not getattr(request, 'csrf_cookie_needs_reset', False): + if getattr(response, 'csrf_cookie_set', False): + return response if not request.META.get("CSRF_COOKIE_USED", False): return response @@ -237,5 +298,5 @@ class CsrfViewMiddleware(MiddlewareMixin): ) # Content varies with the CSRF cookie, so set the Vary header. patch_vary_headers(response, ('Cookie',)) - response.csrf_processing_done = True + response.csrf_cookie_set = True return response diff --git a/docs/ref/csrf.txt b/docs/ref/csrf.txt index 277fd85720..fcb3bb4e33 100644 --- a/docs/ref/csrf.txt +++ b/docs/ref/csrf.txt @@ -218,20 +218,25 @@ How it works The CSRF protection is based on the following things: -1. A CSRF cookie that is set to a random value (a session independent nonce, as - it is called), which other sites will not have access to. +1. A CSRF cookie that is based on a random secret value, which other sites + will not have access to. - This cookie is set by ``CsrfViewMiddleware``. It is meant to be permanent, - but since there is no way to set a cookie that never expires, it is sent with - every response that has called ``django.middleware.csrf.get_token()`` - (the function used internally to retrieve the CSRF token). + This cookie is set by ``CsrfViewMiddleware``. It is sent with every + response that has called ``django.middleware.csrf.get_token()`` (the + function used internally to retrieve the CSRF token), if it wasn't already + set on the request. - For security reasons, the value of the CSRF cookie is changed each time a + In order to protect against `BREACH`_ attacks, the token is not simply the + secret; a random salt is prepended to the secret and used to scramble it. + + For security reasons, the value of the secret is changed each time a user logs in. 2. A hidden form field with the name 'csrfmiddlewaretoken' present in all - outgoing POST forms. The value of this field is the value of the CSRF - cookie. + outgoing POST forms. The value of this field is, again, the value of the + secret, with a salt which is both added to it and used to scramble it. The + salt is regenerated on every call to ``get_token()`` so that the form field + value is changed in every such response. This part is done by the template tag. @@ -239,6 +244,11 @@ The CSRF protection is based on the following things: TRACE, a CSRF cookie must be present, and the 'csrfmiddlewaretoken' field must be present and correct. If it isn't, the user will get a 403 error. + When validating the 'csrfmiddlewaretoken' field value, only the secret, + not the full token, is compared with the secret in the cookie value. + This allows the use of ever-changing tokens. While each request may use its + own token, the secret remains common to all. + This check is done by ``CsrfViewMiddleware``. 4. In addition, for HTTPS requests, strict referer checking is done by @@ -247,7 +257,7 @@ The CSRF protection is based on the following things: application since that request won't come from your own exact domain. This also addresses a man-in-the-middle attack that's possible under HTTPS - when using a session independent nonce, due to the fact that HTTP + when using a session independent secret, due to the fact that HTTP ``Set-Cookie`` headers are (unfortunately) accepted by clients even when they are talking to a site under HTTPS. (Referer checking is not done for HTTP requests because the presence of the ``Referer`` header isn't reliable @@ -283,6 +293,13 @@ vulnerability allows and much worse). Checking against the :setting:`CSRF_COOKIE_DOMAIN` setting was added. +.. versionchanged:: 1.10 + + Added salting to the token and started changing it with each request + to protect against `BREACH`_ attacks. + +.. _BREACH: http://breachattack.com/ + Caching ======= @@ -499,15 +516,6 @@ No, this is by design. Not linking CSRF protection to a session allows using the protection on sites such as a `pastebin` that allow submissions from anonymous users which don't have a session. -Why not use a new token for each request? ------------------------------------------ - -Generating a new token for each request is problematic from a UI perspective -because it invalidates all previous forms. Most users would be very unhappy to -find that opening a new tab on your site has invalidated the form they had -just spent time filling out in another tab or that a form they accessed via -the back button could not be filled out. - Why might a user encounter a CSRF validation failure after logging in? ---------------------------------------------------------------------- diff --git a/docs/ref/middleware.txt b/docs/ref/middleware.txt index 1e582d5724..961e4a272f 100644 --- a/docs/ref/middleware.txt +++ b/docs/ref/middleware.txt @@ -118,13 +118,12 @@ GZip middleware .. warning:: Security researchers recently revealed that when compression techniques - (including ``GZipMiddleware``) are used on a website, the site becomes - exposed to a number of possible attacks. These approaches can be used to - compromise, among other things, Django's CSRF protection. Before using - ``GZipMiddleware`` on your site, you should consider very carefully whether - you are subject to these attacks. If you're in *any* doubt about whether - you're affected, you should avoid using ``GZipMiddleware``. For more - details, see the `the BREACH paper (PDF)`_ and `breachattack.com`_. + (including ``GZipMiddleware``) are used on a website, the site may become + exposed to a number of possible attacks. Before using ``GZipMiddleware`` on + your site, you should consider very carefully whether you are subject to + these attacks. If you're in *any* doubt about whether you're affected, you + should avoid using ``GZipMiddleware``. For more details, see the `the BREACH + paper (PDF)`_ and `breachattack.com`_. .. _the BREACH paper (PDF): http://breachattack.com/resources/BREACH%20-%20SSL,%20gone%20in%2030%20seconds.pdf .. _breachattack.com: http://breachattack.com @@ -147,6 +146,12 @@ It will NOT compress content if any of the following are true: You can apply GZip compression to individual views using the :func:`~django.views.decorators.gzip.gzip_page()` decorator. +.. versionchanged:: 1.10 + + In older versions, Django's CSRF protection mechanism was vulnerable to + BREACH attacks when compression was used. This is no longer the case, but + you should still take care not to compromise your own secrets this way. + Conditional GET middleware -------------------------- diff --git a/docs/releases/1.10.txt b/docs/releases/1.10.txt index 14b23def72..405d7d5715 100644 --- a/docs/releases/1.10.txt +++ b/docs/releases/1.10.txt @@ -256,6 +256,12 @@ CSRF accepts an optional ``template_name`` parameter, defaulting to ``'403_csrf.html'``, to control the template used to render the page. +* To protect against `BREACH`_ attacks, the CSRF protection mechanism now + changes the form token value on every request (while keeping an invariant + secret which can be used to validate the different tokens). + +.. _BREACH: http://breachattack.com/ + Database backends ~~~~~~~~~~~~~~~~~ @@ -795,6 +801,12 @@ Miscellaneous * ``utils.version.get_version()`` returns :pep:`440` compliant release candidate versions (e.g. '1.10rc1' instead of '1.10c1'). +* CSRF token values are now required to be strings of 64 alphanumerics; values + of 32 alphanumerics, as set by older versions of Django by default, are + automatically replaced by strings of 64 characters. Other values are + considered invalid. This should only affect developers or users who replace + these tokens. + * The ``LOGOUT_URL`` setting is removed as Django hasn't made use of it since pre-1.0. If you use it in your project, you can add it to your project's settings. The default value was ``'/accounts/logout/'``. diff --git a/docs/topics/security.txt b/docs/topics/security.txt index eb1172e7e8..ff33e8be6d 100644 --- a/docs/topics/security.txt +++ b/docs/topics/security.txt @@ -65,10 +65,10 @@ this if you know what you are doing. There are other :ref:`limitations ` if your site has subdomains that are outside of your control. -:ref:`CSRF protection works ` by checking for a nonce in each +:ref:`CSRF protection works ` by checking for a secret in each POST request. This ensures that a malicious user cannot simply "replay" a form POST to your website and have another logged in user unwittingly submit that -form. The malicious user would have to know the nonce, which is user specific +form. The malicious user would have to know the secret, which is user specific (using a cookie). When deployed with :ref:`HTTPS `, diff --git a/tests/csrf_tests/test_context_processor.py b/tests/csrf_tests/test_context_processor.py index 270b3e4771..5db0116db0 100644 --- a/tests/csrf_tests/test_context_processor.py +++ b/tests/csrf_tests/test_context_processor.py @@ -1,6 +1,5 @@ -import json - from django.http import HttpRequest +from django.middleware.csrf import _compare_salted_tokens as equivalent_tokens from django.template.context_processors import csrf from django.test import SimpleTestCase from django.utils.encoding import force_text @@ -10,6 +9,7 @@ class TestContextProcessor(SimpleTestCase): def test_force_text_on_token(self): request = HttpRequest() - request.META['CSRF_COOKIE'] = 'test-token' + test_token = '1bcdefghij2bcdefghij3bcdefghij4bcdefghij5bcdefghij6bcdefghijABCD' + request.META['CSRF_COOKIE'] = test_token token = csrf(request).get('csrf_token') - self.assertEqual(json.dumps(force_text(token)), '"test-token"') + self.assertTrue(equivalent_tokens(force_text(token), test_token)) diff --git a/tests/csrf_tests/tests.py b/tests/csrf_tests/tests.py index 54570dca42..5ed7c9dc3d 100644 --- a/tests/csrf_tests/tests.py +++ b/tests/csrf_tests/tests.py @@ -2,15 +2,20 @@ from __future__ import unicode_literals import logging +import re +import warnings from django.conf import settings from django.http import HttpRequest, HttpResponse from django.middleware.csrf import ( - CSRF_KEY_LENGTH, CsrfViewMiddleware, get_token, + CSRF_TOKEN_LENGTH, CsrfViewMiddleware, + _compare_salted_tokens as equivalent_tokens, get_token, ) from django.template import RequestContext, Template from django.template.context_processors import csrf from django.test import SimpleTestCase, override_settings +from django.utils.encoding import force_bytes +from django.utils.six import text_type from django.views.decorators.csrf import ( csrf_exempt, ensure_csrf_cookie, requires_csrf_token, ) @@ -57,10 +62,7 @@ class TestingHttpRequest(HttpRequest): class CsrfViewMiddlewareTest(SimpleTestCase): - # The csrf token is potentially from an untrusted source, so could have - # characters that need dealing with. - _csrf_id_cookie = b"<1>\xc2\xa1" - _csrf_id = "1" + _csrf_id = _csrf_id_cookie = '1bcdefghij2bcdefghij3bcdefghij4bcdefghij5bcdefghij6bcdefghijABCD' def _get_GET_no_csrf_cookie_request(self): return TestingHttpRequest() @@ -85,8 +87,24 @@ class CsrfViewMiddlewareTest(SimpleTestCase): req.POST['csrfmiddlewaretoken'] = self._csrf_id return req + def _get_POST_bare_secret_csrf_cookie_request(self): + req = self._get_POST_no_csrf_cookie_request() + req.COOKIES[settings.CSRF_COOKIE_NAME] = self._csrf_id_cookie[:32] + return req + + def _get_POST_bare_secret_csrf_cookie_request_with_token(self): + req = self._get_POST_bare_secret_csrf_cookie_request() + req.POST['csrfmiddlewaretoken'] = self._csrf_id_cookie[:32] + return req + def _check_token_present(self, response, csrf_id=None): - self.assertContains(response, "name='csrfmiddlewaretoken' value='%s'" % (csrf_id or self._csrf_id)) + text = text_type(response.content, response.charset) + match = re.search("name='csrfmiddlewaretoken' value='(.*?)'", text) + csrf_token = csrf_id or self._csrf_id + self.assertTrue( + match and equivalent_tokens(csrf_token, match.group(1)), + "Could not find csrfmiddlewaretoken to match %s" % csrf_token + ) def test_process_view_token_too_long(self): """ @@ -94,12 +112,45 @@ class CsrfViewMiddlewareTest(SimpleTestCase): created. """ req = self._get_GET_no_csrf_cookie_request() - req.COOKIES[settings.CSRF_COOKIE_NAME] = 'x' * 10000000 + req.COOKIES[settings.CSRF_COOKIE_NAME] = 'x' * 100000 CsrfViewMiddleware().process_view(req, token_view, (), {}) resp = token_view(req) resp2 = CsrfViewMiddleware().process_response(req, resp) csrf_cookie = resp2.cookies.get(settings.CSRF_COOKIE_NAME, False) - self.assertEqual(len(csrf_cookie.value), CSRF_KEY_LENGTH) + self.assertEqual(len(csrf_cookie.value), CSRF_TOKEN_LENGTH) + + def test_process_view_token_invalid_chars(self): + """ + If the token contains non-alphanumeric characters, it is ignored and a + new token is created. + """ + token = ('!@#' + self._csrf_id)[:CSRF_TOKEN_LENGTH] + req = self._get_GET_no_csrf_cookie_request() + req.COOKIES[settings.CSRF_COOKIE_NAME] = token + CsrfViewMiddleware().process_view(req, token_view, (), {}) + resp = token_view(req) + resp2 = CsrfViewMiddleware().process_response(req, resp) + csrf_cookie = resp2.cookies.get(settings.CSRF_COOKIE_NAME, False) + self.assertEqual(len(csrf_cookie.value), CSRF_TOKEN_LENGTH) + self.assertNotEqual(csrf_cookie.value, token) + + def test_process_view_token_invalid_bytes(self): + """ + If the token contains improperly encoded unicode, it is ignored and a + new token is created. + """ + token = (b"<1>\xc2\xa1" + force_bytes(self._csrf_id, 'ascii'))[:CSRF_TOKEN_LENGTH] + req = self._get_GET_no_csrf_cookie_request() + req.COOKIES[settings.CSRF_COOKIE_NAME] = token + # We expect a UnicodeWarning here, because we used broken utf-8 on purpose + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UnicodeWarning) + CsrfViewMiddleware().process_view(req, token_view, (), {}) + resp = token_view(req) + resp2 = CsrfViewMiddleware().process_response(req, resp) + csrf_cookie = resp2.cookies.get(settings.CSRF_COOKIE_NAME, False) + self.assertEqual(len(csrf_cookie.value), CSRF_TOKEN_LENGTH) + self.assertNotEqual(csrf_cookie.value, token) def test_process_response_get_token_used(self): """ @@ -295,6 +346,37 @@ class CsrfViewMiddlewareTest(SimpleTestCase): csrf_cookie = resp2.cookies[settings.CSRF_COOKIE_NAME] self._check_token_present(resp, csrf_id=csrf_cookie.value) + def test_cookie_not_reset_on_accepted_request(self): + """ + The csrf token used in posts is changed on every request (although + stays equivalent). The csrf cookie should not change on accepted + requests. If it appears in the response, it should keep its value. + """ + req = self._get_POST_request_with_token() + CsrfViewMiddleware().process_view(req, token_view, (), {}) + resp = token_view(req) + resp = CsrfViewMiddleware().process_response(req, resp) + csrf_cookie = resp.cookies.get(settings.CSRF_COOKIE_NAME, None) + if csrf_cookie: + self.assertEqual( + csrf_cookie.value, self._csrf_id_cookie, + "CSRF cookie was changed on an accepted request" + ) + + def test_bare_secret_accepted_and_replaced(self): + """ + The csrf token is reset from a bare secret. + """ + req = self._get_POST_bare_secret_csrf_cookie_request_with_token() + req2 = CsrfViewMiddleware().process_view(req, token_view, (), {}) + self.assertIsNone(req2) + resp = token_view(req) + resp = CsrfViewMiddleware().process_response(req, resp) + self.assertIn(settings.CSRF_COOKIE_NAME, resp.cookies, "Cookie was not reset from bare secret") + csrf_cookie = resp.cookies[settings.CSRF_COOKIE_NAME] + self.assertEqual(len(csrf_cookie.value), CSRF_TOKEN_LENGTH) + self._check_token_present(resp, csrf_id=csrf_cookie.value) + @override_settings(DEBUG=True) def test_https_bad_referer(self): """ @@ -592,7 +674,7 @@ class CsrfViewMiddlewareTest(SimpleTestCase): POST = property(_get_post, _set_post) - token = 'ABC' + token = ('ABC' + self._csrf_id)[:CSRF_TOKEN_LENGTH] req = CsrfPostRequest(token, raise_error=False) resp = CsrfViewMiddleware().process_view(req, post_form_view, (), {}) diff --git a/tests/template_backends/test_dummy.py b/tests/template_backends/test_dummy.py index e6e60e71a2..83b42c7eb4 100644 --- a/tests/template_backends/test_dummy.py +++ b/tests/template_backends/test_dummy.py @@ -2,9 +2,13 @@ from __future__ import unicode_literals +import re + from django.forms import CharField, Form, Media from django.http import HttpRequest -from django.middleware.csrf import CsrfViewMiddleware, get_token +from django.middleware.csrf import ( + CsrfViewMiddleware, _compare_salted_tokens as equivalent_tokens, get_token, +) from django.template import TemplateDoesNotExist, TemplateSyntaxError from django.template.backends.dummy import TemplateStrings from django.test import SimpleTestCase @@ -81,11 +85,10 @@ class TemplateStringsTests(SimpleTestCase): template = self.engine.get_template('template_backends/csrf.html') content = template.render(request=request) - expected = ( - ''.format(get_token(request))) - - self.assertHTMLEqual(content, expected) + expected = '' + match = re.match(expected, content) or re.match(expected.replace('"', "'"), content) + self.assertTrue(match, "hidden csrftoken field not found in output") + self.assertTrue(equivalent_tokens(match.group(1), get_token(request))) def test_no_directory_traversal(self): with self.assertRaises(TemplateDoesNotExist):