Fixed #20869 -- made CSRF tokens change every request by salt-encrypting them

Note that the cookie is not changed every request, just the token retrieved
by the `get_token()` method (used also by the `{% csrf_token %}` tag).

While at it, made token validation strict: Where, before, any length was
accepted and non-ASCII chars were ignored, we now treat anything other than
`[A-Za-z0-9]{64}` as invalid (except for 32-char tokens, which, for
backwards-compatibility, are accepted and replaced by 64-char ones).

Thanks Trac user patrys for reporting, github user adambrenecki
for initial patch, Tim Graham for help, and Curtis Maloney,
Collin Anderson, Florian Apolloner, Markus Holtermann & Jon Dufresne
for reviews.
This commit is contained in:
Shai Berger 2015-11-07 18:35:45 +02:00
parent 6d9c5d46e6
commit 5112e65ef2
8 changed files with 241 additions and 70 deletions

View File

@ -8,6 +8,7 @@ from __future__ import unicode_literals
import logging
import re
import string
from django.conf import settings
from django.urls import get_callable
@ -16,8 +17,10 @@ from django.utils.crypto import constant_time_compare, get_random_string
from django.utils.deprecation import MiddlewareMixin
from django.utils.encoding import force_text
from django.utils.http import is_same_domain
from django.utils.six.moves import zip
from django.utils.six.moves.urllib.parse import urlparse
logger = logging.getLogger('django.request')
REASON_NO_REFERER = "Referer checking failed - no Referer."
@ -27,7 +30,9 @@ REASON_BAD_TOKEN = "CSRF token missing or incorrect."
REASON_MALFORMED_REFERER = "Referer checking failed - Referer is malformed."
REASON_INSECURE_REFERER = "Referer checking failed - Referer is insecure while host is secure."
CSRF_KEY_LENGTH = 32
CSRF_SECRET_LENGTH = 32
CSRF_TOKEN_LENGTH = 2 * CSRF_SECRET_LENGTH
CSRF_ALLOWED_CHARS = string.ascii_letters + string.digits
def _get_failure_view():
@ -37,8 +42,38 @@ def _get_failure_view():
return get_callable(settings.CSRF_FAILURE_VIEW)
def _get_new_csrf_key():
return get_random_string(CSRF_KEY_LENGTH)
def _get_new_csrf_string():
return get_random_string(CSRF_SECRET_LENGTH, allowed_chars=CSRF_ALLOWED_CHARS)
def _salt_cipher_secret(secret):
"""
Given a secret (assumed to be a string of CSRF_ALLOWED_CHARS), generate a
token by adding a salt and using it to encrypt the secret.
"""
salt = _get_new_csrf_string()
chars = CSRF_ALLOWED_CHARS
pairs = zip((chars.index(x) for x in secret), (chars.index(x) for x in salt))
cipher = ''.join(chars[(x + y) % len(chars)] for x, y in pairs)
return salt + cipher
def _unsalt_cipher_token(token):
"""
Given a token (assumed to be a string of CSRF_ALLOWED_CHARS, of length
CSRF_TOKEN_LENGTH, and that its first half is a salt), use it to decrypt
the second half to produce the original secret.
"""
salt = token[:CSRF_SECRET_LENGTH]
token = token[CSRF_SECRET_LENGTH:]
chars = CSRF_ALLOWED_CHARS
pairs = zip((chars.index(x) for x in token), (chars.index(x) for x in salt))
secret = ''.join(chars[x - y] for x, y in pairs) # Note negative values are ok
return secret
def _get_new_csrf_token():
return _salt_cipher_secret(_get_new_csrf_string())
def get_token(request):
@ -52,9 +87,12 @@ def get_token(request):
function lazily, as is done by the csrf context processor.
"""
if "CSRF_COOKIE" not in request.META:
request.META["CSRF_COOKIE"] = _get_new_csrf_key()
csrf_secret = _get_new_csrf_string()
request.META["CSRF_COOKIE"] = _salt_cipher_secret(csrf_secret)
else:
csrf_secret = _unsalt_cipher_token(request.META["CSRF_COOKIE"])
request.META["CSRF_COOKIE_USED"] = True
return request.META["CSRF_COOKIE"]
return _salt_cipher_secret(csrf_secret)
def rotate_token(request):
@ -64,19 +102,35 @@ def rotate_token(request):
"""
request.META.update({
"CSRF_COOKIE_USED": True,
"CSRF_COOKIE": _get_new_csrf_key(),
"CSRF_COOKIE": _get_new_csrf_token(),
})
request.csrf_cookie_needs_reset = True
def _sanitize_token(token):
# Allow only alphanum
if len(token) > CSRF_KEY_LENGTH:
return _get_new_csrf_key()
token = re.sub('[^a-zA-Z0-9]+', '', force_text(token))
if token == "":
# In case the cookie has been truncated to nothing at some point.
return _get_new_csrf_key()
return token
# Allow only ASCII alphanumerics
if re.search('[^a-zA-Z0-9]', force_text(token)):
return _get_new_csrf_token()
elif len(token) == CSRF_TOKEN_LENGTH:
return token
elif len(token) == CSRF_SECRET_LENGTH:
# Older Django versions set cookies to values of CSRF_SECRET_LENGTH
# alphanumeric characters. For backwards compatibility, accept
# such values as unsalted secrets.
# It's easier to salt here and be consistent later, rather than add
# different code paths in the checks, although that might be a tad more
# efficient.
return _salt_cipher_secret(token)
return _get_new_csrf_token()
def _compare_salted_tokens(request_csrf_token, csrf_token):
# Assume both arguments are sanitized -- that is, strings of
# length CSRF_TOKEN_LENGTH, all CSRF_ALLOWED_CHARS.
return constant_time_compare(
_unsalt_cipher_token(request_csrf_token),
_unsalt_cipher_token(csrf_token),
)
class CsrfViewMiddleware(MiddlewareMixin):
@ -112,12 +166,17 @@ class CsrfViewMiddleware(MiddlewareMixin):
return None
try:
csrf_token = _sanitize_token(
request.COOKIES[settings.CSRF_COOKIE_NAME])
# Use same token next time
request.META['CSRF_COOKIE'] = csrf_token
cookie_token = request.COOKIES[settings.CSRF_COOKIE_NAME]
except KeyError:
csrf_token = None
else:
csrf_token = _sanitize_token(cookie_token)
if csrf_token != cookie_token:
# Cookie token needed to be replaced;
# the cookie needs to be reset.
request.csrf_cookie_needs_reset = True
# Use same token next time.
request.META['CSRF_COOKIE'] = csrf_token
# Wait until request.META["CSRF_COOKIE"] has been manipulated before
# bailing out, so that get_token still works
@ -142,7 +201,7 @@ class CsrfViewMiddleware(MiddlewareMixin):
#
# The attacker will need to provide a CSRF cookie and token, but
# that's no problem for a MITM and the session-independent
# nonce we're using. So the MITM can circumvent the CSRF
# secret we're using. So the MITM can circumvent the CSRF
# protection. This is true for any HTTP connection, but anyone
# using HTTPS expects better! For this reason, for
# https://example.com/ we need additional protection that treats
@ -213,14 +272,16 @@ class CsrfViewMiddleware(MiddlewareMixin):
# and possible for PUT/DELETE.
request_csrf_token = request.META.get(settings.CSRF_HEADER_NAME, '')
if not constant_time_compare(request_csrf_token, csrf_token):
request_csrf_token = _sanitize_token(request_csrf_token)
if not _compare_salted_tokens(request_csrf_token, csrf_token):
return self._reject(request, REASON_BAD_TOKEN)
return self._accept(request)
def process_response(self, request, response):
if getattr(response, 'csrf_processing_done', False):
return response
if not getattr(request, 'csrf_cookie_needs_reset', False):
if getattr(response, 'csrf_cookie_set', False):
return response
if not request.META.get("CSRF_COOKIE_USED", False):
return response
@ -237,5 +298,5 @@ class CsrfViewMiddleware(MiddlewareMixin):
)
# Content varies with the CSRF cookie, so set the Vary header.
patch_vary_headers(response, ('Cookie',))
response.csrf_processing_done = True
response.csrf_cookie_set = True
return response

View File

@ -218,20 +218,25 @@ How it works
The CSRF protection is based on the following things:
1. A CSRF cookie that is set to a random value (a session independent nonce, as
it is called), which other sites will not have access to.
1. A CSRF cookie that is based on a random secret value, which other sites
will not have access to.
This cookie is set by ``CsrfViewMiddleware``. It is meant to be permanent,
but since there is no way to set a cookie that never expires, it is sent with
every response that has called ``django.middleware.csrf.get_token()``
(the function used internally to retrieve the CSRF token).
This cookie is set by ``CsrfViewMiddleware``. It is sent with every
response that has called ``django.middleware.csrf.get_token()`` (the
function used internally to retrieve the CSRF token), if it wasn't already
set on the request.
For security reasons, the value of the CSRF cookie is changed each time a
In order to protect against `BREACH`_ attacks, the token is not simply the
secret; a random salt is prepended to the secret and used to scramble it.
For security reasons, the value of the secret is changed each time a
user logs in.
2. A hidden form field with the name 'csrfmiddlewaretoken' present in all
outgoing POST forms. The value of this field is the value of the CSRF
cookie.
outgoing POST forms. The value of this field is, again, the value of the
secret, with a salt which is both added to it and used to scramble it. The
salt is regenerated on every call to ``get_token()`` so that the form field
value is changed in every such response.
This part is done by the template tag.
@ -239,6 +244,11 @@ The CSRF protection is based on the following things:
TRACE, a CSRF cookie must be present, and the 'csrfmiddlewaretoken' field
must be present and correct. If it isn't, the user will get a 403 error.
When validating the 'csrfmiddlewaretoken' field value, only the secret,
not the full token, is compared with the secret in the cookie value.
This allows the use of ever-changing tokens. While each request may use its
own token, the secret remains common to all.
This check is done by ``CsrfViewMiddleware``.
4. In addition, for HTTPS requests, strict referer checking is done by
@ -247,7 +257,7 @@ The CSRF protection is based on the following things:
application since that request won't come from your own exact domain.
This also addresses a man-in-the-middle attack that's possible under HTTPS
when using a session independent nonce, due to the fact that HTTP
when using a session independent secret, due to the fact that HTTP
``Set-Cookie`` headers are (unfortunately) accepted by clients even when
they are talking to a site under HTTPS. (Referer checking is not done for
HTTP requests because the presence of the ``Referer`` header isn't reliable
@ -283,6 +293,13 @@ vulnerability allows and much worse).
Checking against the :setting:`CSRF_COOKIE_DOMAIN` setting was added.
.. versionchanged:: 1.10
Added salting to the token and started changing it with each request
to protect against `BREACH`_ attacks.
.. _BREACH: http://breachattack.com/
Caching
=======
@ -499,15 +516,6 @@ No, this is by design. Not linking CSRF protection to a session allows using
the protection on sites such as a `pastebin` that allow submissions from
anonymous users which don't have a session.
Why not use a new token for each request?
-----------------------------------------
Generating a new token for each request is problematic from a UI perspective
because it invalidates all previous forms. Most users would be very unhappy to
find that opening a new tab on your site has invalidated the form they had
just spent time filling out in another tab or that a form they accessed via
the back button could not be filled out.
Why might a user encounter a CSRF validation failure after logging in?
----------------------------------------------------------------------

View File

@ -118,13 +118,12 @@ GZip middleware
.. warning::
Security researchers recently revealed that when compression techniques
(including ``GZipMiddleware``) are used on a website, the site becomes
exposed to a number of possible attacks. These approaches can be used to
compromise, among other things, Django's CSRF protection. Before using
``GZipMiddleware`` on your site, you should consider very carefully whether
you are subject to these attacks. If you're in *any* doubt about whether
you're affected, you should avoid using ``GZipMiddleware``. For more
details, see the `the BREACH paper (PDF)`_ and `breachattack.com`_.
(including ``GZipMiddleware``) are used on a website, the site may become
exposed to a number of possible attacks. Before using ``GZipMiddleware`` on
your site, you should consider very carefully whether you are subject to
these attacks. If you're in *any* doubt about whether you're affected, you
should avoid using ``GZipMiddleware``. For more details, see the `the BREACH
paper (PDF)`_ and `breachattack.com`_.
.. _the BREACH paper (PDF): http://breachattack.com/resources/BREACH%20-%20SSL,%20gone%20in%2030%20seconds.pdf
.. _breachattack.com: http://breachattack.com
@ -147,6 +146,12 @@ It will NOT compress content if any of the following are true:
You can apply GZip compression to individual views using the
:func:`~django.views.decorators.gzip.gzip_page()` decorator.
.. versionchanged:: 1.10
In older versions, Django's CSRF protection mechanism was vulnerable to
BREACH attacks when compression was used. This is no longer the case, but
you should still take care not to compromise your own secrets this way.
Conditional GET middleware
--------------------------

View File

@ -256,6 +256,12 @@ CSRF
accepts an optional ``template_name`` parameter, defaulting to
``'403_csrf.html'``, to control the template used to render the page.
* To protect against `BREACH`_ attacks, the CSRF protection mechanism now
changes the form token value on every request (while keeping an invariant
secret which can be used to validate the different tokens).
.. _BREACH: http://breachattack.com/
Database backends
~~~~~~~~~~~~~~~~~
@ -795,6 +801,12 @@ Miscellaneous
* ``utils.version.get_version()`` returns :pep:`440` compliant release
candidate versions (e.g. '1.10rc1' instead of '1.10c1').
* CSRF token values are now required to be strings of 64 alphanumerics; values
of 32 alphanumerics, as set by older versions of Django by default, are
automatically replaced by strings of 64 characters. Other values are
considered invalid. This should only affect developers or users who replace
these tokens.
* The ``LOGOUT_URL`` setting is removed as Django hasn't made use of it
since pre-1.0. If you use it in your project, you can add it to your
project's settings. The default value was ``'/accounts/logout/'``.

View File

@ -65,10 +65,10 @@ this if you know what you are doing. There are other :ref:`limitations
<csrf-limitations>` if your site has subdomains that are outside of your
control.
:ref:`CSRF protection works <how-csrf-works>` by checking for a nonce in each
:ref:`CSRF protection works <how-csrf-works>` by checking for a secret in each
POST request. This ensures that a malicious user cannot simply "replay" a form
POST to your website and have another logged in user unwittingly submit that
form. The malicious user would have to know the nonce, which is user specific
form. The malicious user would have to know the secret, which is user specific
(using a cookie).
When deployed with :ref:`HTTPS <security-recommendation-ssl>`,

View File

@ -1,6 +1,5 @@
import json
from django.http import HttpRequest
from django.middleware.csrf import _compare_salted_tokens as equivalent_tokens
from django.template.context_processors import csrf
from django.test import SimpleTestCase
from django.utils.encoding import force_text
@ -10,6 +9,7 @@ class TestContextProcessor(SimpleTestCase):
def test_force_text_on_token(self):
request = HttpRequest()
request.META['CSRF_COOKIE'] = 'test-token'
test_token = '1bcdefghij2bcdefghij3bcdefghij4bcdefghij5bcdefghij6bcdefghijABCD'
request.META['CSRF_COOKIE'] = test_token
token = csrf(request).get('csrf_token')
self.assertEqual(json.dumps(force_text(token)), '"test-token"')
self.assertTrue(equivalent_tokens(force_text(token), test_token))

View File

@ -2,15 +2,20 @@
from __future__ import unicode_literals
import logging
import re
import warnings
from django.conf import settings
from django.http import HttpRequest, HttpResponse
from django.middleware.csrf import (
CSRF_KEY_LENGTH, CsrfViewMiddleware, get_token,
CSRF_TOKEN_LENGTH, CsrfViewMiddleware,
_compare_salted_tokens as equivalent_tokens, get_token,
)
from django.template import RequestContext, Template
from django.template.context_processors import csrf
from django.test import SimpleTestCase, override_settings
from django.utils.encoding import force_bytes
from django.utils.six import text_type
from django.views.decorators.csrf import (
csrf_exempt, ensure_csrf_cookie, requires_csrf_token,
)
@ -57,10 +62,7 @@ class TestingHttpRequest(HttpRequest):
class CsrfViewMiddlewareTest(SimpleTestCase):
# The csrf token is potentially from an untrusted source, so could have
# characters that need dealing with.
_csrf_id_cookie = b"<1>\xc2\xa1"
_csrf_id = "1"
_csrf_id = _csrf_id_cookie = '1bcdefghij2bcdefghij3bcdefghij4bcdefghij5bcdefghij6bcdefghijABCD'
def _get_GET_no_csrf_cookie_request(self):
return TestingHttpRequest()
@ -85,8 +87,24 @@ class CsrfViewMiddlewareTest(SimpleTestCase):
req.POST['csrfmiddlewaretoken'] = self._csrf_id
return req
def _get_POST_bare_secret_csrf_cookie_request(self):
req = self._get_POST_no_csrf_cookie_request()
req.COOKIES[settings.CSRF_COOKIE_NAME] = self._csrf_id_cookie[:32]
return req
def _get_POST_bare_secret_csrf_cookie_request_with_token(self):
req = self._get_POST_bare_secret_csrf_cookie_request()
req.POST['csrfmiddlewaretoken'] = self._csrf_id_cookie[:32]
return req
def _check_token_present(self, response, csrf_id=None):
self.assertContains(response, "name='csrfmiddlewaretoken' value='%s'" % (csrf_id or self._csrf_id))
text = text_type(response.content, response.charset)
match = re.search("name='csrfmiddlewaretoken' value='(.*?)'", text)
csrf_token = csrf_id or self._csrf_id
self.assertTrue(
match and equivalent_tokens(csrf_token, match.group(1)),
"Could not find csrfmiddlewaretoken to match %s" % csrf_token
)
def test_process_view_token_too_long(self):
"""
@ -94,12 +112,45 @@ class CsrfViewMiddlewareTest(SimpleTestCase):
created.
"""
req = self._get_GET_no_csrf_cookie_request()
req.COOKIES[settings.CSRF_COOKIE_NAME] = 'x' * 10000000
req.COOKIES[settings.CSRF_COOKIE_NAME] = 'x' * 100000
CsrfViewMiddleware().process_view(req, token_view, (), {})
resp = token_view(req)
resp2 = CsrfViewMiddleware().process_response(req, resp)
csrf_cookie = resp2.cookies.get(settings.CSRF_COOKIE_NAME, False)
self.assertEqual(len(csrf_cookie.value), CSRF_KEY_LENGTH)
self.assertEqual(len(csrf_cookie.value), CSRF_TOKEN_LENGTH)
def test_process_view_token_invalid_chars(self):
"""
If the token contains non-alphanumeric characters, it is ignored and a
new token is created.
"""
token = ('!@#' + self._csrf_id)[:CSRF_TOKEN_LENGTH]
req = self._get_GET_no_csrf_cookie_request()
req.COOKIES[settings.CSRF_COOKIE_NAME] = token
CsrfViewMiddleware().process_view(req, token_view, (), {})
resp = token_view(req)
resp2 = CsrfViewMiddleware().process_response(req, resp)
csrf_cookie = resp2.cookies.get(settings.CSRF_COOKIE_NAME, False)
self.assertEqual(len(csrf_cookie.value), CSRF_TOKEN_LENGTH)
self.assertNotEqual(csrf_cookie.value, token)
def test_process_view_token_invalid_bytes(self):
"""
If the token contains improperly encoded unicode, it is ignored and a
new token is created.
"""
token = (b"<1>\xc2\xa1" + force_bytes(self._csrf_id, 'ascii'))[:CSRF_TOKEN_LENGTH]
req = self._get_GET_no_csrf_cookie_request()
req.COOKIES[settings.CSRF_COOKIE_NAME] = token
# We expect a UnicodeWarning here, because we used broken utf-8 on purpose
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=UnicodeWarning)
CsrfViewMiddleware().process_view(req, token_view, (), {})
resp = token_view(req)
resp2 = CsrfViewMiddleware().process_response(req, resp)
csrf_cookie = resp2.cookies.get(settings.CSRF_COOKIE_NAME, False)
self.assertEqual(len(csrf_cookie.value), CSRF_TOKEN_LENGTH)
self.assertNotEqual(csrf_cookie.value, token)
def test_process_response_get_token_used(self):
"""
@ -295,6 +346,37 @@ class CsrfViewMiddlewareTest(SimpleTestCase):
csrf_cookie = resp2.cookies[settings.CSRF_COOKIE_NAME]
self._check_token_present(resp, csrf_id=csrf_cookie.value)
def test_cookie_not_reset_on_accepted_request(self):
"""
The csrf token used in posts is changed on every request (although
stays equivalent). The csrf cookie should not change on accepted
requests. If it appears in the response, it should keep its value.
"""
req = self._get_POST_request_with_token()
CsrfViewMiddleware().process_view(req, token_view, (), {})
resp = token_view(req)
resp = CsrfViewMiddleware().process_response(req, resp)
csrf_cookie = resp.cookies.get(settings.CSRF_COOKIE_NAME, None)
if csrf_cookie:
self.assertEqual(
csrf_cookie.value, self._csrf_id_cookie,
"CSRF cookie was changed on an accepted request"
)
def test_bare_secret_accepted_and_replaced(self):
"""
The csrf token is reset from a bare secret.
"""
req = self._get_POST_bare_secret_csrf_cookie_request_with_token()
req2 = CsrfViewMiddleware().process_view(req, token_view, (), {})
self.assertIsNone(req2)
resp = token_view(req)
resp = CsrfViewMiddleware().process_response(req, resp)
self.assertIn(settings.CSRF_COOKIE_NAME, resp.cookies, "Cookie was not reset from bare secret")
csrf_cookie = resp.cookies[settings.CSRF_COOKIE_NAME]
self.assertEqual(len(csrf_cookie.value), CSRF_TOKEN_LENGTH)
self._check_token_present(resp, csrf_id=csrf_cookie.value)
@override_settings(DEBUG=True)
def test_https_bad_referer(self):
"""
@ -592,7 +674,7 @@ class CsrfViewMiddlewareTest(SimpleTestCase):
POST = property(_get_post, _set_post)
token = 'ABC'
token = ('ABC' + self._csrf_id)[:CSRF_TOKEN_LENGTH]
req = CsrfPostRequest(token, raise_error=False)
resp = CsrfViewMiddleware().process_view(req, post_form_view, (), {})

View File

@ -2,9 +2,13 @@
from __future__ import unicode_literals
import re
from django.forms import CharField, Form, Media
from django.http import HttpRequest
from django.middleware.csrf import CsrfViewMiddleware, get_token
from django.middleware.csrf import (
CsrfViewMiddleware, _compare_salted_tokens as equivalent_tokens, get_token,
)
from django.template import TemplateDoesNotExist, TemplateSyntaxError
from django.template.backends.dummy import TemplateStrings
from django.test import SimpleTestCase
@ -81,11 +85,10 @@ class TemplateStringsTests(SimpleTestCase):
template = self.engine.get_template('template_backends/csrf.html')
content = template.render(request=request)
expected = (
'<input type="hidden" name="csrfmiddlewaretoken" '
'value="{}" />'.format(get_token(request)))
self.assertHTMLEqual(content, expected)
expected = '<input type="hidden" name="csrfmiddlewaretoken" value="([^"]+)" />'
match = re.match(expected, content) or re.match(expected.replace('"', "'"), content)
self.assertTrue(match, "hidden csrftoken field not found in output")
self.assertTrue(equivalent_tokens(match.group(1), get_token(request)))
def test_no_directory_traversal(self):
with self.assertRaises(TemplateDoesNotExist):