From 90e05aaeac612a4251640564aa65f103ac635e12 Mon Sep 17 00:00:00 2001 From: Paul McMillan Date: Fri, 23 Dec 2011 03:53:56 +0000 Subject: [PATCH] Renovated password hashing, including the forgotten files in r17253. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17254 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/contrib/auth/hashers.py | 362 ++++++++++++++++++++++++++ django/contrib/auth/tests/hashers.py | 128 +++++++++ django/contrib/auth/utils.py | 63 ----- tests/regressiontests/utils/crypto.py | 133 ++++++++++ 4 files changed, 623 insertions(+), 63 deletions(-) create mode 100644 django/contrib/auth/hashers.py create mode 100644 django/contrib/auth/tests/hashers.py delete mode 100644 django/contrib/auth/utils.py create mode 100644 tests/regressiontests/utils/crypto.py diff --git a/django/contrib/auth/hashers.py b/django/contrib/auth/hashers.py new file mode 100644 index 0000000000..0b3b6c3174 --- /dev/null +++ b/django/contrib/auth/hashers.py @@ -0,0 +1,362 @@ +import hashlib + +from django.conf import settings +from django.utils import importlib +from django.utils.datastructures import SortedDict +from django.utils.encoding import smart_str +from django.core.exceptions import ImproperlyConfigured +from django.utils.crypto import ( + pbkdf2, constant_time_compare, get_random_string) + + +UNUSABLE_PASSWORD = '!' # This will never be a valid encoded hash +HASHERS = None # lazily loaded from PASSWORD_HASHERS +PREFERRED_HASHER = None # defaults to first item in PASSWORD_HASHERS + + +def is_password_usable(encoded): + return (encoded is not None and encoded != UNUSABLE_PASSWORD) + + +def check_password(password, encoded, setter=None, preferred='default'): + """ + Returns a boolean of whether the raw password matches the three + part encoded digest. + + If setter is specified, it'll be called when you need to + regenerate the password. + """ + if not password or not is_password_usable(encoded): + return False + + preferred = get_hasher(preferred) + raw_password = password + password = smart_str(password) + encoded = smart_str(encoded) + + if len(encoded) == 32 and '$' not in encoded: + hasher = get_hasher('md5') + else: + algorithm = encoded.split('$', 1)[0] + hasher = get_hasher(algorithm) + + must_update = hasher.algorithm != preferred.algorithm + is_correct = hasher.verify(password, encoded) + if setter and is_correct and must_update: + setter(raw_password) + return is_correct + + +def make_password(password, salt=None, hasher='default'): + """ + Turn a plain-text password into a hash for database storage + + Same as encode() but generates a new random salt. If + password is None or blank then UNUSABLE_PASSWORD will be + returned which disallows logins. + """ + if not password: + return UNUSABLE_PASSWORD + + hasher = get_hasher(hasher) + password = smart_str(password) + + if not salt: + salt = hasher.salt() + salt = smart_str(salt) + + return hasher.encode(password, salt) + + +def load_hashers(): + global HASHERS + global PREFERRED_HASHER + hashers = [] + for backend in settings.PASSWORD_HASHERS: + try: + mod_path, cls_name = backend.rsplit('.', 1) + mod = importlib.import_module(mod_path) + hasher_cls = getattr(mod, cls_name) + except (AttributeError, ImportError, ValueError): + raise ImproperlyConfigured("hasher not found: %s" % backend) + hasher = hasher_cls() + if not getattr(hasher, 'algorithm'): + raise ImproperlyConfigured("hasher doesn't specify an " + "algorithm name: %s" % backend) + hashers.append(hasher) + HASHERS = dict([(hasher.algorithm, hasher) for hasher in hashers]) + PREFERRED_HASHER = hashers[0] + + +def get_hasher(algorithm='default'): + """ + Returns an instance of a loaded password hasher. + + If algorithm is 'default', the default hasher will be returned. + This function will also lazy import hashers specified in your + settings file if needed. + """ + if hasattr(algorithm, 'algorithm'): + return algorithm + + elif algorithm == 'default': + if PREFERRED_HASHER is None: + load_hashers() + return PREFERRED_HASHER + else: + if HASHERS is None: + load_hashers() + if algorithm not in HASHERS: + raise ValueError("Unknown password hashing algorithm '%s'. " + "Did you specify it in the PASSWORD_HASHERS " + "setting?" % algorithm) + return HASHERS[algorithm] + + +def mask_hash(hash, show=6, char="*"): + """ + Returns the given hash, with only the first ``show`` number shown. The + rest are masked with ``char`` for security reasons. + """ + masked = hash[:show] + masked += char * len(hash[show:]) + return masked + + +class BasePasswordHasher(object): + """ + Abstract base class for password hashers + + When creating your own hasher, you need to override algorithm, + verify(), encode() and safe_summary(). + + PasswordHasher objects are immutable. + """ + algorithm = None + library = None + + def _load_library(self): + if self.library is not None: + if isinstance(self.library, (tuple, list)): + name, mod_path = self.library + else: + name = mod_path = self.library + try: + module = importlib.import_module(mod_path) + except ImportError: + raise ValueError("Couldn't load %s password algorithm " + "library" % name) + return module + raise ValueError("Hasher '%s' doesn't specify a library attribute" % + self.__class__) + + def salt(self): + """ + Generates a cryptographically secure nonce salt in ascii + """ + return get_random_string() + + def verify(self, password, encoded): + """ + Checks if the given password is correct + """ + raise NotImplementedError() + + def encode(self, password, salt): + """ + Creates an encoded database value + + The result is normally formatted as "algorithm$salt$hash" and + must be fewer than 128 characters. + """ + raise NotImplementedError() + + def safe_summary(self, encoded): + """ + Returns a summary of safe values + + The result is a dictionary and will be used where the password field + must be displayed to construct a safe representation of the password. + """ + raise NotImplementedError() + + +class PBKDF2PasswordHasher(BasePasswordHasher): + """ + Secure password hashing using the PBKDF2 algorithm (recommended) + + Configured to use PBKDF2 + HMAC + SHA256 with 10000 iterations. + The result is a 64 byte binary string. Iterations may be changed + safely but you must rename the algorithm if you change SHA256. + """ + algorithm = "pbkdf2_sha256" + iterations = 10000 + digest = hashlib.sha256 + + def encode(self, password, salt, iterations=None): + assert password + assert salt and '$' not in salt + if not iterations: + iterations = self.iterations + hash = pbkdf2(password, salt, iterations, digest=self.digest) + hash = hash.encode('base64').strip() + return "%s$%d$%s$%s" % (self.algorithm, iterations, salt, hash) + + def verify(self, password, encoded): + algorithm, iterations, salt, hash = encoded.split('$', 3) + assert algorithm == self.algorithm + encoded_2 = self.encode(password, salt, int(iterations)) + return constant_time_compare(encoded, encoded_2) + + def safe_summary(self, encoded): + algorithm, iterations, salt, hash = encoded.split('$', 3) + assert algorithm == self.algorithm + return SortedDict([ + ('algorithm', algorithm), + ('iterations', iterations), + ('salt', mask_hash(salt)), + ('hash', mask_hash(hash)), + ]) + + +class PBKDF2SHA1PasswordHasher(PBKDF2PasswordHasher): + """ + Alternate PBKDF2 hasher which uses SHA1, the default PRF + recommended by PKCS #5. This is compatible with other + implementations of PBKDF2, such as openssl's + PKCS5_PBKDF2_HMAC_SHA1(). + """ + algorithm = "pbkdf2_sha1" + digest = hashlib.sha1 + + +class BCryptPasswordHasher(BasePasswordHasher): + """ + Secure password hashing using the bcrypt algorithm (recommended) + + This is considered by many to be the most secure algorithm but you + must first install the py-bcrypt library. Please be warned that + this library depends on native C code and might cause portability + issues. + """ + algorithm = "bcrypt" + library = ("py-bcrypt", "bcrypt") + rounds = 12 + + def salt(self): + bcrypt = self._load_library() + return bcrypt.gensalt(self.rounds) + + def encode(self, password, salt): + bcrypt = self._load_library() + data = bcrypt.hashpw(password, salt) + return "%s$%s" % (self.algorithm, data) + + def verify(self, password, encoded): + algorithm, data = encoded.split('$', 1) + assert algorithm == self.algorithm + bcrypt = self._load_library() + return constant_time_compare(data, bcrypt.hashpw(password, data)) + + def safe_summary(self, encoded): + algorithm, empty, algostr, work_factor, data = encoded.split('$', 4) + assert algorithm == self.algorithm + salt, checksum = data[:22], data[22:] + return SortedDict([ + ('algorithm', algorithm), + ('work factor', work_factor), + ('salt', mask_hash(salt)), + ('checksum', mask_hash(checksum)), + ]) + + +class SHA1PasswordHasher(BasePasswordHasher): + """ + The SHA1 password hashing algorithm (not recommended) + """ + algorithm = "sha1" + + def encode(self, password, salt): + assert password + assert salt and '$' not in salt + hash = hashlib.sha1(salt + password).hexdigest() + return "%s$%s$%s" % (self.algorithm, salt, hash) + + def verify(self, password, encoded): + algorithm, salt, hash = encoded.split('$', 2) + assert algorithm == self.algorithm + encoded_2 = self.encode(password, salt) + return constant_time_compare(encoded, encoded_2) + + def safe_summary(self, encoded): + algorithm, salt, hash = encoded.split('$', 2) + assert algorithm == self.algorithm + return SortedDict([ + ('algorithm', algorithm), + ('salt', mask_hash(salt, show=2)), + ('hash', mask_hash(hash)), + ]) + + +class MD5PasswordHasher(BasePasswordHasher): + """ + I am an incredibly insecure algorithm you should *never* use; + stores unsalted MD5 hashes without the algorithm prefix. + + This class is implemented because Django used to store passwords + this way. Some older Django installs still have these values + lingering around so we need to handle and upgrade them properly. + """ + algorithm = "md5" + + def salt(self): + return '' + + def encode(self, password, salt): + return hashlib.md5(password).hexdigest() + + def verify(self, password, encoded): + encoded_2 = self.encode(password, '') + return constant_time_compare(encoded, encoded_2) + + def safe_summary(self, encoded): + return SortedDict([ + ('algorithm', self.algorithm), + ('hash', mask_hash(encoded, show=3)), + ]) + + +class CryptPasswordHasher(BasePasswordHasher): + """ + Password hashing using UNIX crypt (not recommended) + + The crypt module is not supported on all platforms. + """ + algorithm = "crypt" + library = "crypt" + + def salt(self): + return get_random_string(2) + + def encode(self, password, salt): + crypt = self._load_library() + assert len(salt) == 2 + data = crypt.crypt(password, salt) + # we don't need to store the salt, but Django used to do this + return "%s$%s$%s" % (self.algorithm, '', data) + + def verify(self, password, encoded): + crypt = self._load_library() + algorithm, salt, data = encoded.split('$', 2) + assert algorithm == self.algorithm + return constant_time_compare(data, crypt.crypt(password, data)) + + def safe_summary(self, encoded): + algorithm, salt, data = encoded.split('$', 2) + assert algorithm == self.algorithm + return SortedDict([ + ('algorithm', algorithm), + ('salt', salt), + ('hash', mask_hash(data, show=3)), + ]) + diff --git a/django/contrib/auth/tests/hashers.py b/django/contrib/auth/tests/hashers.py new file mode 100644 index 0000000000..4c66cafe34 --- /dev/null +++ b/django/contrib/auth/tests/hashers.py @@ -0,0 +1,128 @@ +from django.conf.global_settings import PASSWORD_HASHERS as default_hashers +from django.contrib.auth.hashers import (is_password_usable, + check_password, make_password, PBKDF2PasswordHasher, load_hashers, + PBKDF2SHA1PasswordHasher, get_hasher, UNUSABLE_PASSWORD) +from django.utils import unittest +from django.utils.unittest import skipUnless +from django.test.utils import override_settings + + +try: + import crypt +except ImportError: + crypt = None + +try: + import bcrypt +except ImportError: + bcrypt = None + + +class TestUtilsHashPass(unittest.TestCase): + def setUp(self): + load_hashers() + + def test_simple(self): + encoded = make_password('letmein') + self.assertTrue(encoded.startswith('pbkdf2_sha256$')) + self.assertTrue(is_password_usable(encoded)) + self.assertTrue(check_password(u'letmein', encoded)) + self.assertFalse(check_password('letmeinz', encoded)) + + def test_pkbdf2(self): + encoded = make_password('letmein', 'seasalt', 'pbkdf2_sha256') + self.assertEqual(encoded, +'pbkdf2_sha256$10000$seasalt$FQCNpiZpTb0zub+HBsH6TOwyRxJ19FwvjbweatNmK/Y=') + self.assertTrue(is_password_usable(encoded)) + self.assertTrue(check_password(u'letmein', encoded)) + self.assertFalse(check_password('letmeinz', encoded)) + + def test_sha1(self): + encoded = make_password('letmein', 'seasalt', 'sha1') + self.assertEqual(encoded, +'sha1$seasalt$fec3530984afba6bade3347b7140d1a7da7da8c7') + self.assertTrue(is_password_usable(encoded)) + self.assertTrue(check_password(u'letmein', encoded)) + self.assertFalse(check_password('letmeinz', encoded)) + + def test_md5(self): + encoded = make_password('letmein', 'seasalt', 'md5') + self.assertEqual(encoded, '0d107d09f5bbe40cade3de5c71e9e9b7') + self.assertTrue(is_password_usable(encoded)) + self.assertTrue(check_password(u'letmein', encoded)) + self.assertFalse(check_password('letmeinz', encoded)) + + @skipUnless(crypt, "no crypt module to generate password.") + def test_crypt(self): + encoded = make_password('letmein', 'ab', 'crypt') + self.assertEqual(encoded, 'crypt$$abN/qM.L/H8EQ') + self.assertTrue(is_password_usable(encoded)) + self.assertTrue(check_password(u'letmein', encoded)) + self.assertFalse(check_password('letmeinz', encoded)) + + @skipUnless(bcrypt, "py-bcrypt not installed") + def test_bcrypt(self): + encoded = make_password('letmein', hasher='bcrypt') + self.assertTrue(is_password_usable(encoded)) + self.assertTrue(encoded.startswith('bcrypt$')) + self.assertTrue(check_password(u'letmein', encoded)) + self.assertFalse(check_password('letmeinz', encoded)) + + def test_unusable(self): + encoded = make_password(None) + self.assertFalse(is_password_usable(encoded)) + self.assertFalse(check_password(None, encoded)) + self.assertFalse(check_password(UNUSABLE_PASSWORD, encoded)) + self.assertFalse(check_password('', encoded)) + self.assertFalse(check_password(u'letmein', encoded)) + self.assertFalse(check_password('letmeinz', encoded)) + + def test_bad_algorithm(self): + def doit(): + make_password('letmein', hasher='lolcat') + self.assertRaises(ValueError, doit) + + def test_low_level_pkbdf2(self): + hasher = PBKDF2PasswordHasher() + encoded = hasher.encode('letmein', 'seasalt') + self.assertEqual(encoded, +'pbkdf2_sha256$10000$seasalt$FQCNpiZpTb0zub+HBsH6TOwyRxJ19FwvjbweatNmK/Y=') + self.assertTrue(hasher.verify('letmein', encoded)) + + def test_low_level_pbkdf2_sha1(self): + hasher = PBKDF2SHA1PasswordHasher() + encoded = hasher.encode('letmein', 'seasalt') + self.assertEqual(encoded, +'pbkdf2_sha1$10000$seasalt$91JiNKgwADC8j2j86Ije/cc4vfQ=') + self.assertTrue(hasher.verify('letmein', encoded)) + + def test_upgrade(self): + self.assertEqual('pbkdf2_sha256', get_hasher('default').algorithm) + for algo in ('sha1', 'md5'): + encoded = make_password('letmein', hasher=algo) + state = {'upgraded': False} + def setter(password): + state['upgraded'] = True + self.assertTrue(check_password('letmein', encoded, setter)) + self.assertTrue(state['upgraded']) + + def test_no_upgrade(self): + encoded = make_password('letmein') + state = {'upgraded': False} + def setter(): + state['upgraded'] = True + self.assertFalse(check_password('WRONG', encoded, setter)) + self.assertFalse(state['upgraded']) + + def test_no_upgrade_on_incorrect_pass(self): + self.assertEqual('pbkdf2_sha256', get_hasher('default').algorithm) + for algo in ('sha1', 'md5'): + encoded = make_password('letmein', hasher=algo) + state = {'upgraded': False} + def setter(): + state['upgraded'] = True + self.assertFalse(check_password('WRONG', encoded, setter)) + self.assertFalse(state['upgraded']) + + +TestUtilsHashPass = override_settings(PASSWORD_HASHERS=default_hashers)(TestUtilsHashPass) diff --git a/django/contrib/auth/utils.py b/django/contrib/auth/utils.py deleted file mode 100644 index 520c25e3c8..0000000000 --- a/django/contrib/auth/utils.py +++ /dev/null @@ -1,63 +0,0 @@ -import hashlib -from django.utils.encoding import smart_str -from django.utils.crypto import constant_time_compare - -UNUSABLE_PASSWORD = '!' # This will never be a valid hash - -def get_hexdigest(algorithm, salt, raw_password): - """ - Returns a string of the hexdigest of the given plaintext password and salt - using the given algorithm ('md5', 'sha1' or 'crypt'). - """ - raw_password, salt = smart_str(raw_password), smart_str(salt) - if algorithm == 'crypt': - try: - import crypt - except ImportError: - raise ValueError('"crypt" password algorithm not supported in this environment') - return crypt.crypt(raw_password, salt) - - if algorithm == 'md5': - return hashlib.md5(salt + raw_password).hexdigest() - elif algorithm == 'sha1': - return hashlib.sha1(salt + raw_password).hexdigest() - raise ValueError("Got unknown password algorithm type in password.") - -def get_random_string(length=12, allowed_chars='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'): - """ - Returns a random string of length characters from the set of a-z, A-Z, 0-9 - for use as a salt. - - The default length of 12 with the a-z, A-Z, 0-9 character set returns - a 71-bit salt. log_2((26+26+10)^12) =~ 71 bits - """ - import random - try: - random = random.SystemRandom() - except NotImplementedError: - pass - return ''.join([random.choice(allowed_chars) for i in range(length)]) - -def check_password(raw_password, enc_password): - """ - Returns a boolean of whether the raw_password was correct. Handles - hashing formats behind the scenes. - """ - parts = enc_password.split('$') - if len(parts) != 3: - return False - algo, salt, hsh = parts - return constant_time_compare(hsh, get_hexdigest(algo, salt, raw_password)) - -def is_password_usable(encoded_password): - return encoded_password is not None and encoded_password != UNUSABLE_PASSWORD - -def make_password(algo, raw_password): - """ - Produce a new password string in this format: algorithm$salt$hash - """ - if raw_password is None: - return UNUSABLE_PASSWORD - salt = get_random_string() - hsh = get_hexdigest(algo, salt, raw_password) - return '%s$%s$%s' % (algo, salt, hsh) diff --git a/tests/regressiontests/utils/crypto.py b/tests/regressiontests/utils/crypto.py new file mode 100644 index 0000000000..f025ffa790 --- /dev/null +++ b/tests/regressiontests/utils/crypto.py @@ -0,0 +1,133 @@ + +import math +import timeit +import hashlib + +from django.utils import unittest +from django.utils.crypto import pbkdf2 + + +class TestUtilsCryptoPBKDF2(unittest.TestCase): + + # http://tools.ietf.org/html/draft-josefsson-pbkdf2-test-vectors-06 + rfc_vectors = [ + { + "args": { + "password": "password", + "salt": "salt", + "iterations": 1, + "dklen": 20, + "digest": hashlib.sha1, + }, + "result": "0c60c80f961f0e71f3a9b524af6012062fe037a6", + }, + { + "args": { + "password": "password", + "salt": "salt", + "iterations": 2, + "dklen": 20, + "digest": hashlib.sha1, + }, + "result": "ea6c014dc72d6f8ccd1ed92ace1d41f0d8de8957", + }, + { + "args": { + "password": "password", + "salt": "salt", + "iterations": 4096, + "dklen": 20, + "digest": hashlib.sha1, + }, + "result": "4b007901b765489abead49d926f721d065a429c1", + }, + # # this takes way too long :( + # { + # "args": { + # "password": "password", + # "salt": "salt", + # "iterations": 16777216, + # "dklen": 20, + # "digest": hashlib.sha1, + # }, + # "result": "eefe3d61cd4da4e4e9945b3d6ba2158c2634e984", + # }, + { + "args": { + "password": "passwordPASSWORDpassword", + "salt": "saltSALTsaltSALTsaltSALTsaltSALTsalt", + "iterations": 4096, + "dklen": 25, + "digest": hashlib.sha1, + }, + "result": "3d2eec4fe41c849b80c8d83662c0e44a8b291a964cf2f07038", + }, + { + "args": { + "password": "pass\0word", + "salt": "sa\0lt", + "iterations": 4096, + "dklen": 16, + "digest": hashlib.sha1, + }, + "result": "56fa6aa75548099dcc37d7f03425e0c3", + }, + ] + + regression_vectors = [ + { + "args": { + "password": "password", + "salt": "salt", + "iterations": 1, + "dklen": 20, + "digest": hashlib.sha256, + }, + "result": "120fb6cffcf8b32c43e7225256c4f837a86548c9", + }, + { + "args": { + "password": "password", + "salt": "salt", + "iterations": 1, + "dklen": 20, + "digest": hashlib.sha512, + }, + "result": "867f70cf1ade02cff3752599a3a53dc4af34c7a6", + }, + { + "args": { + "password": "password", + "salt": "salt", + "iterations": 1000, + "dklen": 0, + "digest": hashlib.sha512, + }, + "result": ("afe6c5530785b6cc6b1c6453384731bd5ee432ee" + "549fd42fb6695779ad8a1c5bf59de69c48f774ef" + "c4007d5298f9033c0241d5ab69305e7b64eceeb8d" + "834cfec"), + }, + ] + + def test_public_vectors(self): + for vector in self.rfc_vectors: + result = pbkdf2(**vector['args']) + self.assertEqual(result.encode('hex'), vector['result']) + + def test_regression_vectors(self): + for vector in self.regression_vectors: + result = pbkdf2(**vector['args']) + self.assertEqual(result.encode('hex'), vector['result']) + + def test_performance_scalability(self): + """ + Theory: If you run with 100 iterations, it should take 100 + times as long as running with 1 iteration. + """ + n1, n2 = 100, 10000 + elapsed = lambda f: timeit.timeit(f, number=1) + t1 = elapsed(lambda: pbkdf2("password", "salt", iterations=n1)) + t2 = elapsed(lambda: pbkdf2("password", "salt", iterations=n2)) + measured_scale_exponent = math.log(t2 / t1, n2 / n1) + self.assertLess(measured_scale_exponent, 1.1)