diff --git a/django/core/files/storage.py b/django/core/files/storage.py index 80123781f6..b13bb35cba 100644 --- a/django/core/files/storage.py +++ b/django/core/files/storage.py @@ -1,12 +1,12 @@ import os import errno -import itertools from datetime import datetime from django.conf import settings from django.core.exceptions import SuspiciousFileOperation from django.core.files import locks, File from django.core.files.move import file_move_safe +from django.utils.crypto import get_random_string from django.utils.encoding import force_text, filepath_to_uri from django.utils.functional import LazyObject from django.utils.module_loading import import_by_path @@ -67,13 +67,12 @@ class Storage(object): """ dir_name, file_name = os.path.split(name) file_root, file_ext = os.path.splitext(file_name) - # If the filename already exists, add an underscore and a number (before - # the file extension, if one exists) to the filename until the generated - # filename doesn't exist. - count = itertools.count(1) + # If the filename already exists, add an underscore and a random 7 + # character alphanumeric string (before the file extension, if one + # exists) to the filename until the generated filename doesn't exist. while self.exists(name): # file_ext includes the dot. - name = os.path.join(dir_name, "%s_%s%s" % (file_root, next(count), file_ext)) + name = os.path.join(dir_name, "%s_%s%s" % (file_root, get_random_string(7), file_ext)) return name diff --git a/docs/howto/custom-file-storage.txt b/docs/howto/custom-file-storage.txt index 090cc089cb..ef234be6d6 100644 --- a/docs/howto/custom-file-storage.txt +++ b/docs/howto/custom-file-storage.txt @@ -83,5 +83,14 @@ the provided filename into account. The ``name`` argument passed to this method will have already cleaned to a filename valid for the storage system, according to the ``get_valid_name()`` method described above. -The code provided on ``Storage`` simply appends ``"_1"``, ``"_2"``, etc. to the -filename until it finds one that's available in the destination directory. +.. versionchanged:: 1.6.6 + + If a file with ``name`` already exists, an underscore plus a random 7 + character alphanumeric string is appended to the filename before the + extension. + + Previously, an underscore followed by a number (e.g. ``"_1"``, ``"_2"``, + etc.) was appended to the filename until an avaible name in the destination + directory was found. A malicious user could exploit this deterministic + algorithm to create a denial-of-service attack. This change was also made + in Django 1.5.9 and 1.4.14. diff --git a/docs/ref/files/storage.txt b/docs/ref/files/storage.txt index 1216e85140..a8567fdcb3 100644 --- a/docs/ref/files/storage.txt +++ b/docs/ref/files/storage.txt @@ -81,6 +81,17 @@ The Storage Class available for new content to be written to on the target storage system. + .. versionchanged:: 1.6.6 + + If a file with ``name`` already exists, an underscore plus a random 7 + character alphanumeric string is appended to the filename before the + extension. + + Previously, an underscore followed by a number (e.g. ``"_1"``, ``"_2"``, + etc.) was appended to the filename until an avaible name in the + destination directory was found. A malicious user could exploit this + deterministic algorithm to create a denial-of-service attack. This + change was also made in Django 1.5.9 and 1.4.14. .. method:: get_valid_name(name) diff --git a/docs/releases/1.4.14.txt b/docs/releases/1.4.14.txt index 28390c96a4..6c140ee6dc 100644 --- a/docs/releases/1.4.14.txt +++ b/docs/releases/1.4.14.txt @@ -18,3 +18,23 @@ To remedy this, URL reversing now ensures that no URL starts with two slashes (//), replacing the second slash with its URL encoded counterpart (%2F). This approach ensures that semantics stay the same, while making the URL relative to the domain and not to the scheme. + +File upload denial-of-service +============================= + +Before this release, Django's file upload handing in its default configuration +may degrade to producing a huge number of ``os.stat()`` system calls when a +duplicate filename is uploaded. Since ``stat()`` may invoke IO, this may produce +a huge data-dependent slowdown that slowly worsens over time. The net result is +that given enough time, a user with the ability to upload files can cause poor +performance in the upload handler, eventually causing it to become very slow +simply by uploading 0-byte files. At this point, even a slow network connection +and few HTTP requests would be all that is necessary to make a site unavailable. + +We've remedied the issue by changing the algorithm for generating file names +if a file with the uploaded name already exists. +:meth:`Storage.get_available_name() +` now appends an +underscore plus a random 7 character alphanumeric string (e.g. ``"_x3a1gho"``), +rather than iterating through an underscore followed by a number (e.g. ``"_1"``, +``"_2"``, etc.). diff --git a/docs/releases/1.5.9.txt b/docs/releases/1.5.9.txt index 12b5b5f806..232cbb0767 100644 --- a/docs/releases/1.5.9.txt +++ b/docs/releases/1.5.9.txt @@ -18,3 +18,23 @@ To remedy this, URL reversing now ensures that no URL starts with two slashes (//), replacing the second slash with its URL encoded counterpart (%2F). This approach ensures that semantics stay the same, while making the URL relative to the domain and not to the scheme. + +File upload denial-of-service +============================= + +Before this release, Django's file upload handing in its default configuration +may degrade to producing a huge number of ``os.stat()`` system calls when a +duplicate filename is uploaded. Since ``stat()`` may invoke IO, this may produce +a huge data-dependent slowdown that slowly worsens over time. The net result is +that given enough time, a user with the ability to upload files can cause poor +performance in the upload handler, eventually causing it to become very slow +simply by uploading 0-byte files. At this point, even a slow network connection +and few HTTP requests would be all that is necessary to make a site unavailable. + +We've remedied the issue by changing the algorithm for generating file names +if a file with the uploaded name already exists. +:meth:`Storage.get_available_name() +` now appends an +underscore plus a random 7 character alphanumeric string (e.g. ``"_x3a1gho"``), +rather than iterating through an underscore followed by a number (e.g. ``"_1"``, +``"_2"``, etc.). diff --git a/docs/releases/1.6.6.txt b/docs/releases/1.6.6.txt index 43b3adf630..c2ebdb9efb 100644 --- a/docs/releases/1.6.6.txt +++ b/docs/releases/1.6.6.txt @@ -19,6 +19,26 @@ To remedy this, URL reversing now ensures that no URL starts with two slashes approach ensures that semantics stay the same, while making the URL relative to the domain and not to the scheme. +File upload denial-of-service +============================= + +Before this release, Django's file upload handing in its default configuration +may degrade to producing a huge number of ``os.stat()`` system calls when a +duplicate filename is uploaded. Since ``stat()`` may invoke IO, this may produce +a huge data-dependent slowdown that slowly worsens over time. The net result is +that given enough time, a user with the ability to upload files can cause poor +performance in the upload handler, eventually causing it to become very slow +simply by uploading 0-byte files. At this point, even a slow network connection +and few HTTP requests would be all that is necessary to make a site unavailable. + +We've remedied the issue by changing the algorithm for generating file names +if a file with the uploaded name already exists. +:meth:`Storage.get_available_name() +` now appends an +underscore plus a random 7 character alphanumeric string (e.g. ``"_x3a1gho"``), +rather than iterating through an underscore followed by a number (e.g. ``"_1"``, +``"_2"``, etc.). + Bugfixes ======== diff --git a/tests/file_storage/tests.py b/tests/file_storage/tests.py index f92eae3f53..f9f2d6ad13 100644 --- a/tests/file_storage/tests.py +++ b/tests/file_storage/tests.py @@ -35,6 +35,9 @@ except ImproperlyConfigured: Image = None +FILE_SUFFIX_REGEX = '[A-Za-z0-9]{7}' + + class GetStorageClassTests(SimpleTestCase): def test_get_filesystem_storage(self): @@ -430,10 +433,9 @@ class FileSaveRaceConditionTest(unittest.TestCase): self.thread.start() name = self.save_file('conflict') self.thread.join() - self.assertTrue(self.storage.exists('conflict')) - self.assertTrue(self.storage.exists('conflict_1')) - self.storage.delete('conflict') - self.storage.delete('conflict_1') + files = sorted(os.listdir(self.storage_dir)) + self.assertEqual(files[0], 'conflict') + six.assertRegex(self, files[1], 'conflict_%s' % FILE_SUFFIX_REGEX) @unittest.skipIf(sys.platform.startswith('win'), "Windows only partially supports umasks and chmod.") class FileStoragePermissions(unittest.TestCase): @@ -477,9 +479,10 @@ class FileStoragePathParsing(unittest.TestCase): self.storage.save('dotted.path/test', ContentFile("1")) self.storage.save('dotted.path/test', ContentFile("2")) + files = sorted(os.listdir(os.path.join(self.storage_dir, 'dotted.path'))) self.assertFalse(os.path.exists(os.path.join(self.storage_dir, 'dotted_.path'))) - self.assertTrue(os.path.exists(os.path.join(self.storage_dir, 'dotted.path/test'))) - self.assertTrue(os.path.exists(os.path.join(self.storage_dir, 'dotted.path/test_1'))) + self.assertEqual(files[0], 'test') + six.assertRegex(self, files[1], 'test_%s' % FILE_SUFFIX_REGEX) def test_first_character_dot(self): """ @@ -489,8 +492,10 @@ class FileStoragePathParsing(unittest.TestCase): self.storage.save('dotted.path/.test', ContentFile("1")) self.storage.save('dotted.path/.test', ContentFile("2")) - self.assertTrue(os.path.exists(os.path.join(self.storage_dir, 'dotted.path/.test'))) - self.assertTrue(os.path.exists(os.path.join(self.storage_dir, 'dotted.path/.test_1'))) + files = sorted(os.listdir(os.path.join(self.storage_dir, 'dotted.path'))) + self.assertFalse(os.path.exists(os.path.join(self.storage_dir, 'dotted_.path'))) + self.assertEqual(files[0], '.test') + six.assertRegex(self, files[1], '.test_%s' % FILE_SUFFIX_REGEX) class DimensionClosingBug(unittest.TestCase): """ diff --git a/tests/files/tests.py b/tests/files/tests.py index 2766ed1630..abc1a2c678 100644 --- a/tests/files/tests.py +++ b/tests/files/tests.py @@ -13,12 +13,15 @@ from django.core.files.base import ContentFile from django.core.files.uploadedfile import SimpleUploadedFile from django.core.files.temp import NamedTemporaryFile from django.test import TestCase -from django.utils import unittest +from django.utils import six, unittest from django.utils.six import StringIO from .models import Storage, temp_storage, temp_storage_location +FILE_SUFFIX_REGEX = '[A-Za-z0-9]{7}' + + class FileStorageTests(TestCase): def tearDown(self): shutil.rmtree(temp_storage_location) @@ -64,27 +67,28 @@ class FileStorageTests(TestCase): # Save another file with the same name. obj2 = Storage() obj2.normal.save("django_test.txt", ContentFile("more content")) - self.assertEqual(obj2.normal.name, "tests/django_test_1.txt") + obj2_name = obj2.normal.name + six.assertRegex(self, obj2_name, "tests/django_test_%s.txt" % FILE_SUFFIX_REGEX) self.assertEqual(obj2.normal.size, 12) # Push the objects into the cache to make sure they pickle properly cache.set("obj1", obj1) cache.set("obj2", obj2) - self.assertEqual(cache.get("obj2").normal.name, "tests/django_test_1.txt") + six.assertRegex(self, cache.get("obj2").normal.name, "tests/django_test_%s.txt" % FILE_SUFFIX_REGEX) # Deleting an object does not delete the file it uses. obj2.delete() obj2.normal.save("django_test.txt", ContentFile("more content")) - self.assertEqual(obj2.normal.name, "tests/django_test_2.txt") + self.assertNotEqual(obj2_name, obj2.normal.name) + six.assertRegex(self, obj2.normal.name, "tests/django_test_%s.txt" % FILE_SUFFIX_REGEX) # Multiple files with the same name get _N appended to them. - objs = [Storage() for i in range(3)] + objs = [Storage() for i in range(2)] for o in objs: o.normal.save("multiple_files.txt", ContentFile("Same Content")) - self.assertEqual( - [o.normal.name for o in objs], - ["tests/multiple_files.txt", "tests/multiple_files_1.txt", "tests/multiple_files_2.txt"] - ) + names = [o.normal.name for o in objs] + self.assertEqual(names[0], "tests/multiple_files.txt") + six.assertRegex(self, names[1], "tests/multiple_files_%s.txt" % FILE_SUFFIX_REGEX) for o in objs: o.delete()