diff --git a/django/utils/text.py b/django/utils/text.py index 147fab4038..8bc256e804 100644 --- a/django/utils/text.py +++ b/django/utils/text.py @@ -116,13 +116,13 @@ def get_valid_filename(s): """ Returns the given string converted to a string that can be used for a clean filename. Specifically, leading and trailing spaces are removed; other - spaces are converted to underscores; and all non-filename-safe characters - are removed. + spaces are converted to underscores; and anything that is not a unicode + alphanumeric, dash, underscore, or dot, is removed. >>> get_valid_filename("john's portrait in 2004.jpg") u'johns_portrait_in_2004.jpg' """ s = force_unicode(s).strip().replace(' ', '_') - return re.sub(r'[^-A-Za-z0-9_.]', '', s) + return re.sub(r'(?u)[^-\w.]', '', s) get_valid_filename = allow_lazy(get_valid_filename, unicode) def get_text_list(list_, last_word=ugettext_lazy(u'or')): diff --git a/tests/regressiontests/file_uploads/tests.py b/tests/regressiontests/file_uploads/tests.py index 21f8ad4de2..ceea027f3c 100644 --- a/tests/regressiontests/file_uploads/tests.py +++ b/tests/regressiontests/file_uploads/tests.py @@ -1,3 +1,4 @@ +#! -*- coding: utf-8 -*- import os import errno import shutil @@ -12,6 +13,8 @@ from django.utils.hashcompat import sha_constructor from models import FileModel, temp_storage, UPLOAD_TO import uploadhandler +UNICODE_FILENAME = u'test-0123456789_中文_Orléans.jpg' + class FileUploadTests(TestCase): def test_simple_upload(self): post_data = { @@ -32,16 +35,10 @@ class FileUploadTests(TestCase): file2.write('a' * (10 * 2 ** 20)) file2.seek(0) - # This file contains chinese symbols for a name. - file3 = open(os.path.join(tdir, u'test_中文_Orl\u00e9ans.jpg'.encode('utf-8')), 'w+b') - file3.write('b' * (2 ** 10)) - file3.seek(0) - post_data = { 'name': 'Ringo', 'file_field1': open(file1.name), 'file_field2': open(file2.name), - 'file_unicode': file3, } for key in post_data.keys(): @@ -53,8 +50,24 @@ class FileUploadTests(TestCase): response = self.client.post('/file_uploads/verify/', post_data) + self.assertEqual(response.status_code, 200) + + def test_unicode_file_name(self): + tdir = tempfile.gettempdir() + + # This file contains chinese symbols and an accented char in the name. + file1 = open(os.path.join(tdir, UNICODE_FILENAME.encode('utf-8')), 'w+b') + file1.write('b' * (2 ** 10)) + file1.seek(0) + + post_data = { + 'file_unicode': file1, + } + + response = self.client.post('/file_uploads/unicode_name/', post_data) + try: - os.unlink(file3.name) + os.unlink(file1.name) except: pass diff --git a/tests/regressiontests/file_uploads/urls.py b/tests/regressiontests/file_uploads/urls.py index 607e1d1034..413080eb4f 100644 --- a/tests/regressiontests/file_uploads/urls.py +++ b/tests/regressiontests/file_uploads/urls.py @@ -4,6 +4,7 @@ import views urlpatterns = patterns('', (r'^upload/$', views.file_upload_view), (r'^verify/$', views.file_upload_view_verify), + (r'^unicode_name/$', views.file_upload_unicode_name), (r'^echo/$', views.file_upload_echo), (r'^quota/$', views.file_upload_quota), (r'^quota/broken/$', views.file_upload_quota_broken), diff --git a/tests/regressiontests/file_uploads/views.py b/tests/regressiontests/file_uploads/views.py index a989069a30..a2053a936a 100644 --- a/tests/regressiontests/file_uploads/views.py +++ b/tests/regressiontests/file_uploads/views.py @@ -5,6 +5,7 @@ from django.utils import simplejson from models import FileModel from uploadhandler import QuotaUploadHandler, ErroringUploadHandler from django.utils.hashcompat import sha_constructor +from tests import UNICODE_FILENAME def file_upload_view(request): """ @@ -29,10 +30,6 @@ def file_upload_view_verify(request): form_data = request.POST.copy() form_data.update(request.FILES) - # Check to see if unicode names worked out. - if not request.FILES['file_unicode'].name.endswith(u'test_\u4e2d\u6587_Orl\xe9ans.jpg'): - return HttpResponseServerError() - for key, value in form_data.items(): if key.endswith('_hash'): continue @@ -53,6 +50,32 @@ def file_upload_view_verify(request): return HttpResponse('') +def file_upload_unicode_name(request): + + # Check to see if unicode name came through properly. + if not request.FILES['file_unicode'].name.endswith(UNICODE_FILENAME): + return HttpResponseServerError() + + response = None + + # Check to make sure the exotic characters are preserved even + # through file save. + uni_named_file = request.FILES['file_unicode'] + obj = FileModel.objects.create(testfile=uni_named_file) + if not obj.testfile.name.endswith(uni_named_file.name): + response = HttpResponseServerError() + + # Cleanup the object with its exotic file name immediately. + # (shutil.rmtree used elsewhere in the tests to clean up the + # upload directory has been seen to choke on unicode + # filenames on Windows.) + obj.delete() + + if response: + return response + else: + return HttpResponse('') + def file_upload_echo(request): """ Simple view to echo back info about uploaded files for tests.