From 668bc4f7bef79c7177bfa5b7dabd1403e0d6fa30 Mon Sep 17 00:00:00 2001 From: Karen Tracey Date: Sat, 4 Apr 2009 17:34:58 +0000 Subject: [PATCH] Fixed #10254: Changed the regex in get_valid_filename to allow unicode alphanumerics (thanks gulliver). Also updated the file_uploads test for this case to check the name after saving the uploaded file. As it was the test ensured that files with unicode characters in their names could be uploaded, but it wasn't actually ensuring that the unicode characters were preserved through save. git-svn-id: http://code.djangoproject.com/svn/django/trunk@10388 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/utils/text.py | 6 ++-- tests/regressiontests/file_uploads/tests.py | 27 +++++++++++++----- tests/regressiontests/file_uploads/urls.py | 1 + tests/regressiontests/file_uploads/views.py | 31 ++++++++++++++++++--- 4 files changed, 51 insertions(+), 14 deletions(-) diff --git a/django/utils/text.py b/django/utils/text.py index 147fab4038..8bc256e804 100644 --- a/django/utils/text.py +++ b/django/utils/text.py @@ -116,13 +116,13 @@ def get_valid_filename(s): """ Returns the given string converted to a string that can be used for a clean filename. Specifically, leading and trailing spaces are removed; other - spaces are converted to underscores; and all non-filename-safe characters - are removed. + spaces are converted to underscores; and anything that is not a unicode + alphanumeric, dash, underscore, or dot, is removed. >>> get_valid_filename("john's portrait in 2004.jpg") u'johns_portrait_in_2004.jpg' """ s = force_unicode(s).strip().replace(' ', '_') - return re.sub(r'[^-A-Za-z0-9_.]', '', s) + return re.sub(r'(?u)[^-\w.]', '', s) get_valid_filename = allow_lazy(get_valid_filename, unicode) def get_text_list(list_, last_word=ugettext_lazy(u'or')): diff --git a/tests/regressiontests/file_uploads/tests.py b/tests/regressiontests/file_uploads/tests.py index 21f8ad4de2..ceea027f3c 100644 --- a/tests/regressiontests/file_uploads/tests.py +++ b/tests/regressiontests/file_uploads/tests.py @@ -1,3 +1,4 @@ +#! -*- coding: utf-8 -*- import os import errno import shutil @@ -12,6 +13,8 @@ from django.utils.hashcompat import sha_constructor from models import FileModel, temp_storage, UPLOAD_TO import uploadhandler +UNICODE_FILENAME = u'test-0123456789_中文_Orléans.jpg' + class FileUploadTests(TestCase): def test_simple_upload(self): post_data = { @@ -32,16 +35,10 @@ class FileUploadTests(TestCase): file2.write('a' * (10 * 2 ** 20)) file2.seek(0) - # This file contains chinese symbols for a name. - file3 = open(os.path.join(tdir, u'test_中文_Orl\u00e9ans.jpg'.encode('utf-8')), 'w+b') - file3.write('b' * (2 ** 10)) - file3.seek(0) - post_data = { 'name': 'Ringo', 'file_field1': open(file1.name), 'file_field2': open(file2.name), - 'file_unicode': file3, } for key in post_data.keys(): @@ -53,8 +50,24 @@ class FileUploadTests(TestCase): response = self.client.post('/file_uploads/verify/', post_data) + self.assertEqual(response.status_code, 200) + + def test_unicode_file_name(self): + tdir = tempfile.gettempdir() + + # This file contains chinese symbols and an accented char in the name. + file1 = open(os.path.join(tdir, UNICODE_FILENAME.encode('utf-8')), 'w+b') + file1.write('b' * (2 ** 10)) + file1.seek(0) + + post_data = { + 'file_unicode': file1, + } + + response = self.client.post('/file_uploads/unicode_name/', post_data) + try: - os.unlink(file3.name) + os.unlink(file1.name) except: pass diff --git a/tests/regressiontests/file_uploads/urls.py b/tests/regressiontests/file_uploads/urls.py index 607e1d1034..413080eb4f 100644 --- a/tests/regressiontests/file_uploads/urls.py +++ b/tests/regressiontests/file_uploads/urls.py @@ -4,6 +4,7 @@ import views urlpatterns = patterns('', (r'^upload/$', views.file_upload_view), (r'^verify/$', views.file_upload_view_verify), + (r'^unicode_name/$', views.file_upload_unicode_name), (r'^echo/$', views.file_upload_echo), (r'^quota/$', views.file_upload_quota), (r'^quota/broken/$', views.file_upload_quota_broken), diff --git a/tests/regressiontests/file_uploads/views.py b/tests/regressiontests/file_uploads/views.py index a989069a30..a2053a936a 100644 --- a/tests/regressiontests/file_uploads/views.py +++ b/tests/regressiontests/file_uploads/views.py @@ -5,6 +5,7 @@ from django.utils import simplejson from models import FileModel from uploadhandler import QuotaUploadHandler, ErroringUploadHandler from django.utils.hashcompat import sha_constructor +from tests import UNICODE_FILENAME def file_upload_view(request): """ @@ -29,10 +30,6 @@ def file_upload_view_verify(request): form_data = request.POST.copy() form_data.update(request.FILES) - # Check to see if unicode names worked out. - if not request.FILES['file_unicode'].name.endswith(u'test_\u4e2d\u6587_Orl\xe9ans.jpg'): - return HttpResponseServerError() - for key, value in form_data.items(): if key.endswith('_hash'): continue @@ -53,6 +50,32 @@ def file_upload_view_verify(request): return HttpResponse('') +def file_upload_unicode_name(request): + + # Check to see if unicode name came through properly. + if not request.FILES['file_unicode'].name.endswith(UNICODE_FILENAME): + return HttpResponseServerError() + + response = None + + # Check to make sure the exotic characters are preserved even + # through file save. + uni_named_file = request.FILES['file_unicode'] + obj = FileModel.objects.create(testfile=uni_named_file) + if not obj.testfile.name.endswith(uni_named_file.name): + response = HttpResponseServerError() + + # Cleanup the object with its exotic file name immediately. + # (shutil.rmtree used elsewhere in the tests to clean up the + # upload directory has been seen to choke on unicode + # filenames on Windows.) + obj.delete() + + if response: + return response + else: + return HttpResponse('') + def file_upload_echo(request): """ Simple view to echo back info about uploaded files for tests.