Fixed #7614: the quickening has come, and there now is only one UploadedFile. On top of that, UploadedFile's interface has been improved:

* The API now more closely matches a proper file API. This unfortunately means a few backwards-incompatible renamings; see BackwardsIncompatibleChanges. This refs #7593.
  * While we were at it, renamed chunk() to chunks() to clarify that it's an iterator.
  * Temporary uploaded files now property use the tempfile library behind the scenes which should ensure better cleanup of tempfiles (refs #7593 again).

Thanks to Mike Axiak for the bulk of this patch.


git-svn-id: http://code.djangoproject.com/svn/django/trunk@7859 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Jacob Kaplan-Moss 2008-07-07 23:16:00 +00:00
parent 9dabd1f8ff
commit a28b75b0ba
12 changed files with 165 additions and 153 deletions

View File

@ -3,12 +3,40 @@ Classes representing uploaded files.
"""
import os
import tempfile
import warnings
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
__all__ = ('UploadedFile', 'TemporaryUploadedFile', 'InMemoryUploadedFile')
from django.conf import settings
__all__ = ('UploadedFile', 'TemporaryUploadedFile', 'InMemoryUploadedFile', 'SimpleUploadedFile')
# Because we fooled around with it a bunch, UploadedFile has a bunch
# of deprecated properties. This little shortcut helps define 'em
# without too much code duplication.
def deprecated_property(old, new, readonly=False):
def issue_warning():
warnings.warn(
message = "UploadedFile.%s is deprecated; use UploadedFile.%s instead." % (old, new),
category = DeprecationWarning,
stacklevel = 3
)
def getter(self):
issue_warning()
return getattr(self, new)
def setter(self, value):
issue_warning()
setattr(self, new, value)
if readonly:
return property(getter)
else:
return property(getter, setter)
class UploadedFile(object):
"""
@ -20,34 +48,34 @@ class UploadedFile(object):
"""
DEFAULT_CHUNK_SIZE = 64 * 2**10
def __init__(self, file_name=None, content_type=None, file_size=None, charset=None):
self.file_name = file_name
self.file_size = file_size
def __init__(self, name=None, content_type=None, size=None, charset=None):
self.name = name
self.size = size
self.content_type = content_type
self.charset = charset
def __repr__(self):
return "<%s: %s (%s)>" % (self.__class__.__name__, self.file_name, self.content_type)
return "<%s: %s (%s)>" % (self.__class__.__name__, self.name, self.content_type)
def _set_file_name(self, name):
def _get_name(self):
return self._name
def _set_name(self, name):
# Sanitize the file name so that it can't be dangerous.
if name is not None:
# Just use the basename of the file -- anything else is dangerous.
name = os.path.basename(name)
# File names longer than 255 characters can cause problems on older OSes.
if len(name) > 255:
name, ext = os.path.splitext(name)
name = name[:255 - len(ext)] + ext
self._file_name = name
def _get_file_name(self):
return self._file_name
file_name = property(_get_file_name, _set_file_name)
def chunk(self, chunk_size=None):
self._name = name
name = property(_get_name, _set_name)
def chunks(self, chunk_size=None):
"""
Read the file and yield chucks of ``chunk_size`` bytes (defaults to
``UploadedFile.DEFAULT_CHUNK_SIZE``).
@ -58,12 +86,18 @@ class UploadedFile(object):
if hasattr(self, 'seek'):
self.seek(0)
# Assume the pointer is at zero...
counter = self.file_size
counter = self.size
while counter > 0:
yield self.read(chunk_size)
counter -= chunk_size
# Deprecated properties
file_name = deprecated_property(old="file_name", new="name")
file_size = deprecated_property(old="file_size", new="size")
data = deprecated_property(old="data", new="read", readonly=True)
chunk = deprecated_property(old="chunk", new="chunks", readonly=True)
def multiple_chunks(self, chunk_size=None):
"""
Returns ``True`` if you can expect multiple chunks.
@ -74,9 +108,9 @@ class UploadedFile(object):
"""
if not chunk_size:
chunk_size = UploadedFile.DEFAULT_CHUNK_SIZE
return self.file_size < chunk_size
return self.size > chunk_size
# Abstract methods; subclasses *must* default read() and probably should
# Abstract methods; subclasses *must* define read() and probably should
# define open/close.
def read(self, num_bytes=None):
raise NotImplementedError()
@ -87,23 +121,49 @@ class UploadedFile(object):
def close(self):
pass
def xreadlines(self):
return self
def readlines(self):
return list(self.xreadlines())
def __iter__(self):
# Iterate over this file-like object by newlines
buffer_ = None
for chunk in self.chunks():
chunk_buffer = StringIO(chunk)
for line in chunk_buffer:
if buffer_:
line = buffer_ + line
buffer_ = None
# If this is the end of a line, yield
# otherwise, wait for the next round
if line[-1] in ('\n', '\r'):
yield line
else:
buffer_ = line
if buffer_ is not None:
yield buffer_
# Backwards-compatible support for uploaded-files-as-dictionaries.
def __getitem__(self, key):
import warnings
warnings.warn(
message = "The dictionary access of uploaded file objects is deprecated. Use the new object interface instead.",
category = DeprecationWarning,
stacklevel = 2
)
backwards_translate = {
'filename': 'file_name',
'filename': 'name',
'content-type': 'content_type',
}
}
if key == 'content':
return self.read()
elif key == 'filename':
return self.file_name
return self.name
elif key == 'content-type':
return self.content_type
else:
@ -113,34 +173,36 @@ class TemporaryUploadedFile(UploadedFile):
"""
A file uploaded to a temporary location (i.e. stream-to-disk).
"""
def __init__(self, file, file_name, content_type, file_size, charset):
super(TemporaryUploadedFile, self).__init__(file_name, content_type, file_size, charset)
self.file = file
self.path = file.name
self.file.seek(0)
def __init__(self, name, content_type, size, charset):
super(TemporaryUploadedFile, self).__init__(name, content_type, size, charset)
if settings.FILE_UPLOAD_TEMP_DIR:
self._file = tempfile.NamedTemporaryFile(suffix='.upload', dir=settings.FILE_UPLOAD_TEMP_DIR)
else:
self._file = tempfile.NamedTemporaryFile(suffix='.upload')
def temporary_file_path(self):
"""
Returns the full path of this file.
"""
return self.path
def read(self, *args, **kwargs):
return self.file.read(*args, **kwargs)
def open(self):
self.seek(0)
def seek(self, *args, **kwargs):
self.file.seek(*args, **kwargs)
return self.name
# Most methods on this object get proxied to NamedTemporaryFile.
# We can't directly subclass because NamedTemporaryFile is actually a
# factory function
def read(self, *args): return self._file.read(*args)
def seek(self, offset): return self._file.seek(offset)
def write(self, s): return self._file.write(s)
def close(self): return self._file.close()
def __iter__(self): return iter(self._file)
def readlines(self, size=None): return self._file.readlines(size)
def xreadlines(self): return self._file.xreadlines()
class InMemoryUploadedFile(UploadedFile):
"""
A file uploaded into memory (i.e. stream-to-memory).
"""
def __init__(self, file, field_name, file_name, content_type, file_size, charset):
super(InMemoryUploadedFile, self).__init__(file_name, content_type, file_size, charset)
def __init__(self, file, field_name, name, content_type, size, charset):
super(InMemoryUploadedFile, self).__init__(name, content_type, size, charset)
self.file = file
self.field_name = field_name
self.file.seek(0)
@ -154,7 +216,7 @@ class InMemoryUploadedFile(UploadedFile):
def read(self, *args, **kwargs):
return self.file.read(*args, **kwargs)
def chunk(self, chunk_size=None):
def chunks(self, chunk_size=None):
self.file.seek(0)
yield self.read()
@ -168,9 +230,9 @@ class SimpleUploadedFile(InMemoryUploadedFile):
"""
def __init__(self, name, content, content_type='text/plain'):
self.file = StringIO(content or '')
self.file_name = name
self.name = name
self.field_name = None
self.file_size = len(content or '')
self.size = len(content or '')
self.content_type = content_type
self.charset = None
self.file.seek(0)

View File

@ -132,21 +132,15 @@ class TemporaryFileUploadHandler(FileUploadHandler):
Create the file object to append to as data is coming in.
"""
super(TemporaryFileUploadHandler, self).new_file(file_name, *args, **kwargs)
self.file = TemporaryFile(settings.FILE_UPLOAD_TEMP_DIR)
self.write = self.file.write
self.file = TemporaryUploadedFile(self.file_name, self.content_type, 0, self.charset)
def receive_data_chunk(self, raw_data, start):
self.write(raw_data)
self.file.write(raw_data)
def file_complete(self, file_size):
self.file.seek(0)
return TemporaryUploadedFile(
file = self.file,
file_name = self.file_name,
content_type = self.content_type,
file_size = file_size,
charset = self.charset
)
self.file.size = file_size
return self.file
class MemoryFileUploadHandler(FileUploadHandler):
"""
@ -189,37 +183,12 @@ class MemoryFileUploadHandler(FileUploadHandler):
return InMemoryUploadedFile(
file = self.file,
field_name = self.field_name,
file_name = self.file_name,
name = self.file_name,
content_type = self.content_type,
file_size = file_size,
size = file_size,
charset = self.charset
)
class TemporaryFile(object):
"""
A temporary file that tries to delete itself when garbage collected.
"""
def __init__(self, dir):
if not dir:
dir = tempfile.gettempdir()
try:
(fd, name) = tempfile.mkstemp(suffix='.upload', dir=dir)
self.file = os.fdopen(fd, 'w+b')
except (OSError, IOError):
raise OSError("Could not create temporary file for uploading, have you set settings.FILE_UPLOAD_TEMP_DIR correctly?")
self.name = name
def __getattr__(self, name):
a = getattr(self.__dict__['file'], name)
if type(a) != type(0):
setattr(self, name, a)
return a
def __del__(self):
try:
os.unlink(self.name)
except OSError:
pass
def load_handler(path, *args, **kwargs):
"""

View File

@ -536,7 +536,7 @@ class Model(object):
# This is a normal uploadedfile that we can stream.
fp = open(full_filename, 'wb')
locks.lock(fp, locks.LOCK_EX)
for chunk in raw_field.chunk():
for chunk in raw_field.chunks():
fp.write(chunk)
locks.unlock(fp)
fp.close()

View File

@ -766,9 +766,12 @@ class FileField(Field):
def get_db_prep_save(self, value):
"Returns field's value prepared for saving into a database."
# Need to convert UploadedFile objects provided via a form to unicode for database insertion
if value is None:
if hasattr(value, 'name'):
return value.name
elif value is None:
return None
return unicode(value)
else:
return unicode(value)
def get_manipulator_fields(self, opts, manipulator, change, name_prefix='', rel=False, follow=True):
field_list = Field.get_manipulator_fields(self, opts, manipulator, change, name_prefix, rel, follow)
@ -842,7 +845,7 @@ class FileField(Field):
# We don't need to raise a warning because Model._save_FIELD_file will
# do so for us.
try:
file_name = file.file_name
file_name = file.name
except AttributeError:
file_name = file['filename']
@ -857,9 +860,9 @@ class FileField(Field):
return os.path.normpath(f)
def save_form_data(self, instance, data):
from django.newforms.fields import UploadedFile
from django.core.files.uploadedfile import UploadedFile
if data and isinstance(data, UploadedFile):
getattr(instance, "save_%s_file" % self.name)(data.filename, data.data, save=False)
getattr(instance, "save_%s_file" % self.name)(data.name, data, save=False)
def formfield(self, **kwargs):
defaults = {'form_class': forms.FileField}

View File

@ -27,7 +27,7 @@ from django.utils.encoding import StrAndUnicode, smart_unicode, smart_str
from util import ErrorList, ValidationError
from widgets import TextInput, PasswordInput, HiddenInput, MultipleHiddenInput, FileInput, CheckboxInput, Select, NullBooleanSelect, SelectMultiple, DateTimeInput
from django.core.files.uploadedfile import SimpleUploadedFile as UploadedFile
__all__ = (
'Field', 'CharField', 'IntegerField',
@ -419,18 +419,6 @@ except ImportError:
# It's OK if Django settings aren't configured.
URL_VALIDATOR_USER_AGENT = 'Django (http://www.djangoproject.com/)'
class UploadedFile(StrAndUnicode):
"A wrapper for files uploaded in a FileField"
def __init__(self, filename, data):
self.filename = filename
self.data = data
def __unicode__(self):
"""
The unicode representation is the filename, so that the pre-database-insertion
logic can use UploadedFile objects
"""
return self.filename
class FileField(Field):
widget = FileInput
@ -460,23 +448,20 @@ class FileField(Field):
category = DeprecationWarning,
stacklevel = 2
)
data = UploadedFile(data['filename'], data['content'])
try:
file_name = data.file_name
file_size = data.file_size
file_name = data.name
file_size = data.size
except AttributeError:
try:
file_name = data.get('filename')
file_size = bool(data['content'])
except (AttributeError, KeyError):
raise ValidationError(self.error_messages['invalid'])
raise ValidationError(self.error_messages['invalid'])
if not file_name:
raise ValidationError(self.error_messages['invalid'])
if not file_size:
raise ValidationError(self.error_messages['empty'])
return UploadedFile(file_name, data)
return data
class ImageField(FileField):
default_error_messages = {

View File

@ -686,7 +686,7 @@ class FileUploadField(FormField):
if upload_errors:
raise validators.CriticalValidationError, upload_errors
try:
file_size = new_data.file_size
file_size = new_data.size
except AttributeError:
file_size = len(new_data['content'])
if not file_size:

View File

@ -1331,23 +1331,12 @@ given length.
* Validates that non-empty file data has been bound to the form.
* Error message keys: ``required``, ``invalid``, ``missing``, ``empty``
An ``UploadedFile`` object has two attributes:
====================== ====================================================
Attribute Description
====================== ====================================================
``filename`` The name of the file, provided by the uploading
client.
``content`` The array of bytes comprising the file content.
====================== ====================================================
The string representation of an ``UploadedFile`` is the same as the filename
attribute.
To learn more about the ``UploadedFile`` object, see the `file uploads documentation`_.
When you use a ``FileField`` in a form, you must also remember to
`bind the file data to the form`_.
.. _file uploads documentation: ../upload_handling/
.. _`bind the file data to the form`: `Binding uploaded files to a form`_
``FilePathField``

View File

@ -279,7 +279,7 @@ Default: ``''`` (Empty string)
The database backend to use. The build-in database backends are
``'postgresql_psycopg2'``, ``'postgresql'``, ``'mysql'``, ``'mysql_old'``,
``'sqlite3'``, ``'oracle'``, and ``'oracle'``.
``'sqlite3'``, and ``'oracle'``.
In the Django development version, you can use a database backend that doesn't
ship with Django by setting ``DATABASE_ENGINE`` to a fully-qualified path (i.e.

View File

@ -22,7 +22,7 @@ Consider a simple form containing a ``FileField``::
class UploadFileForm(forms.Form):
title = forms.CharField(max_length=50)
file = forms.FileField()
A view handling this form will receive the file data in ``request.FILES``, which
is a dictionary containing a key for each ``FileField`` (or ``ImageField``, or
other ``FileField`` subclass) in the form. So the data from the above form would
@ -64,32 +64,32 @@ methods to access the uploaded content:
``UploadedFile.read()``
Read the entire uploaded data from the file. Be careful with this
method: if the uploaded file is huge it can overwhelm your system if you
try to read it into memory. You'll probably want to use ``chunk()``
try to read it into memory. You'll probably want to use ``chunks()``
instead; see below.
``UploadedFile.multiple_chunks()``
Returns ``True`` if the uploaded file is big enough to require
reading in multiple chunks. By default this will be any file
larger than 2.5 megabytes, but that's configurable; see below.
``UploadedFile.chunk()``
A generator returning chunks of the file. If ``multiple_chunks()`` is
``True``, you should use this method in a loop instead of ``read()``.
In practice, it's often easiest simply to use ``chunks()`` all the time;
see the example below.
``UploadedFile.file_name``
The name of the uploaded file (e.g. ``my_file.txt``).
``UploadedFile.file_size``
The size, in bytes, of the uploaded file.
There are a few other methods and attributes available on ``UploadedFile``
objects; see `UploadedFile objects`_ for a complete reference.
Putting it all together, here's a common way you might handle an uploaded file::
def handle_uploaded_file(f):
destination = open('some/file/name.txt', 'wb')
for chunk in f.chunks():
@ -126,27 +126,27 @@ Three `settings`_ control Django's file upload behavior:
The maximum size, in bytes, for files that will be uploaded
into memory. Files larger than ``FILE_UPLOAD_MAX_MEMORY_SIZE``
will be streamed to disk.
Defaults to 2.5 megabytes.
``FILE_UPLOAD_TEMP_DIR``
The directory where uploaded files larger than ``FILE_UPLOAD_TEMP_DIR``
will be stored.
Defaults to your system's standard temporary directory (i.e. ``/tmp`` on
most Unix-like systems).
``FILE_UPLOAD_HANDLERS``
The actual handlers for uploaded files. Changing this setting
allows complete customization -- even replacement -- of
Django's upload process. See `upload handlers`_, below,
for details.
Defaults to::
("django.core.files.uploadhandler.MemoryFileUploadHandler",
"django.core.files.uploadhandler.TemporaryFileUploadHandler",)
Which means "try to upload to memory first, then fall back to temporary
files."
@ -161,35 +161,39 @@ All ``UploadedFile`` objects define the following methods/attributes:
Returns a byte string of length ``num_bytes``, or the complete file if
``num_bytes`` is ``None``.
``UploadedFile.chunk(self, chunk_size=None)``
``UploadedFile.chunks(self, chunk_size=None)``
A generator yielding small chunks from the file. If ``chunk_size`` isn't
given, chunks will be 64 kb.
given, chunks will be 64 KB.
``UploadedFile.multiple_chunks(self, chunk_size=None)``
Returns ``True`` if you can expect more than one chunk when calling
``UploadedFile.chunk(self, chunk_size)``.
``UploadedFile.chunks(self, chunk_size)``.
``UploadedFile.file_size``
The size, in bytes, of the uploaded file.
``UploadedFile.file_name``
The name of the uploaded file as provided by the user.
``UploadedFile.content_type``
The content-type header uploaded with the file (e.g. ``text/plain`` or
``application/pdf``). Like any data supplied by the user, you shouldn't
trust that the uploaded file is actually this type. You'll still need to
validate that the file contains the content that the content-type header
claims -- "trust but verify."
``UploadedFile.charset``
For ``text/*`` content-types, the character set (i.e. ``utf8``) supplied
by the browser. Again, "trust but verify" is the best policy here.
``UploadedFile.__iter__()``
Iterates over the lines in the file.
``UploadedFile.temporary_file_path()``
Only files uploaded onto disk will have this method; it returns the full
path to the temporary uploaded file.
Upload Handlers
===============

View File

@ -803,7 +803,7 @@ False
>>> f.is_valid()
True
>>> type(f.cleaned_data['file'])
<class 'django.newforms.fields.UploadedFile'>
<class 'django.core.files.uploadedfile.SimpleUploadedFile'>
>>> instance = f.save()
>>> instance.file
u'...test1.txt'
@ -814,7 +814,7 @@ u'...test1.txt'
>>> f.is_valid()
True
>>> type(f.cleaned_data['file'])
<class 'django.newforms.fields.UploadedFile'>
<class 'django.core.files.uploadedfile.SimpleUploadedFile'>
>>> instance = f.save()
>>> instance.file
u'...test1.txt'
@ -906,7 +906,7 @@ u'...test3.txt'
>>> f.is_valid()
True
>>> type(f.cleaned_data['image'])
<class 'django.newforms.fields.UploadedFile'>
<class 'django.core.files.uploadedfile.SimpleUploadedFile'>
>>> instance = f.save()
>>> instance.image
u'...test.png'
@ -918,7 +918,7 @@ u'...test.png'
>>> f.is_valid()
True
>>> type(f.cleaned_data['image'])
<class 'django.newforms.fields.UploadedFile'>
<class 'django.core.files.uploadedfile.SimpleUploadedFile'>
>>> instance = f.save()
>>> instance.image
u'...test.png'

View File

@ -15,7 +15,7 @@ def file_upload_view(request):
if isinstance(form_data.get('file_field'), UploadedFile) and isinstance(form_data['name'], unicode):
# If a file is posted, the dummy client should only post the file name,
# not the full path.
if os.path.dirname(form_data['file_field'].file_name) != '':
if os.path.dirname(form_data['file_field'].name) != '':
return HttpResponseServerError()
return HttpResponse('')
else:
@ -29,7 +29,7 @@ def file_upload_view_verify(request):
form_data.update(request.FILES)
# Check to see if unicode names worked out.
if not request.FILES['file_unicode'].file_name.endswith(u'test_\u4e2d\u6587_Orl\xe9ans.jpg'):
if not request.FILES['file_unicode'].name.endswith(u'test_\u4e2d\u6587_Orl\xe9ans.jpg'):
return HttpResponseServerError()
for key, value in form_data.items():
@ -51,7 +51,7 @@ def file_upload_echo(request):
"""
Simple view to echo back info about uploaded files for tests.
"""
r = dict([(k, f.file_name) for k, f in request.FILES.items()])
r = dict([(k, f.name) for k, f in request.FILES.items()])
return HttpResponse(simplejson.dumps(r))
def file_upload_quota(request):

View File

@ -800,10 +800,10 @@ Traceback (most recent call last):
ValidationError: [u'The submitted file is empty.']
>>> type(f.clean(SimpleUploadedFile('name', 'Some File Content')))
<class 'django.newforms.fields.UploadedFile'>
<class 'django.core.files.uploadedfile.SimpleUploadedFile'>
>>> type(f.clean(SimpleUploadedFile('name', 'Some File Content'), 'files/test4.pdf'))
<class 'django.newforms.fields.UploadedFile'>
<class 'django.core.files.uploadedfile.SimpleUploadedFile'>
# URLField ##################################################################