Fixed #2070: refactored Django's file upload capabilities.

A description of the new features can be found in the new [http://www.djangoproject.com/documentation/upload_handing/ upload handling documentation]; the executive summary is that Django will now happily handle uploads of large files without issues.

This changes the representation of uploaded files from dictionaries to bona fide objects; see BackwardsIncompatibleChanges for details.


git-svn-id: http://code.djangoproject.com/svn/django/trunk@7814 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Jacob Kaplan-Moss 2008-07-01 15:10:51 +00:00
parent ef76102e89
commit d725cc9734
38 changed files with 2290 additions and 153 deletions

View File

@ -59,7 +59,7 @@ answer newbie questions, and generally made Django that much better:
Arthur <avandorp@gmail.com>
av0000@mail.ru
David Avsajanishvili <avsd05@gmail.com>
axiak@mit.edu
Mike Axiak <axiak@mit.edu>
Niran Babalola <niran@niran.org>
Morten Bagai <m@bagai.com>
Mikaël Barbero <mikael.barbero nospam at nospam free.fr>
@ -141,7 +141,9 @@ answer newbie questions, and generally made Django that much better:
Marc Fargas <telenieko@telenieko.com>
Szilveszter Farkas <szilveszter.farkas@gmail.com>
favo@exoweb.net
fdr <drfarina@gmail.com>
Dmitri Fedortchenko <zeraien@gmail.com>
Jonathan Feignberg <jdf@pobox.com>
Liang Feng <hutuworm@gmail.com>
Bill Fenner <fenner@gmail.com>
Stefane Fermgier <sf@fermigier.com>

View File

@ -231,6 +231,21 @@ MEDIA_ROOT = ''
# Example: "http://media.lawrence.com"
MEDIA_URL = ''
# List of upload handler classes to be applied in order.
FILE_UPLOAD_HANDLERS = (
'django.core.files.uploadhandler.MemoryFileUploadHandler',
'django.core.files.uploadhandler.TemporaryFileUploadHandler',
)
# Maximum size, in bytes, of a request before it will be streamed to the
# file system instead of into memory.
FILE_UPLOAD_MAX_MEMORY_SIZE = 2621440 # i.e. 2.5 MB
# Directory in which upload streamed files will be temporarily saved. A value of
# `None` will make Django use the operating system's default temporary directory
# (i.e. "/tmp" on *nix systems).
FILE_UPLOAD_TEMP_DIR = None
# Default formatting for date objects. See all available format strings here:
# http://www.djangoproject.com/documentation/templates/#now
DATE_FORMAT = 'N j, Y'

View File

View File

@ -0,0 +1,66 @@
"""
Portable file locking utilities.
Based partially on example by Jonathan Feignberg <jdf@pobox.com> in the Python
Cookbook, licensed under the Python Software License.
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/65203
Example Usage::
>>> from django.core.files import locks
>>> f = open('./file', 'wb')
>>> locks.lock(f, locks.LOCK_EX)
>>> f.write('Django')
>>> f.close()
"""
__all__ = ('LOCK_EX','LOCK_SH','LOCK_NB','lock','unlock')
system_type = None
try:
import win32con
import win32file
import pywintypes
LOCK_EX = win32con.LOCKFILE_EXCLUSIVE_LOCK
LOCK_SH = 0
LOCK_NB = win32con.LOCKFILE_FAIL_IMMEDIATELY
__overlapped = pywintypes.OVERLAPPED()
system_type = 'nt'
except (ImportError, AttributeError):
pass
try:
import fcntl
LOCK_EX = fcntl.LOCK_EX
LOCK_SH = fcntl.LOCK_SH
LOCK_NB = fcntl.LOCK_NB
system_type = 'posix'
except (ImportError, AttributeError):
pass
if system_type == 'nt':
def lock(file, flags):
hfile = win32file._get_osfhandle(file.fileno())
win32file.LockFileEx(hfile, flags, 0, -0x10000, __overlapped)
def unlock(file):
hfile = win32file._get_osfhandle(file.fileno())
win32file.UnlockFileEx(hfile, 0, -0x10000, __overlapped)
elif system_type == 'posix':
def lock(file, flags):
fcntl.flock(file.fileno(), flags)
def unlock(file):
fcntl.flock(file.fileno(), fcntl.LOCK_UN)
else:
# File locking is not supported.
LOCK_EX = LOCK_SH = LOCK_NB = None
# Dummy functions that don't do anything.
def lock(file, flags):
pass
def unlock(file):
pass

59
django/core/files/move.py Normal file
View File

@ -0,0 +1,59 @@
"""
Move a file in the safest way possible::
>>> from django.core.files.move import file_move_save
>>> file_move_save("/tmp/old_file", "/tmp/new_file")
"""
import os
from django.core.files import locks
__all__ = ['file_move_safe']
try:
import shutil
file_move = shutil.move
except ImportError:
file_move = os.rename
def file_move_safe(old_file_name, new_file_name, chunk_size = 1024*64, allow_overwrite=False):
"""
Moves a file from one location to another in the safest way possible.
First, try using ``shutils.move``, which is OS-dependent but doesn't break
if moving across filesystems. Then, try ``os.rename``, which will break
across filesystems. Finally, streams manually from one file to another in
pure Python.
If the destination file exists and ``allow_overwrite`` is ``False``, this
function will throw an ``IOError``.
"""
# There's no reason to move if we don't have to.
if old_file_name == new_file_name:
return
if not allow_overwrite and os.path.exists(new_file_name):
raise IOError("Cannot overwrite existing file '%s'." % new_file_name)
try:
file_move(old_file_name, new_file_name)
return
except OSError:
# This will happen with os.rename if moving to another filesystem
pass
# If the built-in didn't work, do it the hard way.
new_file = open(new_file_name, 'wb')
locks.lock(new_file, locks.LOCK_EX)
old_file = open(old_file_name, 'rb')
current_chunk = None
while current_chunk != '':
current_chunk = old_file.read(chunk_size)
new_file.write(current_chunk)
new_file.close()
old_file.close()
os.remove(old_file_name)

View File

@ -0,0 +1,190 @@
"""
Classes representing uploaded files.
"""
import os
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
__all__ = ('UploadedFile', 'TemporaryUploadedFile', 'InMemoryUploadedFile')
class UploadedFile(object):
"""
A abstract uploadded file (``TemporaryUploadedFile`` and
``InMemoryUploadedFile`` are the built-in concrete subclasses).
An ``UploadedFile`` object behaves somewhat like a file object and
represents some file data that the user submitted with a form.
"""
DEFAULT_CHUNK_SIZE = 64 * 2**10
def __init__(self, file_name=None, content_type=None, file_size=None, charset=None):
self.file_name = file_name
self.file_size = file_size
self.content_type = content_type
self.charset = charset
def __repr__(self):
return "<%s: %s (%s)>" % (self.__class__.__name__, self.file_name, self.content_type)
def _set_file_name(self, name):
# Sanitize the file name so that it can't be dangerous.
if name is not None:
# Just use the basename of the file -- anything else is dangerous.
name = os.path.basename(name)
# File names longer than 255 characters can cause problems on older OSes.
if len(name) > 255:
name, ext = os.path.splitext(name)
name = name[:255 - len(ext)] + ext
self._file_name = name
def _get_file_name(self):
return self._file_name
file_name = property(_get_file_name, _set_file_name)
def chunk(self, chunk_size=None):
"""
Read the file and yield chucks of ``chunk_size`` bytes (defaults to
``UploadedFile.DEFAULT_CHUNK_SIZE``).
"""
if not chunk_size:
chunk_size = UploadedFile.DEFAULT_CHUNK_SIZE
if hasattr(self, 'seek'):
self.seek(0)
# Assume the pointer is at zero...
counter = self.file_size
while counter > 0:
yield self.read(chunk_size)
counter -= chunk_size
def multiple_chunks(self, chunk_size=None):
"""
Returns ``True`` if you can expect multiple chunks.
NB: If a particular file representation is in memory, subclasses should
always return ``False`` -- there's no good reason to read from memory in
chunks.
"""
if not chunk_size:
chunk_size = UploadedFile.DEFAULT_CHUNK_SIZE
return self.file_size < chunk_size
# Abstract methods; subclasses *must* default read() and probably should
# define open/close.
def read(self, num_bytes=None):
raise NotImplementedError()
def open(self):
pass
def close(self):
pass
# Backwards-compatible support for uploaded-files-as-dictionaries.
def __getitem__(self, key):
import warnings
warnings.warn(
message = "The dictionary access of uploaded file objects is deprecated. Use the new object interface instead.",
category = DeprecationWarning,
stacklevel = 2
)
backwards_translate = {
'filename': 'file_name',
'content-type': 'content_type',
}
if key == 'content':
return self.read()
elif key == 'filename':
return self.file_name
elif key == 'content-type':
return self.content_type
else:
return getattr(self, key)
class TemporaryUploadedFile(UploadedFile):
"""
A file uploaded to a temporary location (i.e. stream-to-disk).
"""
def __init__(self, file, file_name, content_type, file_size, charset):
super(TemporaryUploadedFile, self).__init__(file_name, content_type, file_size, charset)
self.file = file
self.path = file.name
self.file.seek(0)
def temporary_file_path(self):
"""
Returns the full path of this file.
"""
return self.path
def read(self, *args, **kwargs):
return self.file.read(*args, **kwargs)
def open(self):
self.seek(0)
def seek(self, *args, **kwargs):
self.file.seek(*args, **kwargs)
class InMemoryUploadedFile(UploadedFile):
"""
A file uploaded into memory (i.e. stream-to-memory).
"""
def __init__(self, file, field_name, file_name, content_type, charset, file_size):
super(InMemoryUploadedFile, self).__init__(file_name, content_type, charset, file_size)
self.file = file
self.field_name = field_name
self.file.seek(0)
def seek(self, *args, **kwargs):
self.file.seek(*args, **kwargs)
def open(self):
self.seek(0)
def read(self, *args, **kwargs):
return self.file.read(*args, **kwargs)
def chunk(self, chunk_size=None):
self.file.seek(0)
yield self.read()
def multiple_chunks(self, chunk_size=None):
# Since it's in memory, we'll never have multiple chunks.
return False
class SimpleUploadedFile(InMemoryUploadedFile):
"""
A simple representation of a file, which just has content, size, and a name.
"""
def __init__(self, name, content, content_type='text/plain'):
self.file = StringIO(content or '')
self.file_name = name
self.field_name = None
self.file_size = len(content or '')
self.content_type = content_type
self.charset = None
self.file.seek(0)
def from_dict(cls, file_dict):
"""
Creates a SimpleUploadedFile object from
a dictionary object with the following keys:
- filename
- content-type
- content
"""
return cls(file_dict['filename'],
file_dict['content'],
file_dict.get('content-type', 'text/plain'))
from_dict = classmethod(from_dict)

View File

@ -0,0 +1,235 @@
"""
Base file upload handler classes, and the built-in concrete subclasses
"""
import os
import tempfile
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
from django.core.files.uploadedfile import TemporaryUploadedFile, InMemoryUploadedFile
__all__ = ['UploadFileException','StopUpload', 'SkipFile', 'FileUploadHandler',
'TemporaryFileUploadHandler', 'MemoryFileUploadHandler',
'load_handler']
class UploadFileException(Exception):
"""
Any error having to do with uploading files.
"""
pass
class StopUpload(UploadFileException):
"""
This exception is raised when an upload must abort.
"""
def __init__(self, connection_reset=False):
"""
If ``connection_reset`` is ``True``, Django knows will halt the upload
without consuming the rest of the upload. This will cause the browser to
show a "connection reset" error.
"""
self.connection_reset = connection_reset
def __unicode__(self):
if self.connection_reset:
return u'StopUpload: Halt current upload.'
else:
return u'StopUpload: Consume request data, then halt.'
class SkipFile(UploadFileException):
"""
This exception is raised by an upload handler that wants to skip a given file.
"""
pass
class StopFutureHandlers(UploadFileException):
"""
Upload handers that have handled a file and do not want future handlers to
run should raise this exception instead of returning None.
"""
pass
class FileUploadHandler(object):
"""
Base class for streaming upload handlers.
"""
chunk_size = 64 * 2 ** 10 #: The default chunk size is 64 KB.
def __init__(self, request=None):
self.file_name = None
self.content_type = None
self.content_length = None
self.charset = None
self.request = request
def handle_raw_input(self, input_data, META, content_length, boundary, encoding=None):
"""
Handle the raw input from the client.
Parameters:
:input_data:
An object that supports reading via .read().
:META:
``request.META``.
:content_length:
The (integer) value of the Content-Length header from the
client.
:boundary: The boundary from the Content-Type header. Be sure to
prepend two '--'.
"""
pass
def new_file(self, field_name, file_name, content_type, content_length, charset=None):
"""
Signal that a new file has been started.
Warning: As with any data from the client, you should not trust
content_length (and sometimes won't even get it).
"""
self.field_name = field_name
self.file_name = file_name
self.content_type = content_type
self.content_length = content_length
self.charset = charset
def receive_data_chunk(self, raw_data, start):
"""
Receive data from the streamed upload parser. ``start`` is the position
in the file of the chunk.
"""
raise NotImplementedError()
def file_complete(self, file_size):
"""
Signal that a file has completed. File size corresponds to the actual
size accumulated by all the chunks.
Subclasses must should return a valid ``UploadedFile`` object.
"""
raise NotImplementedError()
def upload_complete(self):
"""
Signal that the upload is complete. Subclasses should perform cleanup
that is necessary for this handler.
"""
pass
class TemporaryFileUploadHandler(FileUploadHandler):
"""
Upload handler that streams data into a temporary file.
"""
def __init__(self, *args, **kwargs):
super(TemporaryFileUploadHandler, self).__init__(*args, **kwargs)
def new_file(self, file_name, *args, **kwargs):
"""
Create the file object to append to as data is coming in.
"""
super(TemporaryFileUploadHandler, self).new_file(file_name, *args, **kwargs)
self.file = TemporaryFile(settings.FILE_UPLOAD_TEMP_DIR)
self.write = self.file.write
def receive_data_chunk(self, raw_data, start):
self.write(raw_data)
def file_complete(self, file_size):
self.file.seek(0)
return TemporaryUploadedFile(self.file, self.file_name,
self.content_type, file_size,
self.charset)
class MemoryFileUploadHandler(FileUploadHandler):
"""
File upload handler to stream uploads into memory (used for small files).
"""
def handle_raw_input(self, input_data, META, content_length, boundary, encoding=None):
"""
Use the content_length to signal whether or not this handler should be in use.
"""
# Check the content-length header to see if we should
# If the the post is too large, we cannot use the Memory handler.
if content_length > settings.FILE_UPLOAD_MAX_MEMORY_SIZE:
self.activated = False
else:
self.activated = True
def new_file(self, *args, **kwargs):
super(MemoryFileUploadHandler, self).new_file(*args, **kwargs)
if self.activated:
self.file = StringIO()
raise StopFutureHandlers()
def receive_data_chunk(self, raw_data, start):
"""
Add the data to the StringIO file.
"""
if self.activated:
self.file.write(raw_data)
else:
return raw_data
def file_complete(self, file_size):
"""
Return a file object if we're activated.
"""
if not self.activated:
return
return InMemoryUploadedFile(self.file, self.field_name, self.file_name,
self.content_type, self.charset, file_size)
class TemporaryFile(object):
"""
A temporary file that tries to delete itself when garbage collected.
"""
def __init__(self, dir):
if not dir:
dir = tempfile.gettempdir()
try:
(fd, name) = tempfile.mkstemp(suffix='.upload', dir=dir)
self.file = os.fdopen(fd, 'w+b')
except (OSError, IOError):
raise OSError("Could not create temporary file for uploading, have you set settings.FILE_UPLOAD_TEMP_DIR correctly?")
self.name = name
def __getattr__(self, name):
a = getattr(self.__dict__['file'], name)
if type(a) != type(0):
setattr(self, name, a)
return a
def __del__(self):
try:
os.unlink(self.name)
except OSError:
pass
def load_handler(path, *args, **kwargs):
"""
Given a path to a handler, return an instance of that handler.
E.g.::
>>> load_handler('django.core.files.uploadhandler.TemporaryFileUploadHandler', request)
<TemporaryFileUploadHandler object at 0x...>
"""
i = path.rfind('.')
module, attr = path[:i], path[i+1:]
try:
mod = __import__(module, {}, {}, [attr])
except ImportError, e:
raise ImproperlyConfigured('Error importing upload handler module %s: "%s"' % (module, e))
except ValueError, e:
raise ImproperlyConfigured('Error importing upload handler module. Is FILE_UPLOAD_HANDLERS a correctly defined list or tuple?')
try:
cls = getattr(mod, attr)
except AttributeError:
raise ImproperlyConfigured('Module "%s" does not define a "%s" upload handler backend' % (module, attr))
return cls(*args, **kwargs)

View File

@ -53,7 +53,8 @@ class ModPythonRequest(http.HttpRequest):
def _load_post_and_files(self):
"Populates self._post and self._files"
if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'):
self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data)
self._raw_post_data = ''
self._post, self._files = self.parse_file_upload(self.META, self._req)
else:
self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()

View File

@ -112,9 +112,8 @@ class WSGIRequest(http.HttpRequest):
# Populates self._post and self._files
if self.method == 'POST':
if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')])
header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '')
self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data)
self._raw_post_data = ''
self._post, self._files = self.parse_file_upload(self.META, self.environ['wsgi.input'])
else:
self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()
else:

View File

@ -19,6 +19,8 @@ from django.dispatch import dispatcher
from django.utils.datastructures import SortedDict
from django.utils.functional import curry
from django.utils.encoding import smart_str, force_unicode, smart_unicode
from django.core.files.move import file_move_safe
from django.core.files import locks
from django.conf import settings
try:
@ -469,16 +471,51 @@ class Model(object):
def _get_FIELD_size(self, field):
return os.path.getsize(self._get_FIELD_filename(field))
def _save_FIELD_file(self, field, filename, raw_contents, save=True):
def _save_FIELD_file(self, field, filename, raw_field, save=True):
directory = field.get_directory_name()
try: # Create the date-based directory if it doesn't exist.
os.makedirs(os.path.join(settings.MEDIA_ROOT, directory))
except OSError: # Directory probably already exists.
pass
#
# Check for old-style usage (files-as-dictionaries). Warn here first
# since there are multiple locations where we need to support both new
# and old usage.
#
if isinstance(raw_field, dict):
import warnings
warnings.warn(
message = "Representing uploaded files as dictionaries is"\
" deprected. Use django.core.files.SimpleUploadedFile"\
" instead.",
category = DeprecationWarning,
stacklevel = 2
)
from django.core.files.uploadedfile import SimpleUploadedFile
raw_field = SimpleUploadedFile.from_dict(raw_field)
elif isinstance(raw_field, basestring):
import warnings
warnings.warn(
message = "Representing uploaded files as strings is "\
" deprecated. Use django.core.files.SimpleUploadedFile "\
" instead.",
category = DeprecationWarning,
stacklevel = 2
)
from django.core.files.uploadedfile import SimpleUploadedFile
raw_field = SimpleUploadedFile(filename, raw_field)
if filename is None:
filename = raw_field.file_name
filename = field.get_filename(filename)
#
# If the filename already exists, keep adding an underscore to the name of
# the file until the filename doesn't exist.
#
while os.path.exists(os.path.join(settings.MEDIA_ROOT, filename)):
try:
dot_index = filename.rindex('.')
@ -486,14 +523,27 @@ class Model(object):
filename += '_'
else:
filename = filename[:dot_index] + '_' + filename[dot_index:]
#
# Save the file name on the object and write the file to disk
#
# Write the file to disk.
setattr(self, field.attname, filename)
full_filename = self._get_FIELD_filename(field)
fp = open(full_filename, 'wb')
fp.write(raw_contents)
fp.close()
if hasattr(raw_field, 'temporary_file_path'):
# This file has a file path that we can move.
raw_field.close()
file_move_safe(raw_field.temporary_file_path(), full_filename)
else:
# This is a normal uploadedfile that we can stream.
fp = open(full_filename, 'wb')
locks.lock(fp, locks.LOCK_EX)
for chunk in raw_field.chunk():
fp.write(chunk)
locks.unlock(fp)
fp.close()
# Save the width and/or height, if applicable.
if isinstance(field, ImageField) and (field.width_field or field.height_field):

View File

@ -811,7 +811,7 @@ class FileField(Field):
setattr(cls, 'get_%s_filename' % self.name, curry(cls._get_FIELD_filename, field=self))
setattr(cls, 'get_%s_url' % self.name, curry(cls._get_FIELD_url, field=self))
setattr(cls, 'get_%s_size' % self.name, curry(cls._get_FIELD_size, field=self))
setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_contents, save=True: instance._save_FIELD_file(self, filename, raw_contents, save))
setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_field, save=True: instance._save_FIELD_file(self, filename, raw_field, save))
dispatcher.connect(self.delete_file, signal=signals.post_delete, sender=cls)
def delete_file(self, instance):
@ -834,9 +834,19 @@ class FileField(Field):
if new_data.get(upload_field_name, False):
func = getattr(new_object, 'save_%s_file' % self.name)
if rel:
func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0]["content"], save)
file = new_data[upload_field_name][0]
else:
func(new_data[upload_field_name]["filename"], new_data[upload_field_name]["content"], save)
file = new_data[upload_field_name]
# Backwards-compatible support for files-as-dictionaries.
# We don't need to raise a warning because Model._save_FIELD_file will
# do so for us.
try:
file_name = file.file_name
except AttributeError:
file_name = file['filename']
func(file_name, file, save)
def get_directory_name(self):
return os.path.normpath(force_unicode(datetime.datetime.now().strftime(smart_str(self.upload_to))))
@ -849,7 +859,7 @@ class FileField(Field):
def save_form_data(self, instance, data):
from django.newforms.fields import UploadedFile
if data and isinstance(data, UploadedFile):
getattr(instance, "save_%s_file" % self.name)(data.filename, data.content, save=False)
getattr(instance, "save_%s_file" % self.name)(data.filename, data.data, save=False)
def formfield(self, **kwargs):
defaults = {'form_class': forms.FileField}

View File

@ -9,14 +9,15 @@ try:
except ImportError:
from cgi import parse_qsl
from django.utils.datastructures import MultiValueDict, FileDict
from django.utils.datastructures import MultiValueDict, ImmutableList
from django.utils.encoding import smart_str, iri_to_uri, force_unicode
from django.http.multipartparser import MultiPartParser
from django.conf import settings
from django.core.files import uploadhandler
from utils import *
RESERVED_CHARS="!*'();:@&=+$,/?%#[]"
class Http404(Exception):
pass
@ -25,6 +26,7 @@ class HttpRequest(object):
# The encoding used in GET/POST dicts. None means use default setting.
_encoding = None
_upload_handlers = []
def __init__(self):
self.GET, self.POST, self.COOKIES, self.META, self.FILES = {}, {}, {}, {}, {}
@ -102,39 +104,31 @@ class HttpRequest(object):
encoding = property(_get_encoding, _set_encoding)
def parse_file_upload(header_dict, post_data):
"""Returns a tuple of (POST QueryDict, FILES MultiValueDict)."""
import email, email.Message
from cgi import parse_header
raw_message = '\r\n'.join(['%s:%s' % pair for pair in header_dict.items()])
raw_message += '\r\n\r\n' + post_data
msg = email.message_from_string(raw_message)
POST = QueryDict('', mutable=True)
FILES = MultiValueDict()
for submessage in msg.get_payload():
if submessage and isinstance(submessage, email.Message.Message):
name_dict = parse_header(submessage['Content-Disposition'])[1]
# name_dict is something like {'name': 'file', 'filename': 'test.txt'} for file uploads
# or {'name': 'blah'} for POST fields
# We assume all uploaded files have a 'filename' set.
if 'filename' in name_dict:
assert type([]) != type(submessage.get_payload()), "Nested MIME messages are not supported"
if not name_dict['filename'].strip():
continue
# IE submits the full path, so trim everything but the basename.
# (We can't use os.path.basename because that uses the server's
# directory separator, which may not be the same as the
# client's one.)
filename = name_dict['filename'][name_dict['filename'].rfind("\\")+1:]
FILES.appendlist(name_dict['name'], FileDict({
'filename': filename,
'content-type': 'Content-Type' in submessage and submessage['Content-Type'] or None,
'content': submessage.get_payload(),
}))
else:
POST.appendlist(name_dict['name'], submessage.get_payload())
return POST, FILES
def _initialize_handlers(self):
self._upload_handlers = [uploadhandler.load_handler(handler, self)
for handler in settings.FILE_UPLOAD_HANDLERS]
def _set_upload_handlers(self, upload_handlers):
if hasattr(self, '_files'):
raise AttributeError("You cannot set the upload handlers after the upload has been processed.")
self._upload_handlers = upload_handlers
def _get_upload_handlers(self):
if not self._upload_handlers:
# If thre are no upload handlers defined, initialize them from settings.
self._initialize_handlers()
return self._upload_handlers
upload_handlers = property(_get_upload_handlers, _set_upload_handlers)
def parse_file_upload(self, META, post_data):
"""Returns a tuple of (POST QueryDict, FILES MultiValueDict)."""
self.upload_handlers = ImmutableList(
self.upload_handlers,
warning = "You cannot alter upload handlers after the upload has been processed."
)
parser = MultiPartParser(META, post_data, self.upload_handlers, self.encoding)
return parser.parse()
class QueryDict(MultiValueDict):
"""

View File

@ -0,0 +1,658 @@
"""
Multi-part parsing for file uploads.
Exposes one class, ``MultiPartParser``, which feeds chunks of uploaded data to
file upload handlers for processing.
"""
import cgi
from django.conf import settings
from django.core.exceptions import SuspiciousOperation
from django.utils.datastructures import MultiValueDict
from django.utils.encoding import force_unicode
from django.utils.text import unescape_entities
from django.core.files.uploadhandler import StopUpload, SkipFile, StopFutureHandlers
__all__ = ('MultiPartParser','MultiPartParserError','InputStreamExhausted')
class MultiPartParserError(Exception):
pass
class InputStreamExhausted(Exception):
"""
No more reads are allowed from this device.
"""
pass
RAW = "raw"
FILE = "file"
FIELD = "field"
class MultiPartParser(object):
"""
A rfc2388 multipart/form-data parser.
``MultiValueDict.parse()`` reads the input stream in ``chunk_size`` chunks
and returns a tuple of ``(MultiValueDict(POST), MultiValueDict(FILES))``. If
``file_upload_dir`` is defined files will be streamed to temporary files in
that directory.
"""
def __init__(self, META, input_data, upload_handlers, encoding=None):
"""
Initialize the MultiPartParser object.
:META:
The standard ``META`` dictionary in Django request objects.
:input_data:
The raw post data, as a bytestring.
:upload_handler:
An UploadHandler instance that performs operations on the uploaded
data.
:encoding:
The encoding with which to treat the incoming data.
"""
#
# Content-Type should containt multipart and the boundary information.
#
content_type = META.get('HTTP_CONTENT_TYPE', META.get('CONTENT_TYPE', ''))
if not content_type.startswith('multipart/'):
raise MultiPartParserError('Invalid Content-Type: %s' % content_type)
# Parse the header to get the boundary to split the parts.
ctypes, opts = parse_header(content_type)
boundary = opts.get('boundary')
if not boundary or not cgi.valid_boundary(boundary):
raise MultiPartParserError('Invalid boundary in multipart: %s' % boundary)
#
# Content-Length should contain the length of the body we are about
# to receive.
#
try:
content_length = int(META.get('HTTP_CONTENT_LENGTH', META.get('CONTENT_LENGTH',0)))
except (ValueError, TypeError):
# For now set it to 0; we'll try again later on down.
content_length = 0
if content_length <= 0:
# This means we shouldn't continue...raise an error.
raise MultiPartParserError("Invalid content length: %r" % content_length)
self._boundary = boundary
self._input_data = input_data
# For compatibility with low-level network APIs (with 32-bit integers),
# the chunk size should be < 2^31, but still divisible by 4.
self._chunk_size = min(2**31-4, *[x.chunk_size for x in upload_handlers if x.chunk_size])
self._meta = META
self._encoding = encoding or settings.DEFAULT_CHARSET
self._content_length = content_length
self._upload_handlers = upload_handlers
def parse(self):
"""
Parse the POST data and break it into a FILES MultiValueDict and a POST
MultiValueDict.
Returns a tuple containing the POST and FILES dictionary, respectively.
"""
# We have to import QueryDict down here to avoid a circular import.
from django.http import QueryDict
encoding = self._encoding
handlers = self._upload_handlers
limited_input_data = LimitBytes(self._input_data, self._content_length)
# See if the handler will want to take care of the parsing.
# This allows overriding everything if somebody wants it.
for handler in handlers:
result = handler.handle_raw_input(limited_input_data,
self._meta,
self._content_length,
self._boundary,
encoding)
if result is not None:
return result[0], result[1]
# Create the data structures to be used later.
self._post = QueryDict('', mutable=True)
self._files = MultiValueDict()
# Instantiate the parser and stream:
stream = LazyStream(ChunkIter(limited_input_data, self._chunk_size))
# Whether or not to signal a file-completion at the beginning of the loop.
old_field_name = None
counters = [0] * len(handlers)
try:
for item_type, meta_data, field_stream in Parser(stream, self._boundary):
if old_field_name:
# We run this at the beginning of the next loop
# since we cannot be sure a file is complete until
# we hit the next boundary/part of the multipart content.
self.handle_file_complete(old_field_name, counters)
try:
disposition = meta_data['content-disposition'][1]
field_name = disposition['name'].strip()
except (KeyError, IndexError, AttributeError):
continue
transfer_encoding = meta_data.get('content-transfer-encoding')
field_name = force_unicode(field_name, encoding, errors='replace')
if item_type == FIELD:
# This is a post field, we can just set it in the post
if transfer_encoding == 'base64':
raw_data = field_stream.read()
try:
data = str(raw_data).decode('base64')
except:
data = raw_data
else:
data = field_stream.read()
self._post.appendlist(field_name,
force_unicode(data, encoding, errors='replace'))
elif item_type == FILE:
# This is a file, use the handler...
file_successful = True
file_name = disposition.get('filename')
if not file_name:
continue
file_name = force_unicode(file_name, encoding, errors='replace')
file_name = self.IE_sanitize(unescape_entities(file_name))
content_type = meta_data.get('content-type', ('',))[0].strip()
try:
charset = meta_data.get('content-type', (0,{}))[1].get('charset', None)
except:
charset = None
try:
content_length = int(meta_data.get('content-length')[0])
except (IndexError, TypeError, ValueError):
content_length = None
counters = [0] * len(handlers)
try:
for handler in handlers:
try:
handler.new_file(field_name, file_name,
content_type, content_length,
charset)
except StopFutureHandlers:
break
for chunk in field_stream:
if transfer_encoding == 'base64':
# We only special-case base64 transfer encoding
try:
chunk = str(chunk).decode('base64')
except Exception, e:
# Since this is only a chunk, any error is an unfixable error.
raise MultiPartParserError("Could not decode base64 data: %r" % e)
for i, handler in enumerate(handlers):
chunk_length = len(chunk)
chunk = handler.receive_data_chunk(chunk,
counters[i])
counters[i] += chunk_length
if chunk is None:
# If the chunk received by the handler is None, then don't continue.
break
except SkipFile, e:
file_successful = False
# Just use up the rest of this file...
exhaust(field_stream)
else:
# Handle file upload completions on next iteration.
old_field_name = field_name
else:
# If this is neither a FIELD or a FILE, just exhaust the stream.
exhaust(stream)
except StopUpload, e:
if not e.connection_reset:
exhaust(limited_input_data)
else:
# Make sure that the request data is all fed
exhaust(limited_input_data)
# Signal that the upload has completed.
for handler in handlers:
retval = handler.upload_complete()
if retval:
break
return self._post, self._files
def handle_file_complete(self, old_field_name, counters):
"""
Handle all the signalling that takes place when a file is complete.
"""
for i, handler in enumerate(self._upload_handlers):
file_obj = handler.file_complete(counters[i])
if file_obj:
# If it returns a file object, then set the files dict.
self._files.appendlist(force_unicode(old_field_name,
self._encoding,
errors='replace'),
file_obj)
break
def IE_sanitize(self, filename):
"""Cleanup filename from Internet Explorer full paths."""
return filename and filename[filename.rfind("\\")+1:].strip()
class LazyStream(object):
"""
The LazyStream wrapper allows one to get and "unget" bytes from a stream.
Given a producer object (an iterator that yields bytestrings), the
LazyStream object will support iteration, reading, and keeping a "look-back"
variable in case you need to "unget" some bytes.
"""
def __init__(self, producer, length=None):
"""
Every LazyStream must have a producer when instantiated.
A producer is an iterable that returns a string each time it
is called.
"""
self._producer = producer
self._empty = False
self._leftover = ''
self.length = length
self._position = 0
self._remaining = length
# These fields are to do sanity checking to make sure we don't
# have infinite loops getting/ungetting from the stream. The
# purpose overall is to raise an exception if we perform lots
# of stream get/unget gymnastics without getting
# anywhere. Naturally this is not sound, but most probably
# would indicate a bug if the exception is raised.
# largest position tell us how far this lazystream has ever
# been advanced
self._largest_position = 0
# "modifications since" will start at zero and increment every
# time the position is modified but a new largest position is
# not achieved.
self._modifications_since = 0
def tell(self):
return self.position
def read(self, size=None):
def parts():
remaining = (size is not None and [size] or [self._remaining])[0]
# do the whole thing in one shot if no limit was provided.
if remaining is None:
yield ''.join(self)
return
# otherwise do some bookkeeping to return exactly enough
# of the stream and stashing any extra content we get from
# the producer
while remaining != 0:
assert remaining > 0, 'remaining bytes to read should never go negative'
chunk = self.next()
emitting = chunk[:remaining]
self.unget(chunk[remaining:])
remaining -= len(emitting)
yield emitting
out = ''.join(parts())
return out
def next(self):
"""
Used when the exact number of bytes to read is unimportant.
This procedure just returns whatever is chunk is conveniently returned
from the iterator instead. Useful to avoid unnecessary bookkeeping if
performance is an issue.
"""
if self._leftover:
output = self._leftover
self._leftover = ''
else:
output = self._producer.next()
self.position += len(output)
return output
def close(self):
"""
Used to invalidate/disable this lazy stream.
Replaces the producer with an empty list. Any leftover bytes that have
already been read will still be reported upon read() and/or next().
"""
self._producer = []
def __iter__(self):
return self
def unget(self, bytes):
"""
Places bytes back onto the front of the lazy stream.
Future calls to read() will return those bytes first. The
stream position and thus tell() will be rewound.
"""
self.position -= len(bytes)
self._leftover = ''.join([bytes, self._leftover])
def _set_position(self, value):
if value > self._largest_position:
self._modifications_since = 0
self._largest_position = value
else:
self._modifications_since += 1
if self._modifications_since > 500:
raise SuspiciousOperation(
"The multipart parser got stuck, which shouldn't happen with"
" normal uploaded files. Check for malicious upload activity;"
" if there is none, report this to the Django developers."
)
self._position = value
position = property(lambda self: self._position, _set_position)
class ChunkIter(object):
"""
An iterable that will yield chunks of data. Given a file-like object as the
constructor, this object will yield chunks of read operations from that
object.
"""
def __init__(self, flo, chunk_size=64 * 1024):
self.flo = flo
self.chunk_size = chunk_size
def next(self):
try:
data = self.flo.read(self.chunk_size)
except InputStreamExhausted:
raise StopIteration()
if data:
return data
else:
raise StopIteration()
def __iter__(self):
return self
class LimitBytes(object):
""" Limit bytes for a file object. """
def __init__(self, fileobject, length):
self._file = fileobject
self.remaining = length
def read(self, num_bytes=None):
"""
Read data from the underlying file.
If you ask for too much or there isn't anything left,
this will raise an InputStreamExhausted error.
"""
if self.remaining <= 0:
raise InputStreamExhausted()
if num_bytes is None:
num_bytes = self.remaining
else:
num_bytes = min(num_bytes, self.remaining)
self.remaining -= num_bytes
return self._file.read(num_bytes)
class InterBoundaryIter(object):
"""
A Producer that will iterate over boundaries.
"""
def __init__(self, stream, boundary):
self._stream = stream
self._boundary = boundary
def __iter__(self):
return self
def next(self):
try:
return LazyStream(BoundaryIter(self._stream, self._boundary))
except InputStreamExhausted:
raise StopIteration()
class BoundaryIter(object):
"""
A Producer that is sensitive to boundaries.
Will happily yield bytes until a boundary is found. Will yield the bytes
before the boundary, throw away the boundary bytes themselves, and push the
post-boundary bytes back on the stream.
The future calls to .next() after locating the boundary will raise a
StopIteration exception.
"""
def __init__(self, stream, boundary):
self._stream = stream
self._boundary = boundary
self._done = False
# rollback an additional six bytes because the format is like
# this: CRLF<boundary>[--CRLF]
self._rollback = len(boundary) + 6
# Try to use mx fast string search if available. Otherwise
# use Python find. Wrap the latter for consistency.
unused_char = self._stream.read(1)
if not unused_char:
raise InputStreamExhausted()
self._stream.unget(unused_char)
try:
from mx.TextTools import FS
self._fs = FS(boundary).find
except ImportError:
self._fs = lambda data: data.find(boundary)
def __iter__(self):
return self
def next(self):
if self._done:
raise StopIteration()
stream = self._stream
rollback = self._rollback
bytes_read = 0
chunks = []
for bytes in stream:
bytes_read += len(bytes)
chunks.append(bytes)
if bytes_read > rollback:
break
if not bytes:
break
else:
self._done = True
if not chunks:
raise StopIteration()
chunk = ''.join(chunks)
boundary = self._find_boundary(chunk, len(chunk) < self._rollback)
if boundary:
end, next = boundary
stream.unget(chunk[next:])
self._done = True
return chunk[:end]
else:
# make sure we dont treat a partial boundary (and
# its separators) as data
if not chunk[:-rollback]:# and len(chunk) >= (len(self._boundary) + 6):
# There's nothing left, we should just return and mark as done.
self._done = True
return chunk
else:
stream.unget(chunk[-rollback:])
return chunk[:-rollback]
def _find_boundary(self, data, eof = False):
"""
Finds a multipart boundary in data.
Should no boundry exist in the data None is returned instead. Otherwise
a tuple containing the indices of the following are returned:
* the end of current encapsulation
* the start of the next encapsulation
"""
index = self._fs(data)
if index < 0:
return None
else:
end = index
next = index + len(self._boundary)
data_len = len(data) - 1
# backup over CRLF
if data[max(0,end-1)] == '\n':
end -= 1
if data[max(0,end-1)] == '\r':
end -= 1
# skip over --CRLF
#if data[min(data_len,next)] == '-':
# next += 1
#if data[min(data_len,next)] == '-':
# next += 1
#if data[min(data_len,next)] == '\r':
# next += 1
#if data[min(data_len,next)] == '\n':
# next += 1
return end, next
def exhaust(stream_or_iterable):
"""
Completely exhausts an iterator or stream.
Raise a MultiPartParserError if the argument is not a stream or an iterable.
"""
iterator = None
try:
iterator = iter(stream_or_iterable)
except TypeError:
iterator = ChunkIter(stream_or_iterable, 16384)
if iterator is None:
raise MultiPartParserError('multipartparser.exhaust() was passed a non-iterable or stream parameter')
for __ in iterator:
pass
def parse_boundary_stream(stream, max_header_size):
"""
Parses one and exactly one stream that encapsulates a boundary.
"""
# Stream at beginning of header, look for end of header
# and parse it if found. The header must fit within one
# chunk.
chunk = stream.read(max_header_size)
# 'find' returns the top of these four bytes, so we'll
# need to munch them later to prevent them from polluting
# the payload.
header_end = chunk.find('\r\n\r\n')
def _parse_header(line):
main_value_pair, params = parse_header(line)
try:
name, value = main_value_pair.split(':', 1)
except:
raise ValueError("Invalid header: %r" % line)
return name, (value, params)
if header_end == -1:
# we find no header, so we just mark this fact and pass on
# the stream verbatim
stream.unget(chunk)
return (RAW, {}, stream)
header = chunk[:header_end]
# here we place any excess chunk back onto the stream, as
# well as throwing away the CRLFCRLF bytes from above.
stream.unget(chunk[header_end + 4:])
TYPE = RAW
outdict = {}
# Eliminate blank lines
for line in header.split('\r\n'):
# This terminology ("main value" and "dictionary of
# parameters") is from the Python docs.
try:
name, (value, params) = _parse_header(line)
except:
continue
if name == 'content-disposition':
TYPE = FIELD
if params.get('filename'):
TYPE = FILE
outdict[name] = value, params
if TYPE == RAW:
stream.unget(chunk)
return (TYPE, outdict, stream)
class Parser(object):
def __init__(self, stream, boundary):
self._stream = stream
self._separator = '--' + boundary
def __iter__(self):
boundarystream = InterBoundaryIter(self._stream, self._separator)
for sub_stream in boundarystream:
# Iterate over each part
yield parse_boundary_stream(sub_stream, 1024)
def parse_header(line):
""" Parse the header into a key-value. """
plist = _parse_header_params(';' + line)
key = plist.pop(0).lower()
pdict = {}
for p in plist:
i = p.find('=')
if i >= 0:
name = p[:i].strip().lower()
value = p[i+1:].strip()
if len(value) >= 2 and value[0] == value[-1] == '"':
value = value[1:-1]
value = value.replace('\\\\', '\\').replace('\\"', '"')
pdict[name] = value
return key, pdict
def _parse_header_params(s):
plist = []
while s[:1] == ';':
s = s[1:]
end = s.find(';')
while end > 0 and s.count('"', 0, end) % 2:
end = s.find(';', end + 1)
if end < 0:
end = len(s)
f = s[:end]
plist.append(f.strip())
s = s[end:]
return plist

View File

@ -7,6 +7,11 @@ import datetime
import os
import re
import time
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
# Python 2.3 fallbacks
try:
from decimal import Decimal, DecimalException
@ -416,9 +421,9 @@ except ImportError:
class UploadedFile(StrAndUnicode):
"A wrapper for files uploaded in a FileField"
def __init__(self, filename, content):
def __init__(self, filename, data):
self.filename = filename
self.content = content
self.data = data
def __unicode__(self):
"""
@ -444,15 +449,34 @@ class FileField(Field):
return None
elif not data and initial:
return initial
if isinstance(data, dict):
# We warn once, then support both ways below.
import warnings
warnings.warn(
message = "Representing uploaded files as dictionaries is"\
" deprecated. Use django.core.files.SimpleUploadedFile "\
" instead.",
category = DeprecationWarning,
stacklevel = 2
)
try:
f = UploadedFile(data['filename'], data['content'])
except TypeError:
file_name = data.file_name
file_size = data.file_size
except AttributeError:
try:
file_name = data.get('filename')
file_size = bool(data['content'])
except (AttributeError, KeyError):
raise ValidationError(self.error_messages['invalid'])
if not file_name:
raise ValidationError(self.error_messages['invalid'])
except KeyError:
raise ValidationError(self.error_messages['missing'])
if not f.content:
if not file_size:
raise ValidationError(self.error_messages['empty'])
return f
return UploadedFile(file_name, data)
class ImageField(FileField):
default_error_messages = {
@ -470,15 +494,31 @@ class ImageField(FileField):
elif not data and initial:
return initial
from PIL import Image
from cStringIO import StringIO
# We need to get a file object for PIL. We might have a path or we might
# have to read the data into memory.
if hasattr(data, 'temporary_file_path'):
file = data.temporary_file_path()
else:
if hasattr(data, 'read'):
file = StringIO(data.read())
else:
file = StringIO(data['content'])
try:
# load() is the only method that can spot a truncated JPEG,
# but it cannot be called sanely after verify()
trial_image = Image.open(StringIO(f.content))
trial_image = Image.open(file)
trial_image.load()
# Since we're about to use the file again we have to reset the
# file object if possible.
if hasattr(file, 'reset'):
file.reset()
# verify() is the only method that can spot a corrupt PNG,
# but it must be called immediately after the constructor
trial_image = Image.open(StringIO(f.content))
trial_image = Image.open(file)
trial_image.verify()
except Exception: # Python Imaging Library doesn't recognize it as an image
raise ValidationError(self.error_messages['invalid_image'])

View File

@ -680,18 +680,27 @@ class FileUploadField(FormField):
self.field_name, self.is_required = field_name, is_required
self.validator_list = [self.isNonEmptyFile] + validator_list
def isNonEmptyFile(self, field_data, all_data):
def isNonEmptyFile(self, new_data, all_data):
if hasattr(new_data, 'upload_errors'):
upload_errors = new_data.upload_errors()
if upload_errors:
raise validators.CriticalValidationError, upload_errors
try:
content = field_data['content']
except TypeError:
raise validators.CriticalValidationError, ugettext("No file was submitted. Check the encoding type on the form.")
if not content:
file_size = new_data.file_size
except AttributeError:
file_size = len(new_data['content'])
if not file_size:
raise validators.CriticalValidationError, ugettext("The submitted file is empty.")
def render(self, data):
return mark_safe(u'<input type="file" id="%s" class="v%s" name="%s" />' % \
(self.get_id(), self.__class__.__name__, self.field_name))
def prepare(self, new_data):
if hasattr(new_data, 'upload_errors'):
upload_errors = new_data.upload_errors()
new_data[self.field_name] = { '_file_upload_error': upload_errors }
def html2python(data):
if data is None:
raise EmptyValue

View File

@ -1,7 +1,10 @@
import urllib
import sys
import os
from cStringIO import StringIO
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
from django.conf import settings
from django.contrib.auth import authenticate, login
from django.core.handlers.base import BaseHandler
@ -19,6 +22,25 @@ from django.utils.itercompat import is_iterable
BOUNDARY = 'BoUnDaRyStRiNg'
MULTIPART_CONTENT = 'multipart/form-data; boundary=%s' % BOUNDARY
class FakePayload(object):
"""
A wrapper around StringIO that restricts what can be read since data from
the network can't be seeked and cannot be read outside of its content
length. This makes sure that views can't do anything under the test client
that wouldn't work in Real Life.
"""
def __init__(self, content):
self.__content = StringIO(content)
self.__len = len(content)
def read(self, num_bytes=None):
if num_bytes is None:
num_bytes = self.__len or 1
assert self.__len >= num_bytes, "Cannot read more than the available bytes from the HTTP incoming data."
content = self.__content.read(num_bytes)
self.__len -= num_bytes
return content
class ClientHandler(BaseHandler):
"""
A HTTP Handler that can be used for testing purposes.
@ -236,7 +258,7 @@ class Client:
'CONTENT_TYPE': content_type,
'PATH_INFO': urllib.unquote(path),
'REQUEST_METHOD': 'POST',
'wsgi.input': StringIO(post_data),
'wsgi.input': FakePayload(post_data),
}
r.update(extra)

View File

@ -332,17 +332,49 @@ class DotExpandedDict(dict):
except TypeError: # Special-case if current isn't a dict.
current = {bits[-1]: v}
class FileDict(dict):
class ImmutableList(tuple):
"""
A dictionary used to hold uploaded file contents. The only special feature
here is that repr() of this object won't dump the entire contents of the
file to the output. A handy safeguard for a large file upload.
A tuple-like object that raises useful errors when it is asked to mutate.
Example::
>>> a = ImmutableList(range(5), warning="You cannot mutate this.")
>>> a[3] = '4'
Traceback (most recent call last):
...
AttributeError: You cannot mutate this.
"""
def __repr__(self):
if 'content' in self:
d = dict(self, content='<omitted>')
return dict.__repr__(d)
return dict.__repr__(self)
def __new__(cls, *args, **kwargs):
if 'warning' in kwargs:
warning = kwargs['warning']
del kwargs['warning']
else:
warning = 'ImmutableList object is immutable.'
self = tuple.__new__(cls, *args, **kwargs)
self.warning = warning
return self
def complain(self, *wargs, **kwargs):
if isinstance(self.warning, Exception):
raise self.warning
else:
raise AttributeError, self.warning
# All list mutation functions complain.
__delitem__ = complain
__delslice__ = complain
__iadd__ = complain
__imul__ = complain
__setitem__ = complain
__setslice__ = complain
append = complain
extend = complain
insert = complain
pop = complain
remove = complain
sort = complain
reverse = complain
class DictWrapper(dict):
"""

View File

@ -3,6 +3,7 @@ from django.conf import settings
from django.utils.encoding import force_unicode
from django.utils.functional import allow_lazy
from django.utils.translation import ugettext_lazy
from htmlentitydefs import name2codepoint
# Capitalizes the first letter of a string.
capfirst = lambda x: x and force_unicode(x)[0].upper() + force_unicode(x)[1:]
@ -222,3 +223,26 @@ def smart_split(text):
yield bit
smart_split = allow_lazy(smart_split, unicode)
def _replace_entity(match):
text = match.group(1)
if text[0] == u'#':
text = text[1:]
try:
if text[0] in u'xX':
c = int(text[1:], 16)
else:
c = int(text)
return unichr(c)
except ValueError:
return match.group(0)
else:
try:
return unichr(name2codepoint[text])
except (ValueError, KeyError):
return match.group(0)
_entity_re = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));")
def unescape_entities(text):
return _entity_re.sub(_replace_entity, text)
unescape_entities = allow_lazy(unescape_entities, unicode)

View File

@ -805,12 +805,12 @@ ContactForm to include an ``ImageField`` called ``mugshot``, we
need to bind the file data containing the mugshot image::
# Bound form with an image field
>>> from django.core.files.uploadedfile import SimpleUploadedFile
>>> data = {'subject': 'hello',
... 'message': 'Hi there',
... 'sender': 'foo@example.com',
... 'cc_myself': True}
>>> file_data = {'mugshot': {'filename':'face.jpg'
... 'content': <file data>}}
>>> file_data = {'mugshot': SimpleUploadedFile('face.jpg', <file data>)}
>>> f = ContactFormWithMugshot(data, file_data)
In practice, you will usually specify ``request.FILES`` as the source

View File

@ -80,20 +80,37 @@ All attributes except ``session`` should be considered read-only.
strings.
``FILES``
.. admonition:: Changed in Django development version
In previous versions of Django, ``request.FILES`` contained
simple ``dict`` objects representing uploaded files. This is
no longer true -- files are represented by ``UploadedFile``
objects as described below.
These ``UploadedFile`` objects will emulate the old-style ``dict``
interface, but this is deprecated and will be removed in the next
release of Django.
A dictionary-like object containing all uploaded files. Each key in
``FILES`` is the ``name`` from the ``<input type="file" name="" />``. Each
value in ``FILES`` is a standard Python dictionary with the following three
keys:
value in ``FILES`` is an ``UploadedFile`` object containing the following
attributes:
* ``filename`` -- The name of the uploaded file, as a Python string.
* ``content-type`` -- The content type of the uploaded file.
* ``content`` -- The raw content of the uploaded file.
* ``read(num_bytes=None)`` -- Read a number of bytes from the file.
* ``file_name`` -- The name of the uploaded file.
* ``file_size`` -- The size, in bytes, of the uploaded file.
* ``chunk()`` -- A generator that yields sequential chunks of data.
See `File Uploads`_ for more information.
Note that ``FILES`` will only contain data if the request method was POST
and the ``<form>`` that posted to the request had
``enctype="multipart/form-data"``. Otherwise, ``FILES`` will be a blank
dictionary-like object.
.. _File Uploads: ../upload_handling/
``META``
A standard Python dictionary containing all available HTTP headers.
Available headers depend on the client and server, but here are some

View File

@ -279,7 +279,7 @@ Default: ``''`` (Empty string)
The database backend to use. The build-in database backends are
``'postgresql_psycopg2'``, ``'postgresql'``, ``'mysql'``, ``'mysql_old'``,
``'sqlite3'`` and ``'oracle'``.
``'sqlite3'``, ``'oracle'``, and ``'oracle'``.
In the Django development version, you can use a database backend that doesn't
ship with Django by setting ``DATABASE_ENGINE`` to a fully-qualified path (i.e.
@ -530,6 +530,43 @@ Default: ``'utf-8'``
The character encoding used to decode any files read from disk. This includes
template files and initial SQL data files.
FILE_UPLOAD_HANDLERS
--------------------
**New in Django development version**
Default::
("django.core.files.fileuploadhandler.MemoryFileUploadHandler",
"django.core.files.fileuploadhandler.TemporaryFileUploadHandler",)
A tuple of handlers to use for uploading. See `file uploads`_ for details.
.. _file uploads: ../upload_handling/
FILE_UPLOAD_MAX_MEMORY_SIZE
---------------------------
**New in Django development version**
Default: ``2621440`` (i.e. 2.5 MB).
The maximum size (in bytes) that an upload will be before it gets streamed to
the file system. See `file uploads`_ for details.
FILE_UPLOAD_TEMP_DIR
--------------------
**New in Django development version**
Default: ``None``
The directory to store data temporarily while uploading files. If ``None``,
Django will use the standard temporary directory for the operating system. For
example, this will default to '/tmp' on *nix-style operating systems.
See `file uploads`_ for details.
FIXTURE_DIRS
-------------

346
docs/upload_handling.txt Normal file
View File

@ -0,0 +1,346 @@
============
File Uploads
============
**New in Django development version**
Most Web sites wouldn't be complete without a way to upload files. When Django
handles a file upload, the file data ends up placed in ``request.FILES`` (for
more on the ``request`` object see the documentation for `request and response
objects`_). This document explains how files are stored on disk an in memory,
and how to customize the default behavior.
.. _request and response objects: ../request_response/#attributes
Basic file uploads
==================
Consider a simple form containing a ``FileField``::
from django import newforms as forms
class UploadFileForm(forms.Form):
title = forms.CharField(max_length=50)
file = forms.FileField()
A view handling this form will receive the file data in ``request.FILES``, which
is a dictionary containing a key for each ``FileField`` (or ``ImageField``, or
other ``FileField`` subclass) in the form. So the data from the above form would
be accessible as ``request.FILES['file']``.
Most of the time, you'll simply pass the file data from ``request`` into the
form as described in `binding uploaded files to a form`_. This would look
something like::
from django.http import HttpResponseRedirect
from django.shortcuts import render_to_response
# Imaginary function to handle an uploaded file.
from somewhere import handle_uploaded_file
def upload_file(request):
if request.method == 'POST':
form = UploadFileForm(request.POST, request.FILES)
if form.is_valid():
handle_uploaded_file(request.FILES['file'])
return HttpResponseRedirect('/success/url/')
else:
form = UploadFileForm()
return render_to_response('upload.html', {'form': form})
.. _binding uploaded files to a form: ../newforms/#binding-uploaded-files-to-a- form
Notice that we have to pass ``request.FILES`` into the form's constructor; this
is how file data gets bound into a form.
Handling uploaded files
-----------------------
The final piece of the puzzle is handling the actual file data from
``request.FILES``. Each entry in this dictionary is an ``UploadedFile`` object
-- a simple wrapper around an uploaded file. You'll usually use one of these
methods to access the uploaded content:
``UploadedFile.read()``
Read the entire uploaded data from the file. Be careful with this
method: if the uploaded file is huge it can overwhelm your system if you
try to read it into memory. You'll probably want to use ``chunk()``
instead; see below.
``UploadedFile.multiple_chunks()``
Returns ``True`` if the uploaded file is big enough to require
reading in multiple chunks. By default this will be any file
larger than 2.5 megabytes, but that's configurable; see below.
``UploadedFile.chunks()``
A generator returning chunks of the file. If ``multiple_chunks()`` is
``True``, you should use this method in a loop instead of ``read()``.
In practice, it's often easiest simply to use ``chunks()`` all the time;
see the example below.
``UploadedFile.file_name``
The name of the uploaded file (e.g. ``my_file.txt``).
``UploadedFile.file_size``
The size, in bytes, of the uploaded file.
There are a few other methods and attributes available on ``UploadedFile``
objects; see `UploadedFile objects`_ for a complete reference.
Putting it all together, here's a common way you might handle an uploaded file::
def handle_uploaded_file(f):
destination = open('some/file/name.txt', 'wb')
for chunk in f.chunks():
destination.write(chunk)
Looping over ``UploadedFile.chunks()`` instead of using ``read()`` ensures that
large files don't overwhelm your system's memory.
Where uploaded data is stored
-----------------------------
Before you save uploaded files, the data needs to be stored somewhere.
By default, if an uploaded file is smaller than 2.5 megabytes, Django will hold
the entire contents of the upload in memory. This means that saving the file
involves only a read from memory and a write to disk and thus is very fast.
However, if an uploaded file is too large, Django will write the uploaded file
to a temporary file stored in your system's temporary directory. On a Unix-like
platform this means you can expect Django to generate a file called something
like ``/tmp/tmpzfp6I6.upload``. If an upload is large enough, you can watch this
file grow in size as Django streams the data onto disk.
These specifics -- 2.5 megabytes; ``/tmp``; etc. -- are simply "reasonable
defaults". Read on for details on how you can customize or completely replace
upload behavior.
Changing upload handler behavior
--------------------------------
Three `settings`_ control Django's file upload behavior:
``FILE_UPLOAD_MAX_MEMORY_SIZE``
The maximum size, in bytes, for files that will be uploaded
into memory. Files larger than ``FILE_UPLOAD_MAX_MEMORY_SIZE``
will be streamed to disk.
Defaults to 2.5 megabytes.
``FILE_UPLOAD_TEMP_DIR``
The directory where uploaded files larger than ``FILE_UPLOAD_TEMP_DIR``
will be stored.
Defaults to your system's standard temporary directory (i.e. ``/tmp`` on
most Unix-like systems).
``FILE_UPLOAD_HANDLERS``
The actual handlers for uploaded files. Changing this setting
allows complete customization -- even replacement -- of
Django's upload process. See `upload handlers`_, below,
for details.
Defaults to::
("django.core.files.uploadhandler.MemoryFileUploadHandler",
"django.core.files.uploadhandler.TemporaryFileUploadHandler",)
Which means "try to upload to memory first, then fall back to temporary
files."
.. _settings: ../settings/
``UploadedFile`` objects
========================
All ``UploadedFile`` objects define the following methods/attributes:
``UploadedFile.read(self, num_bytes=None)``
Returns a byte string of length ``num_bytes``, or the complete file if
``num_bytes`` is ``None``.
``UploadedFile.chunk(self, chunk_size=None)``
A generator yielding small chunks from the file. If ``chunk_size`` isn't
given, chunks will be 64 kb.
``UploadedFile.multiple_chunks(self, chunk_size=None)``
Returns ``True`` if you can expect more than one chunk when calling
``UploadedFile.chunk(self, chunk_size)``.
``UploadedFile.file_size``
The size, in bytes, of the uploaded file.
``UploadedFile.file_name``
The name of the uploaded file as provided by the user.
``UploadedFile.content_type``
The content-type header uploaded with the file (e.g. ``text/plain`` or
``application/pdf``). Like any data supplied by the user, you shouldn't
trust that the uploaded file is actually this type. You'll still need to
validate that the file contains the content that the content-type header
claims -- "trust but verify."
``UploadedFile.charset``
For ``text/*`` content-types, the character set (i.e. ``utf8``) supplied
by the browser. Again, "trust but verify" is the best policy here.
``UploadedFile.temporary_file_path()``
Only files uploaded onto disk will have this method; it returns the full
path to the temporary uploaded file.
Upload Handlers
===============
When a user uploads a file, Django passes off the file data to an *upload
handler* -- a small class that handles file data as it gets uploaded. Upload
handlers are initially defined in the ``FILE_UPLOAD_HANDLERS`` setting, which
defaults to::
("django.core.files.uploadhandler.MemoryFileUploadHandler",
"django.core.files.uploadhandler.TemporaryFileUploadHandler",)
Together the ``MemoryFileUploadHandler`` and ``TemporaryFileUploadHandler``
provide Django's default file upload behavior of reading small files into memory
and large ones onto disk.
You can write custom handlers that customize how Django handles files. You
could, for example, use custom handlers to enforce user-level quotas, compress
data on the fly, render progress bars, and even send data to another storage
location directly without storing it locally.
Modifying upload handlers on the fly
------------------------------------
Sometimes particular views require different upload behavior. In these cases,
you can override upload handlers on a per-request basis by modifying
``request.upload_handlers``. By default, this list will contain the upload
handlers given by ``FILE_UPLOAD_HANDLERS``, but you can modify the list as you
would any other list.
For instance, suppose you've written a ``ProgressBarUploadHandler`` that
provides feedback on upload progress to some sort of AJAX widget. You'd add this
handler to your upload handers like this::
request.upload_handlers.insert(0, ProgressBarUploadHandler())
You'd probably want to use ``list.insert()`` in this case (instead of
``append()``) because a progress bar handler would need to run *before* any
other handlers. Remember, the upload handlers are processed in order.
If you want to replace the upload handlers completely, you can just assign a new
list::
request.upload_handlers = [ProgressBarUploadHandler()]
.. note::
You can only modify upload handlers *before* accessing ``request.FILES`` --
it doesn't make sense to change upload handlers after upload handling has
already started. If you try to modify ``request.upload_handlers`` after
reading from ``request.FILES`` Django will throw an error.
Thus, you should always modify uploading handlers as early in your view as
possible.
Writing custom upload handlers
------------------------------
All file upload handlers should be subclasses of
``django.core.files.uploadhandler.FileUploadHandler``. You can define upload
handlers wherever you wish.
Required methods
~~~~~~~~~~~~~~~~
Custom file upload handlers **must** define the following methods:
``FileUploadHandler.receive_data_chunk(self, raw_data, start)``
Receives a "chunk" of data from the file upload.
``raw_data`` is a byte string containing the uploaded data.
``start`` is the position in the file where this ``raw_data`` chunk
begins.
The data you return will get fed into the subsequent upload handlers'
``receive_data_chunk`` methods. In this way, one handler can be a
"filter" for other handlers.
Return ``None`` from ``receive_data_chunk`` to sort-circuit remaining
upload handlers from getting this chunk.. This is useful if you're
storing the uploaded data yourself and don't want future handlers to
store a copy of the data.
If you raise a ``StopUpload`` or a ``SkipFile`` exception, the upload
will abort or the file will be completely skipped.
``FileUploadHandler.file_complete(self, file_size)``
Called when a file has finished uploading.
The handler should return an ``UploadedFile`` object that will be stored
in ``request.FILES``. Handlers may also return ``None`` to indicate that
the ``UploadedFile`` object should come from subsequent upload handlers.
Optional methods
~~~~~~~~~~~~~~~~
Custom upload handlers may also define any of the following optional methods or
attributes:
``FileUploadHandler.chunk_size``
Size, in bytes, of the "chunks" Django should store into memory and feed
into the handler. That is, this attribute controls the size of chunks
fed into ``FileUploadHandler.receive_data_chunk``.
For maximum performance the chunk sizes should be divisible by ``4`` and
should not exceed 2 GB (2\ :sup:`31` bytes) in size. When there are
multiple chunk sizes provided by multiple handlers, Django will use the
smallest chunk size defined by any handler.
The default is 64*2\ :sup:`10` bytes, or 64 Kb.
``FileUploadHandler.new_file(self, field_name, file_name, content_type, content_length, charset)``
Callback signaling that a new file upload is starting. This is called
before any data has been fed to any upload handlers.
``field_name`` is a string name of the file ``<input>`` field.
``file_name`` is the unicode filename that was provided by the browser.
``content_type`` is the MIME type provided by the browser -- E.g.
``'image/jpeg'``.
``content_length`` is the length of the image given by the browser.
Sometimes this won't be provided and will be ``None``., ``None``
otherwise.
``charset`` is the character set (i.e. ``utf8``) given by the browser.
Like ``content_length``, this sometimes won't be provided.
This method may raise a ``StopFutureHandlers`` exception to prevent
future handlers from handling this file.
``FileUploadHandler.upload_complete(self)``
Callback signaling that the entire upload (all files) has completed.
``FileUploadHandler.``handle_raw_input(self, input_data, META, content_length, boundary, encoding)``
Allows the handler to completely override the parsing of the raw
HTTP input.
``input_data`` is a file-like object that supports ``read()``-ing.
``META`` is the same object as ``request.META``.
``content_length`` is the length of the data in ``input_data``. Don't
read more than ``content_length`` bytes from ``input_data``.
``boundary`` is the MIME boundary for this request.
``encoding`` is the encoding of the request.
Return ``None`` if you want upload handling to continue, or a tuple of
``(POST, FILES)`` if you want to return the new data structures suitable
for the request directly.

View File

@ -67,7 +67,13 @@ class TextFile(models.Model):
class ImageFile(models.Model):
description = models.CharField(max_length=20)
image = models.FileField(upload_to=tempfile.gettempdir())
try:
# If PIL is available, try testing PIL.
# Otherwise, it's equivalent to TextFile above.
import Image
image = models.ImageField(upload_to=tempfile.gettempdir())
except ImportError:
image = models.FileField(upload_to=tempfile.gettempdir())
def __unicode__(self):
return self.description
@ -75,6 +81,7 @@ class ImageFile(models.Model):
__test__ = {'API_TESTS': """
>>> from django import newforms as forms
>>> from django.newforms.models import ModelForm
>>> from django.core.files.uploadedfile import SimpleUploadedFile
The bare bones, absolutely nothing custom, basic case.
@ -792,7 +799,18 @@ False
# Upload a file and ensure it all works as expected.
>>> f = TextFileForm(data={'description': u'Assistance'}, files={'file': {'filename': 'test1.txt', 'content': 'hello world'}})
>>> f = TextFileForm(data={'description': u'Assistance'}, files={'file': SimpleUploadedFile('test1.txt', 'hello world')})
>>> f.is_valid()
True
>>> type(f.cleaned_data['file'])
<class 'django.newforms.fields.UploadedFile'>
>>> instance = f.save()
>>> instance.file
u'...test1.txt'
>>> os.unlink(instance.get_file_filename())
>>> f = TextFileForm(data={'description': u'Assistance'}, files={'file': SimpleUploadedFile('test1.txt', 'hello world')})
>>> f.is_valid()
True
>>> type(f.cleaned_data['file'])
@ -814,18 +832,30 @@ u'...test1.txt'
u'...test1.txt'
# Delete the current file since this is not done by Django.
>>> os.unlink(instance.get_file_filename())
# Override the file by uploading a new one.
>>> f = TextFileForm(data={'description': u'Assistance'}, files={'file': {'filename': 'test2.txt', 'content': 'hello world'}}, instance=instance)
>>> f = TextFileForm(data={'description': u'Assistance'}, files={'file': SimpleUploadedFile('test2.txt', 'hello world')}, instance=instance)
>>> f.is_valid()
True
>>> instance = f.save()
>>> instance.file
u'...test2.txt'
# Delete the current file since this is not done by Django.
>>> os.unlink(instance.get_file_filename())
>>> f = TextFileForm(data={'description': u'Assistance'}, files={'file': SimpleUploadedFile('test2.txt', 'hello world')})
>>> f.is_valid()
True
>>> instance = f.save()
>>> instance.file
u'...test2.txt'
# Delete the current file since this is not done by Django.
>>> os.unlink(instance.get_file_filename())
>>> instance.delete()
# Test the non-required FileField
@ -838,12 +868,26 @@ True
>>> instance.file
''
>>> f = TextFileForm(data={'description': u'Assistance'}, files={'file': {'filename': 'test3.txt', 'content': 'hello world'}}, instance=instance)
>>> f = TextFileForm(data={'description': u'Assistance'}, files={'file': SimpleUploadedFile('test3.txt', 'hello world')}, instance=instance)
>>> f.is_valid()
True
>>> instance = f.save()
>>> instance.file
u'...test3.txt'
# Delete the current file since this is not done by Django.
>>> os.unlink(instance.get_file_filename())
>>> instance.delete()
>>> f = TextFileForm(data={'description': u'Assistance'}, files={'file': SimpleUploadedFile('test3.txt', 'hello world')})
>>> f.is_valid()
True
>>> instance = f.save()
>>> instance.file
u'...test3.txt'
# Delete the current file since this is not done by Django.
>>> os.unlink(instance.get_file_filename())
>>> instance.delete()
# ImageField ###################################################################
@ -858,7 +902,19 @@ u'...test3.txt'
>>> image_data = open(os.path.join(os.path.dirname(__file__), "test.png")).read()
>>> f = ImageFileForm(data={'description': u'An image'}, files={'image': {'filename': 'test.png', 'content': image_data}})
>>> f = ImageFileForm(data={'description': u'An image'}, files={'image': SimpleUploadedFile('test.png', image_data)})
>>> f.is_valid()
True
>>> type(f.cleaned_data['image'])
<class 'django.newforms.fields.UploadedFile'>
>>> instance = f.save()
>>> instance.image
u'...test.png'
# Delete the current file since this is not done by Django.
>>> os.unlink(instance.get_image_filename())
>>> f = ImageFileForm(data={'description': u'An image'}, files={'image': SimpleUploadedFile('test.png', image_data)})
>>> f.is_valid()
True
>>> type(f.cleaned_data['image'])
@ -885,13 +941,26 @@ u'...test.png'
# Override the file by uploading a new one.
>>> f = ImageFileForm(data={'description': u'Changed it'}, files={'image': {'filename': 'test2.png', 'content': image_data}}, instance=instance)
>>> f = ImageFileForm(data={'description': u'Changed it'}, files={'image': SimpleUploadedFile('test2.png', image_data)}, instance=instance)
>>> f.is_valid()
True
>>> instance = f.save()
>>> instance.image
u'...test2.png'
# Delete the current file since this is not done by Django.
>>> os.unlink(instance.get_image_filename())
>>> instance.delete()
>>> f = ImageFileForm(data={'description': u'Changed it'}, files={'image': SimpleUploadedFile('test2.png', image_data)})
>>> f.is_valid()
True
>>> instance = f.save()
>>> instance.image
u'...test2.png'
# Delete the current file since this is not done by Django.
>>> os.unlink(instance.get_image_filename())
>>> instance.delete()
# Test the non-required ImageField
@ -904,7 +973,18 @@ True
>>> instance.image
''
>>> f = ImageFileForm(data={'description': u'And a final one'}, files={'image': {'filename': 'test3.png', 'content': image_data}}, instance=instance)
>>> f = ImageFileForm(data={'description': u'And a final one'}, files={'image': SimpleUploadedFile('test3.png', image_data)}, instance=instance)
>>> f.is_valid()
True
>>> instance = f.save()
>>> instance.image
u'...test3.png'
# Delete the current file since this is not done by Django.
>>> os.unlink(instance.get_image_filename())
>>> instance.delete()
>>> f = ImageFileForm(data={'description': u'And a final one'}, files={'image': SimpleUploadedFile('test3.png', image_data)})
>>> f.is_valid()
True
>>> instance = f.save()

View File

@ -9,6 +9,7 @@ import unittest
from regressiontests.bug639.models import Photo
from django.http import QueryDict
from django.utils.datastructures import MultiValueDict
from django.core.files.uploadedfile import SimpleUploadedFile
class Bug639Test(unittest.TestCase):
@ -21,11 +22,7 @@ class Bug639Test(unittest.TestCase):
# Fake a request query dict with the file
qd = QueryDict("title=Testing&image=", mutable=True)
qd["image_file"] = {
"filename" : "test.jpg",
"content-type" : "image/jpeg",
"content" : img
}
qd["image_file"] = SimpleUploadedFile('test.jpg', img, 'image/jpeg')
manip = Photo.AddManipulator()
manip.do_html2python(qd)

View File

@ -117,14 +117,25 @@ Init from sequence of tuples
>>> d['person']['2']['firstname']
['Adrian']
### FileDict ################################################################
>>> d = FileDict({'content': 'once upon a time...'})
### ImmutableList ################################################################
>>> d = ImmutableList(range(10))
>>> d.sort()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/var/lib/python-support/python2.5/django/utils/datastructures.py", line 359, in complain
raise AttributeError, self.warning
AttributeError: ImmutableList object is immutable.
>>> repr(d)
"{'content': '<omitted>'}"
>>> d = FileDict({'other-key': 'once upon a time...'})
>>> repr(d)
"{'other-key': 'once upon a time...'}"
'(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)'
>>> d = ImmutableList(range(10), warning="Object is immutable!")
>>> d[1]
1
>>> d[1] = 'test'
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/var/lib/python-support/python2.5/django/utils/datastructures.py", line 359, in complain
raise AttributeError, self.warning
AttributeError: Object is immutable!
### DictWrapper #############################################################

View File

@ -0,0 +1,2 @@
# This file unintentionally left blank.
# Oops.

View File

@ -0,0 +1,158 @@
import os
import sha
import tempfile
from django.test import TestCase, client
from django.utils import simplejson
class FileUploadTests(TestCase):
def test_simple_upload(self):
post_data = {
'name': 'Ringo',
'file_field': open(__file__),
}
response = self.client.post('/file_uploads/upload/', post_data)
self.assertEqual(response.status_code, 200)
def test_large_upload(self):
tdir = tempfile.gettempdir()
file1 = tempfile.NamedTemporaryFile(suffix=".file1", dir=tdir)
file1.write('a' * (2 ** 21))
file1.seek(0)
file2 = tempfile.NamedTemporaryFile(suffix=".file2", dir=tdir)
file2.write('a' * (10 * 2 ** 20))
file2.seek(0)
# This file contains chinese symbols for a name.
file3 = open(os.path.join(tdir, u'test_&#20013;&#25991;_Orl\u00e9ans.jpg'), 'w+b')
file3.write('b' * (2 ** 10))
file3.seek(0)
post_data = {
'name': 'Ringo',
'file_field1': open(file1.name),
'file_field2': open(file2.name),
'file_unicode': file3,
}
for key in post_data.keys():
try:
post_data[key + '_hash'] = sha.new(post_data[key].read()).hexdigest()
post_data[key].seek(0)
except AttributeError:
post_data[key + '_hash'] = sha.new(post_data[key]).hexdigest()
response = self.client.post('/file_uploads/verify/', post_data)
try:
os.unlink(file3.name)
except:
pass
self.assertEqual(response.status_code, 200)
def test_dangerous_file_names(self):
"""Uploaded file names should be sanitized before ever reaching the view."""
# This test simulates possible directory traversal attacks by a
# malicious uploader We have to do some monkeybusiness here to construct
# a malicious payload with an invalid file name (containing os.sep or
# os.pardir). This similar to what an attacker would need to do when
# trying such an attack.
scary_file_names = [
"/tmp/hax0rd.txt", # Absolute path, *nix-style.
"C:\\Windows\\hax0rd.txt", # Absolute path, win-syle.
"C:/Windows/hax0rd.txt", # Absolute path, broken-style.
"\\tmp\\hax0rd.txt", # Absolute path, broken in a different way.
"/tmp\\hax0rd.txt", # Absolute path, broken by mixing.
"subdir/hax0rd.txt", # Descendant path, *nix-style.
"subdir\\hax0rd.txt", # Descendant path, win-style.
"sub/dir\\hax0rd.txt", # Descendant path, mixed.
"../../hax0rd.txt", # Relative path, *nix-style.
"..\\..\\hax0rd.txt", # Relative path, win-style.
"../..\\hax0rd.txt" # Relative path, mixed.
]
payload = []
for i, name in enumerate(scary_file_names):
payload.extend([
'--' + client.BOUNDARY,
'Content-Disposition: form-data; name="file%s"; filename="%s"' % (i, name),
'Content-Type: application/octet-stream',
'',
'You got pwnd.'
])
payload.extend([
'--' + client.BOUNDARY + '--',
'',
])
payload = "\r\n".join(payload)
r = {
'CONTENT_LENGTH': len(payload),
'CONTENT_TYPE': client.MULTIPART_CONTENT,
'PATH_INFO': "/file_uploads/echo/",
'REQUEST_METHOD': 'POST',
'wsgi.input': client.FakePayload(payload),
}
response = self.client.request(**r)
# The filenames should have been sanitized by the time it got to the view.
recieved = simplejson.loads(response.content)
for i, name in enumerate(scary_file_names):
got = recieved["file%s" % i]
self.assertEqual(got, "hax0rd.txt")
def test_filename_overflow(self):
"""File names over 256 characters (dangerous on some platforms) get fixed up."""
name = "%s.txt" % ("f"*500)
payload = "\r\n".join([
'--' + client.BOUNDARY,
'Content-Disposition: form-data; name="file"; filename="%s"' % name,
'Content-Type: application/octet-stream',
'',
'Oops.'
'--' + client.BOUNDARY + '--',
'',
])
r = {
'CONTENT_LENGTH': len(payload),
'CONTENT_TYPE': client.MULTIPART_CONTENT,
'PATH_INFO': "/file_uploads/echo/",
'REQUEST_METHOD': 'POST',
'wsgi.input': client.FakePayload(payload),
}
got = simplejson.loads(self.client.request(**r).content)
self.assert_(len(got['file']) < 256, "Got a long file name (%s characters)." % len(got['file']))
def test_custom_upload_handler(self):
# A small file (under the 5M quota)
smallfile = tempfile.NamedTemporaryFile()
smallfile.write('a' * (2 ** 21))
# A big file (over the quota)
bigfile = tempfile.NamedTemporaryFile()
bigfile.write('a' * (10 * 2 ** 20))
# Small file posting should work.
response = self.client.post('/file_uploads/quota/', {'f': open(smallfile.name)})
got = simplejson.loads(response.content)
self.assert_('f' in got)
# Large files don't go through.
response = self.client.post("/file_uploads/quota/", {'f': open(bigfile.name)})
got = simplejson.loads(response.content)
self.assert_('f' not in got)
def test_broken_custom_upload_handler(self):
f = tempfile.NamedTemporaryFile()
f.write('a' * (2 ** 21))
# AttributeError: You cannot alter upload handlers after the upload has been processed.
self.assertRaises(
AttributeError,
self.client.post,
'/file_uploads/quota/broken/',
{'f': open(f.name)}
)

View File

@ -0,0 +1,26 @@
"""
Upload handlers to test the upload API.
"""
from django.core.files.uploadhandler import FileUploadHandler, StopUpload
class QuotaUploadHandler(FileUploadHandler):
"""
This test upload handler terminates the connection if more than a quota
(5MB) is uploaded.
"""
QUOTA = 5 * 2**20 # 5 MB
def __init__(self, request=None):
super(QuotaUploadHandler, self).__init__(request)
self.total_upload = 0
def receive_data_chunk(self, raw_data, start):
self.total_upload += len(raw_data)
if self.total_upload >= self.QUOTA:
raise StopUpload(connection_reset=True)
return raw_data
def file_complete(self, file_size):
return None

View File

@ -0,0 +1,10 @@
from django.conf.urls.defaults import *
import views
urlpatterns = patterns('',
(r'^upload/$', views.file_upload_view),
(r'^verify/$', views.file_upload_view_verify),
(r'^echo/$', views.file_upload_echo),
(r'^quota/$', views.file_upload_quota),
(r'^quota/broken/$', views.file_upload_quota_broken),
)

View File

@ -0,0 +1,70 @@
import os
import sha
from django.core.files.uploadedfile import UploadedFile
from django.http import HttpResponse, HttpResponseServerError
from django.utils import simplejson
from uploadhandler import QuotaUploadHandler
def file_upload_view(request):
"""
Check that a file upload can be updated into the POST dictionary without
going pear-shaped.
"""
form_data = request.POST.copy()
form_data.update(request.FILES)
if isinstance(form_data.get('file_field'), UploadedFile) and isinstance(form_data['name'], unicode):
# If a file is posted, the dummy client should only post the file name,
# not the full path.
if os.path.dirname(form_data['file_field'].file_name) != '':
return HttpResponseServerError()
return HttpResponse('')
else:
return HttpResponseServerError()
def file_upload_view_verify(request):
"""
Use the sha digest hash to verify the uploaded contents.
"""
form_data = request.POST.copy()
form_data.update(request.FILES)
# Check to see if unicode names worked out.
if not request.FILES['file_unicode'].file_name.endswith(u'test_\u4e2d\u6587_Orl\xe9ans.jpg'):
return HttpResponseServerError()
for key, value in form_data.items():
if key.endswith('_hash'):
continue
if key + '_hash' not in form_data:
continue
submitted_hash = form_data[key + '_hash']
if isinstance(value, UploadedFile):
new_hash = sha.new(value.read()).hexdigest()
else:
new_hash = sha.new(value).hexdigest()
if new_hash != submitted_hash:
return HttpResponseServerError()
return HttpResponse('')
def file_upload_echo(request):
"""
Simple view to echo back info about uploaded files for tests.
"""
r = dict([(k, f.file_name) for k, f in request.FILES.items()])
return HttpResponse(simplejson.dumps(r))
def file_upload_quota(request):
"""
Dynamically add in an upload handler.
"""
request.upload_handlers.insert(0, QuotaUploadHandler())
return file_upload_echo(request)
def file_upload_quota_broken(request):
"""
You can't change handlers after reading FILES; this view shouldn't work.
"""
response = file_upload_echo(request)
request.upload_handlers.insert(0, QuotaUploadHandler())
return response

View File

@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
tests = r"""
>>> from django.newforms import *
>>> from django.core.files.uploadedfile import SimpleUploadedFile
# CharField ###################################################################
@ -214,11 +215,11 @@ ValidationError: [u'REQUIRED']
Traceback (most recent call last):
...
ValidationError: [u'INVALID']
>>> f.clean({})
>>> f.clean(SimpleUploadedFile('name', None))
Traceback (most recent call last):
...
ValidationError: [u'MISSING']
>>> f.clean({'filename': 'name', 'content':''})
ValidationError: [u'EMPTY FILE']
>>> f.clean(SimpleUploadedFile('name', ''))
Traceback (most recent call last):
...
ValidationError: [u'EMPTY FILE']

View File

@ -2,6 +2,7 @@
tests = r"""
>>> from django.newforms import *
>>> from django.newforms.widgets import RadioFieldRenderer
>>> from django.core.files.uploadedfile import SimpleUploadedFile
>>> import datetime
>>> import time
>>> import re
@ -770,17 +771,17 @@ ValidationError: [u'This field is required.']
>>> f.clean(None, 'files/test2.pdf')
'files/test2.pdf'
>>> f.clean({})
>>> f.clean(SimpleUploadedFile('', ''))
Traceback (most recent call last):
...
ValidationError: [u'No file was submitted.']
ValidationError: [u'No file was submitted. Check the encoding type on the form.']
>>> f.clean({}, '')
>>> f.clean(SimpleUploadedFile('', ''), '')
Traceback (most recent call last):
...
ValidationError: [u'No file was submitted.']
ValidationError: [u'No file was submitted. Check the encoding type on the form.']
>>> f.clean({}, 'files/test3.pdf')
>>> f.clean(None, 'files/test3.pdf')
'files/test3.pdf'
>>> f.clean('some content that is not a file')
@ -788,20 +789,20 @@ Traceback (most recent call last):
...
ValidationError: [u'No file was submitted. Check the encoding type on the form.']
>>> f.clean({'filename': 'name', 'content': None})
>>> f.clean(SimpleUploadedFile('name', None))
Traceback (most recent call last):
...
ValidationError: [u'The submitted file is empty.']
>>> f.clean({'filename': 'name', 'content': ''})
>>> f.clean(SimpleUploadedFile('name', ''))
Traceback (most recent call last):
...
ValidationError: [u'The submitted file is empty.']
>>> type(f.clean({'filename': 'name', 'content': 'Some File Content'}))
>>> type(f.clean(SimpleUploadedFile('name', 'Some File Content')))
<class 'django.newforms.fields.UploadedFile'>
>>> type(f.clean({'filename': 'name', 'content': 'Some File Content'}, 'files/test4.pdf'))
>>> type(f.clean(SimpleUploadedFile('name', 'Some File Content'), 'files/test4.pdf'))
<class 'django.newforms.fields.UploadedFile'>
# URLField ##################################################################

View File

@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
tests = r"""
>>> from django.newforms import *
>>> from django.core.files.uploadedfile import SimpleUploadedFile
>>> import datetime
>>> import time
>>> import re
@ -1465,7 +1466,7 @@ not request.POST.
>>> print f
<tr><th>File1:</th><td><ul class="errorlist"><li>This field is required.</li></ul><input type="file" name="file1" /></td></tr>
>>> f = FileForm(data={}, files={'file1': {'filename': 'name', 'content':''}}, auto_id=False)
>>> f = FileForm(data={}, files={'file1': SimpleUploadedFile('name', '')}, auto_id=False)
>>> print f
<tr><th>File1:</th><td><ul class="errorlist"><li>The submitted file is empty.</li></ul><input type="file" name="file1" /></td></tr>
@ -1473,7 +1474,7 @@ not request.POST.
>>> print f
<tr><th>File1:</th><td><ul class="errorlist"><li>No file was submitted. Check the encoding type on the form.</li></ul><input type="file" name="file1" /></td></tr>
>>> f = FileForm(data={}, files={'file1': {'filename': 'name', 'content':'some content'}}, auto_id=False)
>>> f = FileForm(data={}, files={'file1': SimpleUploadedFile('name', 'some content')}, auto_id=False)
>>> print f
<tr><th>File1:</th><td><input type="file" name="file1" /></td></tr>
>>> f.is_valid()

View File

@ -6,6 +6,7 @@ from django.test import Client, TestCase
from django.core.urlresolvers import reverse
from django.core.exceptions import SuspiciousOperation
import os
import sha
class AssertContainsTests(TestCase):
def test_contains(self):
@ -240,16 +241,6 @@ class AssertFormErrorTests(TestCase):
except AssertionError, e:
self.assertEqual(str(e), "The form 'form' in context 0 does not contain the non-field error 'Some error.' (actual errors: )")
class FileUploadTests(TestCase):
def test_simple_upload(self):
fd = open(os.path.join(os.path.dirname(__file__), "views.py"))
post_data = {
'name': 'Ringo',
'file_field': fd,
}
response = self.client.post('/test_client_regress/file_upload/', post_data)
self.assertEqual(response.status_code, 200)
class LoginTests(TestCase):
fixtures = ['testdata']
@ -269,7 +260,6 @@ class LoginTests(TestCase):
# default client.
self.assertRedirects(response, "http://testserver/test_client_regress/get_view/")
class URLEscapingTests(TestCase):
def test_simple_argument_get(self):
"Get a view that has a simple string argument"

View File

@ -3,7 +3,6 @@ import views
urlpatterns = patterns('',
(r'^no_template_view/$', views.no_template_view),
(r'^file_upload/$', views.file_upload_view),
(r'^staff_only/$', views.staff_only_view),
(r'^get_view/$', views.get_view),
url(r'^arg_view/(?P<name>.+)/$', views.view_with_argument, name='arg_view'),

View File

@ -1,29 +1,11 @@
import os
from django.contrib.auth.decorators import login_required
from django.http import HttpResponse, HttpResponseRedirect, HttpResponseServerError
from django.http import HttpResponse, HttpResponseRedirect
from django.core.exceptions import SuspiciousOperation
def no_template_view(request):
"A simple view that expects a GET request, and returns a rendered template"
return HttpResponse("No template used. Sample content: twice once twice. Content ends.")
def file_upload_view(request):
"""
Check that a file upload can be updated into the POST dictionary without
going pear-shaped.
"""
form_data = request.POST.copy()
form_data.update(request.FILES)
if isinstance(form_data['file_field'], dict) and isinstance(form_data['name'], unicode):
# If a file is posted, the dummy client should only post the file name,
# not the full path.
if os.path.dirname(form_data['file_field']['filename']) != '':
return HttpResponseServerError()
return HttpResponse('')
else:
return HttpResponseServerError()
def staff_only_view(request):
"A view that can only be visited by staff. Non staff members get an exception"
if request.user.is_staff:

View File

@ -5,6 +5,9 @@ urlpatterns = patterns('',
(r'^test_client/', include('modeltests.test_client.urls')),
(r'^test_client_regress/', include('regressiontests.test_client_regress.urls')),
# File upload test views
(r'^file_uploads/', include('regressiontests.file_uploads.urls')),
# Always provide the auth system login and logout views
(r'^accounts/login/$', 'django.contrib.auth.views.login', {'template_name': 'login.html'}),
(r'^accounts/logout/$', 'django.contrib.auth.views.logout'),