Fixed #9886 -- Added a file-like interface to HttpRequest. Thanks to Ivan Sagalaev for the suggestion and patch.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@14394 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Russell Keith-Magee 2010-10-29 16:39:25 +00:00
parent 3086b55b0e
commit 269e921756
5 changed files with 236 additions and 95 deletions

View File

@ -42,6 +42,8 @@ class ModPythonRequest(http.HttpRequest):
# naughty, but also pretty harmless. # naughty, but also pretty harmless.
self.path_info = u'/' self.path_info = u'/'
self._post_parse_error = False self._post_parse_error = False
self._stream = self._req
self._read_started = False
def __repr__(self): def __repr__(self):
# Since this is called as part of error handling, we need to be very # Since this is called as part of error handling, we need to be very
@ -81,26 +83,6 @@ class ModPythonRequest(http.HttpRequest):
# mod_python < 3.2.10 doesn't have req.is_https(). # mod_python < 3.2.10 doesn't have req.is_https().
return self._req.subprocess_env.get('HTTPS', '').lower() in ('on', '1') return self._req.subprocess_env.get('HTTPS', '').lower() in ('on', '1')
def _load_post_and_files(self):
"Populates self._post and self._files"
if self.method != 'POST':
self._post, self._files = http.QueryDict('', encoding=self._encoding), datastructures.MultiValueDict()
return
if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'):
self._raw_post_data = ''
try:
self._post, self._files = self.parse_file_upload(self.META, self._req)
except:
# See django.core.handlers.wsgi.WSGIHandler for an explanation
# of what's going on here.
self._post = http.QueryDict('')
self._files = datastructures.MultiValueDict()
self._post_parse_error = True
raise
else:
self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()
def _get_request(self): def _get_request(self):
if not hasattr(self, '_request'): if not hasattr(self, '_request'):
self._request = datastructures.MergeDict(self.POST, self.GET) self._request = datastructures.MergeDict(self.POST, self.GET)
@ -162,13 +144,6 @@ class ModPythonRequest(http.HttpRequest):
self._meta[key] = value self._meta[key] = value
return self._meta return self._meta
def _get_raw_post_data(self):
try:
return self._raw_post_data
except AttributeError:
self._raw_post_data = self._req.read()
return self._raw_post_data
def _get_method(self): def _get_method(self):
return self.META['REQUEST_METHOD'].upper() return self.META['REQUEST_METHOD'].upper()
@ -178,7 +153,6 @@ class ModPythonRequest(http.HttpRequest):
FILES = property(_get_files) FILES = property(_get_files)
META = property(_get_meta) META = property(_get_meta)
REQUEST = property(_get_request) REQUEST = property(_get_request)
raw_post_data = property(_get_raw_post_data)
method = property(_get_method) method = property(_get_method)
class ModPythonHandler(BaseHandler): class ModPythonHandler(BaseHandler):

View File

@ -5,6 +5,7 @@ try:
from cStringIO import StringIO from cStringIO import StringIO
except ImportError: except ImportError:
from StringIO import StringIO from StringIO import StringIO
import socket
from django import http from django import http
from django.core import signals from django.core import signals
@ -62,20 +63,55 @@ STATUS_CODE_TEXT = {
505: 'HTTP VERSION NOT SUPPORTED', 505: 'HTTP VERSION NOT SUPPORTED',
} }
def safe_copyfileobj(fsrc, fdst, length=16*1024, size=0): class LimitedStream(object):
""" '''
A version of shutil.copyfileobj that will not read more than 'size' bytes. LimitedStream wraps another stream in order to not allow reading from it
This makes it safe from clients sending more than CONTENT_LENGTH bytes of past specified amount of bytes.
data in the body. '''
""" def __init__(self, stream, limit, buf_size=64 * 1024 * 1024):
if not size: self.stream = stream
return self.remaining = limit
while size > 0: self.buffer = ''
buf = fsrc.read(min(length, size)) self.buf_size = buf_size
if not buf:
break def _read_limited(self, size=None):
fdst.write(buf) if size is None or size > self.remaining:
size -= len(buf) size = self.remaining
if size == 0:
return ''
result = self.stream.read(size)
self.remaining -= len(result)
return result
def read(self, size=None):
if size is None:
result = self.buffer + self._read_limited()
self.buffer = ''
elif size < len(self.buffer):
result = self.buffer[:size]
self.buffer = self.buffer[size:]
else: # size >= len(self.buffer)
result = self.buffer + self._read_limited(size - len(self.buffer))
self.buffer = ''
return result
def readline(self, size=None):
while '\n' not in self.buffer or \
(size is not None and len(self.buffer) < size):
if size:
chunk = self._read_limited(size - len(self.buffer))
else:
chunk = self._read_limited()
if not chunk:
break
self.buffer += chunk
sio = StringIO(self.buffer)
if size:
line = sio.readline(size)
else:
line = sio.readline()
self.buffer = sio.read()
return line
class WSGIRequest(http.HttpRequest): class WSGIRequest(http.HttpRequest):
def __init__(self, environ): def __init__(self, environ):
@ -98,6 +134,24 @@ class WSGIRequest(http.HttpRequest):
self.META['SCRIPT_NAME'] = script_name self.META['SCRIPT_NAME'] = script_name
self.method = environ['REQUEST_METHOD'].upper() self.method = environ['REQUEST_METHOD'].upper()
self._post_parse_error = False self._post_parse_error = False
if isinstance(self.environ['wsgi.input'], socket._fileobject):
# Under development server 'wsgi.input' is an instance of
# socket._fileobject which hangs indefinitely on reading bytes past
# available count. To prevent this it's wrapped in LimitedStream
# that doesn't read past Content-Length bytes.
#
# This is not done for other kinds of inputs (like flup's FastCGI
# streams) beacuse they don't suffer from this problem and we can
# avoid using another wrapper with its own .read and .readline
# implementation.
try:
content_length = int(self.environ.get('CONTENT_LENGTH', 0))
except (ValueError, TypeError):
content_length = 0
self._stream = LimitedStream(self.environ['wsgi.input'], content_length)
else:
self._stream = self.environ['wsgi.input']
self._read_started = False
def __repr__(self): def __repr__(self):
# Since this is called as part of error handling, we need to be very # Since this is called as part of error handling, we need to be very
@ -133,30 +187,6 @@ class WSGIRequest(http.HttpRequest):
return 'wsgi.url_scheme' in self.environ \ return 'wsgi.url_scheme' in self.environ \
and self.environ['wsgi.url_scheme'] == 'https' and self.environ['wsgi.url_scheme'] == 'https'
def _load_post_and_files(self):
# Populates self._post and self._files
if self.method == 'POST':
if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
self._raw_post_data = ''
try:
self._post, self._files = self.parse_file_upload(self.META, self.environ['wsgi.input'])
except:
# An error occured while parsing POST data. Since when
# formatting the error the request handler might access
# self.POST, set self._post and self._file to prevent
# attempts to parse POST data again.
self._post = http.QueryDict('')
self._files = datastructures.MultiValueDict()
# Mark that an error occured. This allows self.__repr__ to
# be explicit about it instead of simply representing an
# empty POST
self._post_parse_error = True
raise
else:
self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()
else:
self._post, self._files = http.QueryDict('', encoding=self._encoding), datastructures.MultiValueDict()
def _get_request(self): def _get_request(self):
if not hasattr(self, '_request'): if not hasattr(self, '_request'):
self._request = datastructures.MergeDict(self.POST, self.GET) self._request = datastructures.MergeDict(self.POST, self.GET)
@ -192,32 +222,11 @@ class WSGIRequest(http.HttpRequest):
self._load_post_and_files() self._load_post_and_files()
return self._files return self._files
def _get_raw_post_data(self):
try:
return self._raw_post_data
except AttributeError:
buf = StringIO()
try:
# CONTENT_LENGTH might be absent if POST doesn't have content at all (lighttpd)
content_length = int(self.environ.get('CONTENT_LENGTH', 0))
except (ValueError, TypeError):
# If CONTENT_LENGTH was empty string or not an integer, don't
# error out. We've also seen None passed in here (against all
# specs, but see ticket #8259), so we handle TypeError as well.
content_length = 0
if content_length > 0:
safe_copyfileobj(self.environ['wsgi.input'], buf,
size=content_length)
self._raw_post_data = buf.getvalue()
buf.close()
return self._raw_post_data
GET = property(_get_get, _set_get) GET = property(_get_get, _set_get)
POST = property(_get_post, _set_post) POST = property(_get_post, _set_post)
COOKIES = property(_get_cookies, _set_cookies) COOKIES = property(_get_cookies, _set_cookies)
FILES = property(_get_files) FILES = property(_get_files)
REQUEST = property(_get_request) REQUEST = property(_get_request)
raw_post_data = property(_get_raw_post_data)
class WSGIHandler(base.BaseHandler): class WSGIHandler(base.BaseHandler):
initLock = Lock() initLock = Lock()

View File

@ -6,6 +6,10 @@ from Cookie import BaseCookie, SimpleCookie, CookieError
from pprint import pformat from pprint import pformat
from urllib import urlencode from urllib import urlencode
from urlparse import urljoin from urlparse import urljoin
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
try: try:
# The mod_python version is more efficient, so try importing it first. # The mod_python version is more efficient, so try importing it first.
from mod_python.util import parse_qsl from mod_python.util import parse_qsl
@ -132,6 +136,73 @@ class HttpRequest(object):
parser = MultiPartParser(META, post_data, self.upload_handlers, self.encoding) parser = MultiPartParser(META, post_data, self.upload_handlers, self.encoding)
return parser.parse() return parser.parse()
def _get_raw_post_data(self):
if not hasattr(self, '_raw_post_data'):
if self._read_started:
raise Exception("You cannot access raw_post_data after reading from request's data stream")
self._raw_post_data = self.read()
self._stream = StringIO(self._raw_post_data)
return self._raw_post_data
raw_post_data = property(_get_raw_post_data)
def _mark_post_parse_error(self):
self._post = QueryDict('')
self._files = MultiValueDict()
self._post_parse_error = True
def _load_post_and_files(self):
# Populates self._post and self._files
if self.method != 'POST':
self._post, self._files = QueryDict('', encoding=self._encoding), MultiValueDict()
return
if self._read_started:
self._mark_post_parse_error()
return
if self.META.get('CONTENT_TYPE', '').startswith('multipart'):
self._raw_post_data = ''
try:
self._post, self._files = self.parse_file_upload(self.META, self)
except:
# An error occured while parsing POST data. Since when
# formatting the error the request handler might access
# self.POST, set self._post and self._file to prevent
# attempts to parse POST data again.
# Mark that an error occured. This allows self.__repr__ to
# be explicit about it instead of simply representing an
# empty POST
self._mark_post_parse_error()
raise
else:
self._post, self._files = QueryDict(self.raw_post_data, encoding=self._encoding), MultiValueDict()
## File-like and iterator interface.
##
## Expects self._stream to be set to an appropriate source of bytes by
## a corresponding request subclass (WSGIRequest or ModPythonRequest).
## Also when request data has already been read by request.POST or
## request.raw_post_data, self._stream points to a StringIO instance
## containing that data.
def read(self, *args, **kwargs):
self._read_started = True
return self._stream.read(*args, **kwargs)
def readline(self, *args, **kwargs):
self._read_started = True
return self._stream.readline(*args, **kwargs)
def xreadlines(self):
while True:
buf = self.readline()
if not buf:
break
yield buf
__iter__ = xreadlines
def readlines(self):
return list(iter(self))
class QueryDict(MultiValueDict): class QueryDict(MultiValueDict):
""" """
A specialized MultiValueDict that takes a query string when initialized. A specialized MultiValueDict that takes a query string when initialized.
@ -198,7 +269,7 @@ class QueryDict(MultiValueDict):
for key, value in dict.items(self): for key, value in dict.items(self):
dict.__setitem__(result, copy.deepcopy(key, memo), copy.deepcopy(value, memo)) dict.__setitem__(result, copy.deepcopy(key, memo), copy.deepcopy(value, memo))
return result return result
def setlist(self, key, list_): def setlist(self, key, list_):
self._assert_mutable() self._assert_mutable()
key = str_to_unicode(key, self.encoding) key = str_to_unicode(key, self.encoding)
@ -385,7 +456,7 @@ class HttpResponse(object):
""" """
Sets a cookie. Sets a cookie.
``expires`` can be a string in the correct format or a ``expires`` can be a string in the correct format or a
``datetime.datetime`` object in UTC. If ``expires`` is a datetime ``datetime.datetime`` object in UTC. If ``expires`` is a datetime
object then ``max_age`` will be calculated. object then ``max_age`` will be calculated.
""" """
@ -407,7 +478,7 @@ class HttpResponse(object):
# IE requires expires, so set it if hasn't been already. # IE requires expires, so set it if hasn't been already.
if not expires: if not expires:
self.cookies[key]['expires'] = cookie_date(time.time() + self.cookies[key]['expires'] = cookie_date(time.time() +
max_age) max_age)
if path is not None: if path is not None:
self.cookies[key]['path'] = path self.cookies[key]['path'] = path
if domain is not None: if domain is not None:

View File

@ -189,8 +189,14 @@ All attributes except ``session`` should be considered read-only.
.. attribute:: HttpRequest.raw_post_data .. attribute:: HttpRequest.raw_post_data
The raw HTTP POST data. This is only useful for advanced processing. Use The raw HTTP POST data as a byte string. This is useful for processing
``POST`` instead. data in different formats than of conventional HTML forms: binary images,
XML payload etc. For processing form data use ``HttpRequest.POST``.
.. versionadded:: 1.3
You can also read from an HttpRequest using file-like interface. See
:meth:`HttpRequest.read()`.
.. attribute:: HttpRequest.urlconf .. attribute:: HttpRequest.urlconf
@ -249,6 +255,27 @@ Methods
If you write your own XMLHttpRequest call (on the browser side), you'll If you write your own XMLHttpRequest call (on the browser side), you'll
have to set this header manually if you want ``is_ajax()`` to work. have to set this header manually if you want ``is_ajax()`` to work.
.. method:: HttpRequest.read(size=None)
.. method:: HttpRequest.readline()
.. method:: HttpRequest.readlines()
.. method:: HttpRequest.xreadlines()
.. method:: HttpRequest.__iter__()
.. versionadded:: 1.3
Methods implementing a file-like interface for reading from an
HttpRequest instance. This makes it possible to consume an incoming
request in a streaming fashion. A common use-case would be to process a
big XML payload with iterative parser without constructing a whole
XML tree in memory.
Given this standard interface, an HttpRequest instance can be
passed directly to an XML parser such as ElementTree::
import xml.etree.ElementTree as ET
for element in ET.iterparse(request):
process(element)
QueryDict objects QueryDict objects
----------------- -----------------

View File

@ -1,9 +1,10 @@
from datetime import datetime, timedelta from datetime import datetime, timedelta
import time import time
from StringIO import StringIO
import unittest import unittest
from django.http import HttpRequest, HttpResponse, parse_cookie from django.http import HttpRequest, HttpResponse, parse_cookie
from django.core.handlers.wsgi import WSGIRequest from django.core.handlers.wsgi import WSGIRequest, LimitedStream
from django.core.handlers.modpython import ModPythonRequest from django.core.handlers.modpython import ModPythonRequest
from django.utils.http import cookie_date from django.utils.http import cookie_date
@ -17,11 +18,11 @@ class RequestsTests(unittest.TestCase):
self.assertEqual(request.META.keys(), []) self.assertEqual(request.META.keys(), [])
def test_wsgirequest(self): def test_wsgirequest(self):
request = WSGIRequest({'PATH_INFO': 'bogus', 'REQUEST_METHOD': 'bogus'}) request = WSGIRequest({'PATH_INFO': 'bogus', 'REQUEST_METHOD': 'bogus', 'wsgi.input': StringIO('')})
self.assertEqual(request.GET.keys(), []) self.assertEqual(request.GET.keys(), [])
self.assertEqual(request.POST.keys(), []) self.assertEqual(request.POST.keys(), [])
self.assertEqual(request.COOKIES.keys(), []) self.assertEqual(request.COOKIES.keys(), [])
self.assertEqual(set(request.META.keys()), set(['PATH_INFO', 'REQUEST_METHOD', 'SCRIPT_NAME'])) self.assertEqual(set(request.META.keys()), set(['PATH_INFO', 'REQUEST_METHOD', 'SCRIPT_NAME', 'wsgi.input']))
self.assertEqual(request.META['PATH_INFO'], 'bogus') self.assertEqual(request.META['PATH_INFO'], 'bogus')
self.assertEqual(request.META['REQUEST_METHOD'], 'bogus') self.assertEqual(request.META['REQUEST_METHOD'], 'bogus')
self.assertEqual(request.META['SCRIPT_NAME'], '') self.assertEqual(request.META['SCRIPT_NAME'], '')
@ -88,3 +89,62 @@ class RequestsTests(unittest.TestCase):
max_age_cookie = response.cookies['max_age'] max_age_cookie = response.cookies['max_age']
self.assertEqual(max_age_cookie['max-age'], 10) self.assertEqual(max_age_cookie['max-age'], 10)
self.assertEqual(max_age_cookie['expires'], cookie_date(time.time()+10)) self.assertEqual(max_age_cookie['expires'], cookie_date(time.time()+10))
def test_limited_stream(self):
# Read all of a limited stream
stream = LimitedStream(StringIO('test'), 2)
self.assertEqual(stream.read(), 'te')
# Read a number of characters greater than the stream has to offer
stream = LimitedStream(StringIO('test'), 2)
self.assertEqual(stream.read(5), 'te')
# Read sequentially from a stream
stream = LimitedStream(StringIO('12345678'), 8)
self.assertEqual(stream.read(5), '12345')
self.assertEqual(stream.read(5), '678')
# Read lines from a stream
stream = LimitedStream(StringIO('1234\n5678\nabcd\nefgh\nijkl'), 24)
# Read a full line, unconditionally
self.assertEqual(stream.readline(), '1234\n')
# Read a number of characters less than a line
self.assertEqual(stream.readline(2), '56')
# Read the rest of the partial line
self.assertEqual(stream.readline(), '78\n')
# Read a full line, with a character limit greater than the line length
self.assertEqual(stream.readline(6), 'abcd\n')
# Read the next line, deliberately terminated at the line end
self.assertEqual(stream.readline(4), 'efgh')
# Read the next line... just the line end
self.assertEqual(stream.readline(), '\n')
# Read everything else.
self.assertEqual(stream.readline(), 'ijkl')
def test_stream(self):
request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
self.assertEqual(request.read(), 'name=value')
def test_read_after_value(self):
"""
Reading from request is allowed after accessing request contents as
POST or raw_post_data.
"""
request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
self.assertEqual(request.POST, {u'name': [u'value']})
self.assertEqual(request.raw_post_data, 'name=value')
self.assertEqual(request.read(), 'name=value')
def test_value_after_read(self):
"""
Construction of POST or raw_post_data is not allowed after reading
from request.
"""
request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
self.assertEqual(request.read(2), 'na')
self.assertRaises(Exception, lambda: request.raw_post_data)
self.assertEqual(request.POST, {})
def test_read_by_lines(self):
request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
self.assertEqual(list(request), ['name=value'])