Fixed #9886 -- Added a file-like interface to HttpRequest. Thanks to Ivan Sagalaev for the suggestion and patch.
git-svn-id: http://code.djangoproject.com/svn/django/trunk@14394 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
parent
3086b55b0e
commit
269e921756
|
@ -42,6 +42,8 @@ class ModPythonRequest(http.HttpRequest):
|
|||
# naughty, but also pretty harmless.
|
||||
self.path_info = u'/'
|
||||
self._post_parse_error = False
|
||||
self._stream = self._req
|
||||
self._read_started = False
|
||||
|
||||
def __repr__(self):
|
||||
# Since this is called as part of error handling, we need to be very
|
||||
|
@ -81,26 +83,6 @@ class ModPythonRequest(http.HttpRequest):
|
|||
# mod_python < 3.2.10 doesn't have req.is_https().
|
||||
return self._req.subprocess_env.get('HTTPS', '').lower() in ('on', '1')
|
||||
|
||||
def _load_post_and_files(self):
|
||||
"Populates self._post and self._files"
|
||||
if self.method != 'POST':
|
||||
self._post, self._files = http.QueryDict('', encoding=self._encoding), datastructures.MultiValueDict()
|
||||
return
|
||||
|
||||
if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'):
|
||||
self._raw_post_data = ''
|
||||
try:
|
||||
self._post, self._files = self.parse_file_upload(self.META, self._req)
|
||||
except:
|
||||
# See django.core.handlers.wsgi.WSGIHandler for an explanation
|
||||
# of what's going on here.
|
||||
self._post = http.QueryDict('')
|
||||
self._files = datastructures.MultiValueDict()
|
||||
self._post_parse_error = True
|
||||
raise
|
||||
else:
|
||||
self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()
|
||||
|
||||
def _get_request(self):
|
||||
if not hasattr(self, '_request'):
|
||||
self._request = datastructures.MergeDict(self.POST, self.GET)
|
||||
|
@ -162,13 +144,6 @@ class ModPythonRequest(http.HttpRequest):
|
|||
self._meta[key] = value
|
||||
return self._meta
|
||||
|
||||
def _get_raw_post_data(self):
|
||||
try:
|
||||
return self._raw_post_data
|
||||
except AttributeError:
|
||||
self._raw_post_data = self._req.read()
|
||||
return self._raw_post_data
|
||||
|
||||
def _get_method(self):
|
||||
return self.META['REQUEST_METHOD'].upper()
|
||||
|
||||
|
@ -178,7 +153,6 @@ class ModPythonRequest(http.HttpRequest):
|
|||
FILES = property(_get_files)
|
||||
META = property(_get_meta)
|
||||
REQUEST = property(_get_request)
|
||||
raw_post_data = property(_get_raw_post_data)
|
||||
method = property(_get_method)
|
||||
|
||||
class ModPythonHandler(BaseHandler):
|
||||
|
|
|
@ -5,6 +5,7 @@ try:
|
|||
from cStringIO import StringIO
|
||||
except ImportError:
|
||||
from StringIO import StringIO
|
||||
import socket
|
||||
|
||||
from django import http
|
||||
from django.core import signals
|
||||
|
@ -62,20 +63,55 @@ STATUS_CODE_TEXT = {
|
|||
505: 'HTTP VERSION NOT SUPPORTED',
|
||||
}
|
||||
|
||||
def safe_copyfileobj(fsrc, fdst, length=16*1024, size=0):
|
||||
"""
|
||||
A version of shutil.copyfileobj that will not read more than 'size' bytes.
|
||||
This makes it safe from clients sending more than CONTENT_LENGTH bytes of
|
||||
data in the body.
|
||||
"""
|
||||
if not size:
|
||||
return
|
||||
while size > 0:
|
||||
buf = fsrc.read(min(length, size))
|
||||
if not buf:
|
||||
class LimitedStream(object):
|
||||
'''
|
||||
LimitedStream wraps another stream in order to not allow reading from it
|
||||
past specified amount of bytes.
|
||||
'''
|
||||
def __init__(self, stream, limit, buf_size=64 * 1024 * 1024):
|
||||
self.stream = stream
|
||||
self.remaining = limit
|
||||
self.buffer = ''
|
||||
self.buf_size = buf_size
|
||||
|
||||
def _read_limited(self, size=None):
|
||||
if size is None or size > self.remaining:
|
||||
size = self.remaining
|
||||
if size == 0:
|
||||
return ''
|
||||
result = self.stream.read(size)
|
||||
self.remaining -= len(result)
|
||||
return result
|
||||
|
||||
def read(self, size=None):
|
||||
if size is None:
|
||||
result = self.buffer + self._read_limited()
|
||||
self.buffer = ''
|
||||
elif size < len(self.buffer):
|
||||
result = self.buffer[:size]
|
||||
self.buffer = self.buffer[size:]
|
||||
else: # size >= len(self.buffer)
|
||||
result = self.buffer + self._read_limited(size - len(self.buffer))
|
||||
self.buffer = ''
|
||||
return result
|
||||
|
||||
def readline(self, size=None):
|
||||
while '\n' not in self.buffer or \
|
||||
(size is not None and len(self.buffer) < size):
|
||||
if size:
|
||||
chunk = self._read_limited(size - len(self.buffer))
|
||||
else:
|
||||
chunk = self._read_limited()
|
||||
if not chunk:
|
||||
break
|
||||
fdst.write(buf)
|
||||
size -= len(buf)
|
||||
self.buffer += chunk
|
||||
sio = StringIO(self.buffer)
|
||||
if size:
|
||||
line = sio.readline(size)
|
||||
else:
|
||||
line = sio.readline()
|
||||
self.buffer = sio.read()
|
||||
return line
|
||||
|
||||
class WSGIRequest(http.HttpRequest):
|
||||
def __init__(self, environ):
|
||||
|
@ -98,6 +134,24 @@ class WSGIRequest(http.HttpRequest):
|
|||
self.META['SCRIPT_NAME'] = script_name
|
||||
self.method = environ['REQUEST_METHOD'].upper()
|
||||
self._post_parse_error = False
|
||||
if isinstance(self.environ['wsgi.input'], socket._fileobject):
|
||||
# Under development server 'wsgi.input' is an instance of
|
||||
# socket._fileobject which hangs indefinitely on reading bytes past
|
||||
# available count. To prevent this it's wrapped in LimitedStream
|
||||
# that doesn't read past Content-Length bytes.
|
||||
#
|
||||
# This is not done for other kinds of inputs (like flup's FastCGI
|
||||
# streams) beacuse they don't suffer from this problem and we can
|
||||
# avoid using another wrapper with its own .read and .readline
|
||||
# implementation.
|
||||
try:
|
||||
content_length = int(self.environ.get('CONTENT_LENGTH', 0))
|
||||
except (ValueError, TypeError):
|
||||
content_length = 0
|
||||
self._stream = LimitedStream(self.environ['wsgi.input'], content_length)
|
||||
else:
|
||||
self._stream = self.environ['wsgi.input']
|
||||
self._read_started = False
|
||||
|
||||
def __repr__(self):
|
||||
# Since this is called as part of error handling, we need to be very
|
||||
|
@ -133,30 +187,6 @@ class WSGIRequest(http.HttpRequest):
|
|||
return 'wsgi.url_scheme' in self.environ \
|
||||
and self.environ['wsgi.url_scheme'] == 'https'
|
||||
|
||||
def _load_post_and_files(self):
|
||||
# Populates self._post and self._files
|
||||
if self.method == 'POST':
|
||||
if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
|
||||
self._raw_post_data = ''
|
||||
try:
|
||||
self._post, self._files = self.parse_file_upload(self.META, self.environ['wsgi.input'])
|
||||
except:
|
||||
# An error occured while parsing POST data. Since when
|
||||
# formatting the error the request handler might access
|
||||
# self.POST, set self._post and self._file to prevent
|
||||
# attempts to parse POST data again.
|
||||
self._post = http.QueryDict('')
|
||||
self._files = datastructures.MultiValueDict()
|
||||
# Mark that an error occured. This allows self.__repr__ to
|
||||
# be explicit about it instead of simply representing an
|
||||
# empty POST
|
||||
self._post_parse_error = True
|
||||
raise
|
||||
else:
|
||||
self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()
|
||||
else:
|
||||
self._post, self._files = http.QueryDict('', encoding=self._encoding), datastructures.MultiValueDict()
|
||||
|
||||
def _get_request(self):
|
||||
if not hasattr(self, '_request'):
|
||||
self._request = datastructures.MergeDict(self.POST, self.GET)
|
||||
|
@ -192,32 +222,11 @@ class WSGIRequest(http.HttpRequest):
|
|||
self._load_post_and_files()
|
||||
return self._files
|
||||
|
||||
def _get_raw_post_data(self):
|
||||
try:
|
||||
return self._raw_post_data
|
||||
except AttributeError:
|
||||
buf = StringIO()
|
||||
try:
|
||||
# CONTENT_LENGTH might be absent if POST doesn't have content at all (lighttpd)
|
||||
content_length = int(self.environ.get('CONTENT_LENGTH', 0))
|
||||
except (ValueError, TypeError):
|
||||
# If CONTENT_LENGTH was empty string or not an integer, don't
|
||||
# error out. We've also seen None passed in here (against all
|
||||
# specs, but see ticket #8259), so we handle TypeError as well.
|
||||
content_length = 0
|
||||
if content_length > 0:
|
||||
safe_copyfileobj(self.environ['wsgi.input'], buf,
|
||||
size=content_length)
|
||||
self._raw_post_data = buf.getvalue()
|
||||
buf.close()
|
||||
return self._raw_post_data
|
||||
|
||||
GET = property(_get_get, _set_get)
|
||||
POST = property(_get_post, _set_post)
|
||||
COOKIES = property(_get_cookies, _set_cookies)
|
||||
FILES = property(_get_files)
|
||||
REQUEST = property(_get_request)
|
||||
raw_post_data = property(_get_raw_post_data)
|
||||
|
||||
class WSGIHandler(base.BaseHandler):
|
||||
initLock = Lock()
|
||||
|
|
|
@ -6,6 +6,10 @@ from Cookie import BaseCookie, SimpleCookie, CookieError
|
|||
from pprint import pformat
|
||||
from urllib import urlencode
|
||||
from urlparse import urljoin
|
||||
try:
|
||||
from cStringIO import StringIO
|
||||
except ImportError:
|
||||
from StringIO import StringIO
|
||||
try:
|
||||
# The mod_python version is more efficient, so try importing it first.
|
||||
from mod_python.util import parse_qsl
|
||||
|
@ -132,6 +136,73 @@ class HttpRequest(object):
|
|||
parser = MultiPartParser(META, post_data, self.upload_handlers, self.encoding)
|
||||
return parser.parse()
|
||||
|
||||
def _get_raw_post_data(self):
|
||||
if not hasattr(self, '_raw_post_data'):
|
||||
if self._read_started:
|
||||
raise Exception("You cannot access raw_post_data after reading from request's data stream")
|
||||
self._raw_post_data = self.read()
|
||||
self._stream = StringIO(self._raw_post_data)
|
||||
return self._raw_post_data
|
||||
raw_post_data = property(_get_raw_post_data)
|
||||
|
||||
def _mark_post_parse_error(self):
|
||||
self._post = QueryDict('')
|
||||
self._files = MultiValueDict()
|
||||
self._post_parse_error = True
|
||||
|
||||
def _load_post_and_files(self):
|
||||
# Populates self._post and self._files
|
||||
if self.method != 'POST':
|
||||
self._post, self._files = QueryDict('', encoding=self._encoding), MultiValueDict()
|
||||
return
|
||||
if self._read_started:
|
||||
self._mark_post_parse_error()
|
||||
return
|
||||
|
||||
if self.META.get('CONTENT_TYPE', '').startswith('multipart'):
|
||||
self._raw_post_data = ''
|
||||
try:
|
||||
self._post, self._files = self.parse_file_upload(self.META, self)
|
||||
except:
|
||||
# An error occured while parsing POST data. Since when
|
||||
# formatting the error the request handler might access
|
||||
# self.POST, set self._post and self._file to prevent
|
||||
# attempts to parse POST data again.
|
||||
# Mark that an error occured. This allows self.__repr__ to
|
||||
# be explicit about it instead of simply representing an
|
||||
# empty POST
|
||||
self._mark_post_parse_error()
|
||||
raise
|
||||
else:
|
||||
self._post, self._files = QueryDict(self.raw_post_data, encoding=self._encoding), MultiValueDict()
|
||||
|
||||
## File-like and iterator interface.
|
||||
##
|
||||
## Expects self._stream to be set to an appropriate source of bytes by
|
||||
## a corresponding request subclass (WSGIRequest or ModPythonRequest).
|
||||
## Also when request data has already been read by request.POST or
|
||||
## request.raw_post_data, self._stream points to a StringIO instance
|
||||
## containing that data.
|
||||
|
||||
def read(self, *args, **kwargs):
|
||||
self._read_started = True
|
||||
return self._stream.read(*args, **kwargs)
|
||||
|
||||
def readline(self, *args, **kwargs):
|
||||
self._read_started = True
|
||||
return self._stream.readline(*args, **kwargs)
|
||||
|
||||
def xreadlines(self):
|
||||
while True:
|
||||
buf = self.readline()
|
||||
if not buf:
|
||||
break
|
||||
yield buf
|
||||
__iter__ = xreadlines
|
||||
|
||||
def readlines(self):
|
||||
return list(iter(self))
|
||||
|
||||
class QueryDict(MultiValueDict):
|
||||
"""
|
||||
A specialized MultiValueDict that takes a query string when initialized.
|
||||
|
|
|
@ -189,8 +189,14 @@ All attributes except ``session`` should be considered read-only.
|
|||
|
||||
.. attribute:: HttpRequest.raw_post_data
|
||||
|
||||
The raw HTTP POST data. This is only useful for advanced processing. Use
|
||||
``POST`` instead.
|
||||
The raw HTTP POST data as a byte string. This is useful for processing
|
||||
data in different formats than of conventional HTML forms: binary images,
|
||||
XML payload etc. For processing form data use ``HttpRequest.POST``.
|
||||
|
||||
.. versionadded:: 1.3
|
||||
|
||||
You can also read from an HttpRequest using file-like interface. See
|
||||
:meth:`HttpRequest.read()`.
|
||||
|
||||
.. attribute:: HttpRequest.urlconf
|
||||
|
||||
|
@ -249,6 +255,27 @@ Methods
|
|||
If you write your own XMLHttpRequest call (on the browser side), you'll
|
||||
have to set this header manually if you want ``is_ajax()`` to work.
|
||||
|
||||
.. method:: HttpRequest.read(size=None)
|
||||
.. method:: HttpRequest.readline()
|
||||
.. method:: HttpRequest.readlines()
|
||||
.. method:: HttpRequest.xreadlines()
|
||||
.. method:: HttpRequest.__iter__()
|
||||
|
||||
.. versionadded:: 1.3
|
||||
|
||||
Methods implementing a file-like interface for reading from an
|
||||
HttpRequest instance. This makes it possible to consume an incoming
|
||||
request in a streaming fashion. A common use-case would be to process a
|
||||
big XML payload with iterative parser without constructing a whole
|
||||
XML tree in memory.
|
||||
|
||||
Given this standard interface, an HttpRequest instance can be
|
||||
passed directly to an XML parser such as ElementTree::
|
||||
|
||||
import xml.etree.ElementTree as ET
|
||||
for element in ET.iterparse(request):
|
||||
process(element)
|
||||
|
||||
|
||||
QueryDict objects
|
||||
-----------------
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
from datetime import datetime, timedelta
|
||||
import time
|
||||
from StringIO import StringIO
|
||||
import unittest
|
||||
|
||||
from django.http import HttpRequest, HttpResponse, parse_cookie
|
||||
from django.core.handlers.wsgi import WSGIRequest
|
||||
from django.core.handlers.wsgi import WSGIRequest, LimitedStream
|
||||
from django.core.handlers.modpython import ModPythonRequest
|
||||
from django.utils.http import cookie_date
|
||||
|
||||
|
@ -17,11 +18,11 @@ class RequestsTests(unittest.TestCase):
|
|||
self.assertEqual(request.META.keys(), [])
|
||||
|
||||
def test_wsgirequest(self):
|
||||
request = WSGIRequest({'PATH_INFO': 'bogus', 'REQUEST_METHOD': 'bogus'})
|
||||
request = WSGIRequest({'PATH_INFO': 'bogus', 'REQUEST_METHOD': 'bogus', 'wsgi.input': StringIO('')})
|
||||
self.assertEqual(request.GET.keys(), [])
|
||||
self.assertEqual(request.POST.keys(), [])
|
||||
self.assertEqual(request.COOKIES.keys(), [])
|
||||
self.assertEqual(set(request.META.keys()), set(['PATH_INFO', 'REQUEST_METHOD', 'SCRIPT_NAME']))
|
||||
self.assertEqual(set(request.META.keys()), set(['PATH_INFO', 'REQUEST_METHOD', 'SCRIPT_NAME', 'wsgi.input']))
|
||||
self.assertEqual(request.META['PATH_INFO'], 'bogus')
|
||||
self.assertEqual(request.META['REQUEST_METHOD'], 'bogus')
|
||||
self.assertEqual(request.META['SCRIPT_NAME'], '')
|
||||
|
@ -88,3 +89,62 @@ class RequestsTests(unittest.TestCase):
|
|||
max_age_cookie = response.cookies['max_age']
|
||||
self.assertEqual(max_age_cookie['max-age'], 10)
|
||||
self.assertEqual(max_age_cookie['expires'], cookie_date(time.time()+10))
|
||||
|
||||
def test_limited_stream(self):
|
||||
# Read all of a limited stream
|
||||
stream = LimitedStream(StringIO('test'), 2)
|
||||
self.assertEqual(stream.read(), 'te')
|
||||
|
||||
# Read a number of characters greater than the stream has to offer
|
||||
stream = LimitedStream(StringIO('test'), 2)
|
||||
self.assertEqual(stream.read(5), 'te')
|
||||
|
||||
# Read sequentially from a stream
|
||||
stream = LimitedStream(StringIO('12345678'), 8)
|
||||
self.assertEqual(stream.read(5), '12345')
|
||||
self.assertEqual(stream.read(5), '678')
|
||||
|
||||
# Read lines from a stream
|
||||
stream = LimitedStream(StringIO('1234\n5678\nabcd\nefgh\nijkl'), 24)
|
||||
# Read a full line, unconditionally
|
||||
self.assertEqual(stream.readline(), '1234\n')
|
||||
# Read a number of characters less than a line
|
||||
self.assertEqual(stream.readline(2), '56')
|
||||
# Read the rest of the partial line
|
||||
self.assertEqual(stream.readline(), '78\n')
|
||||
# Read a full line, with a character limit greater than the line length
|
||||
self.assertEqual(stream.readline(6), 'abcd\n')
|
||||
# Read the next line, deliberately terminated at the line end
|
||||
self.assertEqual(stream.readline(4), 'efgh')
|
||||
# Read the next line... just the line end
|
||||
self.assertEqual(stream.readline(), '\n')
|
||||
# Read everything else.
|
||||
self.assertEqual(stream.readline(), 'ijkl')
|
||||
|
||||
def test_stream(self):
|
||||
request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
|
||||
self.assertEqual(request.read(), 'name=value')
|
||||
|
||||
def test_read_after_value(self):
|
||||
"""
|
||||
Reading from request is allowed after accessing request contents as
|
||||
POST or raw_post_data.
|
||||
"""
|
||||
request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
|
||||
self.assertEqual(request.POST, {u'name': [u'value']})
|
||||
self.assertEqual(request.raw_post_data, 'name=value')
|
||||
self.assertEqual(request.read(), 'name=value')
|
||||
|
||||
def test_value_after_read(self):
|
||||
"""
|
||||
Construction of POST or raw_post_data is not allowed after reading
|
||||
from request.
|
||||
"""
|
||||
request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
|
||||
self.assertEqual(request.read(2), 'na')
|
||||
self.assertRaises(Exception, lambda: request.raw_post_data)
|
||||
self.assertEqual(request.POST, {})
|
||||
|
||||
def test_read_by_lines(self):
|
||||
request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
|
||||
self.assertEqual(list(request), ['name=value'])
|
||||
|
|
Loading…
Reference in New Issue