Fixed #15785 -- Stopped HttpRequest.read() from reading beyond the end of a wsgi.input stream and removed some redundant code in the multipartparser. Thanks, tomchristie, grahamd and isagalaev.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@16479 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Jannis Leidel 2011-06-28 10:17:56 +00:00
parent 0278947128
commit a6cd78662e
8 changed files with 125 additions and 77 deletions

View File

@ -135,26 +135,11 @@ class WSGIRequest(http.HttpRequest):
self.META['SCRIPT_NAME'] = script_name
self.method = environ['REQUEST_METHOD'].upper()
self._post_parse_error = False
if type(socket._fileobject) is type and isinstance(self.environ['wsgi.input'], socket._fileobject):
# Under development server 'wsgi.input' is an instance of
# socket._fileobject which hangs indefinitely on reading bytes past
# available count. To prevent this it's wrapped in LimitedStream
# that doesn't read past Content-Length bytes.
#
# This is not done for other kinds of inputs (like flup's FastCGI
# streams) beacuse they don't suffer from this problem and we can
# avoid using another wrapper with its own .read and .readline
# implementation.
#
# The type check is done because for some reason, AppEngine
# implements _fileobject as a function, not a class.
try:
content_length = int(self.environ.get('CONTENT_LENGTH', 0))
content_length = int(self.environ.get('CONTENT_LENGTH'))
except (ValueError, TypeError):
content_length = 0
self._stream = LimitedStream(self.environ['wsgi.input'], content_length)
else:
self._stream = self.environ['wsgi.input']
self._read_started = False
def get_full_path(self):

View File

@ -308,16 +308,6 @@ class HttpRequest(object):
if not hasattr(self, '_raw_post_data'):
if self._read_started:
raise Exception("You cannot access raw_post_data after reading from request's data stream")
try:
content_length = int(self.META.get('CONTENT_LENGTH', 0))
except (ValueError, TypeError):
# If CONTENT_LENGTH was empty string or not an integer, don't
# error out. We've also seen None passed in here (against all
# specs, but see ticket #8259), so we handle TypeError as well.
content_length = 0
if content_length:
self._raw_post_data = self.read(content_length)
else:
self._raw_post_data = self.read()
self._stream = StringIO(self._raw_post_data)
return self._raw_post_data

View File

@ -33,7 +33,7 @@ class MultiPartParser(object):
A rfc2388 multipart/form-data parser.
``MultiValueDict.parse()`` reads the input stream in ``chunk_size`` chunks
and returns a tuple of ``(MultiValueDict(POST), MultiValueDict(FILES))``. If
and returns a tuple of ``(MultiValueDict(POST), MultiValueDict(FILES))``.
"""
def __init__(self, META, input_data, upload_handlers, encoding=None):
"""
@ -65,14 +65,11 @@ class MultiPartParser(object):
raise MultiPartParserError('Invalid boundary in multipart: %s' % boundary)
#
# Content-Length should contain the length of the body we are about
# to receive.
#
try:
content_length = int(META.get('HTTP_CONTENT_LENGTH', META.get('CONTENT_LENGTH',0)))
except (ValueError, TypeError):
# For now set it to 0; we'll try again later on down.
content_length = 0
if content_length < 0:
@ -110,12 +107,10 @@ class MultiPartParser(object):
if self._content_length == 0:
return QueryDict(MultiValueDict(), encoding=self._encoding), MultiValueDict()
limited_input_data = LimitBytes(self._input_data, self._content_length)
# See if the handler will want to take care of the parsing.
# This allows overriding everything if somebody wants it.
for handler in handlers:
result = handler.handle_raw_input(limited_input_data,
result = handler.handle_raw_input(self._input_data,
self._meta,
self._content_length,
self._boundary,
@ -128,7 +123,7 @@ class MultiPartParser(object):
self._files = MultiValueDict()
# Instantiate the parser and stream:
stream = LazyStream(ChunkIter(limited_input_data, self._chunk_size))
stream = LazyStream(ChunkIter(self._input_data, self._chunk_size))
# Whether or not to signal a file-completion at the beginning of the loop.
old_field_name = None
@ -225,10 +220,10 @@ class MultiPartParser(object):
exhaust(stream)
except StopUpload, e:
if not e.connection_reset:
exhaust(limited_input_data)
exhaust(self._input_data)
else:
# Make sure that the request data is all fed
exhaust(limited_input_data)
exhaust(self._input_data)
# Signal that the upload has completed.
for handler in handlers:
@ -390,27 +385,6 @@ class ChunkIter(object):
def __iter__(self):
return self
class LimitBytes(object):
""" Limit bytes for a file object. """
def __init__(self, fileobject, length):
self._file = fileobject
self.remaining = length
def read(self, num_bytes=None):
"""
Read data from the underlying file.
If you ask for too much or there isn't anything left,
this will raise an InputStreamExhausted error.
"""
if self.remaining <= 0:
raise InputStreamExhausted()
if num_bytes is None:
num_bytes = self.remaining
else:
num_bytes = min(num_bytes, self.remaining)
self.remaining -= num_bytes
return self._file.read(num_bytes)
class InterBoundaryIter(object):
"""
A Producer that will iterate over boundaries.

View File

@ -9,7 +9,7 @@ from StringIO import StringIO
from django.core.files import temp as tempfile
from django.core.files.uploadedfile import SimpleUploadedFile
from django.http.multipartparser import MultiPartParser
from django.http.multipartparser import MultiPartParser, MultiPartParserError
from django.test import TestCase, client
from django.utils import simplejson
from django.utils import unittest
@ -176,6 +176,47 @@ class FileUploadTests(TestCase):
got = simplejson.loads(self.client.request(**r).content)
self.assertTrue(len(got['file']) < 256, "Got a long file name (%s characters)." % len(got['file']))
def test_truncated_multipart_handled_gracefully(self):
"""
If passed an incomplete multipart message, MultiPartParser does not
attempt to read beyond the end of the stream, and simply will handle
the part that can be parsed gracefully.
"""
payload = "\r\n".join([
'--' + client.BOUNDARY,
'Content-Disposition: form-data; name="file"; filename="foo.txt"',
'Content-Type: application/octet-stream',
'',
'file contents'
'--' + client.BOUNDARY + '--',
'',
])
payload = payload[:-10]
r = {
'CONTENT_LENGTH': len(payload),
'CONTENT_TYPE': client.MULTIPART_CONTENT,
'PATH_INFO': '/file_uploads/echo/',
'REQUEST_METHOD': 'POST',
'wsgi.input': client.FakePayload(payload),
}
got = simplejson.loads(self.client.request(**r).content)
self.assertEquals(got, {})
def test_empty_multipart_handled_gracefully(self):
"""
If passed an empty multipart message, MultiPartParser will return
an empty QueryDict.
"""
r = {
'CONTENT_LENGTH': 0,
'CONTENT_TYPE': client.MULTIPART_CONTENT,
'PATH_INFO': '/file_uploads/echo/',
'REQUEST_METHOD': 'POST',
'wsgi.input': client.FakePayload(''),
}
got = simplejson.loads(self.client.request(**r).content)
self.assertEquals(got, {})
def test_custom_upload_handler(self):
# A small file (under the 5M quota)
smallfile = tempfile.NamedTemporaryFile()

View File

@ -195,7 +195,10 @@ class RequestsTests(unittest.TestCase):
self.assertEqual(stream.read(), '')
def test_stream(self):
request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
payload = 'name=value'
request = WSGIRequest({'REQUEST_METHOD': 'POST',
'CONTENT_LENGTH': len(payload),
'wsgi.input': StringIO(payload)})
self.assertEqual(request.read(), 'name=value')
def test_read_after_value(self):
@ -203,7 +206,10 @@ class RequestsTests(unittest.TestCase):
Reading from request is allowed after accessing request contents as
POST or raw_post_data.
"""
request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
payload = 'name=value'
request = WSGIRequest({'REQUEST_METHOD': 'POST',
'CONTENT_LENGTH': len(payload),
'wsgi.input': StringIO(payload)})
self.assertEqual(request.POST, {u'name': [u'value']})
self.assertEqual(request.raw_post_data, 'name=value')
self.assertEqual(request.read(), 'name=value')
@ -213,7 +219,10 @@ class RequestsTests(unittest.TestCase):
Construction of POST or raw_post_data is not allowed after reading
from request.
"""
request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
payload = 'name=value'
request = WSGIRequest({'REQUEST_METHOD': 'POST',
'CONTENT_LENGTH': len(payload),
'wsgi.input': StringIO(payload)})
self.assertEqual(request.read(2), 'na')
self.assertRaises(Exception, lambda: request.raw_post_data)
self.assertEqual(request.POST, {})
@ -261,14 +270,20 @@ class RequestsTests(unittest.TestCase):
self.assertEqual(request.POST, {})
def test_read_by_lines(self):
request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
payload = 'name=value'
request = WSGIRequest({'REQUEST_METHOD': 'POST',
'CONTENT_LENGTH': len(payload),
'wsgi.input': StringIO(payload)})
self.assertEqual(list(request), ['name=value'])
def test_POST_after_raw_post_data_read(self):
"""
POST should be populated even if raw_post_data is read first
"""
request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
payload = 'name=value'
request = WSGIRequest({'REQUEST_METHOD': 'POST',
'CONTENT_LENGTH': len(payload),
'wsgi.input': StringIO(payload)})
raw_data = request.raw_post_data
self.assertEqual(request.POST, {u'name': [u'value']})
@ -277,7 +292,10 @@ class RequestsTests(unittest.TestCase):
POST should be populated even if raw_post_data is read first, and then
the stream is read second.
"""
request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')})
payload = 'name=value'
request = WSGIRequest({'REQUEST_METHOD': 'POST',
'CONTENT_LENGTH': len(payload),
'wsgi.input': StringIO(payload)})
raw_data = request.raw_post_data
self.assertEqual(request.read(1), u'n')
self.assertEqual(request.POST, {u'name': [u'value']})

View File

@ -913,14 +913,44 @@ class ResponseTemplateDeprecationTests(TestCase):
response = self.client.get("/test_client_regress/request_methods/")
self.assertEqual(response.template, None)
class RawPostDataTest(TestCase):
"Access to request.raw_post_data from the test client."
def test_raw_post_data(self):
# Refs #14753
try:
response = self.client.get("/test_client_regress/raw_post_data/")
except AssertionError:
self.fail("Accessing request.raw_post_data from a view fetched with GET by the test client shouldn't fail.")
class ReadLimitedStreamTest(TestCase):
"""
Tests that ensure that HttpRequest.raw_post_data, HttpRequest.read() and
HttpRequest.read(BUFFER) have proper LimitedStream behaviour.
Refs #14753, #15785
"""
def test_raw_post_data_from_empty_request(self):
"""HttpRequest.raw_post_data on a test client GET request should return
the empty string."""
self.assertEquals(self.client.get("/test_client_regress/raw_post_data/").content, '')
def test_read_from_empty_request(self):
"""HttpRequest.read() on a test client GET request should return the
empty string."""
self.assertEquals(self.client.get("/test_client_regress/read_all/").content, '')
def test_read_numbytes_from_empty_request(self):
"""HttpRequest.read(LARGE_BUFFER) on a test client GET request should
return the empty string."""
self.assertEquals(self.client.get("/test_client_regress/read_buffer/").content, '')
def test_read_from_nonempty_request(self):
"""HttpRequest.read() on a test client PUT request with some payload
should return that payload."""
payload = 'foobar'
self.assertEquals(self.client.put("/test_client_regress/read_all/",
data=payload,
content_type='text/plain').content, payload)
def test_read_numbytes_from_nonempty_request(self):
"""HttpRequest.read(LARGE_BUFFER) on a test client PUT request with
some payload should return that payload."""
payload = 'foobar'
self.assertEquals(self.client.put("/test_client_regress/read_buffer/",
data=payload,
content_type='text/plain').content, payload)
class RequestFactoryStateTest(TestCase):

View File

@ -27,5 +27,7 @@ urlpatterns = patterns('',
(r'^check_headers/$', views.check_headers),
(r'^check_headers_redirect/$', RedirectView.as_view(url='/test_client_regress/check_headers/')),
(r'^raw_post_data/$', views.raw_post_data),
(r'^read_all/$', views.read_all),
(r'^read_buffer/$', views.read_buffer),
(r'^request_context_view/$', views.request_context_view),
)

View File

@ -96,6 +96,14 @@ def raw_post_data(request):
"A view that is requested with GET and accesses request.raw_post_data. Refs #14753."
return HttpResponse(request.raw_post_data)
def read_all(request):
"A view that is requested with accesses request.read()."
return HttpResponse(request.read())
def read_buffer(request):
"A view that is requested with accesses request.read(LARGE_BUFFER)."
return HttpResponse(request.read(99999))
def request_context_view(request):
# Special attribute that won't be present on a plain HttpRequest
request.special_path = request.path