From a6cd78662e72188555027a459cbff2564d72a221 Mon Sep 17 00:00:00 2001 From: Jannis Leidel Date: Tue, 28 Jun 2011 10:17:56 +0000 Subject: [PATCH] Fixed #15785 -- Stopped HttpRequest.read() from reading beyond the end of a wsgi.input stream and removed some redundant code in the multipartparser. Thanks, tomchristie, grahamd and isagalaev. git-svn-id: http://code.djangoproject.com/svn/django/trunk@16479 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/core/handlers/wsgi.py | 25 ++-------- django/http/__init__.py | 12 +---- django/http/multipartparser.py | 36 ++------------- tests/regressiontests/file_uploads/tests.py | 43 ++++++++++++++++- tests/regressiontests/requests/tests.py | 30 +++++++++--- .../test_client_regress/models.py | 46 +++++++++++++++---- .../test_client_regress/urls.py | 2 + .../test_client_regress/views.py | 8 ++++ 8 files changed, 125 insertions(+), 77 deletions(-) diff --git a/django/core/handlers/wsgi.py b/django/core/handlers/wsgi.py index 2acb3b1ee75..14420fa7603 100644 --- a/django/core/handlers/wsgi.py +++ b/django/core/handlers/wsgi.py @@ -135,26 +135,11 @@ class WSGIRequest(http.HttpRequest): self.META['SCRIPT_NAME'] = script_name self.method = environ['REQUEST_METHOD'].upper() self._post_parse_error = False - if type(socket._fileobject) is type and isinstance(self.environ['wsgi.input'], socket._fileobject): - # Under development server 'wsgi.input' is an instance of - # socket._fileobject which hangs indefinitely on reading bytes past - # available count. To prevent this it's wrapped in LimitedStream - # that doesn't read past Content-Length bytes. - # - # This is not done for other kinds of inputs (like flup's FastCGI - # streams) beacuse they don't suffer from this problem and we can - # avoid using another wrapper with its own .read and .readline - # implementation. - # - # The type check is done because for some reason, AppEngine - # implements _fileobject as a function, not a class. - try: - content_length = int(self.environ.get('CONTENT_LENGTH', 0)) - except (ValueError, TypeError): - content_length = 0 - self._stream = LimitedStream(self.environ['wsgi.input'], content_length) - else: - self._stream = self.environ['wsgi.input'] + try: + content_length = int(self.environ.get('CONTENT_LENGTH')) + except (ValueError, TypeError): + content_length = 0 + self._stream = LimitedStream(self.environ['wsgi.input'], content_length) self._read_started = False def get_full_path(self): diff --git a/django/http/__init__.py b/django/http/__init__.py index cf1eb0d9e2a..e667f6a9fc3 100644 --- a/django/http/__init__.py +++ b/django/http/__init__.py @@ -308,17 +308,7 @@ class HttpRequest(object): if not hasattr(self, '_raw_post_data'): if self._read_started: raise Exception("You cannot access raw_post_data after reading from request's data stream") - try: - content_length = int(self.META.get('CONTENT_LENGTH', 0)) - except (ValueError, TypeError): - # If CONTENT_LENGTH was empty string or not an integer, don't - # error out. We've also seen None passed in here (against all - # specs, but see ticket #8259), so we handle TypeError as well. - content_length = 0 - if content_length: - self._raw_post_data = self.read(content_length) - else: - self._raw_post_data = self.read() + self._raw_post_data = self.read() self._stream = StringIO(self._raw_post_data) return self._raw_post_data raw_post_data = property(_get_raw_post_data) diff --git a/django/http/multipartparser.py b/django/http/multipartparser.py index 63203a4581a..477a08c05a7 100644 --- a/django/http/multipartparser.py +++ b/django/http/multipartparser.py @@ -33,7 +33,7 @@ class MultiPartParser(object): A rfc2388 multipart/form-data parser. ``MultiValueDict.parse()`` reads the input stream in ``chunk_size`` chunks - and returns a tuple of ``(MultiValueDict(POST), MultiValueDict(FILES))``. If + and returns a tuple of ``(MultiValueDict(POST), MultiValueDict(FILES))``. """ def __init__(self, META, input_data, upload_handlers, encoding=None): """ @@ -65,14 +65,11 @@ class MultiPartParser(object): raise MultiPartParserError('Invalid boundary in multipart: %s' % boundary) - # # Content-Length should contain the length of the body we are about # to receive. - # try: content_length = int(META.get('HTTP_CONTENT_LENGTH', META.get('CONTENT_LENGTH',0))) except (ValueError, TypeError): - # For now set it to 0; we'll try again later on down. content_length = 0 if content_length < 0: @@ -110,12 +107,10 @@ class MultiPartParser(object): if self._content_length == 0: return QueryDict(MultiValueDict(), encoding=self._encoding), MultiValueDict() - limited_input_data = LimitBytes(self._input_data, self._content_length) - # See if the handler will want to take care of the parsing. # This allows overriding everything if somebody wants it. for handler in handlers: - result = handler.handle_raw_input(limited_input_data, + result = handler.handle_raw_input(self._input_data, self._meta, self._content_length, self._boundary, @@ -128,7 +123,7 @@ class MultiPartParser(object): self._files = MultiValueDict() # Instantiate the parser and stream: - stream = LazyStream(ChunkIter(limited_input_data, self._chunk_size)) + stream = LazyStream(ChunkIter(self._input_data, self._chunk_size)) # Whether or not to signal a file-completion at the beginning of the loop. old_field_name = None @@ -225,10 +220,10 @@ class MultiPartParser(object): exhaust(stream) except StopUpload, e: if not e.connection_reset: - exhaust(limited_input_data) + exhaust(self._input_data) else: # Make sure that the request data is all fed - exhaust(limited_input_data) + exhaust(self._input_data) # Signal that the upload has completed. for handler in handlers: @@ -390,27 +385,6 @@ class ChunkIter(object): def __iter__(self): return self -class LimitBytes(object): - """ Limit bytes for a file object. """ - def __init__(self, fileobject, length): - self._file = fileobject - self.remaining = length - - def read(self, num_bytes=None): - """ - Read data from the underlying file. - If you ask for too much or there isn't anything left, - this will raise an InputStreamExhausted error. - """ - if self.remaining <= 0: - raise InputStreamExhausted() - if num_bytes is None: - num_bytes = self.remaining - else: - num_bytes = min(num_bytes, self.remaining) - self.remaining -= num_bytes - return self._file.read(num_bytes) - class InterBoundaryIter(object): """ A Producer that will iterate over boundaries. diff --git a/tests/regressiontests/file_uploads/tests.py b/tests/regressiontests/file_uploads/tests.py index d42e0279a74..6d79e2a451b 100644 --- a/tests/regressiontests/file_uploads/tests.py +++ b/tests/regressiontests/file_uploads/tests.py @@ -9,7 +9,7 @@ from StringIO import StringIO from django.core.files import temp as tempfile from django.core.files.uploadedfile import SimpleUploadedFile -from django.http.multipartparser import MultiPartParser +from django.http.multipartparser import MultiPartParser, MultiPartParserError from django.test import TestCase, client from django.utils import simplejson from django.utils import unittest @@ -176,6 +176,47 @@ class FileUploadTests(TestCase): got = simplejson.loads(self.client.request(**r).content) self.assertTrue(len(got['file']) < 256, "Got a long file name (%s characters)." % len(got['file'])) + def test_truncated_multipart_handled_gracefully(self): + """ + If passed an incomplete multipart message, MultiPartParser does not + attempt to read beyond the end of the stream, and simply will handle + the part that can be parsed gracefully. + """ + payload = "\r\n".join([ + '--' + client.BOUNDARY, + 'Content-Disposition: form-data; name="file"; filename="foo.txt"', + 'Content-Type: application/octet-stream', + '', + 'file contents' + '--' + client.BOUNDARY + '--', + '', + ]) + payload = payload[:-10] + r = { + 'CONTENT_LENGTH': len(payload), + 'CONTENT_TYPE': client.MULTIPART_CONTENT, + 'PATH_INFO': '/file_uploads/echo/', + 'REQUEST_METHOD': 'POST', + 'wsgi.input': client.FakePayload(payload), + } + got = simplejson.loads(self.client.request(**r).content) + self.assertEquals(got, {}) + + def test_empty_multipart_handled_gracefully(self): + """ + If passed an empty multipart message, MultiPartParser will return + an empty QueryDict. + """ + r = { + 'CONTENT_LENGTH': 0, + 'CONTENT_TYPE': client.MULTIPART_CONTENT, + 'PATH_INFO': '/file_uploads/echo/', + 'REQUEST_METHOD': 'POST', + 'wsgi.input': client.FakePayload(''), + } + got = simplejson.loads(self.client.request(**r).content) + self.assertEquals(got, {}) + def test_custom_upload_handler(self): # A small file (under the 5M quota) smallfile = tempfile.NamedTemporaryFile() diff --git a/tests/regressiontests/requests/tests.py b/tests/regressiontests/requests/tests.py index 91f8b2f8050..8bc81ffcf21 100644 --- a/tests/regressiontests/requests/tests.py +++ b/tests/regressiontests/requests/tests.py @@ -195,7 +195,10 @@ class RequestsTests(unittest.TestCase): self.assertEqual(stream.read(), '') def test_stream(self): - request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')}) + payload = 'name=value' + request = WSGIRequest({'REQUEST_METHOD': 'POST', + 'CONTENT_LENGTH': len(payload), + 'wsgi.input': StringIO(payload)}) self.assertEqual(request.read(), 'name=value') def test_read_after_value(self): @@ -203,7 +206,10 @@ class RequestsTests(unittest.TestCase): Reading from request is allowed after accessing request contents as POST or raw_post_data. """ - request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')}) + payload = 'name=value' + request = WSGIRequest({'REQUEST_METHOD': 'POST', + 'CONTENT_LENGTH': len(payload), + 'wsgi.input': StringIO(payload)}) self.assertEqual(request.POST, {u'name': [u'value']}) self.assertEqual(request.raw_post_data, 'name=value') self.assertEqual(request.read(), 'name=value') @@ -213,7 +219,10 @@ class RequestsTests(unittest.TestCase): Construction of POST or raw_post_data is not allowed after reading from request. """ - request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')}) + payload = 'name=value' + request = WSGIRequest({'REQUEST_METHOD': 'POST', + 'CONTENT_LENGTH': len(payload), + 'wsgi.input': StringIO(payload)}) self.assertEqual(request.read(2), 'na') self.assertRaises(Exception, lambda: request.raw_post_data) self.assertEqual(request.POST, {}) @@ -261,14 +270,20 @@ class RequestsTests(unittest.TestCase): self.assertEqual(request.POST, {}) def test_read_by_lines(self): - request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')}) + payload = 'name=value' + request = WSGIRequest({'REQUEST_METHOD': 'POST', + 'CONTENT_LENGTH': len(payload), + 'wsgi.input': StringIO(payload)}) self.assertEqual(list(request), ['name=value']) def test_POST_after_raw_post_data_read(self): """ POST should be populated even if raw_post_data is read first """ - request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')}) + payload = 'name=value' + request = WSGIRequest({'REQUEST_METHOD': 'POST', + 'CONTENT_LENGTH': len(payload), + 'wsgi.input': StringIO(payload)}) raw_data = request.raw_post_data self.assertEqual(request.POST, {u'name': [u'value']}) @@ -277,7 +292,10 @@ class RequestsTests(unittest.TestCase): POST should be populated even if raw_post_data is read first, and then the stream is read second. """ - request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')}) + payload = 'name=value' + request = WSGIRequest({'REQUEST_METHOD': 'POST', + 'CONTENT_LENGTH': len(payload), + 'wsgi.input': StringIO(payload)}) raw_data = request.raw_post_data self.assertEqual(request.read(1), u'n') self.assertEqual(request.POST, {u'name': [u'value']}) diff --git a/tests/regressiontests/test_client_regress/models.py b/tests/regressiontests/test_client_regress/models.py index d23ed614f58..8e840e49823 100644 --- a/tests/regressiontests/test_client_regress/models.py +++ b/tests/regressiontests/test_client_regress/models.py @@ -913,14 +913,44 @@ class ResponseTemplateDeprecationTests(TestCase): response = self.client.get("/test_client_regress/request_methods/") self.assertEqual(response.template, None) -class RawPostDataTest(TestCase): - "Access to request.raw_post_data from the test client." - def test_raw_post_data(self): - # Refs #14753 - try: - response = self.client.get("/test_client_regress/raw_post_data/") - except AssertionError: - self.fail("Accessing request.raw_post_data from a view fetched with GET by the test client shouldn't fail.") + +class ReadLimitedStreamTest(TestCase): + """ + Tests that ensure that HttpRequest.raw_post_data, HttpRequest.read() and + HttpRequest.read(BUFFER) have proper LimitedStream behaviour. + + Refs #14753, #15785 + """ + def test_raw_post_data_from_empty_request(self): + """HttpRequest.raw_post_data on a test client GET request should return + the empty string.""" + self.assertEquals(self.client.get("/test_client_regress/raw_post_data/").content, '') + + def test_read_from_empty_request(self): + """HttpRequest.read() on a test client GET request should return the + empty string.""" + self.assertEquals(self.client.get("/test_client_regress/read_all/").content, '') + + def test_read_numbytes_from_empty_request(self): + """HttpRequest.read(LARGE_BUFFER) on a test client GET request should + return the empty string.""" + self.assertEquals(self.client.get("/test_client_regress/read_buffer/").content, '') + + def test_read_from_nonempty_request(self): + """HttpRequest.read() on a test client PUT request with some payload + should return that payload.""" + payload = 'foobar' + self.assertEquals(self.client.put("/test_client_regress/read_all/", + data=payload, + content_type='text/plain').content, payload) + + def test_read_numbytes_from_nonempty_request(self): + """HttpRequest.read(LARGE_BUFFER) on a test client PUT request with + some payload should return that payload.""" + payload = 'foobar' + self.assertEquals(self.client.put("/test_client_regress/read_buffer/", + data=payload, + content_type='text/plain').content, payload) class RequestFactoryStateTest(TestCase): diff --git a/tests/regressiontests/test_client_regress/urls.py b/tests/regressiontests/test_client_regress/urls.py index 121f4df638c..454f35b8240 100644 --- a/tests/regressiontests/test_client_regress/urls.py +++ b/tests/regressiontests/test_client_regress/urls.py @@ -27,5 +27,7 @@ urlpatterns = patterns('', (r'^check_headers/$', views.check_headers), (r'^check_headers_redirect/$', RedirectView.as_view(url='/test_client_regress/check_headers/')), (r'^raw_post_data/$', views.raw_post_data), + (r'^read_all/$', views.read_all), + (r'^read_buffer/$', views.read_buffer), (r'^request_context_view/$', views.request_context_view), ) diff --git a/tests/regressiontests/test_client_regress/views.py b/tests/regressiontests/test_client_regress/views.py index c40f34fe563..b3982938b77 100644 --- a/tests/regressiontests/test_client_regress/views.py +++ b/tests/regressiontests/test_client_regress/views.py @@ -96,6 +96,14 @@ def raw_post_data(request): "A view that is requested with GET and accesses request.raw_post_data. Refs #14753." return HttpResponse(request.raw_post_data) +def read_all(request): + "A view that is requested with accesses request.read()." + return HttpResponse(request.read()) + +def read_buffer(request): + "A view that is requested with accesses request.read(LARGE_BUFFER)." + return HttpResponse(request.read(99999)) + def request_context_view(request): # Special attribute that won't be present on a plain HttpRequest request.special_path = request.path