mirror of https://github.com/django/django.git
Fixed #33865 -- Optimized LimitedStream wrapper.
The current implementation of LimitedStream is slow because .read() performs an extra copy into a buffer and .readline() performs two extra copies. The stream being wrapped is already typically a BytesIO object so this is unnecessary. This implementation has largely been untouched for 12 years and, inspired by a simpler implementation in werkzeug, it was possible to achieve the following performance improvement: LimitedStream.read() (single line): Mean +- std dev: [bench_limitedstream-main] 286 ns +- 6 ns -> [bench_limitedstream-patch] 227 ns +- 6 ns: 1.26x faster LimitedStream.readline() (single line): Mean +- std dev: [bench_limitedstream-main] 507 ns +- 11 ns -> [bench_limitedstream-patch] 232 ns +- 8 ns: 2.18x faster LimitedStream.read(8192) (single line): Mean +- std dev: [bench_limitedstream-main] 360 ns +- 8 ns -> [bench_limitedstream-patch] 297 ns +- 6 ns: 1.21x faster LimitedStream.readline(8192) (single line): Mean +- std dev: [bench_limitedstream-main] 602 ns +- 10 ns -> [bench_limitedstream-patch] 305 ns +- 10 ns: 1.98x faster LimitedStream.read() (multiple lines): Mean +- std dev: [bench_limitedstream-main] 290 ns +- 5 ns -> [bench_limitedstream-patch] 236 ns +- 6 ns: 1.23x faster LimitedStream.readline() (multiple lines): Mean +- std dev: [bench_limitedstream-main] 517 ns +- 19 ns -> [bench_limitedstream-patch] 239 ns +- 7 ns: 2.16x faster LimitedStream.read(8192) (multiple lines): Mean +- std dev: [bench_limitedstream-main] 363 ns +- 8 ns -> [bench_limitedstream-patch] 311 ns +- 11 ns: 1.17x faster LimitedStream.readline(8192) (multiple lines): Mean +- std dev: [bench_limitedstream-main] 601 ns +- 12 ns -> [bench_limitedstream-patch] 308 ns +- 7 ns: 1.95x faster Geometric mean: 1.59x faster
This commit is contained in:
parent
57f5669d23
commit
b47f2f5b90
|
@ -1,4 +1,4 @@
|
|||
from io import BytesIO
|
||||
from io import IOBase
|
||||
|
||||
from django.conf import settings
|
||||
from django.core import signals
|
||||
|
@ -12,56 +12,46 @@ from django.utils.regex_helper import _lazy_re_compile
|
|||
_slashes_re = _lazy_re_compile(rb"/+")
|
||||
|
||||
|
||||
class LimitedStream:
|
||||
"""Wrap another stream to disallow reading it past a number of bytes."""
|
||||
class LimitedStream(IOBase):
|
||||
"""
|
||||
Wrap another stream to disallow reading it past a number of bytes.
|
||||
|
||||
Based on the implementation from werkzeug.wsgi.LimitedStream
|
||||
See https://github.com/pallets/werkzeug/blob/dbf78f67/src/werkzeug/wsgi.py#L828
|
||||
"""
|
||||
|
||||
def __init__(self, stream, limit):
|
||||
self.stream = stream
|
||||
self.remaining = limit
|
||||
self.buffer = b""
|
||||
self._read = stream.read
|
||||
self._readline = stream.readline
|
||||
self._pos = 0
|
||||
self.limit = limit
|
||||
|
||||
def _read_limited(self, size=None):
|
||||
if size is None or size > self.remaining:
|
||||
size = self.remaining
|
||||
if size == 0:
|
||||
def read(self, size=-1, /):
|
||||
_pos = self._pos
|
||||
limit = self.limit
|
||||
if _pos >= limit:
|
||||
return b""
|
||||
result = self.stream.read(size)
|
||||
self.remaining -= len(result)
|
||||
return result
|
||||
|
||||
def read(self, size=None):
|
||||
if size is None:
|
||||
result = self.buffer + self._read_limited()
|
||||
self.buffer = b""
|
||||
elif size < len(self.buffer):
|
||||
result = self.buffer[:size]
|
||||
self.buffer = self.buffer[size:]
|
||||
else: # size >= len(self.buffer)
|
||||
result = self.buffer + self._read_limited(size - len(self.buffer))
|
||||
self.buffer = b""
|
||||
return result
|
||||
|
||||
def readline(self, size=None):
|
||||
while b"\n" not in self.buffer and (size is None or len(self.buffer) < size):
|
||||
if size:
|
||||
# since size is not None here, len(self.buffer) < size
|
||||
chunk = self._read_limited(size - len(self.buffer))
|
||||
if size == -1 or size is None:
|
||||
size = limit - _pos
|
||||
else:
|
||||
chunk = self._read_limited()
|
||||
if not chunk:
|
||||
break
|
||||
self.buffer += chunk
|
||||
sio = BytesIO(self.buffer)
|
||||
if size:
|
||||
line = sio.readline(size)
|
||||
size = min(size, limit - _pos)
|
||||
data = self._read(size)
|
||||
self._pos += len(data)
|
||||
return data
|
||||
|
||||
def readline(self, size=-1, /):
|
||||
_pos = self._pos
|
||||
limit = self.limit
|
||||
if _pos >= limit:
|
||||
return b""
|
||||
if size == -1 or size is None:
|
||||
size = limit - _pos
|
||||
else:
|
||||
line = sio.readline()
|
||||
self.buffer = sio.read()
|
||||
size = min(size, limit - _pos)
|
||||
line = self._readline(size)
|
||||
self._pos += len(line)
|
||||
return line
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
|
||||
class WSGIRequest(HttpRequest):
|
||||
non_picklable_attrs = HttpRequest.non_picklable_attrs | frozenset(["environ"])
|
||||
|
|
|
@ -144,7 +144,7 @@ class ServerHandler(simple_server.ServerHandler):
|
|||
self.request_handler.close_connection = True
|
||||
|
||||
def close(self):
|
||||
self.get_stdin()._read_limited()
|
||||
self.get_stdin().read()
|
||||
super().close()
|
||||
|
||||
|
||||
|
|
|
@ -523,6 +523,10 @@ Miscellaneous
|
|||
|
||||
.. _`redis-py`: https://pypi.org/project/redis/
|
||||
|
||||
* Manually instantiated ``WSGIRequest`` objects must be provided a file-like
|
||||
object for ``wsgi.input``. Previously, Django was more lax than the expected
|
||||
behavior as specified by the WSGI specification.
|
||||
|
||||
.. _deprecated-features-4.2:
|
||||
|
||||
Features deprecated in 4.2
|
||||
|
|
|
@ -81,7 +81,7 @@ class WSGIFileWrapperTests(TestCase):
|
|||
|
||||
def test_file_wrapper_uses_sendfile(self):
|
||||
env = {"SERVER_PROTOCOL": "HTTP/1.0"}
|
||||
handler = FileWrapperHandler(None, BytesIO(), BytesIO(), env)
|
||||
handler = FileWrapperHandler(BytesIO(), BytesIO(), BytesIO(), env)
|
||||
handler.run(wsgi_app_file_wrapper)
|
||||
self.assertTrue(handler._used_sendfile)
|
||||
self.assertEqual(handler.stdout.getvalue(), b"")
|
||||
|
@ -89,7 +89,7 @@ class WSGIFileWrapperTests(TestCase):
|
|||
|
||||
def test_file_wrapper_no_sendfile(self):
|
||||
env = {"SERVER_PROTOCOL": "HTTP/1.0"}
|
||||
handler = FileWrapperHandler(None, BytesIO(), BytesIO(), env)
|
||||
handler = FileWrapperHandler(BytesIO(), BytesIO(), BytesIO(), env)
|
||||
handler.run(wsgi_app)
|
||||
self.assertFalse(handler._used_sendfile)
|
||||
self.assertEqual(handler.stdout.getvalue().splitlines()[-1], b"Hello World!")
|
||||
|
@ -102,7 +102,7 @@ class WSGIFileWrapperTests(TestCase):
|
|||
response when file_wrapper is used.
|
||||
"""
|
||||
env = RequestFactory().get("/fileresponse/").environ
|
||||
handler = FileWrapperHandler(None, BytesIO(), BytesIO(), env)
|
||||
handler = FileWrapperHandler(BytesIO(), BytesIO(), BytesIO(), env)
|
||||
handler.run(get_internal_wsgi_application())
|
||||
# Sendfile is used only when file_wrapper has been used.
|
||||
self.assertTrue(handler._used_sendfile)
|
||||
|
@ -119,7 +119,7 @@ class WSGIFileWrapperTests(TestCase):
|
|||
@override_settings(ROOT_URLCONF="builtin_server.urls")
|
||||
def test_file_response_call_request_finished(self):
|
||||
env = RequestFactory().get("/fileresponse/").environ
|
||||
handler = FileWrapperHandler(None, BytesIO(), BytesIO(), env)
|
||||
handler = FileWrapperHandler(BytesIO(), BytesIO(), BytesIO(), env)
|
||||
with mock.MagicMock() as signal_handler:
|
||||
request_finished.connect(signal_handler)
|
||||
handler.run(get_internal_wsgi_application())
|
||||
|
|
Loading…
Reference in New Issue