Fixed #33865 -- Optimized LimitedStream wrapper.

The current implementation of LimitedStream is slow because .read()
performs an extra copy into a buffer and .readline() performs two
extra copies. The stream being wrapped is already typically a BytesIO
object so this is unnecessary.

This implementation has largely been untouched for 12 years and,
inspired by a simpler implementation in werkzeug, it was possible to
achieve the following performance improvement:

LimitedStream.read() (single line):
  Mean +- std dev: [bench_limitedstream-main] 286 ns +- 6 ns
  -> [bench_limitedstream-patch] 227 ns +- 6 ns: 1.26x faster
LimitedStream.readline() (single line):
  Mean +- std dev: [bench_limitedstream-main] 507 ns +- 11 ns
  -> [bench_limitedstream-patch] 232 ns +- 8 ns: 2.18x faster
LimitedStream.read(8192) (single line):
  Mean +- std dev: [bench_limitedstream-main] 360 ns +- 8 ns
  -> [bench_limitedstream-patch] 297 ns +- 6 ns: 1.21x faster
LimitedStream.readline(8192) (single line):
  Mean +- std dev: [bench_limitedstream-main] 602 ns +- 10 ns
  -> [bench_limitedstream-patch] 305 ns +- 10 ns: 1.98x faster
LimitedStream.read() (multiple lines):
  Mean +- std dev: [bench_limitedstream-main] 290 ns +- 5 ns
  -> [bench_limitedstream-patch] 236 ns +- 6 ns: 1.23x faster
LimitedStream.readline() (multiple lines):
  Mean +- std dev: [bench_limitedstream-main] 517 ns +- 19 ns
  -> [bench_limitedstream-patch] 239 ns +- 7 ns: 2.16x faster
LimitedStream.read(8192) (multiple lines):
  Mean +- std dev: [bench_limitedstream-main] 363 ns +- 8 ns
  -> [bench_limitedstream-patch] 311 ns +- 11 ns: 1.17x faster
LimitedStream.readline(8192) (multiple lines):
  Mean +- std dev: [bench_limitedstream-main] 601 ns +- 12 ns
  -> [bench_limitedstream-patch] 308 ns +- 7 ns: 1.95x faster

Geometric mean: 1.59x faster
This commit is contained in:
Nick Pope 2022-07-22 21:08:10 +01:00 committed by Mariusz Felisiak
parent 57f5669d23
commit b47f2f5b90
4 changed files with 43 additions and 49 deletions

View File

@ -1,4 +1,4 @@
from io import BytesIO
from io import IOBase
from django.conf import settings
from django.core import signals
@ -12,56 +12,46 @@ from django.utils.regex_helper import _lazy_re_compile
_slashes_re = _lazy_re_compile(rb"/+")
class LimitedStream:
"""Wrap another stream to disallow reading it past a number of bytes."""
class LimitedStream(IOBase):
"""
Wrap another stream to disallow reading it past a number of bytes.
Based on the implementation from werkzeug.wsgi.LimitedStream
See https://github.com/pallets/werkzeug/blob/dbf78f67/src/werkzeug/wsgi.py#L828
"""
def __init__(self, stream, limit):
self.stream = stream
self.remaining = limit
self.buffer = b""
self._read = stream.read
self._readline = stream.readline
self._pos = 0
self.limit = limit
def _read_limited(self, size=None):
if size is None or size > self.remaining:
size = self.remaining
if size == 0:
def read(self, size=-1, /):
_pos = self._pos
limit = self.limit
if _pos >= limit:
return b""
result = self.stream.read(size)
self.remaining -= len(result)
return result
def read(self, size=None):
if size is None:
result = self.buffer + self._read_limited()
self.buffer = b""
elif size < len(self.buffer):
result = self.buffer[:size]
self.buffer = self.buffer[size:]
else: # size >= len(self.buffer)
result = self.buffer + self._read_limited(size - len(self.buffer))
self.buffer = b""
return result
def readline(self, size=None):
while b"\n" not in self.buffer and (size is None or len(self.buffer) < size):
if size:
# since size is not None here, len(self.buffer) < size
chunk = self._read_limited(size - len(self.buffer))
if size == -1 or size is None:
size = limit - _pos
else:
chunk = self._read_limited()
if not chunk:
break
self.buffer += chunk
sio = BytesIO(self.buffer)
if size:
line = sio.readline(size)
size = min(size, limit - _pos)
data = self._read(size)
self._pos += len(data)
return data
def readline(self, size=-1, /):
_pos = self._pos
limit = self.limit
if _pos >= limit:
return b""
if size == -1 or size is None:
size = limit - _pos
else:
line = sio.readline()
self.buffer = sio.read()
size = min(size, limit - _pos)
line = self._readline(size)
self._pos += len(line)
return line
def close(self):
pass
class WSGIRequest(HttpRequest):
non_picklable_attrs = HttpRequest.non_picklable_attrs | frozenset(["environ"])

View File

@ -144,7 +144,7 @@ class ServerHandler(simple_server.ServerHandler):
self.request_handler.close_connection = True
def close(self):
self.get_stdin()._read_limited()
self.get_stdin().read()
super().close()

View File

@ -523,6 +523,10 @@ Miscellaneous
.. _`redis-py`: https://pypi.org/project/redis/
* Manually instantiated ``WSGIRequest`` objects must be provided a file-like
object for ``wsgi.input``. Previously, Django was more lax than the expected
behavior as specified by the WSGI specification.
.. _deprecated-features-4.2:
Features deprecated in 4.2

View File

@ -81,7 +81,7 @@ class WSGIFileWrapperTests(TestCase):
def test_file_wrapper_uses_sendfile(self):
env = {"SERVER_PROTOCOL": "HTTP/1.0"}
handler = FileWrapperHandler(None, BytesIO(), BytesIO(), env)
handler = FileWrapperHandler(BytesIO(), BytesIO(), BytesIO(), env)
handler.run(wsgi_app_file_wrapper)
self.assertTrue(handler._used_sendfile)
self.assertEqual(handler.stdout.getvalue(), b"")
@ -89,7 +89,7 @@ class WSGIFileWrapperTests(TestCase):
def test_file_wrapper_no_sendfile(self):
env = {"SERVER_PROTOCOL": "HTTP/1.0"}
handler = FileWrapperHandler(None, BytesIO(), BytesIO(), env)
handler = FileWrapperHandler(BytesIO(), BytesIO(), BytesIO(), env)
handler.run(wsgi_app)
self.assertFalse(handler._used_sendfile)
self.assertEqual(handler.stdout.getvalue().splitlines()[-1], b"Hello World!")
@ -102,7 +102,7 @@ class WSGIFileWrapperTests(TestCase):
response when file_wrapper is used.
"""
env = RequestFactory().get("/fileresponse/").environ
handler = FileWrapperHandler(None, BytesIO(), BytesIO(), env)
handler = FileWrapperHandler(BytesIO(), BytesIO(), BytesIO(), env)
handler.run(get_internal_wsgi_application())
# Sendfile is used only when file_wrapper has been used.
self.assertTrue(handler._used_sendfile)
@ -119,7 +119,7 @@ class WSGIFileWrapperTests(TestCase):
@override_settings(ROOT_URLCONF="builtin_server.urls")
def test_file_response_call_request_finished(self):
env = RequestFactory().get("/fileresponse/").environ
handler = FileWrapperHandler(None, BytesIO(), BytesIO(), env)
handler = FileWrapperHandler(BytesIO(), BytesIO(), BytesIO(), env)
with mock.MagicMock() as signal_handler:
request_finished.connect(signal_handler)
handler.run(get_internal_wsgi_application())