Fixed #33865 -- Optimized LimitedStream wrapper.
The current implementation of `LimitedStream` is slow because `.read()` performs an extra copy into a buffer and `.readline()` performs two extra copies. We're already typically wrapping a `BytesIO` object so this is unnecessary. This implementation has largely been untouched for 12 years and, inspired by a simpler implementation in `werkzeug`, I was able to achieve the following performance improvement: ``` LimitedStream.read() (single line): Mean +- std dev: [bench_limitedstream-main] 286 ns +- 6 ns -> [bench_limitedstream-patch] 227 ns +- 6 ns: 1.26x faster LimitedStream.readline() (single line): Mean +- std dev: [bench_limitedstream-main] 507 ns +- 11 ns -> [bench_limitedstream-patch] 232 ns +- 8 ns: 2.18x faster LimitedStream.read(8192) (single line): Mean +- std dev: [bench_limitedstream-main] 360 ns +- 8 ns -> [bench_limitedstream-patch] 297 ns +- 6 ns: 1.21x faster LimitedStream.readline(8192) (single line): Mean +- std dev: [bench_limitedstream-main] 602 ns +- 10 ns -> [bench_limitedstream-patch] 305 ns +- 10 ns: 1.98x faster LimitedStream.read() (multiple lines): Mean +- std dev: [bench_limitedstream-main] 290 ns +- 5 ns -> [bench_limitedstream-patch] 236 ns +- 6 ns: 1.23x faster LimitedStream.readline() (multiple lines): Mean +- std dev: [bench_limitedstream-main] 517 ns +- 19 ns -> [bench_limitedstream-patch] 239 ns +- 7 ns: 2.16x faster LimitedStream.read(8192) (multiple lines): Mean +- std dev: [bench_limitedstream-main] 363 ns +- 8 ns -> [bench_limitedstream-patch] 311 ns +- 11 ns: 1.17x faster LimitedStream.readline(8192) (multiple lines): Mean +- std dev: [bench_limitedstream-main] 601 ns +- 12 ns -> [bench_limitedstream-patch] 308 ns +- 7 ns: 1.95x faster Geometric mean: 1.59x faster ```
This commit is contained in:
parent
a375f15b10
commit
ebec6f429f
|
@ -1,4 +1,4 @@
|
||||||
from io import BytesIO
|
from io import IOBase
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.core import signals
|
from django.core import signals
|
||||||
|
@ -12,55 +12,45 @@ from django.utils.regex_helper import _lazy_re_compile
|
||||||
_slashes_re = _lazy_re_compile(rb"/+")
|
_slashes_re = _lazy_re_compile(rb"/+")
|
||||||
|
|
||||||
|
|
||||||
class LimitedStream:
|
class LimitedStream(IOBase):
|
||||||
"""Wrap another stream to disallow reading it past a number of bytes."""
|
"""
|
||||||
|
Wrap another stream to disallow reading it past a number of bytes.
|
||||||
|
|
||||||
|
Based on the implementation from werkzeug.wsgi.LimitedStream
|
||||||
|
See https://github.com/pallets/werkzeug/blob/dbf78f67/src/werkzeug/wsgi.py#L828
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, stream, limit):
|
def __init__(self, stream, limit):
|
||||||
self.stream = stream
|
self._read = stream.read
|
||||||
self.remaining = limit
|
self._readline = stream.readline
|
||||||
self.buffer = b""
|
self._pos = 0
|
||||||
|
self.limit = limit
|
||||||
|
|
||||||
def _read_limited(self, size=None):
|
def read(self, size=-1, /):
|
||||||
if size is None or size > self.remaining:
|
_pos = self._pos
|
||||||
size = self.remaining
|
limit = self.limit
|
||||||
if size == 0:
|
if _pos >= limit:
|
||||||
return b""
|
return b""
|
||||||
result = self.stream.read(size)
|
if size == -1 or size is None:
|
||||||
self.remaining -= len(result)
|
size = limit - _pos
|
||||||
return result
|
|
||||||
|
|
||||||
def read(self, size=None):
|
|
||||||
if size is None:
|
|
||||||
result = self.buffer + self._read_limited()
|
|
||||||
self.buffer = b""
|
|
||||||
elif size < len(self.buffer):
|
|
||||||
result = self.buffer[:size]
|
|
||||||
self.buffer = self.buffer[size:]
|
|
||||||
else: # size >= len(self.buffer)
|
|
||||||
result = self.buffer + self._read_limited(size - len(self.buffer))
|
|
||||||
self.buffer = b""
|
|
||||||
return result
|
|
||||||
|
|
||||||
def readline(self, size=None):
|
|
||||||
while b"\n" not in self.buffer and (size is None or len(self.buffer) < size):
|
|
||||||
if size:
|
|
||||||
# since size is not None here, len(self.buffer) < size
|
|
||||||
chunk = self._read_limited(size - len(self.buffer))
|
|
||||||
else:
|
|
||||||
chunk = self._read_limited()
|
|
||||||
if not chunk:
|
|
||||||
break
|
|
||||||
self.buffer += chunk
|
|
||||||
sio = BytesIO(self.buffer)
|
|
||||||
if size:
|
|
||||||
line = sio.readline(size)
|
|
||||||
else:
|
else:
|
||||||
line = sio.readline()
|
size = min(size, limit - _pos)
|
||||||
self.buffer = sio.read()
|
data = self._read(size)
|
||||||
return line
|
self._pos += len(data)
|
||||||
|
return data
|
||||||
|
|
||||||
def close(self):
|
def readline(self, size=-1, /):
|
||||||
pass
|
_pos = self._pos
|
||||||
|
limit = self.limit
|
||||||
|
if _pos >= limit:
|
||||||
|
return b""
|
||||||
|
if size == -1 or size is None:
|
||||||
|
size = limit - _pos
|
||||||
|
else:
|
||||||
|
size = min(size, limit - _pos)
|
||||||
|
line = self._readline(size)
|
||||||
|
self._pos += len(line)
|
||||||
|
return line
|
||||||
|
|
||||||
|
|
||||||
class WSGIRequest(HttpRequest):
|
class WSGIRequest(HttpRequest):
|
||||||
|
|
|
@ -144,7 +144,7 @@ class ServerHandler(simple_server.ServerHandler):
|
||||||
self.request_handler.close_connection = True
|
self.request_handler.close_connection = True
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
self.get_stdin()._read_limited()
|
self.get_stdin().read()
|
||||||
super().close()
|
super().close()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -81,7 +81,7 @@ class WSGIFileWrapperTests(TestCase):
|
||||||
|
|
||||||
def test_file_wrapper_uses_sendfile(self):
|
def test_file_wrapper_uses_sendfile(self):
|
||||||
env = {"SERVER_PROTOCOL": "HTTP/1.0"}
|
env = {"SERVER_PROTOCOL": "HTTP/1.0"}
|
||||||
handler = FileWrapperHandler(None, BytesIO(), BytesIO(), env)
|
handler = FileWrapperHandler(BytesIO(), BytesIO(), BytesIO(), env)
|
||||||
handler.run(wsgi_app_file_wrapper)
|
handler.run(wsgi_app_file_wrapper)
|
||||||
self.assertTrue(handler._used_sendfile)
|
self.assertTrue(handler._used_sendfile)
|
||||||
self.assertEqual(handler.stdout.getvalue(), b"")
|
self.assertEqual(handler.stdout.getvalue(), b"")
|
||||||
|
@ -89,7 +89,7 @@ class WSGIFileWrapperTests(TestCase):
|
||||||
|
|
||||||
def test_file_wrapper_no_sendfile(self):
|
def test_file_wrapper_no_sendfile(self):
|
||||||
env = {"SERVER_PROTOCOL": "HTTP/1.0"}
|
env = {"SERVER_PROTOCOL": "HTTP/1.0"}
|
||||||
handler = FileWrapperHandler(None, BytesIO(), BytesIO(), env)
|
handler = FileWrapperHandler(BytesIO(), BytesIO(), BytesIO(), env)
|
||||||
handler.run(wsgi_app)
|
handler.run(wsgi_app)
|
||||||
self.assertFalse(handler._used_sendfile)
|
self.assertFalse(handler._used_sendfile)
|
||||||
self.assertEqual(handler.stdout.getvalue().splitlines()[-1], b"Hello World!")
|
self.assertEqual(handler.stdout.getvalue().splitlines()[-1], b"Hello World!")
|
||||||
|
@ -102,7 +102,7 @@ class WSGIFileWrapperTests(TestCase):
|
||||||
response when file_wrapper is used.
|
response when file_wrapper is used.
|
||||||
"""
|
"""
|
||||||
env = RequestFactory().get("/fileresponse/").environ
|
env = RequestFactory().get("/fileresponse/").environ
|
||||||
handler = FileWrapperHandler(None, BytesIO(), BytesIO(), env)
|
handler = FileWrapperHandler(BytesIO(), BytesIO(), BytesIO(), env)
|
||||||
handler.run(get_internal_wsgi_application())
|
handler.run(get_internal_wsgi_application())
|
||||||
# Sendfile is used only when file_wrapper has been used.
|
# Sendfile is used only when file_wrapper has been used.
|
||||||
self.assertTrue(handler._used_sendfile)
|
self.assertTrue(handler._used_sendfile)
|
||||||
|
@ -119,7 +119,7 @@ class WSGIFileWrapperTests(TestCase):
|
||||||
@override_settings(ROOT_URLCONF="builtin_server.urls")
|
@override_settings(ROOT_URLCONF="builtin_server.urls")
|
||||||
def test_file_response_call_request_finished(self):
|
def test_file_response_call_request_finished(self):
|
||||||
env = RequestFactory().get("/fileresponse/").environ
|
env = RequestFactory().get("/fileresponse/").environ
|
||||||
handler = FileWrapperHandler(None, BytesIO(), BytesIO(), env)
|
handler = FileWrapperHandler(BytesIO(), BytesIO(), BytesIO(), env)
|
||||||
with mock.MagicMock() as signal_handler:
|
with mock.MagicMock() as signal_handler:
|
||||||
request_finished.connect(signal_handler)
|
request_finished.connect(signal_handler)
|
||||||
handler.run(get_internal_wsgi_application())
|
handler.run(get_internal_wsgi_application())
|
||||||
|
|
Loading…
Reference in New Issue