From ebec6f429f241ef7537e410f4e5adc03d1548149 Mon Sep 17 00:00:00 2001 From: Nick Pope Date: Fri, 22 Jul 2022 21:08:10 +0100 Subject: [PATCH] Fixed #33865 -- Optimized LimitedStream wrapper. The current implementation of `LimitedStream` is slow because `.read()` performs an extra copy into a buffer and `.readline()` performs two extra copies. We're already typically wrapping a `BytesIO` object so this is unnecessary. This implementation has largely been untouched for 12 years and, inspired by a simpler implementation in `werkzeug`, I was able to achieve the following performance improvement: ``` LimitedStream.read() (single line): Mean +- std dev: [bench_limitedstream-main] 286 ns +- 6 ns -> [bench_limitedstream-patch] 227 ns +- 6 ns: 1.26x faster LimitedStream.readline() (single line): Mean +- std dev: [bench_limitedstream-main] 507 ns +- 11 ns -> [bench_limitedstream-patch] 232 ns +- 8 ns: 2.18x faster LimitedStream.read(8192) (single line): Mean +- std dev: [bench_limitedstream-main] 360 ns +- 8 ns -> [bench_limitedstream-patch] 297 ns +- 6 ns: 1.21x faster LimitedStream.readline(8192) (single line): Mean +- std dev: [bench_limitedstream-main] 602 ns +- 10 ns -> [bench_limitedstream-patch] 305 ns +- 10 ns: 1.98x faster LimitedStream.read() (multiple lines): Mean +- std dev: [bench_limitedstream-main] 290 ns +- 5 ns -> [bench_limitedstream-patch] 236 ns +- 6 ns: 1.23x faster LimitedStream.readline() (multiple lines): Mean +- std dev: [bench_limitedstream-main] 517 ns +- 19 ns -> [bench_limitedstream-patch] 239 ns +- 7 ns: 2.16x faster LimitedStream.read(8192) (multiple lines): Mean +- std dev: [bench_limitedstream-main] 363 ns +- 8 ns -> [bench_limitedstream-patch] 311 ns +- 11 ns: 1.17x faster LimitedStream.readline(8192) (multiple lines): Mean +- std dev: [bench_limitedstream-main] 601 ns +- 12 ns -> [bench_limitedstream-patch] 308 ns +- 7 ns: 1.95x faster Geometric mean: 1.59x faster ``` --- django/core/handlers/wsgi.py | 78 ++++++++++++++------------------- django/core/servers/basehttp.py | 2 +- tests/builtin_server/tests.py | 8 ++-- 3 files changed, 39 insertions(+), 49 deletions(-) diff --git a/django/core/handlers/wsgi.py b/django/core/handlers/wsgi.py index 1c43953717..d1f8fbf078 100644 --- a/django/core/handlers/wsgi.py +++ b/django/core/handlers/wsgi.py @@ -1,4 +1,4 @@ -from io import BytesIO +from io import IOBase from django.conf import settings from django.core import signals @@ -12,55 +12,45 @@ from django.utils.regex_helper import _lazy_re_compile _slashes_re = _lazy_re_compile(rb"/+") -class LimitedStream: - """Wrap another stream to disallow reading it past a number of bytes.""" +class LimitedStream(IOBase): + """ + Wrap another stream to disallow reading it past a number of bytes. + + Based on the implementation from werkzeug.wsgi.LimitedStream + See https://github.com/pallets/werkzeug/blob/dbf78f67/src/werkzeug/wsgi.py#L828 + """ def __init__(self, stream, limit): - self.stream = stream - self.remaining = limit - self.buffer = b"" + self._read = stream.read + self._readline = stream.readline + self._pos = 0 + self.limit = limit - def _read_limited(self, size=None): - if size is None or size > self.remaining: - size = self.remaining - if size == 0: + def read(self, size=-1, /): + _pos = self._pos + limit = self.limit + if _pos >= limit: return b"" - result = self.stream.read(size) - self.remaining -= len(result) - return result - - def read(self, size=None): - if size is None: - result = self.buffer + self._read_limited() - self.buffer = b"" - elif size < len(self.buffer): - result = self.buffer[:size] - self.buffer = self.buffer[size:] - else: # size >= len(self.buffer) - result = self.buffer + self._read_limited(size - len(self.buffer)) - self.buffer = b"" - return result - - def readline(self, size=None): - while b"\n" not in self.buffer and (size is None or len(self.buffer) < size): - if size: - # since size is not None here, len(self.buffer) < size - chunk = self._read_limited(size - len(self.buffer)) - else: - chunk = self._read_limited() - if not chunk: - break - self.buffer += chunk - sio = BytesIO(self.buffer) - if size: - line = sio.readline(size) + if size == -1 or size is None: + size = limit - _pos else: - line = sio.readline() - self.buffer = sio.read() - return line + size = min(size, limit - _pos) + data = self._read(size) + self._pos += len(data) + return data - def close(self): - pass + def readline(self, size=-1, /): + _pos = self._pos + limit = self.limit + if _pos >= limit: + return b"" + if size == -1 or size is None: + size = limit - _pos + else: + size = min(size, limit - _pos) + line = self._readline(size) + self._pos += len(line) + return line class WSGIRequest(HttpRequest): diff --git a/django/core/servers/basehttp.py b/django/core/servers/basehttp.py index 74a42966a6..fef5532e58 100644 --- a/django/core/servers/basehttp.py +++ b/django/core/servers/basehttp.py @@ -144,7 +144,7 @@ class ServerHandler(simple_server.ServerHandler): self.request_handler.close_connection = True def close(self): - self.get_stdin()._read_limited() + self.get_stdin().read() super().close() diff --git a/tests/builtin_server/tests.py b/tests/builtin_server/tests.py index 2777db1e13..f654fdd92c 100644 --- a/tests/builtin_server/tests.py +++ b/tests/builtin_server/tests.py @@ -81,7 +81,7 @@ class WSGIFileWrapperTests(TestCase): def test_file_wrapper_uses_sendfile(self): env = {"SERVER_PROTOCOL": "HTTP/1.0"} - handler = FileWrapperHandler(None, BytesIO(), BytesIO(), env) + handler = FileWrapperHandler(BytesIO(), BytesIO(), BytesIO(), env) handler.run(wsgi_app_file_wrapper) self.assertTrue(handler._used_sendfile) self.assertEqual(handler.stdout.getvalue(), b"") @@ -89,7 +89,7 @@ class WSGIFileWrapperTests(TestCase): def test_file_wrapper_no_sendfile(self): env = {"SERVER_PROTOCOL": "HTTP/1.0"} - handler = FileWrapperHandler(None, BytesIO(), BytesIO(), env) + handler = FileWrapperHandler(BytesIO(), BytesIO(), BytesIO(), env) handler.run(wsgi_app) self.assertFalse(handler._used_sendfile) self.assertEqual(handler.stdout.getvalue().splitlines()[-1], b"Hello World!") @@ -102,7 +102,7 @@ class WSGIFileWrapperTests(TestCase): response when file_wrapper is used. """ env = RequestFactory().get("/fileresponse/").environ - handler = FileWrapperHandler(None, BytesIO(), BytesIO(), env) + handler = FileWrapperHandler(BytesIO(), BytesIO(), BytesIO(), env) handler.run(get_internal_wsgi_application()) # Sendfile is used only when file_wrapper has been used. self.assertTrue(handler._used_sendfile) @@ -119,7 +119,7 @@ class WSGIFileWrapperTests(TestCase): @override_settings(ROOT_URLCONF="builtin_server.urls") def test_file_response_call_request_finished(self): env = RequestFactory().get("/fileresponse/").environ - handler = FileWrapperHandler(None, BytesIO(), BytesIO(), env) + handler = FileWrapperHandler(BytesIO(), BytesIO(), BytesIO(), env) with mock.MagicMock() as signal_handler: request_finished.connect(signal_handler) handler.run(get_internal_wsgi_application())