mirror of https://github.com/django/django.git
Fixed #29984 -- Added QuerySet.iterator() support for prefetching related objects.
Co-authored-by: Raphael Kimmig <raphael.kimmig@ampad.de> Co-authored-by: Simon Charette <charette.s@gmail.com>
This commit is contained in:
parent
c27932ec93
commit
edbf930287
|
@ -5,7 +5,7 @@ The main QuerySet implementation. This provides the public API for the ORM.
|
|||
import copy
|
||||
import operator
|
||||
import warnings
|
||||
from itertools import chain
|
||||
from itertools import chain, islice
|
||||
|
||||
import django
|
||||
from django.conf import settings
|
||||
|
@ -23,6 +23,7 @@ from django.db.models.query_utils import FilteredRelation, Q
|
|||
from django.db.models.sql.constants import CURSOR, GET_ITERATOR_CHUNK_SIZE
|
||||
from django.db.models.utils import create_namedtuple_class, resolve_callables
|
||||
from django.utils import timezone
|
||||
from django.utils.deprecation import RemovedInDjango50Warning
|
||||
from django.utils.functional import cached_property, partition
|
||||
|
||||
# The maximum number of results to fetch in a get() query.
|
||||
|
@ -356,14 +357,40 @@ class QuerySet:
|
|||
####################################
|
||||
|
||||
def _iterator(self, use_chunked_fetch, chunk_size):
|
||||
yield from self._iterable_class(self, chunked_fetch=use_chunked_fetch, chunk_size=chunk_size)
|
||||
iterable = self._iterable_class(
|
||||
self,
|
||||
chunked_fetch=use_chunked_fetch,
|
||||
chunk_size=chunk_size or 2000,
|
||||
)
|
||||
if not self._prefetch_related_lookups or chunk_size is None:
|
||||
yield from iterable
|
||||
return
|
||||
|
||||
def iterator(self, chunk_size=2000):
|
||||
iterator = iter(iterable)
|
||||
while results := list(islice(iterator, chunk_size)):
|
||||
prefetch_related_objects(results, *self._prefetch_related_lookups)
|
||||
yield from results
|
||||
|
||||
def iterator(self, chunk_size=None):
|
||||
"""
|
||||
An iterator over the results from applying this QuerySet to the
|
||||
database.
|
||||
database. chunk_size must be provided for QuerySets that prefetch
|
||||
related objects. Otherwise, a default chunk_size of 2000 is supplied.
|
||||
"""
|
||||
if chunk_size <= 0:
|
||||
if chunk_size is None:
|
||||
if self._prefetch_related_lookups:
|
||||
# When the deprecation ends, replace with:
|
||||
# raise ValueError(
|
||||
# 'chunk_size must be provided when using '
|
||||
# 'QuerySet.iterator() after prefetch_related().'
|
||||
# )
|
||||
warnings.warn(
|
||||
'Using QuerySet.iterator() after prefetch_related() '
|
||||
'without specifying chunk_size is deprecated.',
|
||||
category=RemovedInDjango50Warning,
|
||||
stacklevel=2,
|
||||
)
|
||||
elif chunk_size <= 0:
|
||||
raise ValueError('Chunk size must be strictly positive.')
|
||||
use_chunked_fetch = not connections[self.db].settings_dict.get('DISABLE_SERVER_SIDE_CURSORS')
|
||||
return self._iterator(use_chunked_fetch, chunk_size)
|
||||
|
|
|
@ -81,6 +81,10 @@ details on these changes.
|
|||
|
||||
* ``django.contrib.sessions.serializers.PickleSerializer`` will be removed.
|
||||
|
||||
* The usage of ``QuerySet.iterator()`` on a queryset that prefetches related
|
||||
objects without providing the ``chunk_size`` argument will no longer be
|
||||
allowed.
|
||||
|
||||
.. _deprecation-removed-in-4.1:
|
||||
|
||||
4.1
|
||||
|
|
|
@ -1215,8 +1215,10 @@ could be generated, which, depending on the database, might have performance
|
|||
problems of its own when it comes to parsing or executing the SQL query. Always
|
||||
profile for your use case!
|
||||
|
||||
Note that if you use ``iterator()`` to run the query, ``prefetch_related()``
|
||||
calls will be ignored since these two optimizations do not make sense together.
|
||||
.. versionchanged:: 4.1
|
||||
|
||||
If you use ``iterator()`` to run the query, ``prefetch_related()``
|
||||
calls will only be observed if a value for ``chunk_size`` is provided.
|
||||
|
||||
You can use the :class:`~django.db.models.Prefetch` object to further control
|
||||
the prefetch operation.
|
||||
|
@ -2341,7 +2343,7 @@ If you pass ``in_bulk()`` an empty list, you'll get an empty dictionary.
|
|||
``iterator()``
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
.. method:: iterator(chunk_size=2000)
|
||||
.. method:: iterator(chunk_size=None)
|
||||
|
||||
Evaluates the ``QuerySet`` (by performing the query) and returns an iterator
|
||||
(see :pep:`234`) over the results. A ``QuerySet`` typically caches its results
|
||||
|
@ -2355,12 +2357,34 @@ performance and a significant reduction in memory.
|
|||
Note that using ``iterator()`` on a ``QuerySet`` which has already been
|
||||
evaluated will force it to evaluate again, repeating the query.
|
||||
|
||||
Also, use of ``iterator()`` causes previous ``prefetch_related()`` calls to be
|
||||
ignored since these two optimizations do not make sense together.
|
||||
``iterator()`` is compatible with previous calls to ``prefetch_related()`` as
|
||||
long as ``chunk_size`` is given. Larger values will necessitate fewer queries
|
||||
to accomplish the prefetching at the cost of greater memory usage.
|
||||
|
||||
On some databases (e.g. Oracle, `SQLite
|
||||
<https://www.sqlite.org/limits.html#max_variable_number>`_), the maximum number
|
||||
of terms in an SQL ``IN`` clause might be limited. Hence values below this
|
||||
limit should be used. (In particular, when prefetching across two or more
|
||||
relations, a ``chunk_size`` should be small enough that the anticipated number
|
||||
of results for each prefetched relation still falls below the limit.)
|
||||
|
||||
So long as the QuerySet does not prefetch any related objects, providing no
|
||||
value for ``chunk_size`` will result in Django using an implicit default of
|
||||
2000.
|
||||
|
||||
Depending on the database backend, query results will either be loaded all at
|
||||
once or streamed from the database using server-side cursors.
|
||||
|
||||
.. versionchanged:: 4.1
|
||||
|
||||
Support for prefetching related objects was added.
|
||||
|
||||
.. deprecated:: 4.1
|
||||
|
||||
Using ``iterator()`` on a queryset that prefetches related objects without
|
||||
providing the ``chunk_size`` is deprecated. In Django 5.0, an exception
|
||||
will be raise.
|
||||
|
||||
With server-side cursors
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
|
@ -2399,8 +2423,10 @@ The ``chunk_size`` parameter controls the size of batches Django retrieves from
|
|||
the database driver. Larger batches decrease the overhead of communicating with
|
||||
the database driver at the expense of a slight increase in memory consumption.
|
||||
|
||||
The default value of ``chunk_size``, 2000, comes from `a calculation on the
|
||||
psycopg mailing list <https://www.postgresql.org/message-id/4D2F2C71.8080805%40dndg.it>`_:
|
||||
So long as the QuerySet does not prefetch any related objects, providing no
|
||||
value for ``chunk_size`` will result in Django using an implicit default of
|
||||
2000, a value derived from `a calculation on the psycopg mailing list
|
||||
<https://www.postgresql.org/message-id/4D2F2C71.8080805%40dndg.it>`_:
|
||||
|
||||
Assuming rows of 10-20 columns with a mix of textual and numeric data, 2000
|
||||
is going to fetch less than 100KB of data, which seems a good compromise
|
||||
|
|
|
@ -239,6 +239,10 @@ Models
|
|||
insertion fails uniqueness constraints. This is supported on MariaDB, MySQL,
|
||||
PostgreSQL, and SQLite 3.24+.
|
||||
|
||||
* :meth:`.QuerySet.iterator` now supports prefetching related objects as long
|
||||
as the ``chunk_size`` argument is provided. In older versions, no prefetching
|
||||
was done.
|
||||
|
||||
Requests and Responses
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|
@ -430,6 +434,11 @@ Miscellaneous
|
|||
* ``django.contrib.sessions.serializers.PickleSerializer`` is deprecated due to
|
||||
the risk of remote code execution.
|
||||
|
||||
* The usage of ``QuerySet.iterator()`` on a queryset that prefetches related
|
||||
objects without providing the ``chunk_size`` argument is deprecated. In older
|
||||
versions, no prefetching was done. Providing a value for ``chunk_size``
|
||||
signifies that the additional query per chunk needed to prefetch is desired.
|
||||
|
||||
Features removed in 4.1
|
||||
=======================
|
||||
|
||||
|
|
|
@ -7,7 +7,8 @@ from django.db.models import Prefetch, QuerySet, prefetch_related_objects
|
|||
from django.db.models.query import get_prefetcher
|
||||
from django.db.models.sql import Query
|
||||
from django.test import TestCase, override_settings
|
||||
from django.test.utils import CaptureQueriesContext
|
||||
from django.test.utils import CaptureQueriesContext, ignore_warnings
|
||||
from django.utils.deprecation import RemovedInDjango50Warning
|
||||
|
||||
from .models import (
|
||||
Article, Author, Author2, AuthorAddress, AuthorWithAge, Bio, Book,
|
||||
|
@ -316,6 +317,38 @@ class PrefetchRelatedTests(TestDataMixin, TestCase):
|
|||
['Anne', 'Charlotte', 'Emily', 'Jane'],
|
||||
)
|
||||
|
||||
def test_m2m_prefetching_iterator_with_chunks(self):
|
||||
with self.assertNumQueries(3):
|
||||
authors = [
|
||||
b.authors.first()
|
||||
for b in Book.objects.prefetch_related('authors').iterator(chunk_size=2)
|
||||
]
|
||||
self.assertEqual(
|
||||
authors,
|
||||
[self.author1, self.author1, self.author3, self.author4],
|
||||
)
|
||||
|
||||
@ignore_warnings(category=RemovedInDjango50Warning)
|
||||
def test_m2m_prefetching_iterator_without_chunks(self):
|
||||
# prefetch_related() is ignored.
|
||||
with self.assertNumQueries(5):
|
||||
authors = [
|
||||
b.authors.first()
|
||||
for b in Book.objects.prefetch_related('authors').iterator()
|
||||
]
|
||||
self.assertEqual(
|
||||
authors,
|
||||
[self.author1, self.author1, self.author3, self.author4],
|
||||
)
|
||||
|
||||
def test_m2m_prefetching_iterator_without_chunks_warning(self):
|
||||
msg = (
|
||||
'Using QuerySet.iterator() after prefetch_related() without '
|
||||
'specifying chunk_size is deprecated.'
|
||||
)
|
||||
with self.assertWarnsMessage(RemovedInDjango50Warning, msg):
|
||||
Book.objects.prefetch_related('authors').iterator()
|
||||
|
||||
|
||||
class RawQuerySetTests(TestDataMixin, TestCase):
|
||||
def test_basic(self):
|
||||
|
|
Loading…
Reference in New Issue