Fixed #29984 -- Added QuerySet.iterator() support for prefetching related objects.
Co-authored-by: Raphael Kimmig <raphael.kimmig@ampad.de> Co-authored-by: Simon Charette <charette.s@gmail.com>
This commit is contained in:
parent
c27932ec93
commit
edbf930287
|
@ -5,7 +5,7 @@ The main QuerySet implementation. This provides the public API for the ORM.
|
||||||
import copy
|
import copy
|
||||||
import operator
|
import operator
|
||||||
import warnings
|
import warnings
|
||||||
from itertools import chain
|
from itertools import chain, islice
|
||||||
|
|
||||||
import django
|
import django
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
@ -23,6 +23,7 @@ from django.db.models.query_utils import FilteredRelation, Q
|
||||||
from django.db.models.sql.constants import CURSOR, GET_ITERATOR_CHUNK_SIZE
|
from django.db.models.sql.constants import CURSOR, GET_ITERATOR_CHUNK_SIZE
|
||||||
from django.db.models.utils import create_namedtuple_class, resolve_callables
|
from django.db.models.utils import create_namedtuple_class, resolve_callables
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
from django.utils.deprecation import RemovedInDjango50Warning
|
||||||
from django.utils.functional import cached_property, partition
|
from django.utils.functional import cached_property, partition
|
||||||
|
|
||||||
# The maximum number of results to fetch in a get() query.
|
# The maximum number of results to fetch in a get() query.
|
||||||
|
@ -356,14 +357,40 @@ class QuerySet:
|
||||||
####################################
|
####################################
|
||||||
|
|
||||||
def _iterator(self, use_chunked_fetch, chunk_size):
|
def _iterator(self, use_chunked_fetch, chunk_size):
|
||||||
yield from self._iterable_class(self, chunked_fetch=use_chunked_fetch, chunk_size=chunk_size)
|
iterable = self._iterable_class(
|
||||||
|
self,
|
||||||
|
chunked_fetch=use_chunked_fetch,
|
||||||
|
chunk_size=chunk_size or 2000,
|
||||||
|
)
|
||||||
|
if not self._prefetch_related_lookups or chunk_size is None:
|
||||||
|
yield from iterable
|
||||||
|
return
|
||||||
|
|
||||||
def iterator(self, chunk_size=2000):
|
iterator = iter(iterable)
|
||||||
|
while results := list(islice(iterator, chunk_size)):
|
||||||
|
prefetch_related_objects(results, *self._prefetch_related_lookups)
|
||||||
|
yield from results
|
||||||
|
|
||||||
|
def iterator(self, chunk_size=None):
|
||||||
"""
|
"""
|
||||||
An iterator over the results from applying this QuerySet to the
|
An iterator over the results from applying this QuerySet to the
|
||||||
database.
|
database. chunk_size must be provided for QuerySets that prefetch
|
||||||
|
related objects. Otherwise, a default chunk_size of 2000 is supplied.
|
||||||
"""
|
"""
|
||||||
if chunk_size <= 0:
|
if chunk_size is None:
|
||||||
|
if self._prefetch_related_lookups:
|
||||||
|
# When the deprecation ends, replace with:
|
||||||
|
# raise ValueError(
|
||||||
|
# 'chunk_size must be provided when using '
|
||||||
|
# 'QuerySet.iterator() after prefetch_related().'
|
||||||
|
# )
|
||||||
|
warnings.warn(
|
||||||
|
'Using QuerySet.iterator() after prefetch_related() '
|
||||||
|
'without specifying chunk_size is deprecated.',
|
||||||
|
category=RemovedInDjango50Warning,
|
||||||
|
stacklevel=2,
|
||||||
|
)
|
||||||
|
elif chunk_size <= 0:
|
||||||
raise ValueError('Chunk size must be strictly positive.')
|
raise ValueError('Chunk size must be strictly positive.')
|
||||||
use_chunked_fetch = not connections[self.db].settings_dict.get('DISABLE_SERVER_SIDE_CURSORS')
|
use_chunked_fetch = not connections[self.db].settings_dict.get('DISABLE_SERVER_SIDE_CURSORS')
|
||||||
return self._iterator(use_chunked_fetch, chunk_size)
|
return self._iterator(use_chunked_fetch, chunk_size)
|
||||||
|
|
|
@ -81,6 +81,10 @@ details on these changes.
|
||||||
|
|
||||||
* ``django.contrib.sessions.serializers.PickleSerializer`` will be removed.
|
* ``django.contrib.sessions.serializers.PickleSerializer`` will be removed.
|
||||||
|
|
||||||
|
* The usage of ``QuerySet.iterator()`` on a queryset that prefetches related
|
||||||
|
objects without providing the ``chunk_size`` argument will no longer be
|
||||||
|
allowed.
|
||||||
|
|
||||||
.. _deprecation-removed-in-4.1:
|
.. _deprecation-removed-in-4.1:
|
||||||
|
|
||||||
4.1
|
4.1
|
||||||
|
|
|
@ -1215,8 +1215,10 @@ could be generated, which, depending on the database, might have performance
|
||||||
problems of its own when it comes to parsing or executing the SQL query. Always
|
problems of its own when it comes to parsing or executing the SQL query. Always
|
||||||
profile for your use case!
|
profile for your use case!
|
||||||
|
|
||||||
Note that if you use ``iterator()`` to run the query, ``prefetch_related()``
|
.. versionchanged:: 4.1
|
||||||
calls will be ignored since these two optimizations do not make sense together.
|
|
||||||
|
If you use ``iterator()`` to run the query, ``prefetch_related()``
|
||||||
|
calls will only be observed if a value for ``chunk_size`` is provided.
|
||||||
|
|
||||||
You can use the :class:`~django.db.models.Prefetch` object to further control
|
You can use the :class:`~django.db.models.Prefetch` object to further control
|
||||||
the prefetch operation.
|
the prefetch operation.
|
||||||
|
@ -2341,7 +2343,7 @@ If you pass ``in_bulk()`` an empty list, you'll get an empty dictionary.
|
||||||
``iterator()``
|
``iterator()``
|
||||||
~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~
|
||||||
|
|
||||||
.. method:: iterator(chunk_size=2000)
|
.. method:: iterator(chunk_size=None)
|
||||||
|
|
||||||
Evaluates the ``QuerySet`` (by performing the query) and returns an iterator
|
Evaluates the ``QuerySet`` (by performing the query) and returns an iterator
|
||||||
(see :pep:`234`) over the results. A ``QuerySet`` typically caches its results
|
(see :pep:`234`) over the results. A ``QuerySet`` typically caches its results
|
||||||
|
@ -2355,12 +2357,34 @@ performance and a significant reduction in memory.
|
||||||
Note that using ``iterator()`` on a ``QuerySet`` which has already been
|
Note that using ``iterator()`` on a ``QuerySet`` which has already been
|
||||||
evaluated will force it to evaluate again, repeating the query.
|
evaluated will force it to evaluate again, repeating the query.
|
||||||
|
|
||||||
Also, use of ``iterator()`` causes previous ``prefetch_related()`` calls to be
|
``iterator()`` is compatible with previous calls to ``prefetch_related()`` as
|
||||||
ignored since these two optimizations do not make sense together.
|
long as ``chunk_size`` is given. Larger values will necessitate fewer queries
|
||||||
|
to accomplish the prefetching at the cost of greater memory usage.
|
||||||
|
|
||||||
|
On some databases (e.g. Oracle, `SQLite
|
||||||
|
<https://www.sqlite.org/limits.html#max_variable_number>`_), the maximum number
|
||||||
|
of terms in an SQL ``IN`` clause might be limited. Hence values below this
|
||||||
|
limit should be used. (In particular, when prefetching across two or more
|
||||||
|
relations, a ``chunk_size`` should be small enough that the anticipated number
|
||||||
|
of results for each prefetched relation still falls below the limit.)
|
||||||
|
|
||||||
|
So long as the QuerySet does not prefetch any related objects, providing no
|
||||||
|
value for ``chunk_size`` will result in Django using an implicit default of
|
||||||
|
2000.
|
||||||
|
|
||||||
Depending on the database backend, query results will either be loaded all at
|
Depending on the database backend, query results will either be loaded all at
|
||||||
once or streamed from the database using server-side cursors.
|
once or streamed from the database using server-side cursors.
|
||||||
|
|
||||||
|
.. versionchanged:: 4.1
|
||||||
|
|
||||||
|
Support for prefetching related objects was added.
|
||||||
|
|
||||||
|
.. deprecated:: 4.1
|
||||||
|
|
||||||
|
Using ``iterator()`` on a queryset that prefetches related objects without
|
||||||
|
providing the ``chunk_size`` is deprecated. In Django 5.0, an exception
|
||||||
|
will be raise.
|
||||||
|
|
||||||
With server-side cursors
|
With server-side cursors
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
@ -2399,8 +2423,10 @@ The ``chunk_size`` parameter controls the size of batches Django retrieves from
|
||||||
the database driver. Larger batches decrease the overhead of communicating with
|
the database driver. Larger batches decrease the overhead of communicating with
|
||||||
the database driver at the expense of a slight increase in memory consumption.
|
the database driver at the expense of a slight increase in memory consumption.
|
||||||
|
|
||||||
The default value of ``chunk_size``, 2000, comes from `a calculation on the
|
So long as the QuerySet does not prefetch any related objects, providing no
|
||||||
psycopg mailing list <https://www.postgresql.org/message-id/4D2F2C71.8080805%40dndg.it>`_:
|
value for ``chunk_size`` will result in Django using an implicit default of
|
||||||
|
2000, a value derived from `a calculation on the psycopg mailing list
|
||||||
|
<https://www.postgresql.org/message-id/4D2F2C71.8080805%40dndg.it>`_:
|
||||||
|
|
||||||
Assuming rows of 10-20 columns with a mix of textual and numeric data, 2000
|
Assuming rows of 10-20 columns with a mix of textual and numeric data, 2000
|
||||||
is going to fetch less than 100KB of data, which seems a good compromise
|
is going to fetch less than 100KB of data, which seems a good compromise
|
||||||
|
|
|
@ -239,6 +239,10 @@ Models
|
||||||
insertion fails uniqueness constraints. This is supported on MariaDB, MySQL,
|
insertion fails uniqueness constraints. This is supported on MariaDB, MySQL,
|
||||||
PostgreSQL, and SQLite 3.24+.
|
PostgreSQL, and SQLite 3.24+.
|
||||||
|
|
||||||
|
* :meth:`.QuerySet.iterator` now supports prefetching related objects as long
|
||||||
|
as the ``chunk_size`` argument is provided. In older versions, no prefetching
|
||||||
|
was done.
|
||||||
|
|
||||||
Requests and Responses
|
Requests and Responses
|
||||||
~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
@ -430,6 +434,11 @@ Miscellaneous
|
||||||
* ``django.contrib.sessions.serializers.PickleSerializer`` is deprecated due to
|
* ``django.contrib.sessions.serializers.PickleSerializer`` is deprecated due to
|
||||||
the risk of remote code execution.
|
the risk of remote code execution.
|
||||||
|
|
||||||
|
* The usage of ``QuerySet.iterator()`` on a queryset that prefetches related
|
||||||
|
objects without providing the ``chunk_size`` argument is deprecated. In older
|
||||||
|
versions, no prefetching was done. Providing a value for ``chunk_size``
|
||||||
|
signifies that the additional query per chunk needed to prefetch is desired.
|
||||||
|
|
||||||
Features removed in 4.1
|
Features removed in 4.1
|
||||||
=======================
|
=======================
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,8 @@ from django.db.models import Prefetch, QuerySet, prefetch_related_objects
|
||||||
from django.db.models.query import get_prefetcher
|
from django.db.models.query import get_prefetcher
|
||||||
from django.db.models.sql import Query
|
from django.db.models.sql import Query
|
||||||
from django.test import TestCase, override_settings
|
from django.test import TestCase, override_settings
|
||||||
from django.test.utils import CaptureQueriesContext
|
from django.test.utils import CaptureQueriesContext, ignore_warnings
|
||||||
|
from django.utils.deprecation import RemovedInDjango50Warning
|
||||||
|
|
||||||
from .models import (
|
from .models import (
|
||||||
Article, Author, Author2, AuthorAddress, AuthorWithAge, Bio, Book,
|
Article, Author, Author2, AuthorAddress, AuthorWithAge, Bio, Book,
|
||||||
|
@ -316,6 +317,38 @@ class PrefetchRelatedTests(TestDataMixin, TestCase):
|
||||||
['Anne', 'Charlotte', 'Emily', 'Jane'],
|
['Anne', 'Charlotte', 'Emily', 'Jane'],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_m2m_prefetching_iterator_with_chunks(self):
|
||||||
|
with self.assertNumQueries(3):
|
||||||
|
authors = [
|
||||||
|
b.authors.first()
|
||||||
|
for b in Book.objects.prefetch_related('authors').iterator(chunk_size=2)
|
||||||
|
]
|
||||||
|
self.assertEqual(
|
||||||
|
authors,
|
||||||
|
[self.author1, self.author1, self.author3, self.author4],
|
||||||
|
)
|
||||||
|
|
||||||
|
@ignore_warnings(category=RemovedInDjango50Warning)
|
||||||
|
def test_m2m_prefetching_iterator_without_chunks(self):
|
||||||
|
# prefetch_related() is ignored.
|
||||||
|
with self.assertNumQueries(5):
|
||||||
|
authors = [
|
||||||
|
b.authors.first()
|
||||||
|
for b in Book.objects.prefetch_related('authors').iterator()
|
||||||
|
]
|
||||||
|
self.assertEqual(
|
||||||
|
authors,
|
||||||
|
[self.author1, self.author1, self.author3, self.author4],
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_m2m_prefetching_iterator_without_chunks_warning(self):
|
||||||
|
msg = (
|
||||||
|
'Using QuerySet.iterator() after prefetch_related() without '
|
||||||
|
'specifying chunk_size is deprecated.'
|
||||||
|
)
|
||||||
|
with self.assertWarnsMessage(RemovedInDjango50Warning, msg):
|
||||||
|
Book.objects.prefetch_related('authors').iterator()
|
||||||
|
|
||||||
|
|
||||||
class RawQuerySetTests(TestDataMixin, TestCase):
|
class RawQuerySetTests(TestDataMixin, TestCase):
|
||||||
def test_basic(self):
|
def test_basic(self):
|
||||||
|
|
Loading…
Reference in New Issue