diff --git a/django/db/models/query.py b/django/db/models/query.py index 1874416928..f8d848e926 100644 --- a/django/db/models/query.py +++ b/django/db/models/query.py @@ -5,7 +5,7 @@ The main QuerySet implementation. This provides the public API for the ORM. import copy import operator import warnings -from itertools import chain +from itertools import chain, islice import django from django.conf import settings @@ -23,6 +23,7 @@ from django.db.models.query_utils import FilteredRelation, Q from django.db.models.sql.constants import CURSOR, GET_ITERATOR_CHUNK_SIZE from django.db.models.utils import create_namedtuple_class, resolve_callables from django.utils import timezone +from django.utils.deprecation import RemovedInDjango50Warning from django.utils.functional import cached_property, partition # The maximum number of results to fetch in a get() query. @@ -356,14 +357,40 @@ class QuerySet: #################################### def _iterator(self, use_chunked_fetch, chunk_size): - yield from self._iterable_class(self, chunked_fetch=use_chunked_fetch, chunk_size=chunk_size) + iterable = self._iterable_class( + self, + chunked_fetch=use_chunked_fetch, + chunk_size=chunk_size or 2000, + ) + if not self._prefetch_related_lookups or chunk_size is None: + yield from iterable + return - def iterator(self, chunk_size=2000): + iterator = iter(iterable) + while results := list(islice(iterator, chunk_size)): + prefetch_related_objects(results, *self._prefetch_related_lookups) + yield from results + + def iterator(self, chunk_size=None): """ An iterator over the results from applying this QuerySet to the - database. + database. chunk_size must be provided for QuerySets that prefetch + related objects. Otherwise, a default chunk_size of 2000 is supplied. """ - if chunk_size <= 0: + if chunk_size is None: + if self._prefetch_related_lookups: + # When the deprecation ends, replace with: + # raise ValueError( + # 'chunk_size must be provided when using ' + # 'QuerySet.iterator() after prefetch_related().' + # ) + warnings.warn( + 'Using QuerySet.iterator() after prefetch_related() ' + 'without specifying chunk_size is deprecated.', + category=RemovedInDjango50Warning, + stacklevel=2, + ) + elif chunk_size <= 0: raise ValueError('Chunk size must be strictly positive.') use_chunked_fetch = not connections[self.db].settings_dict.get('DISABLE_SERVER_SIDE_CURSORS') return self._iterator(use_chunked_fetch, chunk_size) diff --git a/docs/internals/deprecation.txt b/docs/internals/deprecation.txt index 23fa942ef9..4818846bea 100644 --- a/docs/internals/deprecation.txt +++ b/docs/internals/deprecation.txt @@ -81,6 +81,10 @@ details on these changes. * ``django.contrib.sessions.serializers.PickleSerializer`` will be removed. +* The usage of ``QuerySet.iterator()`` on a queryset that prefetches related + objects without providing the ``chunk_size`` argument will no longer be + allowed. + .. _deprecation-removed-in-4.1: 4.1 diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt index 60360ee163..25252b9cec 100644 --- a/docs/ref/models/querysets.txt +++ b/docs/ref/models/querysets.txt @@ -1215,8 +1215,10 @@ could be generated, which, depending on the database, might have performance problems of its own when it comes to parsing or executing the SQL query. Always profile for your use case! -Note that if you use ``iterator()`` to run the query, ``prefetch_related()`` -calls will be ignored since these two optimizations do not make sense together. +.. versionchanged:: 4.1 + + If you use ``iterator()`` to run the query, ``prefetch_related()`` + calls will only be observed if a value for ``chunk_size`` is provided. You can use the :class:`~django.db.models.Prefetch` object to further control the prefetch operation. @@ -2341,7 +2343,7 @@ If you pass ``in_bulk()`` an empty list, you'll get an empty dictionary. ``iterator()`` ~~~~~~~~~~~~~~ -.. method:: iterator(chunk_size=2000) +.. method:: iterator(chunk_size=None) Evaluates the ``QuerySet`` (by performing the query) and returns an iterator (see :pep:`234`) over the results. A ``QuerySet`` typically caches its results @@ -2355,12 +2357,34 @@ performance and a significant reduction in memory. Note that using ``iterator()`` on a ``QuerySet`` which has already been evaluated will force it to evaluate again, repeating the query. -Also, use of ``iterator()`` causes previous ``prefetch_related()`` calls to be -ignored since these two optimizations do not make sense together. +``iterator()`` is compatible with previous calls to ``prefetch_related()`` as +long as ``chunk_size`` is given. Larger values will necessitate fewer queries +to accomplish the prefetching at the cost of greater memory usage. + +On some databases (e.g. Oracle, `SQLite +`_), the maximum number +of terms in an SQL ``IN`` clause might be limited. Hence values below this +limit should be used. (In particular, when prefetching across two or more +relations, a ``chunk_size`` should be small enough that the anticipated number +of results for each prefetched relation still falls below the limit.) + +So long as the QuerySet does not prefetch any related objects, providing no +value for ``chunk_size`` will result in Django using an implicit default of +2000. Depending on the database backend, query results will either be loaded all at once or streamed from the database using server-side cursors. +.. versionchanged:: 4.1 + + Support for prefetching related objects was added. + +.. deprecated:: 4.1 + + Using ``iterator()`` on a queryset that prefetches related objects without + providing the ``chunk_size`` is deprecated. In Django 5.0, an exception + will be raise. + With server-side cursors ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2399,8 +2423,10 @@ The ``chunk_size`` parameter controls the size of batches Django retrieves from the database driver. Larger batches decrease the overhead of communicating with the database driver at the expense of a slight increase in memory consumption. -The default value of ``chunk_size``, 2000, comes from `a calculation on the -psycopg mailing list `_: +So long as the QuerySet does not prefetch any related objects, providing no +value for ``chunk_size`` will result in Django using an implicit default of +2000, a value derived from `a calculation on the psycopg mailing list +`_: Assuming rows of 10-20 columns with a mix of textual and numeric data, 2000 is going to fetch less than 100KB of data, which seems a good compromise diff --git a/docs/releases/4.1.txt b/docs/releases/4.1.txt index cf4eb07b35..21530ff2a2 100644 --- a/docs/releases/4.1.txt +++ b/docs/releases/4.1.txt @@ -239,6 +239,10 @@ Models insertion fails uniqueness constraints. This is supported on MariaDB, MySQL, PostgreSQL, and SQLite 3.24+. +* :meth:`.QuerySet.iterator` now supports prefetching related objects as long + as the ``chunk_size`` argument is provided. In older versions, no prefetching + was done. + Requests and Responses ~~~~~~~~~~~~~~~~~~~~~~ @@ -430,6 +434,11 @@ Miscellaneous * ``django.contrib.sessions.serializers.PickleSerializer`` is deprecated due to the risk of remote code execution. +* The usage of ``QuerySet.iterator()`` on a queryset that prefetches related + objects without providing the ``chunk_size`` argument is deprecated. In older + versions, no prefetching was done. Providing a value for ``chunk_size`` + signifies that the additional query per chunk needed to prefetch is desired. + Features removed in 4.1 ======================= diff --git a/tests/prefetch_related/tests.py b/tests/prefetch_related/tests.py index 5040f3d886..18f5de746f 100644 --- a/tests/prefetch_related/tests.py +++ b/tests/prefetch_related/tests.py @@ -7,7 +7,8 @@ from django.db.models import Prefetch, QuerySet, prefetch_related_objects from django.db.models.query import get_prefetcher from django.db.models.sql import Query from django.test import TestCase, override_settings -from django.test.utils import CaptureQueriesContext +from django.test.utils import CaptureQueriesContext, ignore_warnings +from django.utils.deprecation import RemovedInDjango50Warning from .models import ( Article, Author, Author2, AuthorAddress, AuthorWithAge, Bio, Book, @@ -316,6 +317,38 @@ class PrefetchRelatedTests(TestDataMixin, TestCase): ['Anne', 'Charlotte', 'Emily', 'Jane'], ) + def test_m2m_prefetching_iterator_with_chunks(self): + with self.assertNumQueries(3): + authors = [ + b.authors.first() + for b in Book.objects.prefetch_related('authors').iterator(chunk_size=2) + ] + self.assertEqual( + authors, + [self.author1, self.author1, self.author3, self.author4], + ) + + @ignore_warnings(category=RemovedInDjango50Warning) + def test_m2m_prefetching_iterator_without_chunks(self): + # prefetch_related() is ignored. + with self.assertNumQueries(5): + authors = [ + b.authors.first() + for b in Book.objects.prefetch_related('authors').iterator() + ] + self.assertEqual( + authors, + [self.author1, self.author1, self.author3, self.author4], + ) + + def test_m2m_prefetching_iterator_without_chunks_warning(self): + msg = ( + 'Using QuerySet.iterator() after prefetch_related() without ' + 'specifying chunk_size is deprecated.' + ) + with self.assertWarnsMessage(RemovedInDjango50Warning, msg): + Book.objects.prefetch_related('authors').iterator() + class RawQuerySetTests(TestDataMixin, TestCase): def test_basic(self):