Fixed #18702 -- Removed chunked reads from QuerySet iteration
This commit is contained in:
parent
ea9a0857d4
commit
70679243d1
|
@ -20,11 +20,6 @@ from django.utils.functional import partition
|
||||||
from django.utils import six
|
from django.utils import six
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
|
||||||
# Used to control how many objects are worked with at once in some cases (e.g.
|
|
||||||
# when deleting objects).
|
|
||||||
CHUNK_SIZE = 100
|
|
||||||
ITER_CHUNK_SIZE = CHUNK_SIZE
|
|
||||||
|
|
||||||
# The maximum number of items to display in a QuerySet.__repr__
|
# The maximum number of items to display in a QuerySet.__repr__
|
||||||
REPR_OUTPUT_SIZE = 20
|
REPR_OUTPUT_SIZE = 20
|
||||||
|
|
||||||
|
@ -41,7 +36,6 @@ class QuerySet(object):
|
||||||
self._db = using
|
self._db = using
|
||||||
self.query = query or sql.Query(self.model)
|
self.query = query or sql.Query(self.model)
|
||||||
self._result_cache = None
|
self._result_cache = None
|
||||||
self._iter = None
|
|
||||||
self._sticky_filter = False
|
self._sticky_filter = False
|
||||||
self._for_write = False
|
self._for_write = False
|
||||||
self._prefetch_related_lookups = []
|
self._prefetch_related_lookups = []
|
||||||
|
@ -57,8 +51,8 @@ class QuerySet(object):
|
||||||
Deep copy of a QuerySet doesn't populate the cache
|
Deep copy of a QuerySet doesn't populate the cache
|
||||||
"""
|
"""
|
||||||
obj = self.__class__()
|
obj = self.__class__()
|
||||||
for k,v in self.__dict__.items():
|
for k, v in self.__dict__.items():
|
||||||
if k in ('_iter','_result_cache'):
|
if k == '_result_cache':
|
||||||
obj.__dict__[k] = None
|
obj.__dict__[k] = None
|
||||||
else:
|
else:
|
||||||
obj.__dict__[k] = copy.deepcopy(v, memo)
|
obj.__dict__[k] = copy.deepcopy(v, memo)
|
||||||
|
@ -69,10 +63,8 @@ class QuerySet(object):
|
||||||
Allows the QuerySet to be pickled.
|
Allows the QuerySet to be pickled.
|
||||||
"""
|
"""
|
||||||
# Force the cache to be fully populated.
|
# Force the cache to be fully populated.
|
||||||
len(self)
|
self._fetch_all()
|
||||||
|
|
||||||
obj_dict = self.__dict__.copy()
|
obj_dict = self.__dict__.copy()
|
||||||
obj_dict['_iter'] = None
|
|
||||||
return obj_dict
|
return obj_dict
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
|
@ -82,95 +74,31 @@ class QuerySet(object):
|
||||||
return repr(data)
|
return repr(data)
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
# Since __len__ is called quite frequently (for example, as part of
|
self._fetch_all()
|
||||||
# list(qs), we make some effort here to be as efficient as possible
|
|
||||||
# whilst not messing up any existing iterators against the QuerySet.
|
|
||||||
if self._result_cache is None:
|
|
||||||
if self._iter:
|
|
||||||
self._result_cache = list(self._iter)
|
|
||||||
else:
|
|
||||||
self._result_cache = list(self.iterator())
|
|
||||||
elif self._iter:
|
|
||||||
self._result_cache.extend(self._iter)
|
|
||||||
if self._prefetch_related_lookups and not self._prefetch_done:
|
|
||||||
self._prefetch_related_objects()
|
|
||||||
return len(self._result_cache)
|
return len(self._result_cache)
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
if self._prefetch_related_lookups and not self._prefetch_done:
|
"""
|
||||||
# We need all the results in order to be able to do the prefetch
|
The queryset iterator protocol uses three nested iterators in the
|
||||||
# in one go. To minimize code duplication, we use the __len__
|
default case:
|
||||||
# code path which also forces this, and also does the prefetch
|
1. sql.compiler:execute_sql()
|
||||||
len(self)
|
- Returns 100 rows at time (constants.GET_ITERATOR_CHUNK_SIZE)
|
||||||
|
using cursor.fetchmany(). This part is responsible for
|
||||||
if self._result_cache is None:
|
doing some column masking, and returning the rows in chunks.
|
||||||
self._iter = self.iterator()
|
2. sql/compiler.results_iter()
|
||||||
self._result_cache = []
|
- Returns one row at time. At this point the rows are still just
|
||||||
if self._iter:
|
tuples. In some cases the return values are converted to
|
||||||
return self._result_iter()
|
Python values at this location (see resolve_columns(),
|
||||||
# Python's list iterator is better than our version when we're just
|
resolve_aggregate()).
|
||||||
# iterating over the cache.
|
3. self.iterator()
|
||||||
|
- Responsible for turning the rows into model objects.
|
||||||
|
"""
|
||||||
|
self._fetch_all()
|
||||||
return iter(self._result_cache)
|
return iter(self._result_cache)
|
||||||
|
|
||||||
def _result_iter(self):
|
def __nonzero__(self):
|
||||||
pos = 0
|
self._fetch_all()
|
||||||
while 1:
|
return bool(self._result_cache)
|
||||||
upper = len(self._result_cache)
|
|
||||||
while pos < upper:
|
|
||||||
yield self._result_cache[pos]
|
|
||||||
pos = pos + 1
|
|
||||||
if not self._iter:
|
|
||||||
raise StopIteration
|
|
||||||
if len(self._result_cache) <= pos:
|
|
||||||
self._fill_cache()
|
|
||||||
|
|
||||||
def __bool__(self):
|
|
||||||
if self._prefetch_related_lookups and not self._prefetch_done:
|
|
||||||
# We need all the results in order to be able to do the prefetch
|
|
||||||
# in one go. To minimize code duplication, we use the __len__
|
|
||||||
# code path which also forces this, and also does the prefetch
|
|
||||||
len(self)
|
|
||||||
|
|
||||||
if self._result_cache is not None:
|
|
||||||
return bool(self._result_cache)
|
|
||||||
try:
|
|
||||||
next(iter(self))
|
|
||||||
except StopIteration:
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
def __nonzero__(self): # Python 2 compatibility
|
|
||||||
return type(self).__bool__(self)
|
|
||||||
|
|
||||||
def __contains__(self, val):
|
|
||||||
# The 'in' operator works without this method, due to __iter__. This
|
|
||||||
# implementation exists only to shortcut the creation of Model
|
|
||||||
# instances, by bailing out early if we find a matching element.
|
|
||||||
pos = 0
|
|
||||||
if self._result_cache is not None:
|
|
||||||
if val in self._result_cache:
|
|
||||||
return True
|
|
||||||
elif self._iter is None:
|
|
||||||
# iterator is exhausted, so we have our answer
|
|
||||||
return False
|
|
||||||
# remember not to check these again:
|
|
||||||
pos = len(self._result_cache)
|
|
||||||
else:
|
|
||||||
# We need to start filling the result cache out. The following
|
|
||||||
# ensures that self._iter is not None and self._result_cache is not
|
|
||||||
# None
|
|
||||||
it = iter(self)
|
|
||||||
|
|
||||||
# Carry on, one result at a time.
|
|
||||||
while True:
|
|
||||||
if len(self._result_cache) <= pos:
|
|
||||||
self._fill_cache(num=1)
|
|
||||||
if self._iter is None:
|
|
||||||
# we ran out of items
|
|
||||||
return False
|
|
||||||
if self._result_cache[pos] == val:
|
|
||||||
return True
|
|
||||||
pos += 1
|
|
||||||
|
|
||||||
def __getitem__(self, k):
|
def __getitem__(self, k):
|
||||||
"""
|
"""
|
||||||
|
@ -184,19 +112,6 @@ class QuerySet(object):
|
||||||
"Negative indexing is not supported."
|
"Negative indexing is not supported."
|
||||||
|
|
||||||
if self._result_cache is not None:
|
if self._result_cache is not None:
|
||||||
if self._iter is not None:
|
|
||||||
# The result cache has only been partially populated, so we may
|
|
||||||
# need to fill it out a bit more.
|
|
||||||
if isinstance(k, slice):
|
|
||||||
if k.stop is not None:
|
|
||||||
# Some people insist on passing in strings here.
|
|
||||||
bound = int(k.stop)
|
|
||||||
else:
|
|
||||||
bound = None
|
|
||||||
else:
|
|
||||||
bound = k + 1
|
|
||||||
if len(self._result_cache) < bound:
|
|
||||||
self._fill_cache(bound - len(self._result_cache))
|
|
||||||
return self._result_cache[k]
|
return self._result_cache[k]
|
||||||
|
|
||||||
if isinstance(k, slice):
|
if isinstance(k, slice):
|
||||||
|
@ -370,7 +285,7 @@ class QuerySet(object):
|
||||||
If the QuerySet is already fully cached this simply returns the length
|
If the QuerySet is already fully cached this simply returns the length
|
||||||
of the cached results set to avoid multiple SELECT COUNT(*) calls.
|
of the cached results set to avoid multiple SELECT COUNT(*) calls.
|
||||||
"""
|
"""
|
||||||
if self._result_cache is not None and not self._iter:
|
if self._result_cache is not None:
|
||||||
return len(self._result_cache)
|
return len(self._result_cache)
|
||||||
|
|
||||||
return self.query.get_count(using=self.db)
|
return self.query.get_count(using=self.db)
|
||||||
|
@ -933,17 +848,11 @@ class QuerySet(object):
|
||||||
c._setup_query()
|
c._setup_query()
|
||||||
return c
|
return c
|
||||||
|
|
||||||
def _fill_cache(self, num=None):
|
def _fetch_all(self):
|
||||||
"""
|
if self._result_cache is None:
|
||||||
Fills the result cache with 'num' more entries (or until the results
|
self._result_cache = list(self.iterator())
|
||||||
iterator is exhausted).
|
if self._prefetch_related_lookups and not self._prefetch_done:
|
||||||
"""
|
self._prefetch_related_objects()
|
||||||
if self._iter:
|
|
||||||
try:
|
|
||||||
for i in range(num or ITER_CHUNK_SIZE):
|
|
||||||
self._result_cache.append(next(self._iter))
|
|
||||||
except StopIteration:
|
|
||||||
self._iter = None
|
|
||||||
|
|
||||||
def _next_is_sticky(self):
|
def _next_is_sticky(self):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -524,6 +524,25 @@ non-standard behavior has been preserved but moved to the model form field layer
|
||||||
and occurs only when the associated widget is
|
and occurs only when the associated widget is
|
||||||
:class:`~django.forms.SelectMultiple` or a subclass.
|
:class:`~django.forms.SelectMultiple` or a subclass.
|
||||||
|
|
||||||
|
QuerySet iteration
|
||||||
|
~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
The ``QuerySet`` iteration was changed to immediately convert all fetched
|
||||||
|
rows to ``Model`` objects. In Django 1.5 and earlier the fetched rows were
|
||||||
|
converted to ``Model`` objects in chunks of 100.
|
||||||
|
|
||||||
|
Existing code will work, but the amount of rows converted to objects
|
||||||
|
might change in certain use cases. Such usages include partially looping
|
||||||
|
over a queryset or any usage which ends up doing ``__bool__`` or
|
||||||
|
``__contains__``.
|
||||||
|
|
||||||
|
Notably most database backends did fetch all the rows in one go already in
|
||||||
|
1.5.
|
||||||
|
|
||||||
|
It is still possible to convert the fetched rows to ``Model`` objects
|
||||||
|
lazily by using the :meth:`~django.db.models.query.QuerySet.iterator()`
|
||||||
|
method.
|
||||||
|
|
||||||
Miscellaneous
|
Miscellaneous
|
||||||
~~~~~~~~~~~~~
|
~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,6 @@ from django.conf import settings
|
||||||
from django.core.exceptions import FieldError
|
from django.core.exceptions import FieldError
|
||||||
from django.db import DatabaseError, connection, connections, DEFAULT_DB_ALIAS
|
from django.db import DatabaseError, connection, connections, DEFAULT_DB_ALIAS
|
||||||
from django.db.models import Count, F, Q
|
from django.db.models import Count, F, Q
|
||||||
from django.db.models.query import ITER_CHUNK_SIZE
|
|
||||||
from django.db.models.sql.where import WhereNode, EverythingNode, NothingNode
|
from django.db.models.sql.where import WhereNode, EverythingNode, NothingNode
|
||||||
from django.db.models.sql.datastructures import EmptyResultSet
|
from django.db.models.sql.datastructures import EmptyResultSet
|
||||||
from django.test import TestCase, skipUnlessDBFeature
|
from django.test import TestCase, skipUnlessDBFeature
|
||||||
|
@ -1211,16 +1210,6 @@ class Queries2Tests(TestCase):
|
||||||
ordered=False
|
ordered=False
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_ticket7411(self):
|
|
||||||
# Saving to db must work even with partially read result set in another
|
|
||||||
# cursor.
|
|
||||||
for num in range(2 * ITER_CHUNK_SIZE + 1):
|
|
||||||
_ = Number.objects.create(num=num)
|
|
||||||
|
|
||||||
for i, obj in enumerate(Number.objects.all()):
|
|
||||||
obj.save()
|
|
||||||
if i > 10: break
|
|
||||||
|
|
||||||
def test_ticket7759(self):
|
def test_ticket7759(self):
|
||||||
# Count should work with a partially read result set.
|
# Count should work with a partially read result set.
|
||||||
count = Number.objects.count()
|
count = Number.objects.count()
|
||||||
|
@ -1700,31 +1689,6 @@ class Queries6Tests(TestCase):
|
||||||
ann1.notes.add(n1)
|
ann1.notes.add(n1)
|
||||||
ann2 = Annotation.objects.create(name='a2', tag=t4)
|
ann2 = Annotation.objects.create(name='a2', tag=t4)
|
||||||
|
|
||||||
# This next test used to cause really weird PostgreSQL behavior, but it was
|
|
||||||
# only apparent much later when the full test suite ran.
|
|
||||||
# - Yeah, it leaves global ITER_CHUNK_SIZE to 2 instead of 100...
|
|
||||||
#@unittest.expectedFailure
|
|
||||||
def test_slicing_and_cache_interaction(self):
|
|
||||||
# We can do slicing beyond what is currently in the result cache,
|
|
||||||
# too.
|
|
||||||
|
|
||||||
# We need to mess with the implementation internals a bit here to decrease the
|
|
||||||
# cache fill size so that we don't read all the results at once.
|
|
||||||
from django.db.models import query
|
|
||||||
query.ITER_CHUNK_SIZE = 2
|
|
||||||
qs = Tag.objects.all()
|
|
||||||
|
|
||||||
# Fill the cache with the first chunk.
|
|
||||||
self.assertTrue(bool(qs))
|
|
||||||
self.assertEqual(len(qs._result_cache), 2)
|
|
||||||
|
|
||||||
# Query beyond the end of the cache and check that it is filled out as required.
|
|
||||||
self.assertEqual(repr(qs[4]), '<Tag: t5>')
|
|
||||||
self.assertEqual(len(qs._result_cache), 5)
|
|
||||||
|
|
||||||
# But querying beyond the end of the result set will fail.
|
|
||||||
self.assertRaises(IndexError, lambda: qs[100])
|
|
||||||
|
|
||||||
def test_parallel_iterators(self):
|
def test_parallel_iterators(self):
|
||||||
# Test that parallel iterators work.
|
# Test that parallel iterators work.
|
||||||
qs = Tag.objects.all()
|
qs = Tag.objects.all()
|
||||||
|
@ -2533,6 +2497,14 @@ class WhereNodeTest(TestCase):
|
||||||
w = WhereNode(children=[empty_w, NothingNode()], connector='OR')
|
w = WhereNode(children=[empty_w, NothingNode()], connector='OR')
|
||||||
self.assertRaises(EmptyResultSet, w.as_sql, qn, connection)
|
self.assertRaises(EmptyResultSet, w.as_sql, qn, connection)
|
||||||
|
|
||||||
|
|
||||||
|
class IteratorExceptionsTest(TestCase):
|
||||||
|
def test_iter_exceptions(self):
|
||||||
|
qs = ExtraInfo.objects.only('author')
|
||||||
|
with self.assertRaises(AttributeError):
|
||||||
|
list(qs)
|
||||||
|
|
||||||
|
|
||||||
class NullJoinPromotionOrTest(TestCase):
|
class NullJoinPromotionOrTest(TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.d1 = ModelD.objects.create(name='foo')
|
self.d1 = ModelD.objects.create(name='foo')
|
||||||
|
|
Loading…
Reference in New Issue