magic-removal: Created InBulkQuerySet, ValuesQuerySet and DateQuerySet, subclasses of QuerySet that provide custom iterator(). This lets you use iterator() with in_bulk(), values() and dates(). Also added unit tests.

git-svn-id: http://code.djangoproject.com/svn/django/branches/magic-removal@2200 bcc190cf-cafb-0310-a4f2-bffc1f526a37
2006-01-31 02:21:31 +00:00 · 2006-01-31 02:21:31 +00:00 · e51fcf8ec3
parent 2983bd8989
commit e51fcf8ec3
4 changed files with 113 additions and 61 deletions
--- a/django/db/models/manager.py
+++ b/django/db/models/manager.py
@ -85,6 +85,9 @@ class Manager(object):
    def in_bulk(self, *args, **kwargs):
        return QuerySet(self.model).in_bulk(*args, **kwargs)
    def iterator(self, *args, **kwargs):
        return QuerySet(self.model).iterator(*args, **kwargs)
    def order_by(self, *args, **kwargs):
        return QuerySet(self.model).order_by(*args, **kwargs)
--- a/django/db/models/query.py
+++ b/django/db/models/query.py
@ -190,67 +190,33 @@ class QuerySet(object):
        _, sql, params = del_query._get_sql_clause(False)
        cursor.execute("DELETE " + sql, params)
    ##################################################
    # PUBLIC METHODS THAT RETURN A QUERYSET SUBCLASS #
    ##################################################
    def in_bulk(self, id_list):
        assert isinstance(id_list, list), "in_bulk() must be provided with a list of IDs."
        assert id_list != [], "in_bulk() cannot be passed an empty ID list."
-        bulk_query = self._clone()
+        return self._clone(klass=InBulkQuerySet, _id_list=id_list)
        bulk_query._where.append("%s.%s IN (%s)" % (backend.quote_name(self.model._meta.db_table), backend.quote_name(self.model._meta.pk.column), ",".join(['%s'] * len(id_list))))
        bulk_query._params.extend(id_list)
        return dict([(obj._get_pk_val(), obj) for obj in bulk_query.iterator()])
    def values(self, *fields):
-        # select_related and select aren't supported in values().
+        return self._clone(klass=ValuesQuerySet, _fields=fields)
        values_query = self._clone(_select_related=False, _select={})
        # 'fields' is a list of field names to fetch.
        if fields:
            columns = [self.model._meta.get_field(f, many_to_many=False).column for f in fields]
        else: # Default to all fields.
            columns = [f.column for f in self.model._meta.fields]
        cursor = connection.cursor()
        select, sql, params = values_query._get_sql_clause(True)
        select = ['%s.%s' % (backend.quote_name(self.model._meta.db_table), backend.quote_name(c)) for c in columns]
        cursor.execute("SELECT " + (self._distinct and "DISTINCT " or "") + ",".join(select) + sql, params)
        while 1:
            rows = cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE)
            if not rows:
                raise StopIteration
            for row in rows:
                yield dict(zip(columns, row))
    def dates(self, field_name, kind, order='ASC'):
        """
        Returns a list of datetime objects representing all available dates
        for the given field_name, scoped to 'kind'.
        """
        from django.db.backends.util import typecast_timestamp
        assert kind in ("month", "year", "day"), "'kind' must be one of 'year', 'month' or 'day'."
        assert order in ('ASC', 'DESC'), "'order' must be either 'ASC' or 'DESC'."
        # Let the FieldDoesNotExist exception propogate.
        field = self.model._meta.get_field(field_name, many_to_many=False)
        assert isinstance(field, DateField), "%r isn't a DateField." % field_name
        return self._clone(klass=DateQuerySet, _field=field, _kind=kind, _order=order)
-        date_query = self._clone()
+    ##################################################################
-        date_query._order_by = () # Clear this because it'll mess things up otherwise.
+    # PUBLIC METHODS THAT ALTER ATTRIBUTES AND RETURN A NEW QUERYSET #
-        if field.null:
+    ##################################################################
            date_query._where.append('%s.%s IS NOT NULL' % \
                (backend.quote_name(self.model._meta.db_table), backend.quote_name(field.column)))
        select, sql, params = date_query._get_sql_clause(True)
        sql = 'SELECT %s %s GROUP BY 1 ORDER BY 1 %s' % \
            (backend.get_date_trunc_sql(kind, '%s.%s' % (backend.quote_name(self.model._meta.db_table),
            backend.quote_name(field.column))), sql, order)
        cursor = connection.cursor()
        cursor.execute(sql, params)
        # We have to manually run typecast_timestamp(str()) on the results, because
        # MySQL doesn't automatically cast the result of date functions as datetime
        # objects -- MySQL returns the values as strings, instead.
        return [typecast_timestamp(str(row[0])) for row in cursor.fetchall()]
    #############################################
    # PUBLIC METHODS THAT RETURN A NEW QUERYSET #
    #############################################
    def filter(self, *args, **kwargs):
        "Returns a new QuerySet instance with the args ANDed to the existing set."
@ -285,8 +251,10 @@ class QuerySet(object):
    # PRIVATE METHODS #
    ###################
-    def _clone(self, **kwargs):
+    def _clone(self, klass=None, **kwargs):
-        c = QuerySet()
+        if klass is None:
            klass = self.__class__
        c = klass()
        c.model = self.model
        c._filters = self._filters
        c._order_by = self._order_by
@ -402,6 +370,61 @@ class QuerySet(object):
        return select, " ".join(sql), params
 class InBulkQuerySet(QuerySet):
    def iterator(self):
        self._where.append("%s.%s IN (%s)" % (backend.quote_name(self.model._meta.db_table), backend.quote_name(self.model._meta.pk.column), ",".join(['%s'] * len(self._id_list))))
        self._params.extend(self._id_list)
        yield dict([(obj._get_pk_val(), obj) for obj in QuerySet.iterator(self)])
    def _get_data(self):
        if self._result_cache is None:
            for i in self.iterator():
                self._result_cache = i
        return self._result_cache
 class ValuesQuerySet(QuerySet):
    def iterator(self):
        # select_related and select aren't supported in values().
        self._select_related = False
        self._select = {}
        # self._fields is a list of field names to fetch.
        if self._fields:
            columns = [self.model._meta.get_field(f, many_to_many=False).column for f in self._fields]
            field_names = [f.attname for f in self._fields]
        else: # Default to all fields.
            columns = [f.column for f in self.model._meta.fields]
            field_names = [f.attname for f in self.model._meta.fields]
        cursor = connection.cursor()
        select, sql, params = self._get_sql_clause(True)
        select = ['%s.%s' % (backend.quote_name(self.model._meta.db_table), backend.quote_name(c)) for c in columns]
        cursor.execute("SELECT " + (self._distinct and "DISTINCT " or "") + ",".join(select) + sql, params)
        while 1:
            rows = cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE)
            if not rows:
                raise StopIteration
            for row in rows:
                yield dict(zip(field_names, row))
 class DateQuerySet(QuerySet):
    def iterator(self):
        from django.db.backends.util import typecast_timestamp
        self._order_by = () # Clear this because it'll mess things up otherwise.
        if self._field.null:
            date_query._where.append('%s.%s IS NOT NULL' % \
                (backend.quote_name(self.model._meta.db_table), backend.quote_name(self._field.column)))
        select, sql, params = self._get_sql_clause(True)
        sql = 'SELECT %s %s GROUP BY 1 ORDER BY 1 %s' % \
            (backend.get_date_trunc_sql(self._kind, '%s.%s' % (backend.quote_name(self.model._meta.db_table),
            backend.quote_name(self._field.column))), sql, self._order)
        cursor = connection.cursor()
        cursor.execute(sql, params)
        # We have to manually run typecast_timestamp(str()) on the results, because
        # MySQL doesn't automatically cast the result of date functions as datetime
        # objects -- MySQL returns the values as strings, instead.
        return [typecast_timestamp(str(row[0])) for row in cursor.fetchall()]
 class QOperator:
    "Base class for QAnd and QOr"
    def __init__(self, *args):
--- a/tests/modeltests/basic/models.py
+++ b/tests/modeltests/basic/models.py
@ -37,14 +37,13 @@ datetime.datetime(2005, 7, 28, 0, 0)
 >>> a.headline = 'Area woman programs in Python'
 >>> a.save()
-# Listing objects displays all the articles in the database. Note that the article
+# Article.objects.all() returns all the articles in the database. Note that
-# is represented by "<Article object>", because we haven't given the Article
+# the article is represented by "<Article object>", because we haven't given
-# model a __repr__() method.
+# the Article model a __repr__() method.
 >>> Article.objects.all()
 [<Article object>]
-# Django provides a rich database lookup API that's entirely driven by
+# Django provides a rich database lookup API.
 # keyword arguments.
 >>> Article.objects.get(id__exact=1)
 <Article object>
 >>> Article.objects.get(headline__startswith='Area woman')
@ -56,7 +55,7 @@ datetime.datetime(2005, 7, 28, 0, 0)
 >>> Article.objects.get(pub_date__year=2005, pub_date__month=7, pub_date__day=28)
 <Article object>
-# You can omit __exact if you want
+# The "__exact" lookup type can be omitted, as a shortcut.
 >>> Article.objects.get(id=1)
 <Article object>
 >>> Article.objects.get(headline='Area woman programs in Python')
@ -69,7 +68,8 @@ datetime.datetime(2005, 7, 28, 0, 0)
 >>> Article.objects.filter(pub_date__year=2005, pub_date__month=7)
 [<Article object>]
-# Django raises an ArticleDoesNotExist exception for get()
+# Django raises an Article.DoesNotExist exception for get() if the parameters
 # don't match any object.
 >>> Article.objects.get(id__exact=2)
 Traceback (most recent call last):
    ...
@ -82,7 +82,7 @@ DoesNotExist: Article does not exist for ...
 # Lookup by a primary key is the most common case, so Django provides a
 # shortcut for primary-key exact lookups.
-# The following is identical to articles.get(id__exact=1).
+# The following is identical to articles.get(id=1).
 >>> Article.objects.get(pk=1)
 <Article object>
@ -93,7 +93,7 @@ DoesNotExist: Article does not exist for ...
 True
 # You can initialize a model instance using positional arguments, which should
-# match the field order as defined in the model...
+# match the field order as defined in the model.
 >>> a2 = Article(None, 'Second article', datetime(2005, 7, 29))
 >>> a2.save()
 >>> a2.id
@ -126,7 +126,8 @@ Traceback (most recent call last):
    ...
 TypeError: 'foo' is an invalid keyword argument for this function
-# You can leave off the ID.
+# You can leave off the value for an AutoField when creating an object, because
 # it'll get filled in automatically when you save().
 >>> a5 = Article(headline='Article 6', pub_date=datetime(2005, 7, 31))
 >>> a5.save()
 >>> a5.id
@ -154,7 +155,7 @@ datetime.datetime(2005, 7, 31, 12, 30, 45)
 >>> a8.id
 8L
-# Saving an object again shouldn't create a new object -- it just saves the old one.
+# Saving an object again doesn't create a new object -- it just saves the old one.
 >>> a8.save()
 >>> a8.id
 8L
@ -174,6 +175,7 @@ True
 >>> Article.objects.get(id__exact=8) == Article.objects.get(id__exact=7)
 False
 # dates() returns a list of available dates of the given scope for the given field.
 >>> Article.objects.dates('pub_date', 'year')
 [datetime.datetime(2005, 1, 1, 0, 0)]
 >>> Article.objects.dates('pub_date', 'month')
@ -185,7 +187,7 @@ False
 >>> Article.objects.dates('pub_date', 'day', order='DESC')
 [datetime.datetime(2005, 7, 31, 0, 0), datetime.datetime(2005, 7, 30, 0, 0), datetime.datetime(2005, 7, 29, 0, 0), datetime.datetime(2005, 7, 28, 0, 0)]
-# Try some bad arguments to dates().
+# dates() requires valid arguments.
 >>> Article.objects.dates()
 Traceback (most recent call last):
@ -207,7 +209,16 @@ Traceback (most recent call last):
   ...
 AssertionError: 'order' must be either 'ASC' or 'DESC'.
-# You can combine queries with & and |
+# Use iterator() with dates() to return a generator that lazily requests each
 # result one at a time, to save memory.
 >>> for a in Article.objects.dates('pub_date', 'day', order='DESC').iterator():
 ...     print repr(a)
 datetime.datetime(2005, 7, 31, 0, 0)
 datetime.datetime(2005, 7, 30, 0, 0)
 datetime.datetime(2005, 7, 29, 0, 0)
 datetime.datetime(2005, 7, 28, 0, 0)
 # You can combine queries with & and |.
 >>> s1 = Article.objects.filter(id__exact=1)
 >>> s2 = Article.objects.filter(id__exact=2)
 >>> tmp = [a.id for a in list(s1 | s2)]
@ -231,7 +242,7 @@ AssertionError: 'order' must be either 'ASC' or 'DESC'.
 [<Article object>, <Article object>]
 # An Article instance doesn't have access to the "objects" attribute.
-# That is only available as a class method.
+# That's only available on the class.
 >>> a7.objects.all()
 Traceback (most recent call last):
    ...
--- a/tests/modeltests/lookup/models.py
+++ b/tests/modeltests/lookup/models.py
@ -33,7 +33,8 @@ API_TESTS = """
 >>> a7 = Article(headline='Article 7', pub_date=datetime(2005, 7, 27))
 >>> a7.save()
-# iterator() is a generator.
+# Each QuerySet gets iterator(), which is a generator that "lazily" returns
 # results using database-level iteration.
 >>> for a in Article.objects.iterator():
 ...     print a.headline
 Article 5
@ -103,6 +104,20 @@ True
 [('headline', 'Article 7'), ('id', 7)]
 [('headline', 'Article 1'), ('id', 1)]
 # You can use values() with iterator() for memory savings, because iterator()
 # uses database-level iteration.
 >>> for d in Article.objects.values('id', 'headline').iterator():
 ...     i = d.items()
 ...     i.sort()
 ...     i
 [('headline', 'Article 5'), ('id', 5)]
 [('headline', 'Article 6'), ('id', 6)]
 [('headline', 'Article 4'), ('id', 4)]
 [('headline', 'Article 2'), ('id', 2)]
 [('headline', 'Article 3'), ('id', 3)]
 [('headline', 'Article 7'), ('id', 7)]
 [('headline', 'Article 1'), ('id', 1)]
 # if you don't specify which fields, all are returned
 >>> list(Article.objects.filter(id=5).values()) == [{'id': 5, 'headline': 'Article 5', 'pub_date': datetime(2005, 8, 1, 9, 0)}]
 True