magic-removal: first stab at implementing Manager as a QuerySet

git-svn-id: http://code.djangoproject.com/svn/django/branches/magic-removal@2150 bcc190cf-cafb-0310-a4f2-bffc1f526a37
2006-01-29 00:22:10 +00:00 · 2006-01-29 00:22:10 +00:00 · 02eeb8dd14
parent 3127e3befb
commit 02eeb8dd14
3 changed files with 372 additions and 28 deletions
--- a/django/db/models/fields/related.py
+++ b/django/db/models/fields/related.py
@ -114,9 +114,9 @@ class ManyRelatedObjectsDescriptor(object):
            rel_field = self.related.field

            if self.rel_type == 'o2m':
-                manager.core_filters = {'%s__%s__exact' % (rel_field.name, rel_field.rel.to._meta.pk.name): getattr(instance, rel_field.rel.get_related_field().attname)}
+                manager._set_core_filter({'%s__%s__exact' % (rel_field.name, rel_field.rel.to._meta.pk.name): getattr(instance, rel_field.rel.get_related_field().attname)})
            else:
-                manager.core_filters = {'%s__%s__exact' % (rel_field.name, instance_type._meta.pk.name): instance._get_pk_val()}
+                manager._set_core_filter({'%s__%s__exact' % (rel_field.name, instance_type._meta.pk.name): instance._get_pk_val()})

            # Prepare the manager.
            # TODO: Fix this hack?
--- a/django/db/models/manager.py
+++ b/django/db/models/manager.py
@ -1,11 +1,13 @@
 from django.db.models.fields import DateField
 from django.utils.functional import curry
 from django.db import backend, connection
-from django.db.models.query import Q, parse_lookup, fill_table_cache, get_cached_row
+from django.db.models.query import QuerySet
+from django.db.models.query import Q, fill_table_cache, get_cached_row # TODO - remove lots of these
 from django.db.models.query import handle_legacy_orderlist, orderlist2sql, orderfield2column
 from django.dispatch import dispatcher
 from django.db.models import signals
 from django.utils.datastructures import SortedDict
+import copy

 # Size of each "chunk" for get_iterator calls.
 # Larger values are slightly faster at the expense of more storage space.
@ -17,12 +19,90 @@ def ensure_default_manager(sender):
        # Create the default manager, if needed.
        if hasattr(cls, 'objects'):
            raise ValueError, "Model %s must specify a custom Manager, because it has a field named 'objects'" % name
+            
        cls.add_to_class('objects', Manager())
        cls.objects._prepare()

 dispatcher.connect(ensure_default_manager, signal=signals.class_prepared)

-class Manager(object):
+class Manager(QuerySet):
+    # Tracks each time a Manager instance is created. Used to retain order.
+    creation_counter = 0
+
+    def __init__(self):
+        super(Manager, self).__init__()
+        # Increase the creation counter, and save our local copy.
+        self.creation_counter = Manager.creation_counter
+        Manager.creation_counter += 1
+        self.klass = None
+        
+    def _prepare(self):
+        pass
+        # TODO
+        #if self.klass._meta.get_latest_by:
+        #    self.get_latest = self.__get_latest
+        #for f in self.klass._meta.fields:
+        #    if isinstance(f, DateField):
+        #        setattr(self, 'get_%s_list' % f.name, curry(self.__get_date_list, f))
+
+    def contribute_to_class(self, klass, name):
+        # TODO: Use weakref because of possible memory leak / circular reference.
+        self.klass = klass
+        dispatcher.connect(self._prepare, signal=signals.class_prepared, sender=klass)
+        setattr(klass, name, ManagerDescriptor(self))
+        if not hasattr(klass, '_default_manager') or self.creation_counter < klass._default_manager.creation_counter:
+            klass._default_manager = self
+
+    def get(self, **kwargs):
+        """Gets a single object, using a new query. Keyword arguments are filters."""
+        obj_list = list(self.filter(**kwargs))
+        if len(obj_list) < 1:
+            raise self.klass.DoesNotExist, "%s does not exist for %s" % (self.klass._meta.object_name, kwargs)
+        assert len(obj_list) == 1, "get_object() returned more than one %s -- it returned %s! Lookup parameters were %s" % (self.klass._meta.object_name, len(obj_list), kwargs)
+        return obj_list[0]
+        
+    def in_bulk(self, id_list, **kwargs):
+        assert isinstance(id_list, list), "in_bulk() must be provided with a list of IDs."
+        assert id_list != [], "in_bulk() cannot be passed an empty ID list."
+        new_query = self    # we have to do a copy later, so this is OK
+        if kwargs:
+            new_query = self.filter(**kwargs)
+        new_query = new_query.extras(where=
+                                      ["%s.%s IN (%s)" % (backend.quote_name(self.klass._meta.db_table), 
+                                                          backend.quote_name(self.klass._meta.pk.column), 
+                                                          ",".join(['%s'] * len(id_list)))],
+                                     params=id_list)
+        obj_list = list(new_query)
+        return dict([(obj._get_pk_val(), obj) for obj in obj_list])
+
+    def delete(self, **kwargs):
+        # Remove the DELETE_ALL argument, if it exists.
+        delete_all = kwargs.pop('DELETE_ALL', False)
+
+        # Check for at least one query argument.
+        if not kwargs and not delete_all:
+            raise TypeError, "SAFETY MECHANISM: Specify DELETE_ALL=True if you actually want to delete all data."
+        
+        if kwargs:
+            del_query = self.filter(**kwargs)
+        else:
+            del_query = self._clone()
+        # disable non-supported fields
+        del_query._select_related = False
+        del_query._select = {}
+        del_query._order_by = []
+        del_query._offset = None
+        del_query._limit = None
+
+        opts = self.klass._meta
+
+        # Perform the SQL delete
+        cursor = connection.cursor()
+        _, sql, params = del_query._get_sql_clause(False)
+        cursor.execute("DELETE " + sql, params)        
+             
+
+class OldManager(object):
    # Tracks each time a Manager instance is created. Used to retain order.
    creation_counter = 0

@ -279,6 +359,9 @@ class Manager(object):
        # objects -- MySQL returns the values as strings, instead.
        return [typecast_timestamp(str(row[0])) for row in cursor.fetchall()]

+# DEBUG - to go back to old manager:
+# Manager = OldManager
+
 class ManagerDescriptor(object):
    def __init__(self, manager):
        self.manager = manager
@ -286,4 +369,11 @@ class ManagerDescriptor(object):
    def __get__(self, instance, type=None):
        if instance != None:
            raise AttributeError, "Manager isn't accessible via %s instances" % type.__name__
-        return self.manager
+        
+        # HACK
+        # We need a new instance every time.  Otherwise, the cache that 
+        # the manager keeps never gets dropped, which is pain for memory usage, 
+        # and concurrency and means that queries don't get updated when you do 
+        # a model_obj.save(). (This hack helps some tests to pass, but isn't a real fix)
+        #return self.manager.__class__()
+        return copy.deepcopy(self.manager)
--- a/django/db/models/query.py
+++ b/django/db/models/query.py
@ -1,9 +1,14 @@
 from django.db import backend, connection
 from django.db.models.fields import FieldDoesNotExist
 from django.utils.datastructures import SortedDict
+import copy

 LOOKUP_SEPARATOR = '__'

+# Size of each "chunk" for get_iterator calls.
+# Larger values are slightly faster at the expense of more storage space.
+GET_ITERATOR_CHUNK_SIZE = 100
+
 ####################
 # HELPER FUNCTIONS #
 ####################
@ -46,6 +51,259 @@ def orderlist2sql(order_list, opts, prefix=''):
            output.append('%s%s ASC' % (prefix, backend.quote_name(orderfield2column(f, opts))))
    return ', '.join(output)

+class QuerySet(object):
+    "Represents a lazy database lookup for a set of objects"
+    # Sub classes need to provide 'opts' member for this class
+    # to be able to function.
+    def __init__(self):
+        self._filter = Q()
+        self._order_by = ()
+        self._select_related = False
+        self._distinct = True
+        self._result_cache = None
+        self._params = None
+        self._select = None
+        self._where = None
+        self._tables = None
+        self._offset = None
+        self._limit = None
+        
+    def filter(self, **kwargs):
+        """Returns a new query instance with the query arguments
+        ANDed to the existing set"""
+        clone = self._clone()
+        clone._filter = self._filter & Q(**kwargs)
+        return clone
+
+    def unique(self, true_or_false):
+        """Returns a new query instance with the 'unique' qualifier modified"""
+        return self._clone(_distinct=true_or_false)
+
+    def order_by(self, *field_names):
+        """Returns a new query instance with the ordering changed."""
+        return self._clone(_order_by=field_names)
+        
+    def select_related(self, true_or_false):
+        """Returns a new query instance with the 'related' qualifier modified"""
+        return self._clone(_related=true_or_false)
+               
+    def count(self):
+        counter = self._clone()
+        counter._order_by = []
+        
+        # TODO - do we change these or not?
+        # e.g. if someone does objects[0:10].count()
+        # (which 
+        #counter._offset = None
+        #counter._limit = None
+        counter._select_related = False
+        _, sql, params = counter._get_sql_clause(True)
+        cursor = connection.cursor()
+        cursor.execute("SELECT COUNT(*)" + sql, params)
+        return cursor.fetchone()[0]        
+
+    # Convenience function for subclasses
+    def _set_core_filter(self, **kwargs):
+        """Sets the filters that should always be applied to queries"""
+        self._filter = Q(**kwargs)
+        
+
+    def _clone(self, **kwargs):
+        """Gets a clone of the object, with optional kwargs to alter the clone"""
+        # Don't clone (even temporarily) the cache
+        _result_cache_save = self._result_cache
+        self._result_cache = None
+        # Must ensure we get fully deep copies of all the query objects
+        clone = copy.deepcopy(self)
+        # apply changes to clone
+        clone.__dict__.update(kwargs)
+        # restore cache
+        self._result_cache = _result_cache_save
+        return clone
+        
+    def _ensure_compatible(self, other):
+        if self._distinct != other._distinct:
+            raise ValueException, "Can't combine a unique query with a non-unique query"
+            
+    def _combine(self, other):
+        self._ensure_compatible(other)
+        # get a deepcopy of 'other's order by
+        #  (so that A.filter(args1) & A.filter(args2) does the same as
+        #   A.filter(args1).filter(args2)
+        combined = other._clone() 
+        # If 'self' is ordered and 'other' isn't, propagate 'self's ordering
+        if len(self._order_by) > 0 and len(combined._order_by == 0):
+            combined._order_by = copy.deepcopy(self._order_by)
+        return combined
+            
+    def extras(self, params=None, select=None, where=None, tables=None):
+        return self._clone(_params=params, _select=select, _where=where, _tables=tables)
+    
+    def __and__(self, other):        
+        combined = self._combine(other)
+        combined._filter = self._filter & other._filter
+        return combined
+
+    def __or__(self, other):
+        combined = self._combine(other)
+        combined._filter = self._filter | other._filter
+        return combined
+      
+    # TODO - allow_joins - do we need it?
+    def _get_sql_clause(self, allow_joins):
+        def quote_only_if_word(word):
+            if ' ' in word:
+                return word
+            else:
+                return backend.quote_name(word)
+
+        # This is defined by sub-classes
+        # TODO - define a better accessor
+        opts = self.klass._meta
+
+        # Construct the fundamental parts of the query: SELECT X FROM Y WHERE Z.
+        select = ["%s.%s" % (backend.quote_name(opts.db_table), backend.quote_name(f.column)) for f in opts.fields]
+
+        tables = [quote_only_if_word(t) for t in (self._tables or [])]
+        joins = SortedDict()        
+        where = self._where or []
+        params = self._params or []
+
+        # Convert the Q object into SQL.
+        tables2, joins2, where2, params2 = self._filter.get_sql(opts)
+        
+        tables.extend(tables2)
+        joins.update(joins2)
+        where.extend(where2)
+        params.extend(params2)
+
+        # Add additional tables and WHERE clauses based on select_related.
+        if self._select_related is True:
+            fill_table_cache(opts, select, tables, where, opts.db_table, [opts.db_table])
+
+        # Add any additional SELECTs.
+        if self._select:
+            select.extend(['(%s) AS %s' % (quote_only_if_word(s[1]), backend.quote_name(s[0])) for s in self._select ])
+
+        # Start composing the body of the SQL statement.
+        sql = [" FROM", backend.quote_name(opts.db_table)]
+
+        # Check if extra tables are allowed. If not, throw an error
+        if (tables or joins) and not allow_joins:
+            raise TypeError, "Joins are not allowed in this type of query"
+
+        # Compose the join dictionary into SQL describing the joins.
+        if joins:
+            sql.append(" ".join(["%s %s AS %s ON %s" % (join_type, table, alias, condition)
+                            for (alias, (table, join_type, condition)) in joins.items()]))
+
+        # Compose the tables clause into SQL.
+        if tables:
+            sql.append(", " + ", ".join(tables))
+
+        # Compose the where clause into SQL.
+        if where:
+            sql.append(where and "WHERE " + " AND ".join(where))
+
+        # ORDER BY clause
+        order_by = []
+        for f in handle_legacy_orderlist(self._order_by):
+            if f == '?': # Special case.
+                order_by.append(backend.get_random_function_sql())
+            else:
+                if f.startswith('-'):
+                    col_name = f[1:]
+                    order = "DESC"
+                else:
+                    col_name = f
+                    order = "ASC"
+                if "." in col_name:
+                    table_prefix, col_name = col_name.split('.', 1)
+                    table_prefix = backend.quote_name(table_prefix) + '.'
+                else:
+                    # Use the database table as a column prefix if it wasn't given,
+                    # and if the requested column isn't a custom SELECT.
+                    if "." not in col_name and col_name not in [k[0] for k in (self._select or []) ]:
+                        table_prefix = backend.quote_name(opts.db_table) + '.'
+                    else:
+                        table_prefix = ''
+                order_by.append('%s%s %s' % (table_prefix, backend.quote_name(orderfield2column(col_name, opts)), order))
+        if order_by:
+            sql.append("ORDER BY " + ", ".join(order_by))
+
+        # LIMIT and OFFSET clauses
+        if self._limit is not None:
+            sql.append("%s " % backend.get_limit_offset_sql(self._limit, self._offset))
+        else:
+            assert self._offset is None, "'offset' is not allowed without 'limit'"
+
+        return select, " ".join(sql), params    
+        
+    def _fetch_data(self):
+        if self._result_cache is None:            
+            self._result_cache = list(self.get_iterator())
+            
+    def __iter__(self):
+        """Gets an iterator for the data"""
+        # Fetch the data or use get_iterator?  If not, we can't
+        # do sequence operations - or doing so will require re-fetching
+        # Also, lots of things in current template system break if
+        # don't get it all.
+        self._fetch_data()
+        return iter(self._result_cache)
+        
+    def __len__(self):
+        self._fetch_data()
+        return len(self._result_cache)
+
+    def __getitem__(self, k):
+        """Retrieve an item or slice from the set of results"""
+        # getitem can't return query instances, because  .filter() 
+        # and .order_by() methods on the result would break badly.
+        # This means we don't have to worry about arithmetic with
+        # self._limit or self._offset - they will both be None
+        # at this point
+        if isinstance(k, slice):
+            # Get a new query if we haven't already got data from db
+            if self._result_cache is None:
+                # slice.stop and slice.start 
+                clone = self._clone(_offset=k.start, _limit=k.stop)
+                return list(clone)[::k.step]
+                # TODO - we are throwing away this retrieved data.
+                # We could cache it if we had some kind of sparse
+                # list structure we could put it in.
+            else:
+                return self._result_cache[k]
+            
+        else:
+            # TODO: possibly use a new query which just gets one item
+            # if we haven't already got them all?
+            self._fetch_data()
+            return self._result_cache[k]
+        
+    def get_iterator(self):
+        # self._select is a dictionary, and dictionaries' key order is
+        # undefined, so we convert it to a list of tuples.
+        _extra_select = (self._select or {}).items()
+        
+        cursor = connection.cursor()
+        select, sql, params = self._get_sql_clause(True)
+        cursor.execute("SELECT " + (self._distinct and "DISTINCT " or "") + ",".join(select) + sql, params)
+        fill_cache = self._select_related
+        index_end = len(self.klass._meta.fields)
+        while 1:
+            rows = cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE)
+            if not rows:
+                raise StopIteration
+            for row in rows:
+                if fill_cache:
+                    obj, index_end = get_cached_row(self.klass, row, 0)
+                else:
+                    obj = self.klass(*row[:index_end])
+                for i, k in enumerate(_extra_select):
+                    setattr(obj, k[0], row[index_end+i])
+                yield obj
+        
 class QOperator:
    "Base class for QAnd and QOr"
    def __init__(self, *args):
@ -99,7 +357,7 @@ class QOr(QOperator):
            raise TypeError, other

 class Q:
-    "Encapsulates queries for the 'complex' parameter to Django API functions."
+    "Encapsulates queries as objects that can be combined logically."
    def __init__(self, **kwargs):
        self.kwargs = kwargs

@ -192,42 +450,38 @@ def parse_lookup(kwarg_items, opts):
    # there for others to implement custom Q()s, etc that return other join
    # types.
    tables, joins, where, params = [], SortedDict(), [], []
+    
    for kwarg, value in kwarg_items:
-        if kwarg in ('order_by', 'limit', 'offset', 'select_related', 'distinct', 'select', 'tables', 'where', 'params'):
+        if value is None:
            pass
-        elif value is None:
-            pass
-        elif kwarg == 'complex':
-            if not hasattr(value, 'get_sql'):
-                raise TypeError, "'%s' is not a valid query argument" % str(arg)
-            tables2, joins2, where2, params2 = value.get_sql(opts)
-            tables.extend(tables2)
-            joins.update(joins2)
-            where.extend(where2)
-            params.extend(params2)
-        else: # Must be a search parameter.
+        else:        
            path = kwarg.split(LOOKUP_SEPARATOR)
-
            # Extract the last elements of the kwarg.
            # The very-last is the clause (equals, like, etc).
            # The second-last is the table column on which the clause is
            # to be performed.
-            # The only exception to this is "pk", which is an implicit
-            # id__exact; if we find "pk", make the clause "exact', and
-            # insert a dummy name of None, which we will replace when
-            # we know which table column to grab as the primary key.
+            # The exceptions to this are:
+            # 1)  "pk", which is an implicit id__exact; 
+            #     if we find "pk", make the clause "exact', and insert 
+            #     a dummy name of None, which we will replace when
+            #     we know which table column to grab as the primary key.
+            # 2)  If there is only one part, assume it to be an __exact
            clause = path.pop()
            if clause == 'pk':
                clause = 'exact'
                path.append(None)
+            elif len(path) == 0:
+                path.append(clause)
+                clause = 'exact'
+                
            if len(path) < 1:
                raise TypeError, "Cannot parse keyword query %r" % kwarg

-            tables2, joins2, where2, params2 = lookup_inner(path, clause, value, opts, opts.db_table, None)
-            tables.extend(tables2)
-            joins.update(joins2)
-            where.extend(where2)
-            params.extend(params2)
+        tables2, joins2, where2, params2 = lookup_inner(path, clause, value, opts, opts.db_table, None)
+        tables.extend(tables2)
+        joins.update(joins2)
+        where.extend(where2)
+        params.extend(params2)
    return tables, joins, where, params

 class FieldFound(Exception):