Added support for modifying the effect of ``DISTINCT`` clauses so they

only consider some fields (PostgreSQL only). For this, the ``distinct()`` QuerySet method now accepts an optional list of model fields names and generates ``DISTINCT ON`` clauses on these cases. Thanks Jeffrey Gelens and Anssi Kääriäinen for their work. Fixes #6422. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17244 bcc190cf-cafb-0310-a4f2-bffc1f526a37
2011-12-22 20:42:40 +00:00 · 2011-12-22 20:42:40 +00:00 · 287565779d
parent 03eb2907d5
commit 287565779d
16 changed files with 374 additions and 43 deletions
--- a/2
+++ b/2
@ -203,6 +203,7 @@ answer newbie questions, and generally made Django that much better:
    Marc Garcia <marc.garcia@accopensys.com>
    Andy Gayton <andy-django@thecablelounge.com>
    geber@datacollect.com
    Jeffrey Gelens <jeffrey@gelens.org>
    Baishampayan Ghose
    Joshua Ginsberg <jag@flowtheory.net>
    Dimitris Glezos <dimitris@glezos.com>
@ -269,6 +270,7 @@ answer newbie questions, and generally made Django that much better:
    jpellerin@gmail.com
    junzhang.jn@gmail.com
    Xia Kai <http://blog.xiaket.org/>
    Anssi Kääriäinen
    Antti Kaihola <http://djangopeople.net/akaihola/>
    Peter van Kampen
    Bahadır Kandemir <bahadir@pardus.org.tr>
--- a/django/db/backends/init.py
+++ b/django/db/backends/init.py
@ -406,6 +406,9 @@ class BaseDatabaseFeatures(object):
    supports_stddev = None
    can_introspect_foreign_keys = None
    # Support for the DISTINCT ON clause
    can_distinct_on_fields = False
    def __init__(self, connection):
        self.connection = connection
@ -559,6 +562,17 @@ class BaseDatabaseOperations(object):
        """
        raise NotImplementedError('Full-text search is not implemented for this database backend')
    def distinct_sql(self, fields):
        """
        Returns an SQL DISTINCT clause which removes duplicate rows from the
        result set. If any fields are given, only the given fields are being
        checked for duplicates.
        """
        if fields:
            raise NotImplementedError('DISTINCT ON fields is not supported by this database backend')
        else:
            return 'DISTINCT'
    def last_executed_query(self, cursor, sql, params):
        """
        Returns a string of the query last executed by the given cursor, with
--- a/django/db/backends/postgresql_psycopg2/base.py
+++ b/django/db/backends/postgresql_psycopg2/base.py
@ -82,6 +82,7 @@ class DatabaseFeatures(BaseDatabaseFeatures):
    has_select_for_update_nowait = True
    has_bulk_insert = True
    supports_tablespaces = True
    can_distinct_on_fields = True
 class DatabaseWrapper(BaseDatabaseWrapper):
    vendor = 'postgresql'
--- a/django/db/backends/postgresql_psycopg2/operations.py
+++ b/django/db/backends/postgresql_psycopg2/operations.py
@ -179,6 +179,12 @@ class DatabaseOperations(BaseDatabaseOperations):
        return 63
    def distinct_sql(self, fields):
        if fields:
            return 'DISTINCT ON (%s)' % ', '.join(fields)
        else:
            return 'DISTINCT'
    def last_executed_query(self, cursor, sql, params):
        # http://initd.org/psycopg/docs/cursor.html#cursor.query
        # The query attribute is a Psycopg extension to the DB API 2.0.
--- a/django/db/models/query.py
+++ b/django/db/models/query.py
@ -323,6 +323,8 @@ class QuerySet(object):
        If args is present the expression is passed as a kwarg using
        the Aggregate object's default alias.
        """
        if self.query.distinct_fields:
            raise NotImplementedError("aggregate() + distinct(fields) not implemented.")
        for arg in args:
            kwargs[arg.default_alias] = arg
@ -751,12 +753,14 @@ class QuerySet(object):
        obj.query.add_ordering(*field_names)
        return obj
-    def distinct(self, true_or_false=True):
+    def distinct(self, *field_names):
        """
        Returns a new QuerySet instance that will select only distinct results.
        """
        assert self.query.can_filter(), \
                "Cannot create distinct fields once a slice has been taken."
        obj = self._clone()
-        obj.query.distinct = true_or_false
+        obj.query.add_distinct_fields(*field_names)
        return obj
    def extra(self, select=None, where=None, params=None, tables=None,
@ -1179,7 +1183,7 @@ class EmptyQuerySet(QuerySet):
        """
        return self
-    def distinct(self, true_or_false=True):
+    def distinct(self, fields=None):
        """
        Always returns EmptyQuerySet.
        """
--- a/django/db/models/sql/compiler.py
+++ b/django/db/models/sql/compiler.py
@ -23,6 +23,8 @@ class SQLCompiler(object):
        Does any necessary class setup immediately prior to producing SQL. This
        is for things that can't necessarily be done in __init__ because we
        might not have all the pieces in place at that time.
        # TODO: after the query has been executed, the altered state should be
        # cleaned. We are not using a clone() of the query here.
        """
        if not self.query.tables:
            self.query.join((None, self.query.model._meta.db_table, None, None))
@ -60,11 +62,19 @@ class SQLCompiler(object):
            return '', ()
        self.pre_sql_setup()
        # After executing the query, we must get rid of any joins the query
        # setup created. So, take note of alias counts before the query ran.
        # However we do not want to get rid of stuff done in pre_sql_setup(),
        # as the pre_sql_setup will modify query state in a way that forbids
        # another run of it.
        self.refcounts_before = self.query.alias_refcount.copy()
        out_cols = self.get_columns(with_col_aliases)
        ordering, ordering_group_by = self.get_ordering()
-        # This must come after 'select' and 'ordering' -- see docstring of
+        distinct_fields = self.get_distinct()
-        # get_from_clause() for details.
+
        # This must come after 'select', 'ordering' and 'distinct' -- see
        # docstring of get_from_clause() for details.
        from_, f_params = self.get_from_clause()
        qn = self.quote_name_unless_alias
@ -76,8 +86,10 @@ class SQLCompiler(object):
            params.extend(val[1])
        result = ['SELECT']
        if self.query.distinct:
-            result.append('DISTINCT')
+            result.append(self.connection.ops.distinct_sql(distinct_fields))
        result.append(', '.join(out_cols + self.query.ordering_aliases))
        result.append('FROM')
@ -90,6 +102,9 @@ class SQLCompiler(object):
        grouping, gb_params = self.get_grouping()
        if grouping:
            if distinct_fields:
                raise NotImplementedError(
                    "annotate() + distinct(fields) not implemented.")
            if ordering:
                # If the backend can't group by PK (i.e., any database
                # other than MySQL), then any fields mentioned in the
@ -129,6 +144,9 @@ class SQLCompiler(object):
                raise DatabaseError('NOWAIT is not supported on this database backend.')
            result.append(self.connection.ops.for_update_sql(nowait=nowait))
        # Finally do cleanup - get rid of the joins we created above.
        self.query.reset_refcounts(self.refcounts_before)
        return ' '.join(result), tuple(params)
    def as_nested_sql(self):
@ -292,6 +310,26 @@ class SQLCompiler(object):
                    col_aliases.add(field.column)
        return result, aliases
    def get_distinct(self):
        """
        Returns a quoted list of fields to use in DISTINCT ON part of the query.
        Note that this method can alter the tables in the query, and thus it
        must be called before get_from_clause().
        """
        qn = self.quote_name_unless_alias
        qn2 = self.connection.ops.quote_name
        result = []
        opts = self.query.model._meta
        for name in self.query.distinct_fields:
            parts = name.split(LOOKUP_SEP)
            field, col, alias, _, _ = self._setup_joins(parts, opts, None)
            col, alias = self._final_join_removal(col, alias)
            result.append("%s.%s" % (qn(alias), qn2(col)))
        return result
    def get_ordering(self):
        """
        Returns a tuple containing a list representing the SQL elements in the
@ -384,21 +422,7 @@ class SQLCompiler(object):
        """
        name, order = get_order_dir(name, default_order)
        pieces = name.split(LOOKUP_SEP)
-        if not alias:
+        field, col, alias, joins, opts = self._setup_joins(pieces, opts, alias)
            alias = self.query.get_initial_alias()
        field, target, opts, joins, last, extra = self.query.setup_joins(pieces,
                opts, alias, False)
        alias = joins[-1]
        col = target.column
        if not field.rel:
            # To avoid inadvertent trimming of a necessary alias, use the
            # refcount to show that we are referencing a non-relation field on
            # the model.
            self.query.ref_alias(alias)
        # Must use left outer joins for nullable fields and their relations.
        self.query.promote_alias_chain(joins,
            self.query.alias_map[joins[0]][JOIN_TYPE] == self.query.LOUTER)
        # If we get to this point and the field is a relation to another model,
        # append the default ordering for that model.
@ -416,11 +440,47 @@ class SQLCompiler(object):
                results.extend(self.find_ordering_name(item, opts, alias,
                        order, already_seen))
            return results
        col, alias = self._final_join_removal(col, alias)
        return [(alias, col, order)]
    def _setup_joins(self, pieces, opts, alias):
        """
        A helper method for get_ordering and get_distinct. This method will
        call query.setup_joins, handle refcounts and then promote the joins.
        Note that get_ordering and get_distinct must produce same target
        columns on same input, as the prefixes of get_ordering and get_distinct
        must match. Executing SQL where this is not true is an error.
        """
        if not alias:
            alias = self.query.get_initial_alias()
        field, target, opts, joins, _, _ = self.query.setup_joins(pieces,
                opts, alias, False)
        alias = joins[-1]
        col = target.column
        if not field.rel:
            # To avoid inadvertent trimming of a necessary alias, use the
            # refcount to show that we are referencing a non-relation field on
            # the model.
            self.query.ref_alias(alias)
        # Must use left outer joins for nullable fields and their relations.
        # Ordering or distinct must not affect the returned set, and INNER
        # JOINS for nullable fields could do this.
        self.query.promote_alias_chain(joins,
            self.query.alias_map[joins[0]][JOIN_TYPE] == self.query.LOUTER)
        return field, col, alias, joins, opts
    def _final_join_removal(self, col, alias):
        """
        A helper method for get_distinct and get_ordering. This method will
        trim extra not-needed joins from the tail of the join chain.
        This is very similar to what is done in trim_joins, but we will
        trim LEFT JOINS here. It would be a good idea to consolidate this
        method and query.trim_joins().
        """
        if alias:
            # We have to do the same "final join" optimisation as in
            # add_filter, since the final column might not otherwise be part of
            # the select set (so we can't order on it).
            while 1:
                join = self.query.alias_map[alias]
                if col != join[RHS_JOIN_COL]:
@ -428,7 +488,7 @@ class SQLCompiler(object):
                self.query.unref_alias(alias)
                alias = join[LHS_ALIAS]
                col = join[LHS_JOIN_COL]
-        return [(alias, col, order)]
+        return col, alias
    def get_from_clause(self):
        """
@ -438,8 +498,8 @@ class SQLCompiler(object):
        from-clause via a "select".
        This should only be called after any SQL construction methods that
-        might change the tables we need. This means the select columns and
+        might change the tables we need. This means the select columns,
-        ordering must be done first.
+        ordering and distinct must be done first.
        """
        result = []
        qn = self.quote_name_unless_alias
@ -984,6 +1044,7 @@ class SQLAggregateCompiler(SQLCompiler):
        """
        if qn is None:
            qn = self.quote_name_unless_alias
        sql = ('SELECT %s FROM (%s) subquery' % (
            ', '.join([
                aggregate.as_sql(qn, self.connection)
--- a/django/db/models/sql/query.py
+++ b/django/db/models/sql/query.py
@ -127,6 +127,7 @@ class Query(object):
        self.order_by = []
        self.low_mark, self.high_mark = 0, None  # Used for offset/limit
        self.distinct = False
        self.distinct_fields = []
        self.select_for_update = False
        self.select_for_update_nowait = False
        self.select_related = False
@ -265,6 +266,7 @@ class Query(object):
        obj.order_by = self.order_by[:]
        obj.low_mark, obj.high_mark = self.low_mark, self.high_mark
        obj.distinct = self.distinct
        obj.distinct_fields = self.distinct_fields[:]
        obj.select_for_update = self.select_for_update
        obj.select_for_update_nowait = self.select_for_update_nowait
        obj.select_related = self.select_related
@ -298,6 +300,7 @@ class Query(object):
        else:
            obj.used_aliases = set()
        obj.filter_is_sticky = False
        obj.__dict__.update(kwargs)
        if hasattr(obj, '_setup_query'):
            obj._setup_query()
@ -393,7 +396,7 @@ class Query(object):
        Performs a COUNT() query using the current filter constraints.
        """
        obj = self.clone()
-        if len(self.select) > 1 or self.aggregate_select:
+        if len(self.select) > 1 or self.aggregate_select or (self.distinct and self.distinct_fields):
            # If a select clause exists, then the query has already started to
            # specify the columns that are to be returned.
            # In this case, we need to use a subquery to evaluate the count.
@ -452,6 +455,8 @@ class Query(object):
                "Cannot combine queries once a slice has been taken."
        assert self.distinct == rhs.distinct, \
            "Cannot combine a unique query with a non-unique query."
        assert self.distinct_fields == rhs.distinct_fields, \
            "Cannot combine queries with different distinct fields."
        self.remove_inherited_models()
        # Work out how to relabel the rhs aliases, if necessary.
@ -674,9 +679,9 @@ class Query(object):
        """ Increases the reference count for this alias. """
        self.alias_refcount[alias] += 1
-    def unref_alias(self, alias):
+    def unref_alias(self, alias, amount=1):
        """ Decreases the reference count for this alias. """
-        self.alias_refcount[alias] -= 1
+        self.alias_refcount[alias] -= amount
    def promote_alias(self, alias, unconditional=False):
        """
@ -705,6 +710,15 @@ class Query(object):
            if self.promote_alias(alias, must_promote):
                must_promote = True
    def reset_refcounts(self, to_counts):
        """
        This method will reset reference counts for aliases so that they match
        the value passed in :param to_counts:.
        """
        for alias, cur_refcount in self.alias_refcount.copy().items():
            unref_amount = cur_refcount - to_counts.get(alias, 0)
            self.unref_alias(alias, unref_amount)
    def promote_unused_aliases(self, initial_refcounts, used_aliases):
        """
        Given a "before" copy of the alias_refcounts dictionary (as
@ -832,7 +846,8 @@ class Query(object):
    def count_active_tables(self):
        """
        Returns the number of tables in this query with a non-zero reference
-        count.
+        count. Note that after execution, the reference counts are zeroed, so
        tables added in compiler will not be seen by this method.
        """
        return len([1 for count in self.alias_refcount.itervalues() if count])
@ -1596,6 +1611,13 @@ class Query(object):
        self.select = []
        self.select_fields = []
    def add_distinct_fields(self, *field_names):
        """
        Adds and resolves the given fields to the query's "distinct on" clause.
        """
        self.distinct_fields = field_names
        self.distinct = True
    def add_fields(self, field_names, allow_m2m=True):
        """
        Adds the given (model) fields to the select set. The field names are
--- a/docs/ref/models/querysets.txt
+++ b/docs/ref/models/querysets.txt
@ -345,7 +345,7 @@ remain undefined afterward).
 distinct
 ~~~~~~~~
-.. method:: distinct()
+.. method:: distinct([*fields])
 Returns a new ``QuerySet`` that uses ``SELECT DISTINCT`` in its SQL query. This
 eliminates duplicate rows from the query results.
@ -374,6 +374,43 @@ query spans multiple tables, it's possible to get duplicate results when a
    :meth:`values()` together, be careful when ordering by fields not in the
    :meth:`values()` call.
 .. versionadded:: 1.4
 The possibility to pass positional arguments (``*fields``) is new in Django 1.4.
 They are names of fields to which the ``DISTINCT`` should be limited. This
 translates to a ``SELECT DISTINCT ON`` SQL query. A ``DISTINCT ON`` query eliminates
 duplicate rows not by comparing all fields in a row, but by comparing only the given
 fields.
 .. note::
    Note that the ability to specify field names is only available in PostgreSQL.
 .. note::
    When using the ``DISTINCT ON`` functionality it is required that the columns given
    to :meth:`distinct` match the first :meth:`order_by` columns. For example ``SELECT
    DISTINCT ON (a)`` gives you the first row for each value in column ``a``. If you
    don't specify an order, then you'll get some arbitrary row.
 Examples::
    >>> Author.objects.distinct()
    [...]
    >>> Entry.objects.order_by('pub_date').distinct('pub_date')
    [...]
    >>> Entry.objects.order_by('blog').distinct('blog')
    [...]
    >>> Entry.objects.order_by('author', 'pub_date').distinct('author', 'pub_date')
    [...]
    >>> Entry.objects.order_by('blog__name', 'mod_date').distinct('blog__name', 'mod_date')
    [...]
    >>> Entry.objects.order_by('author', 'pub_date').distinct('author')
    [...]
 values
 ~~~~~~
--- a/docs/releases/1.4-alpha-1.txt
+++ b/docs/releases/1.4-alpha-1.txt
@ -507,6 +507,16 @@ Django 1.4 also includes several smaller improvements worth noting:
  ``pickle.HIGHEST_PROTOCOL`` for better compatibility with the other
  cache backends.
 * Support in the ORM for generating ``SELECT`` queries containing ``DISTINCT ON``
  The ``distinct()`` ``Queryset`` method now accepts an optional list of model
  field names. If specified, then the ``DISTINCT`` statement is limited to these
  fields.  The PostgreSQL is the only of the database backends shipped with
  Django that supports this new functionality.
  For more details, see the documentation for
  :meth:`~django.db.models.query.QuerySet.distinct`.
 Backwards incompatible changes in 1.4
 =====================================
--- a/docs/releases/1.4.txt
+++ b/docs/releases/1.4.txt
@ -498,6 +498,16 @@ Django 1.4 also includes several smaller improvements worth noting:
  ``pickle.HIGHEST_PROTOCOL`` for better compatibility with the other
  cache backends.
 * Support in the ORM for generating ``SELECT`` queries containing ``DISTINCT ON``
  The ``distinct()`` ``Queryset`` method now accepts an optional list of model
  field names. If specified, then the ``DISTINCT`` statement is limited to these
  fields.  The PostgreSQL is the only of the database backends shipped with
  Django that supports this new functionality.
  For more details, see the documentation for
  :meth:`~django.db.models.query.QuerySet.distinct`.
 .. _backwards-incompatible-changes-1.4:
 Backwards incompatible changes in 1.4
--- a/tests/modeltests/distinct_on_fields/init.py
+++ b/tests/modeltests/distinct_on_fields/init.py
@ -0,0 +1 @@
 #
--- a/tests/modeltests/distinct_on_fields/models.py
+++ b/tests/modeltests/distinct_on_fields/models.py
@ -0,0 +1,39 @@
 from django.db import models
 class Tag(models.Model):
    name = models.CharField(max_length=10)
    parent = models.ForeignKey('self', blank=True, null=True,
            related_name='children')
    class Meta:
        ordering = ['name']
    def __unicode__(self):
        return self.name
 class Celebrity(models.Model):
    name = models.CharField("Name", max_length=20)
    greatest_fan = models.ForeignKey("Fan", null=True, unique=True)
    def __unicode__(self):
        return self.name
 class Fan(models.Model):
    fan_of = models.ForeignKey(Celebrity)
 class Staff(models.Model):
    id = models.IntegerField(primary_key=True)
    name = models.CharField(max_length=50)
    organisation = models.CharField(max_length=100)
    tags = models.ManyToManyField(Tag, through='StaffTag')
    coworkers = models.ManyToManyField('self')
    def __unicode__(self):
        return self.name
 class StaffTag(models.Model):
    staff = models.ForeignKey(Staff)
    tag = models.ForeignKey(Tag)
    def __unicode__(self):
        return u"%s -> %s" % (self.tag, self.staff)
--- a/tests/modeltests/distinct_on_fields/tests.py
+++ b/tests/modeltests/distinct_on_fields/tests.py
@ -0,0 +1,116 @@
 from __future__ import absolute_import, with_statement
 from django.db.models import Max
 from django.test import TestCase, skipUnlessDBFeature
 from .models import Tag, Celebrity, Fan, Staff, StaffTag
 class DistinctOnTests(TestCase):
    def setUp(self):
        t1 = Tag.objects.create(name='t1')
        t2 = Tag.objects.create(name='t2', parent=t1)
        t3 = Tag.objects.create(name='t3', parent=t1)
        t4 = Tag.objects.create(name='t4', parent=t3)
        t5 = Tag.objects.create(name='t5', parent=t3)
        p1_o1 = Staff.objects.create(id=1, name="p1", organisation="o1")
        p2_o1 = Staff.objects.create(id=2, name="p2", organisation="o1")
        p3_o1 = Staff.objects.create(id=3, name="p3", organisation="o1")
        p1_o2 = Staff.objects.create(id=4, name="p1", organisation="o2")
        p1_o1.coworkers.add(p2_o1, p3_o1)
        StaffTag.objects.create(staff=p1_o1, tag=t1)
        StaffTag.objects.create(staff=p1_o1, tag=t1)
        celeb1 = Celebrity.objects.create(name="c1")
        celeb2 = Celebrity.objects.create(name="c2")
        self.fan1 = Fan.objects.create(fan_of=celeb1)
        self.fan2 = Fan.objects.create(fan_of=celeb1)
        self.fan3 = Fan.objects.create(fan_of=celeb2)
    @skipUnlessDBFeature('can_distinct_on_fields')
    def test_basic_distinct_on(self):
        """QuerySet.distinct('field', ...) works"""
        # (qset, expected) tuples
        qsets = (
            (
                Staff.objects.distinct().order_by('name'),
                ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
            ),
            (
                Staff.objects.distinct('name').order_by('name'),
                ['<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
            ),
            (
                Staff.objects.distinct('organisation').order_by('organisation', 'name'),
                ['<Staff: p1>', '<Staff: p1>'],
            ),
            (
                Staff.objects.distinct('name', 'organisation').order_by('name', 'organisation'),
                ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
            ),
            (
                Celebrity.objects.filter(fan__in=[self.fan1, self.fan2, self.fan3]).\
                    distinct('name').order_by('name'),
                ['<Celebrity: c1>', '<Celebrity: c2>'],
            ),
            # Does combining querysets work?
            (
                (Celebrity.objects.filter(fan__in=[self.fan1, self.fan2]).\
                    distinct('name').order_by('name')
                |Celebrity.objects.filter(fan__in=[self.fan3]).\
                    distinct('name').order_by('name')),
                ['<Celebrity: c1>', '<Celebrity: c2>'],
            ),
            (
                StaffTag.objects.distinct('staff','tag'),
                ['<StaffTag: t1 -> p1>'],
            ),
            (
                Tag.objects.order_by('parent__pk', 'pk').distinct('parent'),
                ['<Tag: t2>', '<Tag: t4>', '<Tag: t1>'],
            ),
            (
                StaffTag.objects.select_related('staff').distinct('staff__name').order_by('staff__name'),
                ['<StaffTag: t1 -> p1>'],
            ),
            # Fetch the alphabetically first coworker for each worker
            (
                (Staff.objects.distinct('id').order_by('id', 'coworkers__name').
                               values_list('id', 'coworkers__name')),
                ["(1, u'p2')", "(2, u'p1')", "(3, u'p1')", "(4, None)"]
            ),
        )
        for qset, expected in qsets:
            self.assertQuerysetEqual(qset, expected)
            self.assertEqual(qset.count(), len(expected))
        # Combining queries with different distinct_fields is not allowed.
        base_qs = Celebrity.objects.all()
        self.assertRaisesMessage(
            AssertionError,
            "Cannot combine queries with different distinct fields.",
            lambda: (base_qs.distinct('id') & base_qs.distinct('name'))
        )
        # Test join unreffing
        c1 = Celebrity.objects.distinct('greatest_fan__id', 'greatest_fan__fan_of')
        self.assertIn('OUTER JOIN', str(c1.query))
        c2 = c1.distinct('pk')
        self.assertNotIn('OUTER JOIN', str(c2.query))
    @skipUnlessDBFeature('can_distinct_on_fields')
    def test_distinct_not_implemented_checks(self):
        # distinct + annotate not allowed
        with self.assertRaises(NotImplementedError):
            Celebrity.objects.annotate(Max('id')).distinct('id')[0]
        with self.assertRaises(NotImplementedError):
            Celebrity.objects.distinct('id').annotate(Max('id'))[0]
        # However this check is done only when the query executes, so you
        # can use distinct() to remove the fields before execution.
        Celebrity.objects.distinct('id').annotate(Max('id')).distinct()[0]
        # distinct + aggregate not allowed
        with self.assertRaises(NotImplementedError):
            Celebrity.objects.distinct('id').aggregate(Max('id'))
--- a/tests/regressiontests/queries/models.py
+++ b/tests/regressiontests/queries/models.py
@ -209,6 +209,9 @@ class Celebrity(models.Model):
    name = models.CharField("Name", max_length=20)
    greatest_fan = models.ForeignKey("Fan", null=True, unique=True)
    def __unicode__(self):
        return self.name
 class TvChef(Celebrity):
    pass
@ -343,4 +346,3 @@ class OneToOneCategory(models.Model):
    def __unicode__(self):
        return "one2one " + self.new_name
--- a/tests/regressiontests/queries/tests.py
+++ b/tests/regressiontests/queries/tests.py
@ -234,18 +234,22 @@ class Queries1Tests(BaseQuerysetTest):
            ['<Item: four>', '<Item: one>']
        )
    # FIXME: This is difficult to fix and very much an edge case, so punt for
    # now.  This is related to the order_by() tests for ticket #2253, but the
    # old bug exhibited itself here (q2 was pulling too many tables into the
    # combined query with the new ordering, but only because we have evaluated
    # q2 already).
    @unittest.expectedFailure
    def test_order_by_tables(self):
        q1 = Item.objects.order_by('name')
        q2 = Item.objects.filter(id=self.i1.id)
        list(q2)
        self.assertEqual(len((q1 & q2).order_by('name').query.tables), 1)
    def test_order_by_join_unref(self):
        """
        This test is related to the above one, testing that there aren't
        old JOINs in the query.
        """
        qs = Celebrity.objects.order_by('greatest_fan__fan_of')
        self.assertIn('OUTER JOIN', str(qs.query))
        qs = qs.order_by('id')
        self.assertNotIn('OUTER JOIN', str(qs.query))
    def test_tickets_4088_4306(self):
        self.assertQuerysetEqual(
            Report.objects.filter(creator=1001),
@ -1728,7 +1732,7 @@ class ToFieldTests(TestCase):
 class ConditionalTests(BaseQuerysetTest):
-    """Tests whose execution depend on dfferent environment conditions like
+    """Tests whose execution depend on different environment conditions like
    Python version or DB backend features"""
    def setUp(self):
@ -1739,6 +1743,7 @@ class ConditionalTests(BaseQuerysetTest):
        t4 = Tag.objects.create(name='t4', parent=t3)
        t5 = Tag.objects.create(name='t5', parent=t3)
    # In Python 2.6 beta releases, exceptions raised in __len__ are swallowed
    # (Python issue 1242657), so these cases return an empty list, rather than
    # raising an exception. Not a lot we can do about that, unfortunately, due to
@ -1810,6 +1815,7 @@ class ConditionalTests(BaseQuerysetTest):
            2500
        )
 class UnionTests(unittest.TestCase):
    """
    Tests for the union of two querysets. Bug #12252.
--- a/tests/regressiontests/select_related_regress/tests.py
+++ b/tests/regressiontests/select_related_regress/tests.py
@ -40,9 +40,9 @@ class SelectRelatedRegressTests(TestCase):
        self.assertEqual([(c.id, unicode(c.start), unicode(c.end)) for c in connections],
            [(c1.id, u'router/4', u'switch/7'), (c2.id, u'switch/7', u'server/1')])
-        # This final query should only join seven tables (port, device and building
+        # This final query should only have seven tables (port, device and building
-        # twice each, plus connection once).
+        # twice each, plus connection once). Thus, 6 joins plus the FROM table.
-        self.assertEqual(connections.query.count_active_tables(), 7)
+        self.assertEqual(str(connections.query).count(" JOIN "), 6)
    def test_regression_8106(self):