Added support for modifying the effect of ``DISTINCT`` clauses so they

only consider some fields (PostgreSQL only).

For this, the ``distinct()`` QuerySet method now accepts an optional
list of model fields names and generates ``DISTINCT ON`` clauses on
these cases. Thanks Jeffrey Gelens and Anssi Kääriäinen for their work.

Fixes #6422.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@17244 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Ramiro Morales 2011-12-22 20:42:40 +00:00
parent 03eb2907d5
commit 287565779d
16 changed files with 374 additions and 43 deletions

View File

@ -203,6 +203,7 @@ answer newbie questions, and generally made Django that much better:
Marc Garcia <marc.garcia@accopensys.com> Marc Garcia <marc.garcia@accopensys.com>
Andy Gayton <andy-django@thecablelounge.com> Andy Gayton <andy-django@thecablelounge.com>
geber@datacollect.com geber@datacollect.com
Jeffrey Gelens <jeffrey@gelens.org>
Baishampayan Ghose Baishampayan Ghose
Joshua Ginsberg <jag@flowtheory.net> Joshua Ginsberg <jag@flowtheory.net>
Dimitris Glezos <dimitris@glezos.com> Dimitris Glezos <dimitris@glezos.com>
@ -269,6 +270,7 @@ answer newbie questions, and generally made Django that much better:
jpellerin@gmail.com jpellerin@gmail.com
junzhang.jn@gmail.com junzhang.jn@gmail.com
Xia Kai <http://blog.xiaket.org/> Xia Kai <http://blog.xiaket.org/>
Anssi Kääriäinen
Antti Kaihola <http://djangopeople.net/akaihola/> Antti Kaihola <http://djangopeople.net/akaihola/>
Peter van Kampen Peter van Kampen
Bahadır Kandemir <bahadir@pardus.org.tr> Bahadır Kandemir <bahadir@pardus.org.tr>

View File

@ -406,6 +406,9 @@ class BaseDatabaseFeatures(object):
supports_stddev = None supports_stddev = None
can_introspect_foreign_keys = None can_introspect_foreign_keys = None
# Support for the DISTINCT ON clause
can_distinct_on_fields = False
def __init__(self, connection): def __init__(self, connection):
self.connection = connection self.connection = connection
@ -559,6 +562,17 @@ class BaseDatabaseOperations(object):
""" """
raise NotImplementedError('Full-text search is not implemented for this database backend') raise NotImplementedError('Full-text search is not implemented for this database backend')
def distinct_sql(self, fields):
"""
Returns an SQL DISTINCT clause which removes duplicate rows from the
result set. If any fields are given, only the given fields are being
checked for duplicates.
"""
if fields:
raise NotImplementedError('DISTINCT ON fields is not supported by this database backend')
else:
return 'DISTINCT'
def last_executed_query(self, cursor, sql, params): def last_executed_query(self, cursor, sql, params):
""" """
Returns a string of the query last executed by the given cursor, with Returns a string of the query last executed by the given cursor, with

View File

@ -82,6 +82,7 @@ class DatabaseFeatures(BaseDatabaseFeatures):
has_select_for_update_nowait = True has_select_for_update_nowait = True
has_bulk_insert = True has_bulk_insert = True
supports_tablespaces = True supports_tablespaces = True
can_distinct_on_fields = True
class DatabaseWrapper(BaseDatabaseWrapper): class DatabaseWrapper(BaseDatabaseWrapper):
vendor = 'postgresql' vendor = 'postgresql'

View File

@ -179,6 +179,12 @@ class DatabaseOperations(BaseDatabaseOperations):
return 63 return 63
def distinct_sql(self, fields):
if fields:
return 'DISTINCT ON (%s)' % ', '.join(fields)
else:
return 'DISTINCT'
def last_executed_query(self, cursor, sql, params): def last_executed_query(self, cursor, sql, params):
# http://initd.org/psycopg/docs/cursor.html#cursor.query # http://initd.org/psycopg/docs/cursor.html#cursor.query
# The query attribute is a Psycopg extension to the DB API 2.0. # The query attribute is a Psycopg extension to the DB API 2.0.

View File

@ -323,6 +323,8 @@ class QuerySet(object):
If args is present the expression is passed as a kwarg using If args is present the expression is passed as a kwarg using
the Aggregate object's default alias. the Aggregate object's default alias.
""" """
if self.query.distinct_fields:
raise NotImplementedError("aggregate() + distinct(fields) not implemented.")
for arg in args: for arg in args:
kwargs[arg.default_alias] = arg kwargs[arg.default_alias] = arg
@ -751,12 +753,14 @@ class QuerySet(object):
obj.query.add_ordering(*field_names) obj.query.add_ordering(*field_names)
return obj return obj
def distinct(self, true_or_false=True): def distinct(self, *field_names):
""" """
Returns a new QuerySet instance that will select only distinct results. Returns a new QuerySet instance that will select only distinct results.
""" """
assert self.query.can_filter(), \
"Cannot create distinct fields once a slice has been taken."
obj = self._clone() obj = self._clone()
obj.query.distinct = true_or_false obj.query.add_distinct_fields(*field_names)
return obj return obj
def extra(self, select=None, where=None, params=None, tables=None, def extra(self, select=None, where=None, params=None, tables=None,
@ -1179,7 +1183,7 @@ class EmptyQuerySet(QuerySet):
""" """
return self return self
def distinct(self, true_or_false=True): def distinct(self, fields=None):
""" """
Always returns EmptyQuerySet. Always returns EmptyQuerySet.
""" """

View File

@ -23,6 +23,8 @@ class SQLCompiler(object):
Does any necessary class setup immediately prior to producing SQL. This Does any necessary class setup immediately prior to producing SQL. This
is for things that can't necessarily be done in __init__ because we is for things that can't necessarily be done in __init__ because we
might not have all the pieces in place at that time. might not have all the pieces in place at that time.
# TODO: after the query has been executed, the altered state should be
# cleaned. We are not using a clone() of the query here.
""" """
if not self.query.tables: if not self.query.tables:
self.query.join((None, self.query.model._meta.db_table, None, None)) self.query.join((None, self.query.model._meta.db_table, None, None))
@ -60,11 +62,19 @@ class SQLCompiler(object):
return '', () return '', ()
self.pre_sql_setup() self.pre_sql_setup()
# After executing the query, we must get rid of any joins the query
# setup created. So, take note of alias counts before the query ran.
# However we do not want to get rid of stuff done in pre_sql_setup(),
# as the pre_sql_setup will modify query state in a way that forbids
# another run of it.
self.refcounts_before = self.query.alias_refcount.copy()
out_cols = self.get_columns(with_col_aliases) out_cols = self.get_columns(with_col_aliases)
ordering, ordering_group_by = self.get_ordering() ordering, ordering_group_by = self.get_ordering()
# This must come after 'select' and 'ordering' -- see docstring of distinct_fields = self.get_distinct()
# get_from_clause() for details.
# This must come after 'select', 'ordering' and 'distinct' -- see
# docstring of get_from_clause() for details.
from_, f_params = self.get_from_clause() from_, f_params = self.get_from_clause()
qn = self.quote_name_unless_alias qn = self.quote_name_unless_alias
@ -76,8 +86,10 @@ class SQLCompiler(object):
params.extend(val[1]) params.extend(val[1])
result = ['SELECT'] result = ['SELECT']
if self.query.distinct: if self.query.distinct:
result.append('DISTINCT') result.append(self.connection.ops.distinct_sql(distinct_fields))
result.append(', '.join(out_cols + self.query.ordering_aliases)) result.append(', '.join(out_cols + self.query.ordering_aliases))
result.append('FROM') result.append('FROM')
@ -90,6 +102,9 @@ class SQLCompiler(object):
grouping, gb_params = self.get_grouping() grouping, gb_params = self.get_grouping()
if grouping: if grouping:
if distinct_fields:
raise NotImplementedError(
"annotate() + distinct(fields) not implemented.")
if ordering: if ordering:
# If the backend can't group by PK (i.e., any database # If the backend can't group by PK (i.e., any database
# other than MySQL), then any fields mentioned in the # other than MySQL), then any fields mentioned in the
@ -129,6 +144,9 @@ class SQLCompiler(object):
raise DatabaseError('NOWAIT is not supported on this database backend.') raise DatabaseError('NOWAIT is not supported on this database backend.')
result.append(self.connection.ops.for_update_sql(nowait=nowait)) result.append(self.connection.ops.for_update_sql(nowait=nowait))
# Finally do cleanup - get rid of the joins we created above.
self.query.reset_refcounts(self.refcounts_before)
return ' '.join(result), tuple(params) return ' '.join(result), tuple(params)
def as_nested_sql(self): def as_nested_sql(self):
@ -292,6 +310,26 @@ class SQLCompiler(object):
col_aliases.add(field.column) col_aliases.add(field.column)
return result, aliases return result, aliases
def get_distinct(self):
"""
Returns a quoted list of fields to use in DISTINCT ON part of the query.
Note that this method can alter the tables in the query, and thus it
must be called before get_from_clause().
"""
qn = self.quote_name_unless_alias
qn2 = self.connection.ops.quote_name
result = []
opts = self.query.model._meta
for name in self.query.distinct_fields:
parts = name.split(LOOKUP_SEP)
field, col, alias, _, _ = self._setup_joins(parts, opts, None)
col, alias = self._final_join_removal(col, alias)
result.append("%s.%s" % (qn(alias), qn2(col)))
return result
def get_ordering(self): def get_ordering(self):
""" """
Returns a tuple containing a list representing the SQL elements in the Returns a tuple containing a list representing the SQL elements in the
@ -384,21 +422,7 @@ class SQLCompiler(object):
""" """
name, order = get_order_dir(name, default_order) name, order = get_order_dir(name, default_order)
pieces = name.split(LOOKUP_SEP) pieces = name.split(LOOKUP_SEP)
if not alias: field, col, alias, joins, opts = self._setup_joins(pieces, opts, alias)
alias = self.query.get_initial_alias()
field, target, opts, joins, last, extra = self.query.setup_joins(pieces,
opts, alias, False)
alias = joins[-1]
col = target.column
if not field.rel:
# To avoid inadvertent trimming of a necessary alias, use the
# refcount to show that we are referencing a non-relation field on
# the model.
self.query.ref_alias(alias)
# Must use left outer joins for nullable fields and their relations.
self.query.promote_alias_chain(joins,
self.query.alias_map[joins[0]][JOIN_TYPE] == self.query.LOUTER)
# If we get to this point and the field is a relation to another model, # If we get to this point and the field is a relation to another model,
# append the default ordering for that model. # append the default ordering for that model.
@ -416,11 +440,47 @@ class SQLCompiler(object):
results.extend(self.find_ordering_name(item, opts, alias, results.extend(self.find_ordering_name(item, opts, alias,
order, already_seen)) order, already_seen))
return results return results
col, alias = self._final_join_removal(col, alias)
return [(alias, col, order)]
def _setup_joins(self, pieces, opts, alias):
"""
A helper method for get_ordering and get_distinct. This method will
call query.setup_joins, handle refcounts and then promote the joins.
Note that get_ordering and get_distinct must produce same target
columns on same input, as the prefixes of get_ordering and get_distinct
must match. Executing SQL where this is not true is an error.
"""
if not alias:
alias = self.query.get_initial_alias()
field, target, opts, joins, _, _ = self.query.setup_joins(pieces,
opts, alias, False)
alias = joins[-1]
col = target.column
if not field.rel:
# To avoid inadvertent trimming of a necessary alias, use the
# refcount to show that we are referencing a non-relation field on
# the model.
self.query.ref_alias(alias)
# Must use left outer joins for nullable fields and their relations.
# Ordering or distinct must not affect the returned set, and INNER
# JOINS for nullable fields could do this.
self.query.promote_alias_chain(joins,
self.query.alias_map[joins[0]][JOIN_TYPE] == self.query.LOUTER)
return field, col, alias, joins, opts
def _final_join_removal(self, col, alias):
"""
A helper method for get_distinct and get_ordering. This method will
trim extra not-needed joins from the tail of the join chain.
This is very similar to what is done in trim_joins, but we will
trim LEFT JOINS here. It would be a good idea to consolidate this
method and query.trim_joins().
"""
if alias: if alias:
# We have to do the same "final join" optimisation as in
# add_filter, since the final column might not otherwise be part of
# the select set (so we can't order on it).
while 1: while 1:
join = self.query.alias_map[alias] join = self.query.alias_map[alias]
if col != join[RHS_JOIN_COL]: if col != join[RHS_JOIN_COL]:
@ -428,7 +488,7 @@ class SQLCompiler(object):
self.query.unref_alias(alias) self.query.unref_alias(alias)
alias = join[LHS_ALIAS] alias = join[LHS_ALIAS]
col = join[LHS_JOIN_COL] col = join[LHS_JOIN_COL]
return [(alias, col, order)] return col, alias
def get_from_clause(self): def get_from_clause(self):
""" """
@ -438,8 +498,8 @@ class SQLCompiler(object):
from-clause via a "select". from-clause via a "select".
This should only be called after any SQL construction methods that This should only be called after any SQL construction methods that
might change the tables we need. This means the select columns and might change the tables we need. This means the select columns,
ordering must be done first. ordering and distinct must be done first.
""" """
result = [] result = []
qn = self.quote_name_unless_alias qn = self.quote_name_unless_alias
@ -984,6 +1044,7 @@ class SQLAggregateCompiler(SQLCompiler):
""" """
if qn is None: if qn is None:
qn = self.quote_name_unless_alias qn = self.quote_name_unless_alias
sql = ('SELECT %s FROM (%s) subquery' % ( sql = ('SELECT %s FROM (%s) subquery' % (
', '.join([ ', '.join([
aggregate.as_sql(qn, self.connection) aggregate.as_sql(qn, self.connection)

View File

@ -127,6 +127,7 @@ class Query(object):
self.order_by = [] self.order_by = []
self.low_mark, self.high_mark = 0, None # Used for offset/limit self.low_mark, self.high_mark = 0, None # Used for offset/limit
self.distinct = False self.distinct = False
self.distinct_fields = []
self.select_for_update = False self.select_for_update = False
self.select_for_update_nowait = False self.select_for_update_nowait = False
self.select_related = False self.select_related = False
@ -265,6 +266,7 @@ class Query(object):
obj.order_by = self.order_by[:] obj.order_by = self.order_by[:]
obj.low_mark, obj.high_mark = self.low_mark, self.high_mark obj.low_mark, obj.high_mark = self.low_mark, self.high_mark
obj.distinct = self.distinct obj.distinct = self.distinct
obj.distinct_fields = self.distinct_fields[:]
obj.select_for_update = self.select_for_update obj.select_for_update = self.select_for_update
obj.select_for_update_nowait = self.select_for_update_nowait obj.select_for_update_nowait = self.select_for_update_nowait
obj.select_related = self.select_related obj.select_related = self.select_related
@ -298,6 +300,7 @@ class Query(object):
else: else:
obj.used_aliases = set() obj.used_aliases = set()
obj.filter_is_sticky = False obj.filter_is_sticky = False
obj.__dict__.update(kwargs) obj.__dict__.update(kwargs)
if hasattr(obj, '_setup_query'): if hasattr(obj, '_setup_query'):
obj._setup_query() obj._setup_query()
@ -393,7 +396,7 @@ class Query(object):
Performs a COUNT() query using the current filter constraints. Performs a COUNT() query using the current filter constraints.
""" """
obj = self.clone() obj = self.clone()
if len(self.select) > 1 or self.aggregate_select: if len(self.select) > 1 or self.aggregate_select or (self.distinct and self.distinct_fields):
# If a select clause exists, then the query has already started to # If a select clause exists, then the query has already started to
# specify the columns that are to be returned. # specify the columns that are to be returned.
# In this case, we need to use a subquery to evaluate the count. # In this case, we need to use a subquery to evaluate the count.
@ -452,6 +455,8 @@ class Query(object):
"Cannot combine queries once a slice has been taken." "Cannot combine queries once a slice has been taken."
assert self.distinct == rhs.distinct, \ assert self.distinct == rhs.distinct, \
"Cannot combine a unique query with a non-unique query." "Cannot combine a unique query with a non-unique query."
assert self.distinct_fields == rhs.distinct_fields, \
"Cannot combine queries with different distinct fields."
self.remove_inherited_models() self.remove_inherited_models()
# Work out how to relabel the rhs aliases, if necessary. # Work out how to relabel the rhs aliases, if necessary.
@ -674,9 +679,9 @@ class Query(object):
""" Increases the reference count for this alias. """ """ Increases the reference count for this alias. """
self.alias_refcount[alias] += 1 self.alias_refcount[alias] += 1
def unref_alias(self, alias): def unref_alias(self, alias, amount=1):
""" Decreases the reference count for this alias. """ """ Decreases the reference count for this alias. """
self.alias_refcount[alias] -= 1 self.alias_refcount[alias] -= amount
def promote_alias(self, alias, unconditional=False): def promote_alias(self, alias, unconditional=False):
""" """
@ -705,6 +710,15 @@ class Query(object):
if self.promote_alias(alias, must_promote): if self.promote_alias(alias, must_promote):
must_promote = True must_promote = True
def reset_refcounts(self, to_counts):
"""
This method will reset reference counts for aliases so that they match
the value passed in :param to_counts:.
"""
for alias, cur_refcount in self.alias_refcount.copy().items():
unref_amount = cur_refcount - to_counts.get(alias, 0)
self.unref_alias(alias, unref_amount)
def promote_unused_aliases(self, initial_refcounts, used_aliases): def promote_unused_aliases(self, initial_refcounts, used_aliases):
""" """
Given a "before" copy of the alias_refcounts dictionary (as Given a "before" copy of the alias_refcounts dictionary (as
@ -832,7 +846,8 @@ class Query(object):
def count_active_tables(self): def count_active_tables(self):
""" """
Returns the number of tables in this query with a non-zero reference Returns the number of tables in this query with a non-zero reference
count. count. Note that after execution, the reference counts are zeroed, so
tables added in compiler will not be seen by this method.
""" """
return len([1 for count in self.alias_refcount.itervalues() if count]) return len([1 for count in self.alias_refcount.itervalues() if count])
@ -1596,6 +1611,13 @@ class Query(object):
self.select = [] self.select = []
self.select_fields = [] self.select_fields = []
def add_distinct_fields(self, *field_names):
"""
Adds and resolves the given fields to the query's "distinct on" clause.
"""
self.distinct_fields = field_names
self.distinct = True
def add_fields(self, field_names, allow_m2m=True): def add_fields(self, field_names, allow_m2m=True):
""" """
Adds the given (model) fields to the select set. The field names are Adds the given (model) fields to the select set. The field names are

View File

@ -345,7 +345,7 @@ remain undefined afterward).
distinct distinct
~~~~~~~~ ~~~~~~~~
.. method:: distinct() .. method:: distinct([*fields])
Returns a new ``QuerySet`` that uses ``SELECT DISTINCT`` in its SQL query. This Returns a new ``QuerySet`` that uses ``SELECT DISTINCT`` in its SQL query. This
eliminates duplicate rows from the query results. eliminates duplicate rows from the query results.
@ -374,6 +374,43 @@ query spans multiple tables, it's possible to get duplicate results when a
:meth:`values()` together, be careful when ordering by fields not in the :meth:`values()` together, be careful when ordering by fields not in the
:meth:`values()` call. :meth:`values()` call.
.. versionadded:: 1.4
The possibility to pass positional arguments (``*fields``) is new in Django 1.4.
They are names of fields to which the ``DISTINCT`` should be limited. This
translates to a ``SELECT DISTINCT ON`` SQL query. A ``DISTINCT ON`` query eliminates
duplicate rows not by comparing all fields in a row, but by comparing only the given
fields.
.. note::
Note that the ability to specify field names is only available in PostgreSQL.
.. note::
When using the ``DISTINCT ON`` functionality it is required that the columns given
to :meth:`distinct` match the first :meth:`order_by` columns. For example ``SELECT
DISTINCT ON (a)`` gives you the first row for each value in column ``a``. If you
don't specify an order, then you'll get some arbitrary row.
Examples::
>>> Author.objects.distinct()
[...]
>>> Entry.objects.order_by('pub_date').distinct('pub_date')
[...]
>>> Entry.objects.order_by('blog').distinct('blog')
[...]
>>> Entry.objects.order_by('author', 'pub_date').distinct('author', 'pub_date')
[...]
>>> Entry.objects.order_by('blog__name', 'mod_date').distinct('blog__name', 'mod_date')
[...]
>>> Entry.objects.order_by('author', 'pub_date').distinct('author')
[...]
values values
~~~~~~ ~~~~~~

View File

@ -507,6 +507,16 @@ Django 1.4 also includes several smaller improvements worth noting:
``pickle.HIGHEST_PROTOCOL`` for better compatibility with the other ``pickle.HIGHEST_PROTOCOL`` for better compatibility with the other
cache backends. cache backends.
* Support in the ORM for generating ``SELECT`` queries containing ``DISTINCT ON``
The ``distinct()`` ``Queryset`` method now accepts an optional list of model
field names. If specified, then the ``DISTINCT`` statement is limited to these
fields. The PostgreSQL is the only of the database backends shipped with
Django that supports this new functionality.
For more details, see the documentation for
:meth:`~django.db.models.query.QuerySet.distinct`.
Backwards incompatible changes in 1.4 Backwards incompatible changes in 1.4
===================================== =====================================

View File

@ -498,6 +498,16 @@ Django 1.4 also includes several smaller improvements worth noting:
``pickle.HIGHEST_PROTOCOL`` for better compatibility with the other ``pickle.HIGHEST_PROTOCOL`` for better compatibility with the other
cache backends. cache backends.
* Support in the ORM for generating ``SELECT`` queries containing ``DISTINCT ON``
The ``distinct()`` ``Queryset`` method now accepts an optional list of model
field names. If specified, then the ``DISTINCT`` statement is limited to these
fields. The PostgreSQL is the only of the database backends shipped with
Django that supports this new functionality.
For more details, see the documentation for
:meth:`~django.db.models.query.QuerySet.distinct`.
.. _backwards-incompatible-changes-1.4: .. _backwards-incompatible-changes-1.4:
Backwards incompatible changes in 1.4 Backwards incompatible changes in 1.4

View File

@ -0,0 +1 @@
#

View File

@ -0,0 +1,39 @@
from django.db import models
class Tag(models.Model):
name = models.CharField(max_length=10)
parent = models.ForeignKey('self', blank=True, null=True,
related_name='children')
class Meta:
ordering = ['name']
def __unicode__(self):
return self.name
class Celebrity(models.Model):
name = models.CharField("Name", max_length=20)
greatest_fan = models.ForeignKey("Fan", null=True, unique=True)
def __unicode__(self):
return self.name
class Fan(models.Model):
fan_of = models.ForeignKey(Celebrity)
class Staff(models.Model):
id = models.IntegerField(primary_key=True)
name = models.CharField(max_length=50)
organisation = models.CharField(max_length=100)
tags = models.ManyToManyField(Tag, through='StaffTag')
coworkers = models.ManyToManyField('self')
def __unicode__(self):
return self.name
class StaffTag(models.Model):
staff = models.ForeignKey(Staff)
tag = models.ForeignKey(Tag)
def __unicode__(self):
return u"%s -> %s" % (self.tag, self.staff)

View File

@ -0,0 +1,116 @@
from __future__ import absolute_import, with_statement
from django.db.models import Max
from django.test import TestCase, skipUnlessDBFeature
from .models import Tag, Celebrity, Fan, Staff, StaffTag
class DistinctOnTests(TestCase):
def setUp(self):
t1 = Tag.objects.create(name='t1')
t2 = Tag.objects.create(name='t2', parent=t1)
t3 = Tag.objects.create(name='t3', parent=t1)
t4 = Tag.objects.create(name='t4', parent=t3)
t5 = Tag.objects.create(name='t5', parent=t3)
p1_o1 = Staff.objects.create(id=1, name="p1", organisation="o1")
p2_o1 = Staff.objects.create(id=2, name="p2", organisation="o1")
p3_o1 = Staff.objects.create(id=3, name="p3", organisation="o1")
p1_o2 = Staff.objects.create(id=4, name="p1", organisation="o2")
p1_o1.coworkers.add(p2_o1, p3_o1)
StaffTag.objects.create(staff=p1_o1, tag=t1)
StaffTag.objects.create(staff=p1_o1, tag=t1)
celeb1 = Celebrity.objects.create(name="c1")
celeb2 = Celebrity.objects.create(name="c2")
self.fan1 = Fan.objects.create(fan_of=celeb1)
self.fan2 = Fan.objects.create(fan_of=celeb1)
self.fan3 = Fan.objects.create(fan_of=celeb2)
@skipUnlessDBFeature('can_distinct_on_fields')
def test_basic_distinct_on(self):
"""QuerySet.distinct('field', ...) works"""
# (qset, expected) tuples
qsets = (
(
Staff.objects.distinct().order_by('name'),
['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
),
(
Staff.objects.distinct('name').order_by('name'),
['<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
),
(
Staff.objects.distinct('organisation').order_by('organisation', 'name'),
['<Staff: p1>', '<Staff: p1>'],
),
(
Staff.objects.distinct('name', 'organisation').order_by('name', 'organisation'),
['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
),
(
Celebrity.objects.filter(fan__in=[self.fan1, self.fan2, self.fan3]).\
distinct('name').order_by('name'),
['<Celebrity: c1>', '<Celebrity: c2>'],
),
# Does combining querysets work?
(
(Celebrity.objects.filter(fan__in=[self.fan1, self.fan2]).\
distinct('name').order_by('name')
|Celebrity.objects.filter(fan__in=[self.fan3]).\
distinct('name').order_by('name')),
['<Celebrity: c1>', '<Celebrity: c2>'],
),
(
StaffTag.objects.distinct('staff','tag'),
['<StaffTag: t1 -> p1>'],
),
(
Tag.objects.order_by('parent__pk', 'pk').distinct('parent'),
['<Tag: t2>', '<Tag: t4>', '<Tag: t1>'],
),
(
StaffTag.objects.select_related('staff').distinct('staff__name').order_by('staff__name'),
['<StaffTag: t1 -> p1>'],
),
# Fetch the alphabetically first coworker for each worker
(
(Staff.objects.distinct('id').order_by('id', 'coworkers__name').
values_list('id', 'coworkers__name')),
["(1, u'p2')", "(2, u'p1')", "(3, u'p1')", "(4, None)"]
),
)
for qset, expected in qsets:
self.assertQuerysetEqual(qset, expected)
self.assertEqual(qset.count(), len(expected))
# Combining queries with different distinct_fields is not allowed.
base_qs = Celebrity.objects.all()
self.assertRaisesMessage(
AssertionError,
"Cannot combine queries with different distinct fields.",
lambda: (base_qs.distinct('id') & base_qs.distinct('name'))
)
# Test join unreffing
c1 = Celebrity.objects.distinct('greatest_fan__id', 'greatest_fan__fan_of')
self.assertIn('OUTER JOIN', str(c1.query))
c2 = c1.distinct('pk')
self.assertNotIn('OUTER JOIN', str(c2.query))
@skipUnlessDBFeature('can_distinct_on_fields')
def test_distinct_not_implemented_checks(self):
# distinct + annotate not allowed
with self.assertRaises(NotImplementedError):
Celebrity.objects.annotate(Max('id')).distinct('id')[0]
with self.assertRaises(NotImplementedError):
Celebrity.objects.distinct('id').annotate(Max('id'))[0]
# However this check is done only when the query executes, so you
# can use distinct() to remove the fields before execution.
Celebrity.objects.distinct('id').annotate(Max('id')).distinct()[0]
# distinct + aggregate not allowed
with self.assertRaises(NotImplementedError):
Celebrity.objects.distinct('id').aggregate(Max('id'))

View File

@ -209,6 +209,9 @@ class Celebrity(models.Model):
name = models.CharField("Name", max_length=20) name = models.CharField("Name", max_length=20)
greatest_fan = models.ForeignKey("Fan", null=True, unique=True) greatest_fan = models.ForeignKey("Fan", null=True, unique=True)
def __unicode__(self):
return self.name
class TvChef(Celebrity): class TvChef(Celebrity):
pass pass
@ -343,4 +346,3 @@ class OneToOneCategory(models.Model):
def __unicode__(self): def __unicode__(self):
return "one2one " + self.new_name return "one2one " + self.new_name

View File

@ -234,18 +234,22 @@ class Queries1Tests(BaseQuerysetTest):
['<Item: four>', '<Item: one>'] ['<Item: four>', '<Item: one>']
) )
# FIXME: This is difficult to fix and very much an edge case, so punt for
# now. This is related to the order_by() tests for ticket #2253, but the
# old bug exhibited itself here (q2 was pulling too many tables into the
# combined query with the new ordering, but only because we have evaluated
# q2 already).
@unittest.expectedFailure
def test_order_by_tables(self): def test_order_by_tables(self):
q1 = Item.objects.order_by('name') q1 = Item.objects.order_by('name')
q2 = Item.objects.filter(id=self.i1.id) q2 = Item.objects.filter(id=self.i1.id)
list(q2) list(q2)
self.assertEqual(len((q1 & q2).order_by('name').query.tables), 1) self.assertEqual(len((q1 & q2).order_by('name').query.tables), 1)
def test_order_by_join_unref(self):
"""
This test is related to the above one, testing that there aren't
old JOINs in the query.
"""
qs = Celebrity.objects.order_by('greatest_fan__fan_of')
self.assertIn('OUTER JOIN', str(qs.query))
qs = qs.order_by('id')
self.assertNotIn('OUTER JOIN', str(qs.query))
def test_tickets_4088_4306(self): def test_tickets_4088_4306(self):
self.assertQuerysetEqual( self.assertQuerysetEqual(
Report.objects.filter(creator=1001), Report.objects.filter(creator=1001),
@ -1728,7 +1732,7 @@ class ToFieldTests(TestCase):
class ConditionalTests(BaseQuerysetTest): class ConditionalTests(BaseQuerysetTest):
"""Tests whose execution depend on dfferent environment conditions like """Tests whose execution depend on different environment conditions like
Python version or DB backend features""" Python version or DB backend features"""
def setUp(self): def setUp(self):
@ -1739,6 +1743,7 @@ class ConditionalTests(BaseQuerysetTest):
t4 = Tag.objects.create(name='t4', parent=t3) t4 = Tag.objects.create(name='t4', parent=t3)
t5 = Tag.objects.create(name='t5', parent=t3) t5 = Tag.objects.create(name='t5', parent=t3)
# In Python 2.6 beta releases, exceptions raised in __len__ are swallowed # In Python 2.6 beta releases, exceptions raised in __len__ are swallowed
# (Python issue 1242657), so these cases return an empty list, rather than # (Python issue 1242657), so these cases return an empty list, rather than
# raising an exception. Not a lot we can do about that, unfortunately, due to # raising an exception. Not a lot we can do about that, unfortunately, due to
@ -1810,6 +1815,7 @@ class ConditionalTests(BaseQuerysetTest):
2500 2500
) )
class UnionTests(unittest.TestCase): class UnionTests(unittest.TestCase):
""" """
Tests for the union of two querysets. Bug #12252. Tests for the union of two querysets. Bug #12252.

View File

@ -40,9 +40,9 @@ class SelectRelatedRegressTests(TestCase):
self.assertEqual([(c.id, unicode(c.start), unicode(c.end)) for c in connections], self.assertEqual([(c.id, unicode(c.start), unicode(c.end)) for c in connections],
[(c1.id, u'router/4', u'switch/7'), (c2.id, u'switch/7', u'server/1')]) [(c1.id, u'router/4', u'switch/7'), (c2.id, u'switch/7', u'server/1')])
# This final query should only join seven tables (port, device and building # This final query should only have seven tables (port, device and building
# twice each, plus connection once). # twice each, plus connection once). Thus, 6 joins plus the FROM table.
self.assertEqual(connections.query.count_active_tables(), 7) self.assertEqual(str(connections.query).count(" JOIN "), 6)
def test_regression_8106(self): def test_regression_8106(self):