Added support for modifying the effect of ``DISTINCT`` clauses so they
only consider some fields (PostgreSQL only). For this, the ``distinct()`` QuerySet method now accepts an optional list of model fields names and generates ``DISTINCT ON`` clauses on these cases. Thanks Jeffrey Gelens and Anssi Kääriäinen for their work. Fixes #6422. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17244 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
parent
03eb2907d5
commit
287565779d
2
AUTHORS
2
AUTHORS
|
@ -203,6 +203,7 @@ answer newbie questions, and generally made Django that much better:
|
||||||
Marc Garcia <marc.garcia@accopensys.com>
|
Marc Garcia <marc.garcia@accopensys.com>
|
||||||
Andy Gayton <andy-django@thecablelounge.com>
|
Andy Gayton <andy-django@thecablelounge.com>
|
||||||
geber@datacollect.com
|
geber@datacollect.com
|
||||||
|
Jeffrey Gelens <jeffrey@gelens.org>
|
||||||
Baishampayan Ghose
|
Baishampayan Ghose
|
||||||
Joshua Ginsberg <jag@flowtheory.net>
|
Joshua Ginsberg <jag@flowtheory.net>
|
||||||
Dimitris Glezos <dimitris@glezos.com>
|
Dimitris Glezos <dimitris@glezos.com>
|
||||||
|
@ -269,6 +270,7 @@ answer newbie questions, and generally made Django that much better:
|
||||||
jpellerin@gmail.com
|
jpellerin@gmail.com
|
||||||
junzhang.jn@gmail.com
|
junzhang.jn@gmail.com
|
||||||
Xia Kai <http://blog.xiaket.org/>
|
Xia Kai <http://blog.xiaket.org/>
|
||||||
|
Anssi Kääriäinen
|
||||||
Antti Kaihola <http://djangopeople.net/akaihola/>
|
Antti Kaihola <http://djangopeople.net/akaihola/>
|
||||||
Peter van Kampen
|
Peter van Kampen
|
||||||
Bahadır Kandemir <bahadir@pardus.org.tr>
|
Bahadır Kandemir <bahadir@pardus.org.tr>
|
||||||
|
|
|
@ -406,6 +406,9 @@ class BaseDatabaseFeatures(object):
|
||||||
supports_stddev = None
|
supports_stddev = None
|
||||||
can_introspect_foreign_keys = None
|
can_introspect_foreign_keys = None
|
||||||
|
|
||||||
|
# Support for the DISTINCT ON clause
|
||||||
|
can_distinct_on_fields = False
|
||||||
|
|
||||||
def __init__(self, connection):
|
def __init__(self, connection):
|
||||||
self.connection = connection
|
self.connection = connection
|
||||||
|
|
||||||
|
@ -559,6 +562,17 @@ class BaseDatabaseOperations(object):
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError('Full-text search is not implemented for this database backend')
|
raise NotImplementedError('Full-text search is not implemented for this database backend')
|
||||||
|
|
||||||
|
def distinct_sql(self, fields):
|
||||||
|
"""
|
||||||
|
Returns an SQL DISTINCT clause which removes duplicate rows from the
|
||||||
|
result set. If any fields are given, only the given fields are being
|
||||||
|
checked for duplicates.
|
||||||
|
"""
|
||||||
|
if fields:
|
||||||
|
raise NotImplementedError('DISTINCT ON fields is not supported by this database backend')
|
||||||
|
else:
|
||||||
|
return 'DISTINCT'
|
||||||
|
|
||||||
def last_executed_query(self, cursor, sql, params):
|
def last_executed_query(self, cursor, sql, params):
|
||||||
"""
|
"""
|
||||||
Returns a string of the query last executed by the given cursor, with
|
Returns a string of the query last executed by the given cursor, with
|
||||||
|
|
|
@ -82,6 +82,7 @@ class DatabaseFeatures(BaseDatabaseFeatures):
|
||||||
has_select_for_update_nowait = True
|
has_select_for_update_nowait = True
|
||||||
has_bulk_insert = True
|
has_bulk_insert = True
|
||||||
supports_tablespaces = True
|
supports_tablespaces = True
|
||||||
|
can_distinct_on_fields = True
|
||||||
|
|
||||||
class DatabaseWrapper(BaseDatabaseWrapper):
|
class DatabaseWrapper(BaseDatabaseWrapper):
|
||||||
vendor = 'postgresql'
|
vendor = 'postgresql'
|
||||||
|
|
|
@ -179,6 +179,12 @@ class DatabaseOperations(BaseDatabaseOperations):
|
||||||
|
|
||||||
return 63
|
return 63
|
||||||
|
|
||||||
|
def distinct_sql(self, fields):
|
||||||
|
if fields:
|
||||||
|
return 'DISTINCT ON (%s)' % ', '.join(fields)
|
||||||
|
else:
|
||||||
|
return 'DISTINCT'
|
||||||
|
|
||||||
def last_executed_query(self, cursor, sql, params):
|
def last_executed_query(self, cursor, sql, params):
|
||||||
# http://initd.org/psycopg/docs/cursor.html#cursor.query
|
# http://initd.org/psycopg/docs/cursor.html#cursor.query
|
||||||
# The query attribute is a Psycopg extension to the DB API 2.0.
|
# The query attribute is a Psycopg extension to the DB API 2.0.
|
||||||
|
|
|
@ -323,6 +323,8 @@ class QuerySet(object):
|
||||||
If args is present the expression is passed as a kwarg using
|
If args is present the expression is passed as a kwarg using
|
||||||
the Aggregate object's default alias.
|
the Aggregate object's default alias.
|
||||||
"""
|
"""
|
||||||
|
if self.query.distinct_fields:
|
||||||
|
raise NotImplementedError("aggregate() + distinct(fields) not implemented.")
|
||||||
for arg in args:
|
for arg in args:
|
||||||
kwargs[arg.default_alias] = arg
|
kwargs[arg.default_alias] = arg
|
||||||
|
|
||||||
|
@ -751,12 +753,14 @@ class QuerySet(object):
|
||||||
obj.query.add_ordering(*field_names)
|
obj.query.add_ordering(*field_names)
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
def distinct(self, true_or_false=True):
|
def distinct(self, *field_names):
|
||||||
"""
|
"""
|
||||||
Returns a new QuerySet instance that will select only distinct results.
|
Returns a new QuerySet instance that will select only distinct results.
|
||||||
"""
|
"""
|
||||||
|
assert self.query.can_filter(), \
|
||||||
|
"Cannot create distinct fields once a slice has been taken."
|
||||||
obj = self._clone()
|
obj = self._clone()
|
||||||
obj.query.distinct = true_or_false
|
obj.query.add_distinct_fields(*field_names)
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
def extra(self, select=None, where=None, params=None, tables=None,
|
def extra(self, select=None, where=None, params=None, tables=None,
|
||||||
|
@ -1179,7 +1183,7 @@ class EmptyQuerySet(QuerySet):
|
||||||
"""
|
"""
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def distinct(self, true_or_false=True):
|
def distinct(self, fields=None):
|
||||||
"""
|
"""
|
||||||
Always returns EmptyQuerySet.
|
Always returns EmptyQuerySet.
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -23,6 +23,8 @@ class SQLCompiler(object):
|
||||||
Does any necessary class setup immediately prior to producing SQL. This
|
Does any necessary class setup immediately prior to producing SQL. This
|
||||||
is for things that can't necessarily be done in __init__ because we
|
is for things that can't necessarily be done in __init__ because we
|
||||||
might not have all the pieces in place at that time.
|
might not have all the pieces in place at that time.
|
||||||
|
# TODO: after the query has been executed, the altered state should be
|
||||||
|
# cleaned. We are not using a clone() of the query here.
|
||||||
"""
|
"""
|
||||||
if not self.query.tables:
|
if not self.query.tables:
|
||||||
self.query.join((None, self.query.model._meta.db_table, None, None))
|
self.query.join((None, self.query.model._meta.db_table, None, None))
|
||||||
|
@ -60,11 +62,19 @@ class SQLCompiler(object):
|
||||||
return '', ()
|
return '', ()
|
||||||
|
|
||||||
self.pre_sql_setup()
|
self.pre_sql_setup()
|
||||||
|
# After executing the query, we must get rid of any joins the query
|
||||||
|
# setup created. So, take note of alias counts before the query ran.
|
||||||
|
# However we do not want to get rid of stuff done in pre_sql_setup(),
|
||||||
|
# as the pre_sql_setup will modify query state in a way that forbids
|
||||||
|
# another run of it.
|
||||||
|
self.refcounts_before = self.query.alias_refcount.copy()
|
||||||
out_cols = self.get_columns(with_col_aliases)
|
out_cols = self.get_columns(with_col_aliases)
|
||||||
ordering, ordering_group_by = self.get_ordering()
|
ordering, ordering_group_by = self.get_ordering()
|
||||||
|
|
||||||
# This must come after 'select' and 'ordering' -- see docstring of
|
distinct_fields = self.get_distinct()
|
||||||
# get_from_clause() for details.
|
|
||||||
|
# This must come after 'select', 'ordering' and 'distinct' -- see
|
||||||
|
# docstring of get_from_clause() for details.
|
||||||
from_, f_params = self.get_from_clause()
|
from_, f_params = self.get_from_clause()
|
||||||
|
|
||||||
qn = self.quote_name_unless_alias
|
qn = self.quote_name_unless_alias
|
||||||
|
@ -76,8 +86,10 @@ class SQLCompiler(object):
|
||||||
params.extend(val[1])
|
params.extend(val[1])
|
||||||
|
|
||||||
result = ['SELECT']
|
result = ['SELECT']
|
||||||
|
|
||||||
if self.query.distinct:
|
if self.query.distinct:
|
||||||
result.append('DISTINCT')
|
result.append(self.connection.ops.distinct_sql(distinct_fields))
|
||||||
|
|
||||||
result.append(', '.join(out_cols + self.query.ordering_aliases))
|
result.append(', '.join(out_cols + self.query.ordering_aliases))
|
||||||
|
|
||||||
result.append('FROM')
|
result.append('FROM')
|
||||||
|
@ -90,6 +102,9 @@ class SQLCompiler(object):
|
||||||
|
|
||||||
grouping, gb_params = self.get_grouping()
|
grouping, gb_params = self.get_grouping()
|
||||||
if grouping:
|
if grouping:
|
||||||
|
if distinct_fields:
|
||||||
|
raise NotImplementedError(
|
||||||
|
"annotate() + distinct(fields) not implemented.")
|
||||||
if ordering:
|
if ordering:
|
||||||
# If the backend can't group by PK (i.e., any database
|
# If the backend can't group by PK (i.e., any database
|
||||||
# other than MySQL), then any fields mentioned in the
|
# other than MySQL), then any fields mentioned in the
|
||||||
|
@ -129,6 +144,9 @@ class SQLCompiler(object):
|
||||||
raise DatabaseError('NOWAIT is not supported on this database backend.')
|
raise DatabaseError('NOWAIT is not supported on this database backend.')
|
||||||
result.append(self.connection.ops.for_update_sql(nowait=nowait))
|
result.append(self.connection.ops.for_update_sql(nowait=nowait))
|
||||||
|
|
||||||
|
# Finally do cleanup - get rid of the joins we created above.
|
||||||
|
self.query.reset_refcounts(self.refcounts_before)
|
||||||
|
|
||||||
return ' '.join(result), tuple(params)
|
return ' '.join(result), tuple(params)
|
||||||
|
|
||||||
def as_nested_sql(self):
|
def as_nested_sql(self):
|
||||||
|
@ -292,6 +310,26 @@ class SQLCompiler(object):
|
||||||
col_aliases.add(field.column)
|
col_aliases.add(field.column)
|
||||||
return result, aliases
|
return result, aliases
|
||||||
|
|
||||||
|
def get_distinct(self):
|
||||||
|
"""
|
||||||
|
Returns a quoted list of fields to use in DISTINCT ON part of the query.
|
||||||
|
|
||||||
|
Note that this method can alter the tables in the query, and thus it
|
||||||
|
must be called before get_from_clause().
|
||||||
|
"""
|
||||||
|
qn = self.quote_name_unless_alias
|
||||||
|
qn2 = self.connection.ops.quote_name
|
||||||
|
result = []
|
||||||
|
opts = self.query.model._meta
|
||||||
|
|
||||||
|
for name in self.query.distinct_fields:
|
||||||
|
parts = name.split(LOOKUP_SEP)
|
||||||
|
field, col, alias, _, _ = self._setup_joins(parts, opts, None)
|
||||||
|
col, alias = self._final_join_removal(col, alias)
|
||||||
|
result.append("%s.%s" % (qn(alias), qn2(col)))
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def get_ordering(self):
|
def get_ordering(self):
|
||||||
"""
|
"""
|
||||||
Returns a tuple containing a list representing the SQL elements in the
|
Returns a tuple containing a list representing the SQL elements in the
|
||||||
|
@ -384,21 +422,7 @@ class SQLCompiler(object):
|
||||||
"""
|
"""
|
||||||
name, order = get_order_dir(name, default_order)
|
name, order = get_order_dir(name, default_order)
|
||||||
pieces = name.split(LOOKUP_SEP)
|
pieces = name.split(LOOKUP_SEP)
|
||||||
if not alias:
|
field, col, alias, joins, opts = self._setup_joins(pieces, opts, alias)
|
||||||
alias = self.query.get_initial_alias()
|
|
||||||
field, target, opts, joins, last, extra = self.query.setup_joins(pieces,
|
|
||||||
opts, alias, False)
|
|
||||||
alias = joins[-1]
|
|
||||||
col = target.column
|
|
||||||
if not field.rel:
|
|
||||||
# To avoid inadvertent trimming of a necessary alias, use the
|
|
||||||
# refcount to show that we are referencing a non-relation field on
|
|
||||||
# the model.
|
|
||||||
self.query.ref_alias(alias)
|
|
||||||
|
|
||||||
# Must use left outer joins for nullable fields and their relations.
|
|
||||||
self.query.promote_alias_chain(joins,
|
|
||||||
self.query.alias_map[joins[0]][JOIN_TYPE] == self.query.LOUTER)
|
|
||||||
|
|
||||||
# If we get to this point and the field is a relation to another model,
|
# If we get to this point and the field is a relation to another model,
|
||||||
# append the default ordering for that model.
|
# append the default ordering for that model.
|
||||||
|
@ -416,11 +440,47 @@ class SQLCompiler(object):
|
||||||
results.extend(self.find_ordering_name(item, opts, alias,
|
results.extend(self.find_ordering_name(item, opts, alias,
|
||||||
order, already_seen))
|
order, already_seen))
|
||||||
return results
|
return results
|
||||||
|
col, alias = self._final_join_removal(col, alias)
|
||||||
|
return [(alias, col, order)]
|
||||||
|
|
||||||
|
def _setup_joins(self, pieces, opts, alias):
|
||||||
|
"""
|
||||||
|
A helper method for get_ordering and get_distinct. This method will
|
||||||
|
call query.setup_joins, handle refcounts and then promote the joins.
|
||||||
|
|
||||||
|
Note that get_ordering and get_distinct must produce same target
|
||||||
|
columns on same input, as the prefixes of get_ordering and get_distinct
|
||||||
|
must match. Executing SQL where this is not true is an error.
|
||||||
|
"""
|
||||||
|
if not alias:
|
||||||
|
alias = self.query.get_initial_alias()
|
||||||
|
field, target, opts, joins, _, _ = self.query.setup_joins(pieces,
|
||||||
|
opts, alias, False)
|
||||||
|
alias = joins[-1]
|
||||||
|
col = target.column
|
||||||
|
if not field.rel:
|
||||||
|
# To avoid inadvertent trimming of a necessary alias, use the
|
||||||
|
# refcount to show that we are referencing a non-relation field on
|
||||||
|
# the model.
|
||||||
|
self.query.ref_alias(alias)
|
||||||
|
|
||||||
|
# Must use left outer joins for nullable fields and their relations.
|
||||||
|
# Ordering or distinct must not affect the returned set, and INNER
|
||||||
|
# JOINS for nullable fields could do this.
|
||||||
|
self.query.promote_alias_chain(joins,
|
||||||
|
self.query.alias_map[joins[0]][JOIN_TYPE] == self.query.LOUTER)
|
||||||
|
return field, col, alias, joins, opts
|
||||||
|
|
||||||
|
def _final_join_removal(self, col, alias):
|
||||||
|
"""
|
||||||
|
A helper method for get_distinct and get_ordering. This method will
|
||||||
|
trim extra not-needed joins from the tail of the join chain.
|
||||||
|
|
||||||
|
This is very similar to what is done in trim_joins, but we will
|
||||||
|
trim LEFT JOINS here. It would be a good idea to consolidate this
|
||||||
|
method and query.trim_joins().
|
||||||
|
"""
|
||||||
if alias:
|
if alias:
|
||||||
# We have to do the same "final join" optimisation as in
|
|
||||||
# add_filter, since the final column might not otherwise be part of
|
|
||||||
# the select set (so we can't order on it).
|
|
||||||
while 1:
|
while 1:
|
||||||
join = self.query.alias_map[alias]
|
join = self.query.alias_map[alias]
|
||||||
if col != join[RHS_JOIN_COL]:
|
if col != join[RHS_JOIN_COL]:
|
||||||
|
@ -428,7 +488,7 @@ class SQLCompiler(object):
|
||||||
self.query.unref_alias(alias)
|
self.query.unref_alias(alias)
|
||||||
alias = join[LHS_ALIAS]
|
alias = join[LHS_ALIAS]
|
||||||
col = join[LHS_JOIN_COL]
|
col = join[LHS_JOIN_COL]
|
||||||
return [(alias, col, order)]
|
return col, alias
|
||||||
|
|
||||||
def get_from_clause(self):
|
def get_from_clause(self):
|
||||||
"""
|
"""
|
||||||
|
@ -438,8 +498,8 @@ class SQLCompiler(object):
|
||||||
from-clause via a "select".
|
from-clause via a "select".
|
||||||
|
|
||||||
This should only be called after any SQL construction methods that
|
This should only be called after any SQL construction methods that
|
||||||
might change the tables we need. This means the select columns and
|
might change the tables we need. This means the select columns,
|
||||||
ordering must be done first.
|
ordering and distinct must be done first.
|
||||||
"""
|
"""
|
||||||
result = []
|
result = []
|
||||||
qn = self.quote_name_unless_alias
|
qn = self.quote_name_unless_alias
|
||||||
|
@ -984,6 +1044,7 @@ class SQLAggregateCompiler(SQLCompiler):
|
||||||
"""
|
"""
|
||||||
if qn is None:
|
if qn is None:
|
||||||
qn = self.quote_name_unless_alias
|
qn = self.quote_name_unless_alias
|
||||||
|
|
||||||
sql = ('SELECT %s FROM (%s) subquery' % (
|
sql = ('SELECT %s FROM (%s) subquery' % (
|
||||||
', '.join([
|
', '.join([
|
||||||
aggregate.as_sql(qn, self.connection)
|
aggregate.as_sql(qn, self.connection)
|
||||||
|
|
|
@ -127,6 +127,7 @@ class Query(object):
|
||||||
self.order_by = []
|
self.order_by = []
|
||||||
self.low_mark, self.high_mark = 0, None # Used for offset/limit
|
self.low_mark, self.high_mark = 0, None # Used for offset/limit
|
||||||
self.distinct = False
|
self.distinct = False
|
||||||
|
self.distinct_fields = []
|
||||||
self.select_for_update = False
|
self.select_for_update = False
|
||||||
self.select_for_update_nowait = False
|
self.select_for_update_nowait = False
|
||||||
self.select_related = False
|
self.select_related = False
|
||||||
|
@ -265,6 +266,7 @@ class Query(object):
|
||||||
obj.order_by = self.order_by[:]
|
obj.order_by = self.order_by[:]
|
||||||
obj.low_mark, obj.high_mark = self.low_mark, self.high_mark
|
obj.low_mark, obj.high_mark = self.low_mark, self.high_mark
|
||||||
obj.distinct = self.distinct
|
obj.distinct = self.distinct
|
||||||
|
obj.distinct_fields = self.distinct_fields[:]
|
||||||
obj.select_for_update = self.select_for_update
|
obj.select_for_update = self.select_for_update
|
||||||
obj.select_for_update_nowait = self.select_for_update_nowait
|
obj.select_for_update_nowait = self.select_for_update_nowait
|
||||||
obj.select_related = self.select_related
|
obj.select_related = self.select_related
|
||||||
|
@ -298,6 +300,7 @@ class Query(object):
|
||||||
else:
|
else:
|
||||||
obj.used_aliases = set()
|
obj.used_aliases = set()
|
||||||
obj.filter_is_sticky = False
|
obj.filter_is_sticky = False
|
||||||
|
|
||||||
obj.__dict__.update(kwargs)
|
obj.__dict__.update(kwargs)
|
||||||
if hasattr(obj, '_setup_query'):
|
if hasattr(obj, '_setup_query'):
|
||||||
obj._setup_query()
|
obj._setup_query()
|
||||||
|
@ -393,7 +396,7 @@ class Query(object):
|
||||||
Performs a COUNT() query using the current filter constraints.
|
Performs a COUNT() query using the current filter constraints.
|
||||||
"""
|
"""
|
||||||
obj = self.clone()
|
obj = self.clone()
|
||||||
if len(self.select) > 1 or self.aggregate_select:
|
if len(self.select) > 1 or self.aggregate_select or (self.distinct and self.distinct_fields):
|
||||||
# If a select clause exists, then the query has already started to
|
# If a select clause exists, then the query has already started to
|
||||||
# specify the columns that are to be returned.
|
# specify the columns that are to be returned.
|
||||||
# In this case, we need to use a subquery to evaluate the count.
|
# In this case, we need to use a subquery to evaluate the count.
|
||||||
|
@ -452,6 +455,8 @@ class Query(object):
|
||||||
"Cannot combine queries once a slice has been taken."
|
"Cannot combine queries once a slice has been taken."
|
||||||
assert self.distinct == rhs.distinct, \
|
assert self.distinct == rhs.distinct, \
|
||||||
"Cannot combine a unique query with a non-unique query."
|
"Cannot combine a unique query with a non-unique query."
|
||||||
|
assert self.distinct_fields == rhs.distinct_fields, \
|
||||||
|
"Cannot combine queries with different distinct fields."
|
||||||
|
|
||||||
self.remove_inherited_models()
|
self.remove_inherited_models()
|
||||||
# Work out how to relabel the rhs aliases, if necessary.
|
# Work out how to relabel the rhs aliases, if necessary.
|
||||||
|
@ -674,9 +679,9 @@ class Query(object):
|
||||||
""" Increases the reference count for this alias. """
|
""" Increases the reference count for this alias. """
|
||||||
self.alias_refcount[alias] += 1
|
self.alias_refcount[alias] += 1
|
||||||
|
|
||||||
def unref_alias(self, alias):
|
def unref_alias(self, alias, amount=1):
|
||||||
""" Decreases the reference count for this alias. """
|
""" Decreases the reference count for this alias. """
|
||||||
self.alias_refcount[alias] -= 1
|
self.alias_refcount[alias] -= amount
|
||||||
|
|
||||||
def promote_alias(self, alias, unconditional=False):
|
def promote_alias(self, alias, unconditional=False):
|
||||||
"""
|
"""
|
||||||
|
@ -705,6 +710,15 @@ class Query(object):
|
||||||
if self.promote_alias(alias, must_promote):
|
if self.promote_alias(alias, must_promote):
|
||||||
must_promote = True
|
must_promote = True
|
||||||
|
|
||||||
|
def reset_refcounts(self, to_counts):
|
||||||
|
"""
|
||||||
|
This method will reset reference counts for aliases so that they match
|
||||||
|
the value passed in :param to_counts:.
|
||||||
|
"""
|
||||||
|
for alias, cur_refcount in self.alias_refcount.copy().items():
|
||||||
|
unref_amount = cur_refcount - to_counts.get(alias, 0)
|
||||||
|
self.unref_alias(alias, unref_amount)
|
||||||
|
|
||||||
def promote_unused_aliases(self, initial_refcounts, used_aliases):
|
def promote_unused_aliases(self, initial_refcounts, used_aliases):
|
||||||
"""
|
"""
|
||||||
Given a "before" copy of the alias_refcounts dictionary (as
|
Given a "before" copy of the alias_refcounts dictionary (as
|
||||||
|
@ -832,7 +846,8 @@ class Query(object):
|
||||||
def count_active_tables(self):
|
def count_active_tables(self):
|
||||||
"""
|
"""
|
||||||
Returns the number of tables in this query with a non-zero reference
|
Returns the number of tables in this query with a non-zero reference
|
||||||
count.
|
count. Note that after execution, the reference counts are zeroed, so
|
||||||
|
tables added in compiler will not be seen by this method.
|
||||||
"""
|
"""
|
||||||
return len([1 for count in self.alias_refcount.itervalues() if count])
|
return len([1 for count in self.alias_refcount.itervalues() if count])
|
||||||
|
|
||||||
|
@ -1596,6 +1611,13 @@ class Query(object):
|
||||||
self.select = []
|
self.select = []
|
||||||
self.select_fields = []
|
self.select_fields = []
|
||||||
|
|
||||||
|
def add_distinct_fields(self, *field_names):
|
||||||
|
"""
|
||||||
|
Adds and resolves the given fields to the query's "distinct on" clause.
|
||||||
|
"""
|
||||||
|
self.distinct_fields = field_names
|
||||||
|
self.distinct = True
|
||||||
|
|
||||||
def add_fields(self, field_names, allow_m2m=True):
|
def add_fields(self, field_names, allow_m2m=True):
|
||||||
"""
|
"""
|
||||||
Adds the given (model) fields to the select set. The field names are
|
Adds the given (model) fields to the select set. The field names are
|
||||||
|
|
|
@ -345,7 +345,7 @@ remain undefined afterward).
|
||||||
distinct
|
distinct
|
||||||
~~~~~~~~
|
~~~~~~~~
|
||||||
|
|
||||||
.. method:: distinct()
|
.. method:: distinct([*fields])
|
||||||
|
|
||||||
Returns a new ``QuerySet`` that uses ``SELECT DISTINCT`` in its SQL query. This
|
Returns a new ``QuerySet`` that uses ``SELECT DISTINCT`` in its SQL query. This
|
||||||
eliminates duplicate rows from the query results.
|
eliminates duplicate rows from the query results.
|
||||||
|
@ -374,6 +374,43 @@ query spans multiple tables, it's possible to get duplicate results when a
|
||||||
:meth:`values()` together, be careful when ordering by fields not in the
|
:meth:`values()` together, be careful when ordering by fields not in the
|
||||||
:meth:`values()` call.
|
:meth:`values()` call.
|
||||||
|
|
||||||
|
.. versionadded:: 1.4
|
||||||
|
|
||||||
|
The possibility to pass positional arguments (``*fields``) is new in Django 1.4.
|
||||||
|
They are names of fields to which the ``DISTINCT`` should be limited. This
|
||||||
|
translates to a ``SELECT DISTINCT ON`` SQL query. A ``DISTINCT ON`` query eliminates
|
||||||
|
duplicate rows not by comparing all fields in a row, but by comparing only the given
|
||||||
|
fields.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
Note that the ability to specify field names is only available in PostgreSQL.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
When using the ``DISTINCT ON`` functionality it is required that the columns given
|
||||||
|
to :meth:`distinct` match the first :meth:`order_by` columns. For example ``SELECT
|
||||||
|
DISTINCT ON (a)`` gives you the first row for each value in column ``a``. If you
|
||||||
|
don't specify an order, then you'll get some arbitrary row.
|
||||||
|
|
||||||
|
Examples::
|
||||||
|
|
||||||
|
>>> Author.objects.distinct()
|
||||||
|
[...]
|
||||||
|
|
||||||
|
>>> Entry.objects.order_by('pub_date').distinct('pub_date')
|
||||||
|
[...]
|
||||||
|
|
||||||
|
>>> Entry.objects.order_by('blog').distinct('blog')
|
||||||
|
[...]
|
||||||
|
|
||||||
|
>>> Entry.objects.order_by('author', 'pub_date').distinct('author', 'pub_date')
|
||||||
|
[...]
|
||||||
|
|
||||||
|
>>> Entry.objects.order_by('blog__name', 'mod_date').distinct('blog__name', 'mod_date')
|
||||||
|
[...]
|
||||||
|
|
||||||
|
>>> Entry.objects.order_by('author', 'pub_date').distinct('author')
|
||||||
|
[...]
|
||||||
|
|
||||||
values
|
values
|
||||||
~~~~~~
|
~~~~~~
|
||||||
|
|
||||||
|
|
|
@ -507,6 +507,16 @@ Django 1.4 also includes several smaller improvements worth noting:
|
||||||
``pickle.HIGHEST_PROTOCOL`` for better compatibility with the other
|
``pickle.HIGHEST_PROTOCOL`` for better compatibility with the other
|
||||||
cache backends.
|
cache backends.
|
||||||
|
|
||||||
|
* Support in the ORM for generating ``SELECT`` queries containing ``DISTINCT ON``
|
||||||
|
|
||||||
|
The ``distinct()`` ``Queryset`` method now accepts an optional list of model
|
||||||
|
field names. If specified, then the ``DISTINCT`` statement is limited to these
|
||||||
|
fields. The PostgreSQL is the only of the database backends shipped with
|
||||||
|
Django that supports this new functionality.
|
||||||
|
|
||||||
|
For more details, see the documentation for
|
||||||
|
:meth:`~django.db.models.query.QuerySet.distinct`.
|
||||||
|
|
||||||
Backwards incompatible changes in 1.4
|
Backwards incompatible changes in 1.4
|
||||||
=====================================
|
=====================================
|
||||||
|
|
||||||
|
|
|
@ -498,6 +498,16 @@ Django 1.4 also includes several smaller improvements worth noting:
|
||||||
``pickle.HIGHEST_PROTOCOL`` for better compatibility with the other
|
``pickle.HIGHEST_PROTOCOL`` for better compatibility with the other
|
||||||
cache backends.
|
cache backends.
|
||||||
|
|
||||||
|
* Support in the ORM for generating ``SELECT`` queries containing ``DISTINCT ON``
|
||||||
|
|
||||||
|
The ``distinct()`` ``Queryset`` method now accepts an optional list of model
|
||||||
|
field names. If specified, then the ``DISTINCT`` statement is limited to these
|
||||||
|
fields. The PostgreSQL is the only of the database backends shipped with
|
||||||
|
Django that supports this new functionality.
|
||||||
|
|
||||||
|
For more details, see the documentation for
|
||||||
|
:meth:`~django.db.models.query.QuerySet.distinct`.
|
||||||
|
|
||||||
.. _backwards-incompatible-changes-1.4:
|
.. _backwards-incompatible-changes-1.4:
|
||||||
|
|
||||||
Backwards incompatible changes in 1.4
|
Backwards incompatible changes in 1.4
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
#
|
|
@ -0,0 +1,39 @@
|
||||||
|
from django.db import models
|
||||||
|
|
||||||
|
class Tag(models.Model):
|
||||||
|
name = models.CharField(max_length=10)
|
||||||
|
parent = models.ForeignKey('self', blank=True, null=True,
|
||||||
|
related_name='children')
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
ordering = ['name']
|
||||||
|
|
||||||
|
def __unicode__(self):
|
||||||
|
return self.name
|
||||||
|
|
||||||
|
class Celebrity(models.Model):
|
||||||
|
name = models.CharField("Name", max_length=20)
|
||||||
|
greatest_fan = models.ForeignKey("Fan", null=True, unique=True)
|
||||||
|
|
||||||
|
def __unicode__(self):
|
||||||
|
return self.name
|
||||||
|
|
||||||
|
class Fan(models.Model):
|
||||||
|
fan_of = models.ForeignKey(Celebrity)
|
||||||
|
|
||||||
|
class Staff(models.Model):
|
||||||
|
id = models.IntegerField(primary_key=True)
|
||||||
|
name = models.CharField(max_length=50)
|
||||||
|
organisation = models.CharField(max_length=100)
|
||||||
|
tags = models.ManyToManyField(Tag, through='StaffTag')
|
||||||
|
coworkers = models.ManyToManyField('self')
|
||||||
|
|
||||||
|
def __unicode__(self):
|
||||||
|
return self.name
|
||||||
|
|
||||||
|
class StaffTag(models.Model):
|
||||||
|
staff = models.ForeignKey(Staff)
|
||||||
|
tag = models.ForeignKey(Tag)
|
||||||
|
|
||||||
|
def __unicode__(self):
|
||||||
|
return u"%s -> %s" % (self.tag, self.staff)
|
|
@ -0,0 +1,116 @@
|
||||||
|
from __future__ import absolute_import, with_statement
|
||||||
|
|
||||||
|
from django.db.models import Max
|
||||||
|
from django.test import TestCase, skipUnlessDBFeature
|
||||||
|
|
||||||
|
from .models import Tag, Celebrity, Fan, Staff, StaffTag
|
||||||
|
|
||||||
|
class DistinctOnTests(TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
t1 = Tag.objects.create(name='t1')
|
||||||
|
t2 = Tag.objects.create(name='t2', parent=t1)
|
||||||
|
t3 = Tag.objects.create(name='t3', parent=t1)
|
||||||
|
t4 = Tag.objects.create(name='t4', parent=t3)
|
||||||
|
t5 = Tag.objects.create(name='t5', parent=t3)
|
||||||
|
|
||||||
|
p1_o1 = Staff.objects.create(id=1, name="p1", organisation="o1")
|
||||||
|
p2_o1 = Staff.objects.create(id=2, name="p2", organisation="o1")
|
||||||
|
p3_o1 = Staff.objects.create(id=3, name="p3", organisation="o1")
|
||||||
|
p1_o2 = Staff.objects.create(id=4, name="p1", organisation="o2")
|
||||||
|
p1_o1.coworkers.add(p2_o1, p3_o1)
|
||||||
|
StaffTag.objects.create(staff=p1_o1, tag=t1)
|
||||||
|
StaffTag.objects.create(staff=p1_o1, tag=t1)
|
||||||
|
|
||||||
|
celeb1 = Celebrity.objects.create(name="c1")
|
||||||
|
celeb2 = Celebrity.objects.create(name="c2")
|
||||||
|
|
||||||
|
self.fan1 = Fan.objects.create(fan_of=celeb1)
|
||||||
|
self.fan2 = Fan.objects.create(fan_of=celeb1)
|
||||||
|
self.fan3 = Fan.objects.create(fan_of=celeb2)
|
||||||
|
|
||||||
|
@skipUnlessDBFeature('can_distinct_on_fields')
|
||||||
|
def test_basic_distinct_on(self):
|
||||||
|
"""QuerySet.distinct('field', ...) works"""
|
||||||
|
# (qset, expected) tuples
|
||||||
|
qsets = (
|
||||||
|
(
|
||||||
|
Staff.objects.distinct().order_by('name'),
|
||||||
|
['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
Staff.objects.distinct('name').order_by('name'),
|
||||||
|
['<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
Staff.objects.distinct('organisation').order_by('organisation', 'name'),
|
||||||
|
['<Staff: p1>', '<Staff: p1>'],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
Staff.objects.distinct('name', 'organisation').order_by('name', 'organisation'),
|
||||||
|
['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
Celebrity.objects.filter(fan__in=[self.fan1, self.fan2, self.fan3]).\
|
||||||
|
distinct('name').order_by('name'),
|
||||||
|
['<Celebrity: c1>', '<Celebrity: c2>'],
|
||||||
|
),
|
||||||
|
# Does combining querysets work?
|
||||||
|
(
|
||||||
|
(Celebrity.objects.filter(fan__in=[self.fan1, self.fan2]).\
|
||||||
|
distinct('name').order_by('name')
|
||||||
|
|Celebrity.objects.filter(fan__in=[self.fan3]).\
|
||||||
|
distinct('name').order_by('name')),
|
||||||
|
['<Celebrity: c1>', '<Celebrity: c2>'],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
StaffTag.objects.distinct('staff','tag'),
|
||||||
|
['<StaffTag: t1 -> p1>'],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
Tag.objects.order_by('parent__pk', 'pk').distinct('parent'),
|
||||||
|
['<Tag: t2>', '<Tag: t4>', '<Tag: t1>'],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
StaffTag.objects.select_related('staff').distinct('staff__name').order_by('staff__name'),
|
||||||
|
['<StaffTag: t1 -> p1>'],
|
||||||
|
),
|
||||||
|
# Fetch the alphabetically first coworker for each worker
|
||||||
|
(
|
||||||
|
(Staff.objects.distinct('id').order_by('id', 'coworkers__name').
|
||||||
|
values_list('id', 'coworkers__name')),
|
||||||
|
["(1, u'p2')", "(2, u'p1')", "(3, u'p1')", "(4, None)"]
|
||||||
|
),
|
||||||
|
)
|
||||||
|
for qset, expected in qsets:
|
||||||
|
self.assertQuerysetEqual(qset, expected)
|
||||||
|
self.assertEqual(qset.count(), len(expected))
|
||||||
|
|
||||||
|
# Combining queries with different distinct_fields is not allowed.
|
||||||
|
base_qs = Celebrity.objects.all()
|
||||||
|
self.assertRaisesMessage(
|
||||||
|
AssertionError,
|
||||||
|
"Cannot combine queries with different distinct fields.",
|
||||||
|
lambda: (base_qs.distinct('id') & base_qs.distinct('name'))
|
||||||
|
)
|
||||||
|
|
||||||
|
# Test join unreffing
|
||||||
|
c1 = Celebrity.objects.distinct('greatest_fan__id', 'greatest_fan__fan_of')
|
||||||
|
self.assertIn('OUTER JOIN', str(c1.query))
|
||||||
|
c2 = c1.distinct('pk')
|
||||||
|
self.assertNotIn('OUTER JOIN', str(c2.query))
|
||||||
|
|
||||||
|
@skipUnlessDBFeature('can_distinct_on_fields')
|
||||||
|
def test_distinct_not_implemented_checks(self):
|
||||||
|
# distinct + annotate not allowed
|
||||||
|
with self.assertRaises(NotImplementedError):
|
||||||
|
Celebrity.objects.annotate(Max('id')).distinct('id')[0]
|
||||||
|
with self.assertRaises(NotImplementedError):
|
||||||
|
Celebrity.objects.distinct('id').annotate(Max('id'))[0]
|
||||||
|
|
||||||
|
# However this check is done only when the query executes, so you
|
||||||
|
# can use distinct() to remove the fields before execution.
|
||||||
|
Celebrity.objects.distinct('id').annotate(Max('id')).distinct()[0]
|
||||||
|
# distinct + aggregate not allowed
|
||||||
|
with self.assertRaises(NotImplementedError):
|
||||||
|
Celebrity.objects.distinct('id').aggregate(Max('id'))
|
||||||
|
|
|
@ -209,6 +209,9 @@ class Celebrity(models.Model):
|
||||||
name = models.CharField("Name", max_length=20)
|
name = models.CharField("Name", max_length=20)
|
||||||
greatest_fan = models.ForeignKey("Fan", null=True, unique=True)
|
greatest_fan = models.ForeignKey("Fan", null=True, unique=True)
|
||||||
|
|
||||||
|
def __unicode__(self):
|
||||||
|
return self.name
|
||||||
|
|
||||||
class TvChef(Celebrity):
|
class TvChef(Celebrity):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -343,4 +346,3 @@ class OneToOneCategory(models.Model):
|
||||||
|
|
||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
return "one2one " + self.new_name
|
return "one2one " + self.new_name
|
||||||
|
|
||||||
|
|
|
@ -234,18 +234,22 @@ class Queries1Tests(BaseQuerysetTest):
|
||||||
['<Item: four>', '<Item: one>']
|
['<Item: four>', '<Item: one>']
|
||||||
)
|
)
|
||||||
|
|
||||||
# FIXME: This is difficult to fix and very much an edge case, so punt for
|
|
||||||
# now. This is related to the order_by() tests for ticket #2253, but the
|
|
||||||
# old bug exhibited itself here (q2 was pulling too many tables into the
|
|
||||||
# combined query with the new ordering, but only because we have evaluated
|
|
||||||
# q2 already).
|
|
||||||
@unittest.expectedFailure
|
|
||||||
def test_order_by_tables(self):
|
def test_order_by_tables(self):
|
||||||
q1 = Item.objects.order_by('name')
|
q1 = Item.objects.order_by('name')
|
||||||
q2 = Item.objects.filter(id=self.i1.id)
|
q2 = Item.objects.filter(id=self.i1.id)
|
||||||
list(q2)
|
list(q2)
|
||||||
self.assertEqual(len((q1 & q2).order_by('name').query.tables), 1)
|
self.assertEqual(len((q1 & q2).order_by('name').query.tables), 1)
|
||||||
|
|
||||||
|
def test_order_by_join_unref(self):
|
||||||
|
"""
|
||||||
|
This test is related to the above one, testing that there aren't
|
||||||
|
old JOINs in the query.
|
||||||
|
"""
|
||||||
|
qs = Celebrity.objects.order_by('greatest_fan__fan_of')
|
||||||
|
self.assertIn('OUTER JOIN', str(qs.query))
|
||||||
|
qs = qs.order_by('id')
|
||||||
|
self.assertNotIn('OUTER JOIN', str(qs.query))
|
||||||
|
|
||||||
def test_tickets_4088_4306(self):
|
def test_tickets_4088_4306(self):
|
||||||
self.assertQuerysetEqual(
|
self.assertQuerysetEqual(
|
||||||
Report.objects.filter(creator=1001),
|
Report.objects.filter(creator=1001),
|
||||||
|
@ -1728,7 +1732,7 @@ class ToFieldTests(TestCase):
|
||||||
|
|
||||||
|
|
||||||
class ConditionalTests(BaseQuerysetTest):
|
class ConditionalTests(BaseQuerysetTest):
|
||||||
"""Tests whose execution depend on dfferent environment conditions like
|
"""Tests whose execution depend on different environment conditions like
|
||||||
Python version or DB backend features"""
|
Python version or DB backend features"""
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
|
@ -1739,6 +1743,7 @@ class ConditionalTests(BaseQuerysetTest):
|
||||||
t4 = Tag.objects.create(name='t4', parent=t3)
|
t4 = Tag.objects.create(name='t4', parent=t3)
|
||||||
t5 = Tag.objects.create(name='t5', parent=t3)
|
t5 = Tag.objects.create(name='t5', parent=t3)
|
||||||
|
|
||||||
|
|
||||||
# In Python 2.6 beta releases, exceptions raised in __len__ are swallowed
|
# In Python 2.6 beta releases, exceptions raised in __len__ are swallowed
|
||||||
# (Python issue 1242657), so these cases return an empty list, rather than
|
# (Python issue 1242657), so these cases return an empty list, rather than
|
||||||
# raising an exception. Not a lot we can do about that, unfortunately, due to
|
# raising an exception. Not a lot we can do about that, unfortunately, due to
|
||||||
|
@ -1810,6 +1815,7 @@ class ConditionalTests(BaseQuerysetTest):
|
||||||
2500
|
2500
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class UnionTests(unittest.TestCase):
|
class UnionTests(unittest.TestCase):
|
||||||
"""
|
"""
|
||||||
Tests for the union of two querysets. Bug #12252.
|
Tests for the union of two querysets. Bug #12252.
|
||||||
|
|
|
@ -40,9 +40,9 @@ class SelectRelatedRegressTests(TestCase):
|
||||||
self.assertEqual([(c.id, unicode(c.start), unicode(c.end)) for c in connections],
|
self.assertEqual([(c.id, unicode(c.start), unicode(c.end)) for c in connections],
|
||||||
[(c1.id, u'router/4', u'switch/7'), (c2.id, u'switch/7', u'server/1')])
|
[(c1.id, u'router/4', u'switch/7'), (c2.id, u'switch/7', u'server/1')])
|
||||||
|
|
||||||
# This final query should only join seven tables (port, device and building
|
# This final query should only have seven tables (port, device and building
|
||||||
# twice each, plus connection once).
|
# twice each, plus connection once). Thus, 6 joins plus the FROM table.
|
||||||
self.assertEqual(connections.query.count_active_tables(), 7)
|
self.assertEqual(str(connections.query).count(" JOIN "), 6)
|
||||||
|
|
||||||
|
|
||||||
def test_regression_8106(self):
|
def test_regression_8106(self):
|
||||||
|
|
Loading…
Reference in New Issue