Fixed #27149 -- Added Subquery and Exists database expressions.

Thanks Josh Smeaton for Oracle fixes.
This commit is contained in:
Matthew Schinckel 2016-04-20 16:26:51 +09:30 committed by Tim Graham
parent 84c1826ded
commit 236ebe94bf
6 changed files with 479 additions and 11 deletions

View File

@ -31,10 +31,17 @@ class SQLCompiler(compiler.SQLCompiler):
high_where = ''
if self.query.high_mark is not None:
high_where = 'WHERE ROWNUM <= %d' % (self.query.high_mark,)
sql = (
'SELECT * FROM (SELECT "_SUB".*, ROWNUM AS "_RN" FROM (%s) '
'"_SUB" %s) WHERE "_RN" > %d' % (sql, high_where, self.query.low_mark)
)
if self.query.low_mark:
sql = (
'SELECT * FROM (SELECT "_SUB".*, ROWNUM AS "_RN" FROM (%s) '
'"_SUB" %s) WHERE "_RN" > %d' % (sql, high_where, self.query.low_mark)
)
else:
# Simplify the query to support subqueries if there's no offset.
sql = (
'SELECT * FROM (SELECT "_SUB".* FROM (%s) "_SUB" %s)' % (sql, high_where)
)
return sql, params

View File

@ -6,7 +6,8 @@ from django.db.models.deletion import (
CASCADE, DO_NOTHING, PROTECT, SET, SET_DEFAULT, SET_NULL, ProtectedError,
)
from django.db.models.expressions import (
Case, Expression, ExpressionWrapper, F, Func, Value, When,
Case, Exists, Expression, ExpressionWrapper, F, Func, OuterRef, Subquery,
Value, When,
)
from django.db.models.fields import * # NOQA
from django.db.models.fields import __all__ as fields_all
@ -62,7 +63,8 @@ __all__ += [
'ObjectDoesNotExist', 'signals',
'CASCADE', 'DO_NOTHING', 'PROTECT', 'SET', 'SET_DEFAULT', 'SET_NULL',
'ProtectedError',
'Case', 'Expression', 'ExpressionWrapper', 'F', 'Func', 'Value', 'When',
'Case', 'Exists', 'Expression', 'ExpressionWrapper', 'F', 'Func',
'OuterRef', 'Subquery', 'Value', 'When',
'FileField', 'ImageField', 'OrderWrt', 'Lookup', 'Transform', 'Manager',
'Prefetch', 'Q', 'QuerySet', 'prefetch_related_objects', 'DEFERRED', 'Model',
'ForeignKey', 'ForeignObject', 'OneToOneField', 'ManyToManyField',

View File

@ -477,6 +477,33 @@ class F(Combinable):
return OrderBy(self, descending=True, **kwargs)
class ResolvedOuterRef(F):
"""
An object that contains a reference to an outer query.
In this case, the reference to the outer query has been resolved because
the inner query has been used as a subquery.
"""
def as_sql(self, *args, **kwargs):
raise ValueError(
'This queryset contains a reference to an outer query and may '
'only be used in a subquery.'
)
def _prepare(self, output_field=None):
return self
class OuterRef(F):
def resolve_expression(self, query=None, allow_joins=True, reuse=None, summarize=False, for_save=False):
if isinstance(self.name, self.__class__):
return self.name
return ResolvedOuterRef(self.name)
def _prepare(self, output_field=None):
return self
class Func(Expression):
"""
An SQL function call.
@ -873,6 +900,128 @@ class Case(Expression):
return sql, sql_params
class Subquery(Expression):
"""
An explicit subquery. It may contain OuterRef() references to the outer
query which will be resolved when it is applied to that query.
"""
template = '(%(subquery)s)'
def __init__(self, queryset, output_field=None, **extra):
self.queryset = queryset
self.extra = extra
if output_field is None and len(self.queryset.query.select) == 1:
output_field = self.queryset.query.select[0].field
super(Subquery, self).__init__(output_field)
def copy(self):
clone = super(Subquery, self).copy()
clone.queryset = clone.queryset.all()
return clone
def resolve_expression(self, query=None, allow_joins=True, reuse=None, summarize=False, for_save=False):
clone = self.copy()
clone.is_summary = summarize
clone.queryset.query.bump_prefix(query)
# Need to recursively resolve these.
def resolve_all(child):
if hasattr(child, 'children'):
[resolve_all(_child) for _child in child.children]
if hasattr(child, 'rhs'):
child.rhs = resolve(child.rhs)
def resolve(child):
if hasattr(child, 'resolve_expression'):
return child.resolve_expression(
query=query, allow_joins=allow_joins, reuse=reuse,
summarize=summarize, for_save=for_save,
)
return child
resolve_all(clone.queryset.query.where)
for key, value in clone.queryset.query.annotations.items():
if isinstance(value, Subquery):
clone.queryset.query.annotations[key] = resolve(value)
return clone
def get_source_expressions(self):
return [
x for x in [
getattr(expr, 'lhs', None)
for expr in self.queryset.query.where.children
] if x
]
def relabeled_clone(self, change_map):
clone = self.copy()
clone.queryset.query = clone.queryset.query.relabeled_clone(change_map)
clone.queryset.query.external_aliases.update(
alias for alias in change_map.values()
if alias not in clone.queryset.query.tables
)
return clone
def as_sql(self, compiler, connection, template=None, **extra_context):
connection.ops.check_expression_support(self)
template_params = self.extra.copy()
template_params.update(extra_context)
template_params['subquery'], sql_params = self.queryset.query.get_compiler(connection=connection).as_sql()
template = template or template_params.get('template', self.template)
sql = template % template_params
sql = connection.ops.unification_cast_sql(self.output_field) % sql
return sql, sql_params
def _prepare(self, output_field):
# This method will only be called if this instance is the "rhs" in an
# expression: the wrapping () must be removed (as the expression that
# contains this will provide them). SQLite evaluates ((subquery))
# differently than the other databases.
if self.template == '(%(subquery)s)':
clone = self.copy()
clone.template = '%(subquery)s'
return clone
return self
class Exists(Subquery):
template = 'EXISTS(%(subquery)s)'
def __init__(self, *args, **kwargs):
self.negated = kwargs.pop('negated', False)
super(Exists, self).__init__(*args, **kwargs)
def __invert__(self):
return type(self)(self.queryset, self.output_field, negated=(not self.negated), **self.extra)
@property
def output_field(self):
return fields.BooleanField()
def resolve_expression(self, query=None, **kwargs):
# As a performance optimization, remove ordering since EXISTS doesn't
# care about it, just whether or not a row matches.
self.queryset = self.queryset.order_by()
return super(Exists, self).resolve_expression(query, **kwargs)
def as_sql(self, compiler, connection, template=None, **extra_context):
sql, params = super(Exists, self).as_sql(compiler, connection, template, **extra_context)
if self.negated:
sql = 'NOT {}'.format(sql)
return sql, params
def as_oracle(self, compiler, connection, template=None, **extra_context):
# Oracle doesn't allow EXISTS() in the SELECT list, so wrap it with a
# CASE WHEN expression. Change the template since the When expression
# requires a left hand side (column) to compare against.
sql, params = self.as_sql(compiler, connection, template, **extra_context)
sql = 'CASE WHEN {} THEN 1 ELSE 0 END'.format(sql)
return sql, params
class OrderBy(BaseExpression):
template = '%(expression)s %(ordering)s'

View File

@ -450,6 +450,178 @@ Conditional expressions allow you to use :keyword:`if` ... :keyword:`elif` ...
:keyword:`else` logic in queries. Django natively supports SQL ``CASE``
expressions. For more details see :doc:`conditional-expressions`.
``Subquery()`` expressions
--------------------------
.. class:: Subquery(queryset, output_field=None)
.. versionadded:: 1.11
You can add an explicit subquery to a ``QuerySet`` using the ``Subquery``
expression.
For example, to annotate each post with the email address of the author of the
newest comment on that post::
>>> from django.db.models import OuterRef, Subquery
>>> newest = Comment.objects.filter(post=OuterRef('pk')).order_by('-created_at')
>>> Post.objects.annotate(newest_commenter_email=Subquery(newest.values('email')[:1]))
On PostgreSQL, the SQL looks like:
.. code-block:: sql
SELECT "post"."id", (
SELECT U0."email"
FROM "comment" U0
WHERE U0."post_id" = ("post"."id")
ORDER BY U0."created_at" DESC LIMIT 1
) AS "newest_commenter_email" FROM "post"
.. note::
The examples in this section are designed to show how to force
Django to execute a subquery. In some cases it may be possible to
write an equivalent queryset that performs the same task more
clearly or efficiently.
Referencing columns from the outer queryset
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. class:: OuterRef(field)
.. versionadded:: 1.11
Use ``OuterRef`` when a queryset in a ``Subquery`` needs to refer to a field
from the outer query. It acts like an :class:`F` expression except that the
check to see if it refers to a valid field isn't made until the outer queryset
is resolved.
Instances of ``OuterRef`` may be used in conjunction with nested instances
of ``Subquery`` to refer to a containing queryset that isn't the immediate
parent. For example, this queryset would need to be within a nested pair of
``Subquery`` instances to resolve correctly::
>>> Book.objects.filter(author=OuterRef(OuterRef('pk')))
Limiting a subquery to a single column
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
There are times when a single column must be returned from a ``Subquery``, for
instance, to use a ``Subquery`` as the target of an ``__in`` lookup. To return
all comments for posts published within the last day::
>>> from datetime import timedelta
>>> from django.utils import timezone
>>> one_day_ago = timezone.now() - timedelta(days=1)
>>> posts = Post.objects.filter(published_at__gte=one_day_ago)
>>> Comment.objects.filter(post__in=Subquery(posts.values('pk')))
In this case, the subquery must use :meth:`~.QuerySet.values`
to return only a single column: the primary key of the post.
Limiting the subquery to a single row
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
To prevent a subquery from returning multiple rows, a slice (``[:1]``) of the
queryset is used::
>>> subquery = Subquery(newest.values('email')[:1])
>>> Post.objects.annotate(newest_commenter_email=subquery)
In this case, the subquery must only return a single column *and* a single
row: the email address of the most recently created comment.
(Using :meth:`~.QuerySet.get` instead of a slice would fail because the
``OuterRef`` cannot be resolved until the queryset is used within a
``Subquery``.)
``Exists()`` subqueries
~~~~~~~~~~~~~~~~~~~~~~~
.. class:: Exists(queryset)
.. versionadded:: 1.11
``Exists`` is a ``Subquery`` subclass that uses an SQL ``EXISTS`` statement. In
many cases it will perform better than a subquery since the database is able to
stop evaluation of the subquery when a first matching row is found.
For example, to annotate each post with whether or not it has a comment from
within the last day::
>>> from django.db.models import Exists, OuterRef
>>> from datetime import timedelta
>>> from django.utils import timezone
>>> one_day_ago = timezone.now() - timedelta(days=1)
>>> recent_comments = Comment.objects.filter(
... post=OuterRef('pk'),
... created_at__gte=one_day_ago,
... )
>>> Post.objects.annotate(recent_comment=Exists(recent_comments)
On PostgreSQL, the SQL looks like:
.. code-block:: sql
SELECT "post"."id", "post"."published_at", EXISTS(
SELECT U0."id", U0."post_id", U0."email", U0."created_at"
FROM "comment" U0
WHERE (
U0."created_at" >= YYYY-MM-DD HH:MM:SS AND
U0."post_id" = ("post"."id")
)
) AS "recent_comment" FROM "post"
It's unnecessary to force ``Exists`` to refer to a single column, since the
columns are discarded and a boolean result is returned. Similarly, since
ordering is unimportant within an SQL ``EXISTS`` subquery and would only
degrade performance, it's automatically removed.
You can query using ``NOT EXISTS`` with ``~Exists()``.
Filtering on a ``Subquery`` expression
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
It's not possible to filter directly using ``Subquery`` and ``Exists``, e.g.::
>>> Post.objects.filter(Exists(recent_comments))
...
TypeError: 'Exists' object is not iterable
You must filter on a subquery expression by first annotating the queryset
and then filtering based on that annotation::
>>> Post.objects.annotate(
... recent_comment=Exists(recent_comments),
... ).filter(recent_comment=True)
Using aggregates within a ``Subquery`` expression
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Aggregates may be used within a ``Subquery``, but they require a specific
combination of :meth:`~.QuerySet.filter`, :meth:`~.QuerySet.values`, and
:meth:`~.QuerySet.annotate` to get the subquery grouping correct.
Assuming both models have a ``length`` field, to find posts where the post
length is greater than the total length of all combined comments::
>>> from django.db.models import OuterRef, Subquery, Sum
>>> comments = Comment.objects.filter(post=OuterRef('pk')).values('post')
>>> total_comments = comments.annotate(total=Sum('length')).values('total')
>>> Post.objects.filter(length__gt=Subquery(total_comments))
The initial ``filter(...)`` limits the subquery to the relevant parameters.
``values('post')`` aggregates comments by ``Post``. Finally, ``annotate(...)``
performs the aggregation. The order in which these queryset methods are applied
is important. In this case, since the subquery must be limited to a single
column, ``values('total')`` is required.
This is the only way to perform an aggregation within a ``Subquery``, as
using :meth:`~.QuerySet.aggregate` attempts to evaluate the queryset (and if
there is an ``OuterRef``, this will not be possible to resolve).
Raw SQL expressions
-------------------

View File

@ -70,6 +70,14 @@ template system rather than in Python. See :doc:`/ref/forms/renderers`.
You may need to adjust any custom widgets that you've written for a few
:ref:`backwards incompatible changes <template-widget-incompatibilities-1-11>`.
``Subquery`` expressions
------------------------
The new :class:`~django.db.models.Subquery` and
:class:`~django.db.models.Exists` database expressions allow creating
explicit subqueries. Subqueries may refer to fields from the outer queryset
using the :class:`~django.db.models.OuterRef` class.
Minor features
--------------

View File

@ -12,8 +12,8 @@ from django.db.models.aggregates import (
Avg, Count, Max, Min, StdDev, Sum, Variance,
)
from django.db.models.expressions import (
Case, Col, ExpressionWrapper, F, Func, OrderBy, Random, RawSQL, Ref, Value,
When,
Case, Col, Exists, ExpressionWrapper, F, Func, OrderBy, OuterRef, Random,
RawSQL, Ref, Subquery, Value, When,
)
from django.db.models.functions import (
Coalesce, Concat, Length, Lower, Substr, Upper,
@ -32,15 +32,15 @@ from .models import (
class BasicExpressionsTests(TestCase):
@classmethod
def setUpTestData(cls):
Company.objects.create(
cls.example_inc = Company.objects.create(
name="Example Inc.", num_employees=2300, num_chairs=5,
ceo=Employee.objects.create(firstname="Joe", lastname="Smith", salary=10)
)
Company.objects.create(
cls.foobar_ltd = Company.objects.create(
name="Foobar Ltd.", num_employees=3, num_chairs=4,
ceo=Employee.objects.create(firstname="Frank", lastname="Meyer", salary=20)
)
Company.objects.create(
cls.gmbh = Company.objects.create(
name="Test GmbH", num_employees=32, num_chairs=1,
ceo=Employee.objects.create(firstname="Max", lastname="Mustermann", salary=30)
)
@ -387,6 +387,136 @@ class BasicExpressionsTests(TestCase):
)
self.assertEqual(str(qs.query).count('JOIN'), 2)
def test_outerref(self):
inner = Company.objects.filter(point_of_contact=OuterRef('pk'))
msg = (
'This queryset contains a reference to an outer query and may only '
'be used in a subquery.'
)
with self.assertRaisesMessage(ValueError, msg):
inner.exists()
outer = Employee.objects.annotate(is_point_of_contact=Exists(inner))
self.assertIs(outer.exists(), True)
def test_subquery(self):
Company.objects.filter(name='Example Inc.').update(
point_of_contact=Employee.objects.get(firstname='Joe', lastname='Smith'),
ceo=Employee.objects.get(firstname='Max', lastname='Mustermann'),
)
Employee.objects.create(firstname='Bob', lastname='Brown', salary=40)
qs = Employee.objects.annotate(
is_point_of_contact=Exists(Company.objects.filter(point_of_contact=OuterRef('pk'))),
is_not_point_of_contact=~Exists(Company.objects.filter(point_of_contact=OuterRef('pk'))),
is_ceo_of_small_company=Exists(Company.objects.filter(num_employees__lt=200, ceo=OuterRef('pk'))),
is_ceo_small_2=~~Exists(Company.objects.filter(num_employees__lt=200, ceo=OuterRef('pk'))),
largest_company=Subquery(Company.objects.order_by('-num_employees').filter(
models.Q(ceo=OuterRef('pk')) | models.Q(point_of_contact=OuterRef('pk'))
).values('name')[:1], output_field=models.CharField())
).values(
'firstname',
'is_point_of_contact',
'is_not_point_of_contact',
'is_ceo_of_small_company',
'is_ceo_small_2',
'largest_company',
).order_by('firstname')
results = list(qs)
# Could use Coalesce(subq, Value('')) instead except for the bug in
# cx_Oracle mentioned in #23843.
bob = results[0]
if bob['largest_company'] == '' and connection.features.interprets_empty_strings_as_nulls:
bob['largest_company'] = None
self.assertEqual(results, [
{
'firstname': 'Bob',
'is_point_of_contact': False,
'is_not_point_of_contact': True,
'is_ceo_of_small_company': False,
'is_ceo_small_2': False,
'largest_company': None,
},
{
'firstname': 'Frank',
'is_point_of_contact': False,
'is_not_point_of_contact': True,
'is_ceo_of_small_company': True,
'is_ceo_small_2': True,
'largest_company': 'Foobar Ltd.',
},
{
'firstname': 'Joe',
'is_point_of_contact': True,
'is_not_point_of_contact': False,
'is_ceo_of_small_company': False,
'is_ceo_small_2': False,
'largest_company': 'Example Inc.',
},
{
'firstname': 'Max',
'is_point_of_contact': False,
'is_not_point_of_contact': True,
'is_ceo_of_small_company': True,
'is_ceo_small_2': True,
'largest_company': 'Example Inc.'
}
])
# A less elegant way to write the same query: this uses a LEFT OUTER
# JOIN and an IS NULL, inside a WHERE NOT IN which is probably less
# efficient than EXISTS.
self.assertCountEqual(
qs.filter(is_point_of_contact=True).values('pk'),
Employee.objects.exclude(company_point_of_contact_set=None).values('pk')
)
def test_in_subquery(self):
# This is a contrived test (and you really wouldn't write this query),
# but it is a succinct way to test the __in=Subquery() construct.
small_companies = Company.objects.filter(num_employees__lt=200).values('pk')
subquery_test = Company.objects.filter(pk__in=Subquery(small_companies))
self.assertCountEqual(subquery_test, [self.foobar_ltd, self.gmbh])
subquery_test2 = Company.objects.filter(pk=Subquery(small_companies.filter(num_employees=3)))
self.assertCountEqual(subquery_test2, [self.foobar_ltd])
def test_nested_subquery(self):
inner = Company.objects.filter(point_of_contact=OuterRef('pk'))
outer = Employee.objects.annotate(is_point_of_contact=Exists(inner))
contrived = Employee.objects.annotate(
is_point_of_contact=Subquery(
outer.filter(pk=OuterRef('pk')).values('is_point_of_contact'),
output_field=models.BooleanField(),
),
)
self.assertCountEqual(contrived.values_list(), outer.values_list())
def test_nested_subquery_outer_ref_2(self):
first = Time.objects.create(time='09:00')
second = Time.objects.create(time='17:00')
third = Time.objects.create(time='21:00')
SimulationRun.objects.bulk_create([
SimulationRun(start=first, end=second, midpoint='12:00'),
SimulationRun(start=first, end=third, midpoint='15:00'),
SimulationRun(start=second, end=first, midpoint='00:00'),
])
inner = Time.objects.filter(time=OuterRef(OuterRef('time')), pk=OuterRef('start')).values('time')
middle = SimulationRun.objects.annotate(other=Subquery(inner)).values('other')[:1]
outer = Time.objects.annotate(other=Subquery(middle, output_field=models.TimeField()))
# This is a contrived example. It exercises the double OuterRef form.
self.assertCountEqual(outer, [first, second, third])
def test_annotations_within_subquery(self):
Company.objects.filter(num_employees__lt=50).update(ceo=Employee.objects.get(firstname='Frank'))
inner = Company.objects.filter(
ceo=OuterRef('pk')
).values('ceo').annotate(total_employees=models.Sum('num_employees')).values('total_employees')
outer = Employee.objects.annotate(total_employees=Subquery(inner)).filter(salary__lte=Subquery(inner))
self.assertSequenceEqual(
outer.order_by('-total_employees').values('salary', 'total_employees'),
[{'salary': 10, 'total_employees': 2300}, {'salary': 20, 'total_employees': 35}],
)
class IterableLookupInnerExpressionsTests(TestCase):
@classmethod