From 8593e162c9cb63a6c0b06daf045bc1c21eb4d7c1 Mon Sep 17 00:00:00 2001 From: Simon Charette Date: Sun, 25 Oct 2020 16:04:21 -0400 Subject: [PATCH] Fixed #32143 -- Used EXISTS to exclude multi-valued relationships. As mentioned in the pre-existing split_exclude() docstring EXISTS is easier to optimize for query planers and circumvents the IN (NULL) handling issue. --- django/db/models/expressions.py | 9 ++++----- django/db/models/sql/query.py | 32 ++++++++++++++------------------ tests/queries/tests.py | 12 ++++++++++-- 3 files changed, 28 insertions(+), 25 deletions(-) diff --git a/django/db/models/expressions.py b/django/db/models/expressions.py index 13918467ee..c596ccfe5d 100644 --- a/django/db/models/expressions.py +++ b/django/db/models/expressions.py @@ -1083,19 +1083,18 @@ class Subquery(Expression): contains_aggregate = False def __init__(self, queryset, output_field=None, **extra): - self.query = queryset.query + # Allow the usage of both QuerySet and sql.Query objects. + self.query = getattr(queryset, 'query', queryset) self.extra = extra - # Prevent the QuerySet from being evaluated. - self.queryset = queryset._chain(_result_cache=[], prefetch_done=True) super().__init__(output_field) def __getstate__(self): state = super().__getstate__() args, kwargs = state['_constructor_args'] if args: - args = (self.queryset, *args[1:]) + args = (self.query, *args[1:]) else: - kwargs['queryset'] = self.queryset + kwargs['queryset'] = self.query state['_constructor_args'] = args, kwargs return state diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py index 8d76b436ee..abb545eaa4 100644 --- a/django/db/models/sql/query.py +++ b/django/db/models/sql/query.py @@ -23,7 +23,9 @@ from django.core.exceptions import ( from django.db import DEFAULT_DB_ALIAS, NotSupportedError, connections from django.db.models.aggregates import Count from django.db.models.constants import LOOKUP_SEP -from django.db.models.expressions import BaseExpression, Col, F, OuterRef, Ref +from django.db.models.expressions import ( + BaseExpression, Col, Exists, F, OuterRef, Ref, ResolvedOuterRef, +) from django.db.models.fields import Field from django.db.models.fields.related_lookups import MultiColSource from django.db.models.lookups import Lookup @@ -1765,12 +1767,12 @@ class Query(BaseExpression): filters in the original query. We will turn this into equivalent of: - WHERE NOT (pk IN (SELECT parent_id FROM thetable - WHERE name = 'foo' AND parent_id IS NOT NULL)) - - It might be worth it to consider using WHERE NOT EXISTS as that has - saner null handling, and is easier for the backend's optimizer to - handle. + WHERE NOT EXISTS( + SELECT 1 + FROM child + WHERE name = 'foo' AND child.parent_id = parent.id + LIMIT 1 + ) """ filter_lhs, filter_rhs = filter_expr if isinstance(filter_rhs, OuterRef): @@ -1786,17 +1788,9 @@ class Query(BaseExpression): # the subquery. trimmed_prefix, contains_louter = query.trim_start(names_with_path) - # Add extra check to make sure the selected field will not be null - # since we are adding an IN clause. This prevents the - # database from tripping over IN (...,NULL,...) selects and returning - # nothing col = query.select[0] select_field = col.target alias = col.alias - if self.is_nullable(select_field): - lookup_class = select_field.get_lookup('isnull') - lookup = lookup_class(select_field.get_col(alias), False) - query.where.add(lookup, AND) if alias in can_reuse: pk = select_field.model._meta.pk # Need to add a restriction so that outer query's filters are in effect for @@ -1810,9 +1804,11 @@ class Query(BaseExpression): query.where.add(lookup, AND) query.external_aliases[alias] = True - condition, needed_inner = self.build_filter( - ('%s__in' % trimmed_prefix, query), - current_negated=True, branch_negated=True, can_reuse=can_reuse) + lookup_class = select_field.get_lookup('exact') + lookup = lookup_class(col, ResolvedOuterRef(trimmed_prefix)) + query.where.add(lookup, AND) + condition, needed_inner = self.build_filter(Exists(query)) + if contains_louter: or_null_condition, _ = self.build_filter( ('%s__isnull' % trimmed_prefix, True), diff --git a/tests/queries/tests.py b/tests/queries/tests.py index c98a6adeba..c3e3a40340 100644 --- a/tests/queries/tests.py +++ b/tests/queries/tests.py @@ -2807,11 +2807,11 @@ class ExcludeTests(TestCase): f1 = Food.objects.create(name='apples') Food.objects.create(name='oranges') Eaten.objects.create(food=f1, meal='dinner') - j1 = Job.objects.create(name='Manager') + cls.j1 = Job.objects.create(name='Manager') cls.r1 = Responsibility.objects.create(description='Playing golf') j2 = Job.objects.create(name='Programmer') r2 = Responsibility.objects.create(description='Programming') - JobResponsibilities.objects.create(job=j1, responsibility=cls.r1) + JobResponsibilities.objects.create(job=cls.j1, responsibility=cls.r1) JobResponsibilities.objects.create(job=j2, responsibility=r2) def test_to_field(self): @@ -2884,6 +2884,14 @@ class ExcludeTests(TestCase): [number], ) + def test_exclude_multivalued_exists(self): + with CaptureQueriesContext(connection) as captured_queries: + self.assertSequenceEqual( + Job.objects.exclude(responsibilities__description='Programming'), + [self.j1], + ) + self.assertIn('exists', captured_queries[0]['sql'].lower()) + class ExcludeTest17600(TestCase): """