From 33c0f0de676ae8fd2c406beebb2f113aeb95fee7 Mon Sep 17 00:00:00 2001
From: Malcolm Tredinnick <malcolm.tredinnick@gmail.com>
Date: Mon, 8 Dec 2008 02:39:51 +0000
Subject: [PATCH] This fixes a group of problems in the SQL created by
 QuerySet.exclude() when used in a few situations where NULL results can
 appear.

Fixed #8921 (the only ticket I know of that noticed any of these).


git-svn-id: http://code.djangoproject.com/svn/django/trunk@9590 bcc190cf-cafb-0310-a4f2-bffc1f526a37
---
 django/db/models/sql/query.py           | 48 +++++++++++++++++--------
 tests/regressiontests/queries/models.py | 25 ++++++++++---
 2 files changed, 53 insertions(+), 20 deletions(-)

diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py
index f0ed1105c8..bc32ead930 100644
--- a/django/db/models/sql/query.py
+++ b/django/db/models/sql/query.py
@@ -54,7 +54,6 @@ class BaseQuery(object):
         self.default_ordering = True
         self.standard_ordering = True
         self.ordering_aliases = []
-        self.start_meta = None
         self.select_fields = []
         self.related_select_fields = []
         self.dupe_avoidance = {}
@@ -125,10 +124,9 @@ class BaseQuery(object):
     def get_meta(self):
         """
         Returns the Options instance (the model._meta) from which to start
-        processing. Normally, this is self.model._meta, but it can change.
+        processing. Normally, this is self.model._meta, but it can be changed
+        by subclasses.
         """
-        if self.start_meta:
-            return self.start_meta
         return self.model._meta
 
     def quote_name_unless_alias(self, name):
@@ -166,7 +164,6 @@ class BaseQuery(object):
         obj.default_ordering = self.default_ordering
         obj.standard_ordering = self.standard_ordering
         obj.ordering_aliases = []
-        obj.start_meta = self.start_meta
         obj.select_fields = self.select_fields[:]
         obj.related_select_fields = self.related_select_fields[:]
         obj.dupe_avoidance = self.dupe_avoidance.copy()
@@ -1485,11 +1482,24 @@ class BaseQuery(object):
         query = Query(self.model, self.connection)
         query.add_filter(filter_expr, can_reuse=can_reuse)
         query.bump_prefix()
-        query.set_start(prefix)
         query.clear_ordering(True)
+        query.set_start(prefix)
         self.add_filter(('%s__in' % prefix, query), negate=True, trim=True,
                 can_reuse=can_reuse)
 
+        # If there's more than one join in the inner query (before any initial
+        # bits were trimmed -- which means the last active table is more than
+        # two places into the alias list), we need to also handle the
+        # possibility that the earlier joins don't match anything by adding a
+        # comparison to NULL (e.g. in
+        # Tag.objects.exclude(parent__parent__name='t1'), a tag with no parent
+        # would otherwise be overlooked).
+        active_positions = [pos for (pos, count) in
+                enumerate(query.alias_refcount.itervalues()) if count]
+        if active_positions[-1] > 1:
+            self.add_filter(('%s__isnull' % prefix, False), negate=True,
+                    trim=True, can_reuse=can_reuse)
+
     def set_limits(self, low=None, high=None):
         """
         Adjusts the limits on the rows retrieved. We use low/high to set these,
@@ -1695,18 +1705,26 @@ class BaseQuery(object):
         alias = self.get_initial_alias()
         field, col, opts, joins, last, extra = self.setup_joins(
                 start.split(LOOKUP_SEP), opts, alias, False)
-        alias = joins[last[-1]]
-        self.select = [(alias, self.alias_map[alias][RHS_JOIN_COL])]
-        self.select_fields = [field]
-        self.start_meta = opts
+        select_col = self.alias_map[joins[1]][LHS_JOIN_COL]
+        select_alias = alias
 
-        # The call to setup_joins add an extra reference to everything in
-        # joins. So we need to unref everything once, and everything prior to
-        # the final join a second time.
+        # The call to setup_joins added an extra reference to everything in
+        # joins. Reverse that.
         for alias in joins:
             self.unref_alias(alias)
-        for alias in joins[:last[-1]]:
-            self.unref_alias(alias)
+
+        # We might be able to trim some joins from the front of this query,
+        # providing that we only traverse "always equal" connections (i.e. rhs
+        # is *always* the same value as lhs).
+        for alias in joins[1:]:
+            join_info = self.alias_map[alias]
+            if (join_info[LHS_JOIN_COL] != select_col
+                    or join_info[JOIN_TYPE] != self.INNER):
+                break
+            self.unref_alias(select_alias)
+            select_alias = join_info[RHS_ALIAS]
+            select_col = join_info[RHS_JOIN_COL]
+        self.select = [(select_alias, select_col)]
 
     def execute_sql(self, result_type=MULTI):
         """
diff --git a/tests/regressiontests/queries/models.py b/tests/regressiontests/queries/models.py
index 5a41af8dc6..868200f60c 100644
--- a/tests/regressiontests/queries/models.py
+++ b/tests/regressiontests/queries/models.py
@@ -675,7 +675,7 @@ thus fail.)
 ...     s.reverse()
 ...     params.reverse()
 
-# This slightly odd comparison works aorund the fact that PostgreSQL will
+# This slightly odd comparison works around the fact that PostgreSQL will
 # return 'one' and 'two' as strings, not Unicode objects. It's a side-effect of
 # using constants here and not a real concern.
 >>> d = Item.objects.extra(select=SortedDict(s), select_params=params).values('a', 'b')[0]
@@ -742,7 +742,7 @@ We can do slicing beyond what is currently in the result cache, too.
 ## only apparent much later when the full test suite runs. I don't understand
 ## what's going on here yet.
 ##
-## # We need to mess with the implemenation internals a bit here to decrease the
+## # We need to mess with the implementation internals a bit here to decrease the
 ## # cache fill size so that we don't read all the results at once.
 ## >>> from django.db.models import query
 ## >>> query.ITER_CHUNK_SIZE = 2
@@ -795,7 +795,7 @@ More twisted cases, involving nested negations.
 [<Item: four>, <Item: one>, <Item: three>]
 
 Bug #7095
-Updates that are filtered on the model being updated are somewhat tricky to get
+Updates that are filtered on the model being updated are somewhat tricky
 in MySQL. This exercises that case.
 >>> mm = ManagedModel.objects.create(data='mm1', tag=t1, public=True)
 >>> ManagedModel.objects.update(data='mm')
@@ -998,13 +998,28 @@ model. But it should still be possible to add new ordering after that.
 >>> 'ORDER BY' in qs.query.as_sql()[0]
 True
 
-Bug #9188 -- incorrect SQL was being generated for certain types of
-exclude() queries that crossed multi-valued relations.
+Incorrect SQL was being generated for certain types of exclude() queries that
+crossed multi-valued relations (#8921, #9188 and some pre-emptively discovered
+cases).
 
 >>> PointerA.objects.filter(connection__pointerb__id=1)
 []
 >>> PointerA.objects.exclude(connection__pointerb__id=1)
 []
+
+>>> Tag.objects.exclude(children=None)
+[<Tag: t1>, <Tag: t3>]
+
+# This example is tricky because the parent could be NULL, so only checking
+# parents with annotations omits some results (tag t1, in this case).
+>>> Tag.objects.exclude(parent__annotation__name="a1")
+[<Tag: t1>, <Tag: t4>, <Tag: t5>]
+
+# The annotation->tag link is single values and tag->children links is
+# multi-valued. So we have to split the exclude filter in the middle and then
+# optimise the inner query without losing results.
+>>> Annotation.objects.exclude(tag__children__name="t2")
+[<Annotation: a2>]
 """}
 
 # In Python 2.3 and the Python 2.6 beta releases, exceptions raised in __len__