Fixed #34176 -- Fixed grouping by ambiguous aliases.

Regression in b7b28c7c18. Refs #31377. Thanks Shai Berger for the report and reviews. test_aggregation_subquery_annotation_values_collision() has been updated as queries that are explicitly grouped by a subquery should always be grouped by it and not its outer columns even if its alias collides with referenced table columns. This was not possible to accomplish at the time 10866a10 landed because we didn't have compiler level handling of colliding aliases.
2023-01-06 09:10:16 -05:00 · 2023-01-06 09:10:16 -05:00 · dd68af62b2
parent 016bead6a2
commit dd68af62b2
8 changed files with 186 additions and 62 deletions
--- a/django/db/backends/base/features.py
+++ b/django/db/backends/base/features.py
@ -9,7 +9,7 @@ class BaseDatabaseFeatures:
    # Oracle can't group by LOB (large object) data types.
    allows_group_by_lob = True
    allows_group_by_selected_pks = False
-    allows_group_by_refs = True
+    allows_group_by_select_index = True
    empty_fetchmany_value = []
    update_can_self_select = True
--- a/django/db/backends/oracle/features.py
+++ b/django/db/backends/oracle/features.py
@ -8,7 +8,7 @@ class DatabaseFeatures(BaseDatabaseFeatures):
    # Oracle crashes with "ORA-00932: inconsistent datatypes: expected - got
    # BLOB" when grouping by LOBs (#24096).
    allows_group_by_lob = False
-    allows_group_by_refs = False
+    allows_group_by_select_index = False
    interprets_empty_strings_as_nulls = True
    has_select_for_update = True
    has_select_for_update_nowait = True
--- a/django/db/models/sql/compiler.py
+++ b/django/db/models/sql/compiler.py
@ -123,7 +123,7 @@ class SQLCompiler:
        if self.query.group_by is None:
            return []
        expressions = []
-        allows_group_by_refs = self.connection.features.allows_group_by_refs
+        group_by_refs = set()
        if self.query.group_by is not True:
            # If the group by is set to a list (by .values() call most likely),
            # then we need to add everything in it to the GROUP BY clause.
@ -133,21 +133,23 @@ class SQLCompiler:
            for expr in self.query.group_by:
                if not hasattr(expr, "as_sql"):
                    expr = self.query.resolve_ref(expr)
-                if not allows_group_by_refs and isinstance(expr, Ref):
+                if isinstance(expr, Ref):
-                    expr = expr.source
+                    if expr.refs not in group_by_refs:
-                expressions.append(expr)
+                        group_by_refs.add(expr.refs)
                        expressions.append(expr.source)
                else:
                    expressions.append(expr)
        # Note that even if the group_by is set, it is only the minimal
        # set to group by. So, we need to add cols in select, order_by, and
        # having into the select in any case.
-        ref_sources = {expr.source for expr in expressions if isinstance(expr, Ref)}
+        selected_expr_indices = {}
-        aliased_exprs = {}
+        for index, (expr, _, alias) in enumerate(select, start=1):
        for expr, _, alias in select:
            # Skip members of the select clause that are already included
            # by reference.
            if expr in ref_sources:
                continue
            if alias:
-                aliased_exprs[expr] = alias
+                selected_expr_indices[expr] = index
            # Skip members of the select clause that are already explicitly
            # grouped against.
            if alias in group_by_refs:
                continue
            expressions.extend(expr.get_group_by_cols())
        if not self._meta_ordering:
            for expr, (sql, params, is_ref) in order_by:
@ -162,14 +164,21 @@ class SQLCompiler:
        seen = set()
        expressions = self.collapse_group_by(expressions, having_group_by)
        allows_group_by_select_index = (
            self.connection.features.allows_group_by_select_index
        )
        for expr in expressions:
            if allows_group_by_refs and (alias := aliased_exprs.get(expr)):
                expr = Ref(alias, expr)
            try:
                sql, params = self.compile(expr)
            except (EmptyResultSet, FullResultSet):
                continue
-            sql, params = expr.select_format(self, sql, params)
+            if (
                allows_group_by_select_index
                and (select_index := selected_expr_indices.get(expr)) is not None
            ):
                sql, params = str(select_index), ()
            else:
                sql, params = expr.select_format(self, sql, params)
            params_hash = make_hashable(params)
            if (sql, params_hash) not in seen:
                result.append((sql, params))
--- a/django/db/models/sql/query.py
+++ b/django/db/models/sql/query.py
@ -2211,40 +2211,25 @@ class Query(BaseExpression):
        primary key, and the query would be equivalent, the optimization
        will be made automatically.
        """
-        if allow_aliases:
+        if allow_aliases and self.values_select:
-            # Column names from JOINs to check collisions with aliases.
+            # If grouping by aliases is allowed assign selected value aliases
-            column_names = set()
+            # by moving them to annotations.
-            seen_models = set()
+            group_by_annotations = {}
-            for join in list(self.alias_map.values())[1:]:  # Skip base table.
+            values_select = {}
-                model = join.join_field.related_model
+            for alias, expr in zip(self.values_select, self.select):
-                if model not in seen_models:
+                if isinstance(expr, Col):
-                    column_names.update(
+                    values_select[alias] = expr
-                        {field.column for field in model._meta.local_concrete_fields}
+                else:
-                    )
+                    group_by_annotations[alias] = expr
-                    seen_models.add(model)
+            self.annotations = {**group_by_annotations, **self.annotations}
-            if self.values_select:
+            self.append_annotation_mask(group_by_annotations)
-                # If grouping by aliases is allowed assign selected values
+            self.select = tuple(values_select.values())
-                # aliases by moving them to annotations.
+            self.values_select = tuple(values_select)
                group_by_annotations = {}
                values_select = {}
                for alias, expr in zip(self.values_select, self.select):
                    if isinstance(expr, Col):
                        values_select[alias] = expr
                    else:
                        group_by_annotations[alias] = expr
                self.annotations = {**group_by_annotations, **self.annotations}
                self.append_annotation_mask(group_by_annotations)
                self.select = tuple(values_select.values())
                self.values_select = tuple(values_select)
        group_by = list(self.select)
        for alias, annotation in self.annotation_select.items():
            if not (group_by_cols := annotation.get_group_by_cols()):
                continue
-            if (
+            if allow_aliases and not annotation.contains_aggregate:
                allow_aliases
                and alias not in column_names
                and not annotation.contains_aggregate
            ):
                group_by.append(Ref(alias, annotation))
            else:
                group_by.extend(group_by_cols)
--- a/tests/aggregation/tests.py
+++ b/tests/aggregation/tests.py
@ -1500,27 +1500,29 @@ class AggregateTestCase(TestCase):
            ],
        )
    @skipUnlessDBFeature("supports_subqueries_in_group_by")
    def test_aggregation_subquery_annotation_values_collision(self):
        books_rating_qs = Book.objects.filter(
-            publisher=OuterRef("pk"),
+            pk=OuterRef("book"),
            price=Decimal("29.69"),
        ).values("rating")
        publisher_qs = (
            Publisher.objects.filter(
                book__contact__age__gt=20,
                name=self.p1.name,
            )
            .annotate(
                rating=Subquery(books_rating_qs),
                contacts_count=Count("book__contact"),
            )
            .values("rating")
-            .annotate(total_count=Count("rating"))
+            .annotate(total_count=Count("*"))
            .order_by("rating")
        )
        self.assertEqual(
            list(publisher_qs),
            [
-                {"rating": 4.0, "total_count": 2},
+                {"rating": 3.0, "total_count": 1},
                {"rating": 4.0, "total_count": 3},
                {"rating": 4.5, "total_count": 1},
                {"rating": 5.0, "total_count": 1},
            ],
        )
@ -1642,9 +1644,7 @@ class AggregateTestCase(TestCase):
        )
        with self.assertNumQueries(1) as ctx:
            self.assertSequenceEqual(books_qs, [book])
-        # Outerquery SELECT, annotation SELECT, and WHERE SELECT but GROUP BY
+        if connection.features.allows_group_by_select_index:
        # selected alias, if allowed.
        if connection.features.allows_group_by_refs:
            self.assertEqual(ctx[0]["sql"].count("SELECT"), 3)
    @skipUnlessDBFeature("supports_subqueries_in_group_by")
--- a/tests/aggregation_regress/models.py
+++ b/tests/aggregation_regress/models.py
@ -89,3 +89,49 @@ class SelfRefFK(models.Model):
    parent = models.ForeignKey(
        "self", models.SET_NULL, null=True, blank=True, related_name="children"
    )
 class AuthorProxy(Author):
    class Meta:
        proxy = True
 class Recipe(models.Model):
    name = models.CharField(max_length=20)
    author = models.ForeignKey(AuthorProxy, models.CASCADE)
    tasters = models.ManyToManyField(AuthorProxy, related_name="recipes")
 class RecipeProxy(Recipe):
    class Meta:
        proxy = True
 class AuthorUnmanaged(models.Model):
    age = models.IntegerField()
    class Meta:
        db_table = Author._meta.db_table
        managed = False
 class RecipeTasterUnmanaged(models.Model):
    recipe = models.ForeignKey("RecipeUnmanaged", models.CASCADE)
    author = models.ForeignKey(
        AuthorUnmanaged, models.CASCADE, db_column="authorproxy_id"
    )
    class Meta:
        managed = False
        db_table = Recipe.tasters.through._meta.db_table
 class RecipeUnmanaged(models.Model):
    author = models.ForeignKey(AuthorUnmanaged, models.CASCADE)
    tasters = models.ManyToManyField(
        AuthorUnmanaged, through=RecipeTasterUnmanaged, related_name="+"
    )
    class Meta:
        managed = False
        db_table = Recipe._meta.db_table
--- a/tests/aggregation_regress/tests.py
+++ b/tests/aggregation_regress/tests.py
@ -11,6 +11,7 @@ from django.db.models import (
    Aggregate,
    Avg,
    Case,
    CharField,
    Count,
    DecimalField,
    F,
@ -23,12 +24,15 @@ from django.db.models import (
    Variance,
    When,
 )
 from django.db.models.functions import Cast, Concat
 from django.test import TestCase, skipUnlessDBFeature
 from django.test.utils import Approximate
 from .models import (
    Alfa,
    Author,
    AuthorProxy,
    AuthorUnmanaged,
    Book,
    Bravo,
    Charlie,
@ -37,6 +41,8 @@ from .models import (
    HardbackBook,
    ItemTag,
    Publisher,
    RecipeProxy,
    RecipeUnmanaged,
    SelfRefFK,
    Store,
    WithManualPK,
@ -188,9 +194,8 @@ class AggregationTests(TestCase):
                    }
                ],
            )
-        if connection.features.allows_group_by_refs:
+        if connection.features.allows_group_by_select_index:
-            alias = connection.ops.quote_name("discount_price")
+            self.assertIn("GROUP BY 1", ctx[0]["sql"])
            self.assertIn(f"GROUP BY {alias}", ctx[0]["sql"])
    def test_aggregates_in_where_clause(self):
        """
@ -1827,6 +1832,85 @@ class AggregationTests(TestCase):
        )
        self.assertEqual(set(books), {self.b1, self.b4})
    def test_aggregate_and_annotate_duplicate_columns(self):
        books = (
            Book.objects.values("isbn")
            .annotate(
                name=F("publisher__name"),
                num_authors=Count("authors"),
            )
            .order_by("isbn")
        )
        self.assertSequenceEqual(
            books,
            [
                {"isbn": "013235613", "name": "Prentice Hall", "num_authors": 3},
                {"isbn": "013790395", "name": "Prentice Hall", "num_authors": 2},
                {"isbn": "067232959", "name": "Sams", "num_authors": 1},
                {"isbn": "155860191", "name": "Morgan Kaufmann", "num_authors": 1},
                {"isbn": "159059725", "name": "Apress", "num_authors": 2},
                {"isbn": "159059996", "name": "Apress", "num_authors": 1},
            ],
        )
    def test_aggregate_and_annotate_duplicate_columns_proxy(self):
        author = AuthorProxy.objects.latest("pk")
        recipe = RecipeProxy.objects.create(name="Dahl", author=author)
        recipe.tasters.add(author)
        recipes = RecipeProxy.objects.values("pk").annotate(
            name=F("author__name"),
            num_tasters=Count("tasters"),
        )
        self.assertSequenceEqual(
            recipes,
            [{"pk": recipe.pk, "name": "Stuart Russell", "num_tasters": 1}],
        )
    def test_aggregate_and_annotate_duplicate_columns_unmanaged(self):
        author = AuthorProxy.objects.latest("pk")
        recipe = RecipeProxy.objects.create(name="Dahl", author=author)
        recipe.tasters.add(author)
        recipes = RecipeUnmanaged.objects.values("pk").annotate(
            name=F("author__age"),
            num_tasters=Count("tasters"),
        )
        self.assertSequenceEqual(
            recipes,
            [{"pk": recipe.pk, "name": 46, "num_tasters": 1}],
        )
    def test_aggregate_group_by_unseen_columns_unmanaged(self):
        author = AuthorProxy.objects.latest("pk")
        shadow_author = AuthorProxy.objects.create(name=author.name, age=author.age - 2)
        recipe = RecipeProxy.objects.create(name="Dahl", author=author)
        shadow_recipe = RecipeProxy.objects.create(
            name="Shadow Dahl",
            author=shadow_author,
        )
        recipe.tasters.add(shadow_author)
        shadow_recipe.tasters.add(author)
        # This selects how many tasters each author had according to a
        # calculated field "name". The table has a column "name" that Django is
        # unaware of, and is equal for the two authors. The grouping column
        # cannot be referenced by its name ("name"), as it'd return one result
        # which is incorrect.
        author_recipes = (
            AuthorUnmanaged.objects.annotate(
                name=Concat(
                    Value("Writer at "),
                    Cast(F("age"), output_field=CharField()),
                )
            )
            .values("name")  # Field used for grouping.
            .annotate(num_recipes=Count("recipeunmanaged"))
            .filter(num_recipes__gt=0)
            .values("num_recipes")  # Drop grouping column.
        )
        self.assertSequenceEqual(
            author_recipes,
            [{"num_recipes": 1}, {"num_recipes": 1}],
        )
 class JoinPromotionTests(TestCase):
    def test_ticket_21150(self):
--- a/tests/postgres_tests/test_array.py
+++ b/tests/postgres_tests/test_array.py
@ -449,8 +449,8 @@ class TestQuerying(PostgreSQLTestCase):
                    expected,
                )
-    @skipUnlessDBFeature("allows_group_by_refs")
+    @skipUnlessDBFeature("allows_group_by_select_index")
-    def test_group_by_order_by_aliases(self):
+    def test_group_by_order_by_select_index(self):
        with self.assertNumQueries(1) as ctx:
            self.assertSequenceEqual(
                NullableIntegerArrayModel.objects.filter(
@ -467,7 +467,7 @@ class TestQuerying(PostgreSQLTestCase):
            )
        alias = connection.ops.quote_name("field__0")
        sql = ctx[0]["sql"]
-        self.assertIn(f"GROUP BY {alias}", sql)
+        self.assertIn("GROUP BY 1", sql)
        self.assertIn(f"ORDER BY {alias}", sql)
    def test_index(self):