Fixed #31150 -- Included subqueries that reference related fields in GROUP BY clauses.

Thanks Johannes Hoppe for the report.

Regression in fb3f034f1c.

Co-authored-by: Simon Charette <charette.s@gmail.com>
This commit is contained in:
Mariusz Felisiak 2020-03-02 13:20:36 +01:00
parent 3bd29a8a97
commit 7b8fa1653f
3 changed files with 44 additions and 1 deletions

View File

@ -6,6 +6,7 @@ from decimal import Decimal
from django.core.exceptions import EmptyResultSet, FieldError from django.core.exceptions import EmptyResultSet, FieldError
from django.db import NotSupportedError, connection from django.db import NotSupportedError, connection
from django.db.models import fields from django.db.models import fields
from django.db.models.constants import LOOKUP_SEP
from django.db.models.query_utils import Q from django.db.models.query_utils import Q
from django.utils.deconstruct import deconstructible from django.utils.deconstruct import deconstructible
from django.utils.functional import cached_property from django.utils.functional import cached_property
@ -559,6 +560,14 @@ class ResolvedOuterRef(F):
'only be used in a subquery.' 'only be used in a subquery.'
) )
def resolve_expression(self, *args, **kwargs):
col = super().resolve_expression(*args, **kwargs)
# FIXME: Rename possibly_multivalued to multivalued and fix detection
# for non-multivalued JOINs (e.g. foreign key fields). This should take
# into account only many-to-many and one-to-many relationships.
col.possibly_multivalued = LOOKUP_SEP in self.name
return col
def relabeled_clone(self, relabels): def relabeled_clone(self, relabels):
return self return self
@ -747,6 +756,7 @@ class Random(Expression):
class Col(Expression): class Col(Expression):
contains_column_references = True contains_column_references = True
possibly_multivalued = False
def __init__(self, alias, target, output_field=None): def __init__(self, alias, target, output_field=None):
if output_field is None: if output_field is None:
@ -1042,7 +1052,10 @@ class Subquery(Expression):
def get_group_by_cols(self, alias=None): def get_group_by_cols(self, alias=None):
if alias: if alias:
return [Ref(alias, self)] return [Ref(alias, self)]
return self.query.get_external_cols() external_cols = self.query.get_external_cols()
if any(col.possibly_multivalued for col in external_cols):
return [self]
return external_cols
class Exists(Subquery): class Exists(Subquery):

View File

@ -27,3 +27,6 @@ Bugfixes
* Fixed a regression in Django 3.0.3 that caused misplacing parameters of SQL * Fixed a regression in Django 3.0.3 that caused misplacing parameters of SQL
queries when subtracting ``DateField`` or ``DateTimeField`` expressions on queries when subtracting ``DateField`` or ``DateTimeField`` expressions on
MySQL (:ticket:`31312`). MySQL (:ticket:`31312`).
* Fixed a regression in Django 3.0 that didn't include subqueries spanning
multivalued relations in the ``GROUP BY`` clause (:ticket:`31150`).

View File

@ -1,6 +1,7 @@
import datetime import datetime
import re import re
from decimal import Decimal from decimal import Decimal
from unittest import skipIf
from django.core.exceptions import FieldError from django.core.exceptions import FieldError
from django.db import connection from django.db import connection
@ -1190,6 +1191,26 @@ class AggregateTestCase(TestCase):
}, },
]) ])
@skipUnlessDBFeature('supports_subqueries_in_group_by')
@skipIf(
connection.vendor == 'mysql',
'GROUP BY optimization does not work properly when ONLY_FULL_GROUP_BY '
'mode is enabled on MySQL, see #31331.',
)
def test_aggregation_subquery_annotation_multivalued(self):
"""
Subquery annotations must be included in the GROUP BY if they use
potentially multivalued relations (contain the LOOKUP_SEP).
"""
subquery_qs = Author.objects.filter(
pk=OuterRef('pk'),
book__name=OuterRef('book__name'),
).values('pk')
author_qs = Author.objects.annotate(
subquery_id=Subquery(subquery_qs),
).annotate(count=Count('book'))
self.assertEqual(author_qs.count(), Author.objects.count())
def test_aggregation_order_by_not_selected_annotation_values(self): def test_aggregation_order_by_not_selected_annotation_values(self):
result_asc = [ result_asc = [
self.b4.pk, self.b4.pk,
@ -1248,6 +1269,7 @@ class AggregateTestCase(TestCase):
).annotate(total=Count('*')) ).annotate(total=Count('*'))
self.assertEqual(dict(has_long_books_breakdown), {True: 2, False: 3}) self.assertEqual(dict(has_long_books_breakdown), {True: 2, False: 3})
@skipUnlessDBFeature('supports_subqueries_in_group_by')
def test_aggregation_subquery_annotation_related_field(self): def test_aggregation_subquery_annotation_related_field(self):
publisher = Publisher.objects.create(name=self.a9.name, num_awards=2) publisher = Publisher.objects.create(name=self.a9.name, num_awards=2)
book = Book.objects.create( book = Book.objects.create(
@ -1267,3 +1289,8 @@ class AggregateTestCase(TestCase):
contact_publisher__isnull=False, contact_publisher__isnull=False,
).annotate(count=Count('authors')) ).annotate(count=Count('authors'))
self.assertSequenceEqual(books_qs, [book]) self.assertSequenceEqual(books_qs, [book])
# FIXME: GROUP BY doesn't need to include a subquery with
# non-multivalued JOINs, see Col.possibly_multivalued (refs #31150):
# with self.assertNumQueries(1) as ctx:
# self.assertSequenceEqual(books_qs, [book])
# self.assertEqual(ctx[0]['sql'].count('SELECT'), 2)