Fixed #28194 -- Added support for normalization and cover density to SearchRank.

This commit is contained in:
Hannes Ljungberg 2020-03-20 22:01:26 +01:00 committed by Mariusz Felisiak
parent 4ed534758c
commit 0b51a4f894
4 changed files with 106 additions and 3 deletions

View File

@ -208,7 +208,10 @@ class SearchRank(Func):
function = 'ts_rank' function = 'ts_rank'
output_field = FloatField() output_field = FloatField()
def __init__(self, vector, query, weights=None): def __init__(
self, vector, query, weights=None, normalization=None,
cover_density=False,
):
if not hasattr(vector, 'resolve_expression'): if not hasattr(vector, 'resolve_expression'):
vector = SearchVector(vector) vector = SearchVector(vector)
if not hasattr(query, 'resolve_expression'): if not hasattr(query, 'resolve_expression'):
@ -218,6 +221,12 @@ class SearchRank(Func):
if not hasattr(weights, 'resolve_expression'): if not hasattr(weights, 'resolve_expression'):
weights = Value(weights) weights = Value(weights)
expressions = (weights,) + expressions expressions = (weights,) + expressions
if normalization is not None:
if not hasattr(normalization, 'resolve_expression'):
normalization = Value(normalization)
expressions += (normalization,)
if cover_density:
self.function = 'ts_rank_cd'
super().__init__(*expressions) super().__init__(*expressions)

View File

@ -118,7 +118,7 @@ See :ref:`postgresql-fts-search-configuration` for an explanation of the
``SearchRank`` ``SearchRank``
============== ==============
.. class:: SearchRank(vector, query, weights=None) .. class:: SearchRank(vector, query, weights=None, normalization=None, cover_density=False)
So far, we've returned the results for which any match between the vector and So far, we've returned the results for which any match between the vector and
the query are possible. It's likely you may wish to order the results by some the query are possible. It's likely you may wish to order the results by some
@ -137,6 +137,32 @@ order by relevancy::
See :ref:`postgresql-fts-weighting-queries` for an explanation of the See :ref:`postgresql-fts-weighting-queries` for an explanation of the
``weights`` parameter. ``weights`` parameter.
Set the ``cover_density`` parameter to ``True`` to enable the cover density
ranking, which means that the proximity of matching query terms is taken into
account.
Provide an integer to the ``normalization`` parameter to control rank
normalization. This integer is a bit mask, so you can combine multiple
behaviors::
>>> from django.db.models import Value
>>> Entry.objects.annotate(
... rank=SearchRank(
... vector,
... query,
... normalization=Value(2).bitor(Value(4)),
... )
... )
The PostgreSQL documentation has more details about `different rank
normalization options`_.
.. _different rank normalization options: https://www.postgresql.org/docs/current/textsearch-controls.html#TEXTSEARCH-RANKING
.. versionadded:: 3.1
The ``normalization`` and ``cover_density`` parameters were added.
``SearchHeadline`` ``SearchHeadline``
================== ==================

View File

@ -160,6 +160,14 @@ Minor features
* :lookup:`search` lookup now supports query expressions. * :lookup:`search` lookup now supports query expressions.
* The new ``cover_density`` parameter of
:class:`~django.contrib.postgres.search.SearchRank` allows ranking by cover
density.
* The new ``normalization`` parameter of
:class:`~django.contrib.postgres.search.SearchRank` allows rank
normalization.
:mod:`django.contrib.redirects` :mod:`django.contrib.redirects`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -6,7 +6,7 @@ All text copyright Python (Monty) Pictures. Thanks to sacred-texts.com for the
transcript. transcript.
""" """
from django.db import connection from django.db import connection
from django.db.models import F from django.db.models import F, Value
from django.test import modify_settings, skipUnlessDBFeature from django.test import modify_settings, skipUnlessDBFeature
from . import PostgreSQLSimpleTestCase, PostgreSQLTestCase from . import PostgreSQLSimpleTestCase, PostgreSQLTestCase
@ -449,6 +449,66 @@ class TestRankingAndWeights(GrailTestData, PostgreSQLTestCase):
).filter(rank__gt=0.3) ).filter(rank__gt=0.3)
self.assertSequenceEqual(searched, [self.verse0]) self.assertSequenceEqual(searched, [self.verse0])
def test_cover_density_ranking(self):
not_dense_verse = Line.objects.create(
scene=self.robin,
character=self.minstrel,
dialogue=(
'Bravely taking to his feet, he beat a very brave retreat. '
'A brave retreat brave Sir Robin.'
)
)
searched = Line.objects.filter(character=self.minstrel).annotate(
rank=SearchRank(
SearchVector('dialogue'),
SearchQuery('brave robin'),
cover_density=True,
),
).order_by('rank', '-pk')
self.assertSequenceEqual(
searched,
[self.verse2, not_dense_verse, self.verse1, self.verse0],
)
def test_ranking_with_normalization(self):
short_verse = Line.objects.create(
scene=self.robin,
character=self.minstrel,
dialogue='A brave retreat brave Sir Robin.',
)
searched = Line.objects.filter(character=self.minstrel).annotate(
rank=SearchRank(
SearchVector('dialogue'),
SearchQuery('brave sir robin'),
# Divide the rank by the document length.
normalization=2,
),
).order_by('rank')
self.assertSequenceEqual(
searched,
[self.verse2, self.verse1, self.verse0, short_verse],
)
def test_ranking_with_masked_normalization(self):
short_verse = Line.objects.create(
scene=self.robin,
character=self.minstrel,
dialogue='A brave retreat brave Sir Robin.',
)
searched = Line.objects.filter(character=self.minstrel).annotate(
rank=SearchRank(
SearchVector('dialogue'),
SearchQuery('brave sir robin'),
# Divide the rank by the document length and by the number of
# unique words in document.
normalization=Value(2).bitor(Value(8)),
),
).order_by('rank')
self.assertSequenceEqual(
searched,
[self.verse2, self.verse1, self.verse0, short_verse],
)
class SearchVectorIndexTests(PostgreSQLTestCase): class SearchVectorIndexTests(PostgreSQLTestCase):
def test_search_vector_index(self): def test_search_vector_index(self):