Fixed #31147 -- Added SearchHeadline to django.contrib.postgres.

This commit is contained in:
Hannes Ljungberg 2019-11-19 14:59:06 +01:00 committed by Mariusz Felisiak
parent aee0bebc2f
commit 65ab4f9f03
5 changed files with 239 additions and 6 deletions

View File

@ -1,3 +1,5 @@
import psycopg2
from django.db.models import (
CharField, Expression, Field, FloatField, Func, Lookup, TextField, Value,
)
@ -230,6 +232,57 @@ class SearchRank(Func):
super().__init__(*expressions)
class SearchHeadline(Func):
function = 'ts_headline'
template = '%(function)s(%(expressions)s%(options)s)'
output_field = TextField()
def __init__(
self, expression, query, *, config=None, start_sel=None, stop_sel=None,
max_words=None, min_words=None, short_word=None, highlight_all=None,
max_fragments=None, fragment_delimiter=None,
):
if not hasattr(query, 'resolve_expression'):
query = SearchQuery(query)
options = {
'StartSel': start_sel,
'StopSel': stop_sel,
'MaxWords': max_words,
'MinWords': min_words,
'ShortWord': short_word,
'HighlightAll': highlight_all,
'MaxFragments': max_fragments,
'FragmentDelimiter': fragment_delimiter,
}
self.options = {
option: value
for option, value in options.items() if value is not None
}
expressions = (expression, query)
if config is not None:
config = SearchConfig.from_parameter(config)
expressions = (config,) + expressions
super().__init__(*expressions)
def as_sql(self, compiler, connection, function=None, template=None):
options_sql = ''
options_params = []
if self.options:
# getquoted() returns a quoted bytestring of the adapted value.
options_params.append(', '.join(
'%s=%s' % (
option,
psycopg2.extensions.adapt(value).getquoted().decode(),
) for option, value in self.options.items()
))
options_sql = ', %s'
sql, params = super().as_sql(
compiler, connection, function=function, template=template,
options=options_sql,
)
return sql, params + options_params
SearchVectorField.register_lookup(SearchVectorExact)

View File

@ -132,6 +132,60 @@ order by relevancy::
See :ref:`postgresql-fts-weighting-queries` for an explanation of the
``weights`` parameter.
``SearchHeadline``
==================
.. versionadded:: 3.1
.. class:: SearchHeadline(expression, query, config=None, start_sel=None, stop_sel=None, max_words=None, min_words=None, short_word=None, highlight_all=None, max_fragments=None, fragment_delimiter=None)
Accepts a single text field or an expression, a query, a config, and a set of
options. Returns highlighted search results.
Set the ``start_sel`` and ``stop_sel`` parameters to the string values to be
used to wrap highlighted query terms in the document. PostgreSQL's defaults are
``<b>`` and ``</b>``.
Provide integer values to the ``max_words`` and ``min_words`` parameters to
determine the longest and shortest headlines. PostgreSQL's defaults are 35 and
15.
Provide an integer value to the ``short_word`` parameter to discard words of
this length or less in each headline. PostgreSQL's default is 3.
Set the ``highlight_all`` parameter to ``True`` to use the whole document in
place of a fragment and ignore ``max_words``, ``min_words``, and ``short_word``
parameters. That's disabled by default in PostgreSQL.
Provide a non-zero integer value to the ``max_fragments`` to set the maximum
number of fragments to display. That's disabled by default in PostgreSQL.
Set the ``fragment_delimiter`` string parameter to configure the delimiter
between fragments. PostgreSQL's default is ``" ... "``.
The PostgreSQL documentation has more details on `highlighting search
results`_.
Usage example::
>>> from django.contrib.postgres.search import SearchHeadline, SearchQuery
>>> query = SearchQuery('red tomato')
>>> entry = Entry.objects.annotate(
... headline=SearchHeadline(
... 'body_text',
... query,
... start_sel='<span>',
... stop_sel='</span>',
... ),
... ).get()
>>> print(entry.headline)
Sandwich with <span>tomato</span> and <span>red</span> cheese.
See :ref:`postgresql-fts-search-configuration` for an explanation of the
``config`` parameter.
.. _highlighting search results: https://www.postgresql.org/docs/current/textsearch-controls.html#TEXTSEARCH-HEADLINE
.. _postgresql-fts-search-configuration:
Changing the search configuration

View File

@ -108,6 +108,9 @@ Minor features
* :class:`~django.contrib.postgres.search.SearchQuery` now supports
``'websearch'`` search type on PostgreSQL 11+.
* The new :class:`~django.contrib.postgres.search.SearchHeadline` class allows
highlighting search results.
:mod:`django.contrib.redirects`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -5,16 +5,20 @@ These tests use dialogue from the 1975 film Monty Python and the Holy Grail.
All text copyright Python (Monty) Pictures. Thanks to sacred-texts.com for the
transcript.
"""
from django.contrib.postgres.search import (
SearchConfig, SearchQuery, SearchRank, SearchVector,
)
from django.db import connection
from django.db.models import F
from django.test import SimpleTestCase, modify_settings, skipUnlessDBFeature
from django.test import modify_settings, skipUnlessDBFeature
from . import PostgreSQLSimpleTestCase, PostgreSQLTestCase
from .models import Character, Line, Scene
try:
from django.contrib.postgres.search import (
SearchConfig, SearchHeadline, SearchQuery, SearchRank, SearchVector,
)
except ImportError:
pass
class GrailTestData:
@ -436,7 +440,7 @@ class SearchVectorIndexTests(PostgreSQLTestCase):
)
class SearchQueryTests(SimpleTestCase):
class SearchQueryTests(PostgreSQLSimpleTestCase):
def test_str(self):
tests = (
(~SearchQuery('a'), '~SearchQuery(a)'),
@ -460,3 +464,118 @@ class SearchQueryTests(SimpleTestCase):
for query, expected_str in tests:
with self.subTest(query=query):
self.assertEqual(str(query), expected_str)
@modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'})
class SearchHeadlineTests(GrailTestData, PostgreSQLTestCase):
def test_headline(self):
searched = Line.objects.annotate(
headline=SearchHeadline(
F('dialogue'),
SearchQuery('brave sir robin'),
config=SearchConfig('english'),
),
).get(pk=self.verse0.pk)
self.assertEqual(
searched.headline,
'<b>Robin</b>. He was not at all afraid to be killed in nasty '
'ways. <b>Brave</b>, <b>brave</b>, <b>brave</b>, <b>brave</b> '
'<b>Sir</b> <b>Robin</b>',
)
def test_headline_untyped_args(self):
searched = Line.objects.annotate(
headline=SearchHeadline('dialogue', 'killed', config='english'),
).get(pk=self.verse0.pk)
self.assertEqual(
searched.headline,
'Robin. He was not at all afraid to be <b>killed</b> in nasty '
'ways. Brave, brave, brave, brave Sir Robin!',
)
def test_headline_with_config(self):
searched = Line.objects.annotate(
headline=SearchHeadline(
'dialogue',
SearchQuery('cadeaux', config='french'),
config='french',
),
).get(pk=self.french.pk)
self.assertEqual(
searched.headline,
'Oh. Un beau <b>cadeau</b>. Oui oui.',
)
def test_headline_with_config_from_field(self):
searched = Line.objects.annotate(
headline=SearchHeadline(
'dialogue',
SearchQuery('cadeaux', config=F('dialogue_config')),
config=F('dialogue_config'),
),
).get(pk=self.french.pk)
self.assertEqual(
searched.headline,
'Oh. Un beau <b>cadeau</b>. Oui oui.',
)
def test_headline_separator_options(self):
searched = Line.objects.annotate(
headline=SearchHeadline(
'dialogue',
'brave sir robin',
start_sel='<span>',
stop_sel='</span>',
),
).get(pk=self.verse0.pk)
self.assertEqual(
searched.headline,
'<span>Robin</span>. He was not at all afraid to be killed in '
'nasty ways. <span>Brave</span>, <span>brave</span>, <span>brave'
'</span>, <span>brave</span> <span>Sir</span> <span>Robin</span>',
)
def test_headline_highlight_all_option(self):
searched = Line.objects.annotate(
headline=SearchHeadline(
'dialogue',
SearchQuery('brave sir robin', config='english'),
highlight_all=True,
),
).get(pk=self.verse0.pk)
self.assertIn(
'<b>Bravely</b> bold <b>Sir</b> <b>Robin</b>, rode forth from '
'Camelot. He was not afraid to die, o ',
searched.headline,
)
def test_headline_short_word_option(self):
searched = Line.objects.annotate(
headline=SearchHeadline(
'dialogue',
SearchQuery('brave sir robin', config='english'),
short_word=6,
),
).get(pk=self.verse0.pk)
self.assertIs(searched.headline.endswith(
'<b>Brave</b>, <b>brave</b>, <b>brave</b>, <b>brave</b> <b>Sir</b>'
), True)
def test_headline_fragments_words_options(self):
searched = Line.objects.annotate(
headline=SearchHeadline(
'dialogue',
SearchQuery('brave sir robin', config='english'),
fragment_delimiter='...<br>',
max_fragments=4,
max_words=3,
min_words=1,
),
).get(pk=self.verse0.pk)
self.assertEqual(
searched.headline,
'<b>Sir</b> <b>Robin</b>, rode...<br>'
'<b>Brave</b> <b>Sir</b> <b>Robin</b>...<br>'
'<b>Brave</b>, <b>brave</b>, <b>brave</b>...<br>'
'<b>brave</b> <b>Sir</b> <b>Robin</b>',
)

View File

@ -1,9 +1,13 @@
from django.contrib.postgres.search import TrigramDistance, TrigramSimilarity
from django.test import modify_settings
from . import PostgreSQLTestCase
from .models import CharFieldModel, TextFieldModel
try:
from django.contrib.postgres.search import TrigramDistance, TrigramSimilarity
except ImportError:
pass
@modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'})
class TrigramTest(PostgreSQLTestCase):