"""
Test PostgreSQL full text search.
These tests use dialogue from the 1975 film Monty Python and the Holy Grail.
All text copyright Python (Monty) Pictures. Thanks to sacred-texts.com for the
transcript.
"""
from django.db import connection
from django.db.models import F, Value
from django.test import modify_settings, skipUnlessDBFeature
from . import PostgreSQLSimpleTestCase, PostgreSQLTestCase
from .models import Character, Line, LineSavedSearch, Scene
try:
from django.contrib.postgres.search import (
SearchConfig, SearchHeadline, SearchQuery, SearchRank, SearchVector,
)
except ImportError:
pass
class GrailTestData:
@classmethod
def setUpTestData(cls):
cls.robin = Scene.objects.create(scene='Scene 10', setting='The dark forest of Ewing')
cls.minstrel = Character.objects.create(name='Minstrel')
verses = [
(
'Bravely bold Sir Robin, rode forth from Camelot. '
'He was not afraid to die, o Brave Sir Robin. '
'He was not at all afraid to be killed in nasty ways. '
'Brave, brave, brave, brave Sir Robin'
),
(
'He was not in the least bit scared to be mashed into a pulp, '
'Or to have his eyes gouged out, and his elbows broken. '
'To have his kneecaps split, and his body burned away, '
'And his limbs all hacked and mangled, brave Sir Robin!'
),
(
'His head smashed in and his heart cut out, '
'And his liver removed and his bowels unplugged, '
'And his nostrils ripped and his bottom burned off,'
'And his --'
),
]
cls.verses = [Line.objects.create(
scene=cls.robin,
character=cls.minstrel,
dialogue=verse,
) for verse in verses]
cls.verse0, cls.verse1, cls.verse2 = cls.verses
cls.witch_scene = Scene.objects.create(scene='Scene 5', setting="Sir Bedemir's Castle")
bedemir = Character.objects.create(name='Bedemir')
crowd = Character.objects.create(name='Crowd')
witch = Character.objects.create(name='Witch')
duck = Character.objects.create(name='Duck')
cls.bedemir0 = Line.objects.create(
scene=cls.witch_scene,
character=bedemir,
dialogue='We shall use my larger scales!',
dialogue_config='english',
)
cls.bedemir1 = Line.objects.create(
scene=cls.witch_scene,
character=bedemir,
dialogue='Right, remove the supports!',
dialogue_config='english',
)
cls.duck = Line.objects.create(scene=cls.witch_scene, character=duck, dialogue=None)
cls.crowd = Line.objects.create(scene=cls.witch_scene, character=crowd, dialogue='A witch! A witch!')
cls.witch = Line.objects.create(scene=cls.witch_scene, character=witch, dialogue="It's a fair cop.")
trojan_rabbit = Scene.objects.create(scene='Scene 8', setting="The castle of Our Master Ruiz' de lu la Ramper")
guards = Character.objects.create(name='French Guards')
cls.french = Line.objects.create(
scene=trojan_rabbit,
character=guards,
dialogue='Oh. Un beau cadeau. Oui oui.',
dialogue_config='french',
)
@modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'})
class SimpleSearchTest(GrailTestData, PostgreSQLTestCase):
def test_simple(self):
searched = Line.objects.filter(dialogue__search='elbows')
self.assertSequenceEqual(searched, [self.verse1])
def test_non_exact_match(self):
searched = Line.objects.filter(dialogue__search='hearts')
self.assertSequenceEqual(searched, [self.verse2])
def test_search_two_terms(self):
searched = Line.objects.filter(dialogue__search='heart bowel')
self.assertSequenceEqual(searched, [self.verse2])
def test_search_two_terms_with_partial_match(self):
searched = Line.objects.filter(dialogue__search='Robin killed')
self.assertSequenceEqual(searched, [self.verse0])
def test_search_query_config(self):
searched = Line.objects.filter(
dialogue__search=SearchQuery('nostrils', config='simple'),
)
self.assertSequenceEqual(searched, [self.verse2])
def test_search_with_F_expression(self):
# Non-matching query.
LineSavedSearch.objects.create(line=self.verse1, query='hearts')
# Matching query.
match = LineSavedSearch.objects.create(line=self.verse1, query='elbows')
for query_expression in [F('query'), SearchQuery(F('query'))]:
with self.subTest(query_expression):
searched = LineSavedSearch.objects.filter(
line__dialogue__search=query_expression,
)
self.assertSequenceEqual(searched, [match])
@modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'})
class SearchVectorFieldTest(GrailTestData, PostgreSQLTestCase):
def test_existing_vector(self):
Line.objects.update(dialogue_search_vector=SearchVector('dialogue'))
searched = Line.objects.filter(dialogue_search_vector=SearchQuery('Robin killed'))
self.assertSequenceEqual(searched, [self.verse0])
def test_existing_vector_config_explicit(self):
Line.objects.update(dialogue_search_vector=SearchVector('dialogue'))
searched = Line.objects.filter(dialogue_search_vector=SearchQuery('cadeaux', config='french'))
self.assertSequenceEqual(searched, [self.french])
def test_single_coalesce_expression(self):
searched = Line.objects.annotate(search=SearchVector('dialogue')).filter(search='cadeaux')
self.assertNotIn('COALESCE(COALESCE', str(searched.query))
class SearchConfigTests(PostgreSQLSimpleTestCase):
def test_from_parameter(self):
self.assertIsNone(SearchConfig.from_parameter(None))
self.assertEqual(SearchConfig.from_parameter('foo'), SearchConfig('foo'))
self.assertEqual(SearchConfig.from_parameter(SearchConfig('bar')), SearchConfig('bar'))
class MultipleFieldsTest(GrailTestData, PostgreSQLTestCase):
def test_simple_on_dialogue(self):
searched = Line.objects.annotate(
search=SearchVector('scene__setting', 'dialogue'),
).filter(search='elbows')
self.assertSequenceEqual(searched, [self.verse1])
def test_simple_on_scene(self):
searched = Line.objects.annotate(
search=SearchVector('scene__setting', 'dialogue'),
).filter(search='Forest')
self.assertCountEqual(searched, self.verses)
def test_non_exact_match(self):
searched = Line.objects.annotate(
search=SearchVector('scene__setting', 'dialogue'),
).filter(search='heart')
self.assertSequenceEqual(searched, [self.verse2])
def test_search_two_terms(self):
searched = Line.objects.annotate(
search=SearchVector('scene__setting', 'dialogue'),
).filter(search='heart forest')
self.assertSequenceEqual(searched, [self.verse2])
def test_terms_adjacent(self):
searched = Line.objects.annotate(
search=SearchVector('character__name', 'dialogue'),
).filter(search='minstrel')
self.assertCountEqual(searched, self.verses)
searched = Line.objects.annotate(
search=SearchVector('scene__setting', 'dialogue'),
).filter(search='minstrelbravely')
self.assertSequenceEqual(searched, [])
def test_search_with_null(self):
searched = Line.objects.annotate(
search=SearchVector('scene__setting', 'dialogue'),
).filter(search='bedemir')
self.assertCountEqual(searched, [self.bedemir0, self.bedemir1, self.crowd, self.witch, self.duck])
def test_search_with_non_text(self):
searched = Line.objects.annotate(
search=SearchVector('id'),
).filter(search=str(self.crowd.id))
self.assertSequenceEqual(searched, [self.crowd])
def test_phrase_search(self):
line_qs = Line.objects.annotate(search=SearchVector('dialogue'))
searched = line_qs.filter(search=SearchQuery('burned body his away', search_type='phrase'))
self.assertSequenceEqual(searched, [])
searched = line_qs.filter(search=SearchQuery('his body burned away', search_type='phrase'))
self.assertSequenceEqual(searched, [self.verse1])
def test_phrase_search_with_config(self):
line_qs = Line.objects.annotate(
search=SearchVector('scene__setting', 'dialogue', config='french'),
)
searched = line_qs.filter(
search=SearchQuery('cadeau beau un', search_type='phrase', config='french'),
)
self.assertSequenceEqual(searched, [])
searched = line_qs.filter(
search=SearchQuery('un beau cadeau', search_type='phrase', config='french'),
)
self.assertSequenceEqual(searched, [self.french])
def test_raw_search(self):
line_qs = Line.objects.annotate(search=SearchVector('dialogue'))
searched = line_qs.filter(search=SearchQuery('Robin', search_type='raw'))
self.assertCountEqual(searched, [self.verse0, self.verse1])
searched = line_qs.filter(search=SearchQuery("Robin & !'Camelot'", search_type='raw'))
self.assertSequenceEqual(searched, [self.verse1])
def test_raw_search_with_config(self):
line_qs = Line.objects.annotate(search=SearchVector('dialogue', config='french'))
searched = line_qs.filter(
search=SearchQuery("'cadeaux' & 'beaux'", search_type='raw', config='french'),
)
self.assertSequenceEqual(searched, [self.french])
@skipUnlessDBFeature('has_websearch_to_tsquery')
def test_web_search(self):
line_qs = Line.objects.annotate(search=SearchVector('dialogue'))
searched = line_qs.filter(
search=SearchQuery(
'"burned body" "split kneecaps"',
search_type='websearch',
),
)
self.assertSequenceEqual(searched, [])
searched = line_qs.filter(
search=SearchQuery(
'"body burned" "kneecaps split" -"nostrils"',
search_type='websearch',
),
)
self.assertSequenceEqual(searched, [self.verse1])
searched = line_qs.filter(
search=SearchQuery(
'"Sir Robin" ("kneecaps" OR "Camelot")',
search_type='websearch',
),
)
self.assertSequenceEqual(searched, [self.verse0, self.verse1])
@skipUnlessDBFeature('has_websearch_to_tsquery')
def test_web_search_with_config(self):
line_qs = Line.objects.annotate(
search=SearchVector('scene__setting', 'dialogue', config='french'),
)
searched = line_qs.filter(
search=SearchQuery('cadeau -beau', search_type='websearch', config='french'),
)
self.assertSequenceEqual(searched, [])
searched = line_qs.filter(
search=SearchQuery('beau cadeau', search_type='websearch', config='french'),
)
self.assertSequenceEqual(searched, [self.french])
def test_bad_search_type(self):
with self.assertRaisesMessage(ValueError, "Unknown search_type argument 'foo'."):
SearchQuery('kneecaps', search_type='foo')
def test_config_query_explicit(self):
searched = Line.objects.annotate(
search=SearchVector('scene__setting', 'dialogue', config='french'),
).filter(search=SearchQuery('cadeaux', config='french'))
self.assertSequenceEqual(searched, [self.french])
def test_config_query_implicit(self):
searched = Line.objects.annotate(
search=SearchVector('scene__setting', 'dialogue', config='french'),
).filter(search='cadeaux')
self.assertSequenceEqual(searched, [self.french])
def test_config_from_field_explicit(self):
searched = Line.objects.annotate(
search=SearchVector('scene__setting', 'dialogue', config=F('dialogue_config')),
).filter(search=SearchQuery('cadeaux', config=F('dialogue_config')))
self.assertSequenceEqual(searched, [self.french])
def test_config_from_field_implicit(self):
searched = Line.objects.annotate(
search=SearchVector('scene__setting', 'dialogue', config=F('dialogue_config')),
).filter(search='cadeaux')
self.assertSequenceEqual(searched, [self.french])
@modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'})
class TestCombinations(GrailTestData, PostgreSQLTestCase):
def test_vector_add(self):
searched = Line.objects.annotate(
search=SearchVector('scene__setting') + SearchVector('character__name'),
).filter(search='bedemir')
self.assertCountEqual(searched, [self.bedemir0, self.bedemir1, self.crowd, self.witch, self.duck])
def test_vector_add_multi(self):
searched = Line.objects.annotate(
search=(
SearchVector('scene__setting') +
SearchVector('character__name') +
SearchVector('dialogue')
),
).filter(search='bedemir')
self.assertCountEqual(searched, [self.bedemir0, self.bedemir1, self.crowd, self.witch, self.duck])
def test_vector_combined_mismatch(self):
msg = (
'SearchVector can only be combined with other SearchVector '
'instances, got NoneType.'
)
with self.assertRaisesMessage(TypeError, msg):
Line.objects.filter(dialogue__search=None + SearchVector('character__name'))
def test_combine_different_vector_configs(self):
searched = Line.objects.annotate(
search=(
SearchVector('dialogue', config='english') +
SearchVector('dialogue', config='french')
),
).filter(
search=SearchQuery('cadeaux', config='french') | SearchQuery('nostrils')
)
self.assertCountEqual(searched, [self.french, self.verse2])
def test_query_and(self):
searched = Line.objects.annotate(
search=SearchVector('scene__setting', 'dialogue'),
).filter(search=SearchQuery('bedemir') & SearchQuery('scales'))
self.assertSequenceEqual(searched, [self.bedemir0])
def test_query_multiple_and(self):
searched = Line.objects.annotate(
search=SearchVector('scene__setting', 'dialogue'),
).filter(search=SearchQuery('bedemir') & SearchQuery('scales') & SearchQuery('nostrils'))
self.assertSequenceEqual(searched, [])
searched = Line.objects.annotate(
search=SearchVector('scene__setting', 'dialogue'),
).filter(search=SearchQuery('shall') & SearchQuery('use') & SearchQuery('larger'))
self.assertSequenceEqual(searched, [self.bedemir0])
def test_query_or(self):
searched = Line.objects.filter(dialogue__search=SearchQuery('kneecaps') | SearchQuery('nostrils'))
self.assertCountEqual(searched, [self.verse1, self.verse2])
def test_query_multiple_or(self):
searched = Line.objects.filter(
dialogue__search=SearchQuery('kneecaps') | SearchQuery('nostrils') | SearchQuery('Sir Robin')
)
self.assertCountEqual(searched, [self.verse1, self.verse2, self.verse0])
def test_query_invert(self):
searched = Line.objects.filter(character=self.minstrel, dialogue__search=~SearchQuery('kneecaps'))
self.assertCountEqual(searched, [self.verse0, self.verse2])
def test_combine_different_configs(self):
searched = Line.objects.filter(
dialogue__search=(
SearchQuery('cadeau', config='french') |
SearchQuery('nostrils', config='english')
)
)
self.assertCountEqual(searched, [self.french, self.verse2])
def test_combined_configs(self):
searched = Line.objects.filter(
dialogue__search=(
SearchQuery('nostrils', config='simple') &
SearchQuery('bowels', config='simple')
),
)
self.assertSequenceEqual(searched, [self.verse2])
def test_combine_raw_phrase(self):
searched = Line.objects.filter(
dialogue__search=(
SearchQuery('burn:*', search_type='raw', config='simple') |
SearchQuery('rode forth from Camelot', search_type='phrase')
)
)
self.assertCountEqual(searched, [self.verse0, self.verse1, self.verse2])
def test_query_combined_mismatch(self):
msg = (
'SearchQuery can only be combined with other SearchQuery '
'instances, got NoneType.'
)
with self.assertRaisesMessage(TypeError, msg):
Line.objects.filter(dialogue__search=None | SearchQuery('kneecaps'))
with self.assertRaisesMessage(TypeError, msg):
Line.objects.filter(dialogue__search=None & SearchQuery('kneecaps'))
@modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'})
class TestRankingAndWeights(GrailTestData, PostgreSQLTestCase):
def test_ranking(self):
searched = Line.objects.filter(character=self.minstrel).annotate(
rank=SearchRank(SearchVector('dialogue'), SearchQuery('brave sir robin')),
).order_by('rank')
self.assertSequenceEqual(searched, [self.verse2, self.verse1, self.verse0])
def test_rank_passing_untyped_args(self):
searched = Line.objects.filter(character=self.minstrel).annotate(
rank=SearchRank('dialogue', 'brave sir robin'),
).order_by('rank')
self.assertSequenceEqual(searched, [self.verse2, self.verse1, self.verse0])
def test_weights_in_vector(self):
vector = SearchVector('dialogue', weight='A') + SearchVector('character__name', weight='D')
searched = Line.objects.filter(scene=self.witch_scene).annotate(
rank=SearchRank(vector, SearchQuery('witch')),
).order_by('-rank')[:2]
self.assertSequenceEqual(searched, [self.crowd, self.witch])
vector = SearchVector('dialogue', weight='D') + SearchVector('character__name', weight='A')
searched = Line.objects.filter(scene=self.witch_scene).annotate(
rank=SearchRank(vector, SearchQuery('witch')),
).order_by('-rank')[:2]
self.assertSequenceEqual(searched, [self.witch, self.crowd])
def test_ranked_custom_weights(self):
vector = SearchVector('dialogue', weight='D') + SearchVector('character__name', weight='A')
searched = Line.objects.filter(scene=self.witch_scene).annotate(
rank=SearchRank(vector, SearchQuery('witch'), weights=[1, 0, 0, 0.5]),
).order_by('-rank')[:2]
self.assertSequenceEqual(searched, [self.crowd, self.witch])
def test_ranking_chaining(self):
searched = Line.objects.filter(character=self.minstrel).annotate(
rank=SearchRank(SearchVector('dialogue'), SearchQuery('brave sir robin')),
).filter(rank__gt=0.3)
self.assertSequenceEqual(searched, [self.verse0])
def test_cover_density_ranking(self):
not_dense_verse = Line.objects.create(
scene=self.robin,
character=self.minstrel,
dialogue=(
'Bravely taking to his feet, he beat a very brave retreat. '
'A brave retreat brave Sir Robin.'
)
)
searched = Line.objects.filter(character=self.minstrel).annotate(
rank=SearchRank(
SearchVector('dialogue'),
SearchQuery('brave robin'),
cover_density=True,
),
).order_by('rank', '-pk')
self.assertSequenceEqual(
searched,
[self.verse2, not_dense_verse, self.verse1, self.verse0],
)
def test_ranking_with_normalization(self):
short_verse = Line.objects.create(
scene=self.robin,
character=self.minstrel,
dialogue='A brave retreat brave Sir Robin.',
)
searched = Line.objects.filter(character=self.minstrel).annotate(
rank=SearchRank(
SearchVector('dialogue'),
SearchQuery('brave sir robin'),
# Divide the rank by the document length.
normalization=2,
),
).order_by('rank')
self.assertSequenceEqual(
searched,
[self.verse2, self.verse1, self.verse0, short_verse],
)
def test_ranking_with_masked_normalization(self):
short_verse = Line.objects.create(
scene=self.robin,
character=self.minstrel,
dialogue='A brave retreat brave Sir Robin.',
)
searched = Line.objects.filter(character=self.minstrel).annotate(
rank=SearchRank(
SearchVector('dialogue'),
SearchQuery('brave sir robin'),
# Divide the rank by the document length and by the number of
# unique words in document.
normalization=Value(2).bitor(Value(8)),
),
).order_by('rank')
self.assertSequenceEqual(
searched,
[self.verse2, self.verse1, self.verse0, short_verse],
)
class SearchVectorIndexTests(PostgreSQLTestCase):
def test_search_vector_index(self):
"""SearchVector generates IMMUTABLE SQL in order to be indexable."""
# This test should be moved to test_indexes and use a functional
# index instead once support lands (see #26167).
query = Line.objects.all().query
resolved = SearchVector('id', 'dialogue', config='english').resolve_expression(query)
compiler = query.get_compiler(connection.alias)
sql, params = resolved.as_sql(compiler, connection)
# Indexed function must be IMMUTABLE.
with connection.cursor() as cursor:
cursor.execute(
'CREATE INDEX search_vector_index ON %s USING GIN (%s)' % (Line._meta.db_table, sql),
params,
)
class SearchQueryTests(PostgreSQLSimpleTestCase):
def test_str(self):
tests = (
(~SearchQuery('a'), '~SearchQuery(Value(a))'),
(
(SearchQuery('a') | SearchQuery('b')) & (SearchQuery('c') | SearchQuery('d')),
'((SearchQuery(Value(a)) || SearchQuery(Value(b))) && '
'(SearchQuery(Value(c)) || SearchQuery(Value(d))))',
),
(
SearchQuery('a') & (SearchQuery('b') | SearchQuery('c')),
'(SearchQuery(Value(a)) && (SearchQuery(Value(b)) || '
'SearchQuery(Value(c))))',
),
(
(SearchQuery('a') | SearchQuery('b')) & SearchQuery('c'),
'((SearchQuery(Value(a)) || SearchQuery(Value(b))) && '
'SearchQuery(Value(c)))'
),
(
SearchQuery('a') & (SearchQuery('b') & (SearchQuery('c') | SearchQuery('d'))),
'(SearchQuery(Value(a)) && (SearchQuery(Value(b)) && '
'(SearchQuery(Value(c)) || SearchQuery(Value(d)))))',
),
)
for query, expected_str in tests:
with self.subTest(query=query):
self.assertEqual(str(query), expected_str)
@modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'})
class SearchHeadlineTests(GrailTestData, PostgreSQLTestCase):
def test_headline(self):
searched = Line.objects.annotate(
headline=SearchHeadline(
F('dialogue'),
SearchQuery('brave sir robin'),
config=SearchConfig('english'),
),
).get(pk=self.verse0.pk)
self.assertEqual(
searched.headline,
'Robin. He was not at all afraid to be killed in nasty '
'ways. Brave, brave, brave, brave '
'Sir Robin',
)
def test_headline_untyped_args(self):
searched = Line.objects.annotate(
headline=SearchHeadline('dialogue', 'killed', config='english'),
).get(pk=self.verse0.pk)
self.assertEqual(
searched.headline,
'Robin. He was not at all afraid to be killed in nasty '
'ways. Brave, brave, brave, brave Sir Robin',
)
def test_headline_with_config(self):
searched = Line.objects.annotate(
headline=SearchHeadline(
'dialogue',
SearchQuery('cadeaux', config='french'),
config='french',
),
).get(pk=self.french.pk)
self.assertEqual(
searched.headline,
'Oh. Un beau cadeau. Oui oui.',
)
def test_headline_with_config_from_field(self):
searched = Line.objects.annotate(
headline=SearchHeadline(
'dialogue',
SearchQuery('cadeaux', config=F('dialogue_config')),
config=F('dialogue_config'),
),
).get(pk=self.french.pk)
self.assertEqual(
searched.headline,
'Oh. Un beau cadeau. Oui oui.',
)
def test_headline_separator_options(self):
searched = Line.objects.annotate(
headline=SearchHeadline(
'dialogue',
'brave sir robin',
start_sel='',
stop_sel='',
),
).get(pk=self.verse0.pk)
self.assertEqual(
searched.headline,
'Robin. He was not at all afraid to be killed in '
'nasty ways. Brave, brave, brave'
', brave Sir Robin',
)
def test_headline_highlight_all_option(self):
searched = Line.objects.annotate(
headline=SearchHeadline(
'dialogue',
SearchQuery('brave sir robin', config='english'),
highlight_all=True,
),
).get(pk=self.verse0.pk)
self.assertIn(
'Bravely bold Sir Robin, rode forth from '
'Camelot. He was not afraid to die, o ',
searched.headline,
)
def test_headline_short_word_option(self):
searched = Line.objects.annotate(
headline=SearchHeadline(
'dialogue',
SearchQuery('Camelot', config='english'),
short_word=5,
min_words=8,
),
).get(pk=self.verse0.pk)
self.assertEqual(searched.headline, (
'Camelot. He was not afraid to die, o Brave Sir Robin. He '
'was not at all afraid'
))
def test_headline_fragments_words_options(self):
searched = Line.objects.annotate(
headline=SearchHeadline(
'dialogue',
SearchQuery('brave sir robin', config='english'),
fragment_delimiter='...
',
max_fragments=4,
max_words=3,
min_words=1,
),
).get(pk=self.verse0.pk)
self.assertEqual(
searched.headline,
'Sir Robin, rode...
'
'Brave Sir Robin...
'
'Brave, brave, brave...
'
'brave Sir Robin',
)