Fixed #33788 -- Added TrigramStrictWordSimilarity() and TrigramStrictWordDistance() on PostgreSQL.
This commit is contained in:
parent
3ef37a5245
commit
8d160f154f
1
AUTHORS
1
AUTHORS
|
@ -636,6 +636,7 @@ answer newbie questions, and generally made Django that much better:
|
||||||
Mathieu Agopian <mathieu.agopian@gmail.com>
|
Mathieu Agopian <mathieu.agopian@gmail.com>
|
||||||
Matías Bordese
|
Matías Bordese
|
||||||
Matt Boersma <matt@sprout.org>
|
Matt Boersma <matt@sprout.org>
|
||||||
|
Matt Brewer <matt.brewer693@gmail.com>
|
||||||
Matt Croydon <http://www.postneo.com/>
|
Matt Croydon <http://www.postneo.com/>
|
||||||
Matt Deacalion Stevens <matt@dirtymonkey.co.uk>
|
Matt Deacalion Stevens <matt@dirtymonkey.co.uk>
|
||||||
Matt Dennenbaum
|
Matt Dennenbaum
|
||||||
|
|
|
@ -11,7 +11,13 @@ from django.db.models.indexes import IndexExpression
|
||||||
from django.utils.translation import gettext_lazy as _
|
from django.utils.translation import gettext_lazy as _
|
||||||
|
|
||||||
from .indexes import OpClass
|
from .indexes import OpClass
|
||||||
from .lookups import SearchLookup, TrigramSimilar, TrigramWordSimilar, Unaccent
|
from .lookups import (
|
||||||
|
SearchLookup,
|
||||||
|
TrigramSimilar,
|
||||||
|
TrigramStrictWordSimilar,
|
||||||
|
TrigramWordSimilar,
|
||||||
|
Unaccent,
|
||||||
|
)
|
||||||
from .serializers import RangeSerializer
|
from .serializers import RangeSerializer
|
||||||
from .signals import register_type_handlers
|
from .signals import register_type_handlers
|
||||||
|
|
||||||
|
@ -37,6 +43,8 @@ def uninstall_if_needed(setting, value, enter, **kwargs):
|
||||||
TextField._unregister_lookup(TrigramSimilar)
|
TextField._unregister_lookup(TrigramSimilar)
|
||||||
CharField._unregister_lookup(TrigramWordSimilar)
|
CharField._unregister_lookup(TrigramWordSimilar)
|
||||||
TextField._unregister_lookup(TrigramWordSimilar)
|
TextField._unregister_lookup(TrigramWordSimilar)
|
||||||
|
CharField._unregister_lookup(TrigramStrictWordSimilar)
|
||||||
|
TextField._unregister_lookup(TrigramStrictWordSimilar)
|
||||||
# Disconnect this receiver until the next time this app is installed
|
# Disconnect this receiver until the next time this app is installed
|
||||||
# and ready() connects it again to prevent unnecessary processing on
|
# and ready() connects it again to prevent unnecessary processing on
|
||||||
# each setting change.
|
# each setting change.
|
||||||
|
@ -73,5 +81,7 @@ class PostgresConfig(AppConfig):
|
||||||
TextField.register_lookup(TrigramSimilar)
|
TextField.register_lookup(TrigramSimilar)
|
||||||
CharField.register_lookup(TrigramWordSimilar)
|
CharField.register_lookup(TrigramWordSimilar)
|
||||||
TextField.register_lookup(TrigramWordSimilar)
|
TextField.register_lookup(TrigramWordSimilar)
|
||||||
|
CharField.register_lookup(TrigramStrictWordSimilar)
|
||||||
|
TextField.register_lookup(TrigramStrictWordSimilar)
|
||||||
MigrationWriter.register_serializer(RANGE_TYPES, RangeSerializer)
|
MigrationWriter.register_serializer(RANGE_TYPES, RangeSerializer)
|
||||||
IndexExpression.register_wrappers(OrderBy, OpClass, Collate)
|
IndexExpression.register_wrappers(OrderBy, OpClass, Collate)
|
||||||
|
|
|
@ -63,3 +63,8 @@ class TrigramSimilar(PostgresOperatorLookup):
|
||||||
class TrigramWordSimilar(PostgresOperatorLookup):
|
class TrigramWordSimilar(PostgresOperatorLookup):
|
||||||
lookup_name = "trigram_word_similar"
|
lookup_name = "trigram_word_similar"
|
||||||
postgres_operator = "%%>"
|
postgres_operator = "%%>"
|
||||||
|
|
||||||
|
|
||||||
|
class TrigramStrictWordSimilar(PostgresOperatorLookup):
|
||||||
|
lookup_name = "trigram_strict_word_similar"
|
||||||
|
postgres_operator = "%%>>"
|
||||||
|
|
|
@ -366,5 +366,14 @@ class TrigramWordDistance(TrigramWordBase):
|
||||||
arg_joiner = " <<-> "
|
arg_joiner = " <<-> "
|
||||||
|
|
||||||
|
|
||||||
|
class TrigramStrictWordDistance(TrigramWordBase):
|
||||||
|
function = ""
|
||||||
|
arg_joiner = " <<<-> "
|
||||||
|
|
||||||
|
|
||||||
class TrigramWordSimilarity(TrigramWordBase):
|
class TrigramWordSimilarity(TrigramWordBase):
|
||||||
function = "WORD_SIMILARITY"
|
function = "WORD_SIMILARITY"
|
||||||
|
|
||||||
|
|
||||||
|
class TrigramStrictWordSimilarity(TrigramWordBase):
|
||||||
|
function = "STRICT_WORD_SIMILARITY"
|
||||||
|
|
|
@ -7,6 +7,9 @@ Trigram similarity
|
||||||
|
|
||||||
.. fieldlookup:: trigram_similar
|
.. fieldlookup:: trigram_similar
|
||||||
|
|
||||||
|
``trigram_similar``
|
||||||
|
-------------------
|
||||||
|
|
||||||
The ``trigram_similar`` lookup allows you to perform trigram lookups,
|
The ``trigram_similar`` lookup allows you to perform trigram lookups,
|
||||||
measuring the number of trigrams (three consecutive characters) shared, using a
|
measuring the number of trigrams (three consecutive characters) shared, using a
|
||||||
dedicated PostgreSQL extension. A trigram lookup is given an expression and
|
dedicated PostgreSQL extension. A trigram lookup is given an expression and
|
||||||
|
@ -27,6 +30,9 @@ The ``trigram_similar`` lookup can be used on
|
||||||
|
|
||||||
.. fieldlookup:: trigram_word_similar
|
.. fieldlookup:: trigram_word_similar
|
||||||
|
|
||||||
|
``trigram_word_similar``
|
||||||
|
------------------------
|
||||||
|
|
||||||
The ``trigram_word_similar`` lookup allows you to perform trigram word
|
The ``trigram_word_similar`` lookup allows you to perform trigram word
|
||||||
similarity lookups using a dedicated PostgreSQL extension. It can be
|
similarity lookups using a dedicated PostgreSQL extension. It can be
|
||||||
approximately understood as measuring the greatest number of trigrams shared
|
approximately understood as measuring the greatest number of trigrams shared
|
||||||
|
@ -46,6 +52,25 @@ The ``trigram_word_similar`` lookup can be used on
|
||||||
>>> Sentence.objects.filter(name__trigram_word_similar='Middlesborough')
|
>>> Sentence.objects.filter(name__trigram_word_similar='Middlesborough')
|
||||||
['<Sentence: Gumby rides on the path of Middlesbrough>']
|
['<Sentence: Gumby rides on the path of Middlesbrough>']
|
||||||
|
|
||||||
|
.. fieldlookup:: trigram_strict_word_similar
|
||||||
|
|
||||||
|
``trigram_strict_word_similar``
|
||||||
|
-------------------------------
|
||||||
|
|
||||||
|
.. versionadded:: 4.2
|
||||||
|
|
||||||
|
Similar to :lookup:`trigram_word_similar`, except that it forces extent
|
||||||
|
boundaries to match word boundaries.
|
||||||
|
|
||||||
|
To use it, add ``'django.contrib.postgres'`` in your :setting:`INSTALLED_APPS`
|
||||||
|
and activate the `pg_trgm extension`_ on PostgreSQL. You can install the
|
||||||
|
extension using the
|
||||||
|
:class:`~django.contrib.postgres.operations.TrigramExtension` migration
|
||||||
|
operation.
|
||||||
|
|
||||||
|
The ``trigram_strict_word_similar`` lookup can be used on
|
||||||
|
:class:`~django.db.models.CharField` and :class:`~django.db.models.TextField`.
|
||||||
|
|
||||||
.. _`pg_trgm extension`: https://www.postgresql.org/docs/current/pgtrgm.html
|
.. _`pg_trgm extension`: https://www.postgresql.org/docs/current/pgtrgm.html
|
||||||
|
|
||||||
``Unaccent``
|
``Unaccent``
|
||||||
|
|
|
@ -286,9 +286,9 @@ Trigram similarity
|
||||||
==================
|
==================
|
||||||
|
|
||||||
Another approach to searching is trigram similarity. A trigram is a group of
|
Another approach to searching is trigram similarity. A trigram is a group of
|
||||||
three consecutive characters. In addition to the :lookup:`trigram_similar` and
|
three consecutive characters. In addition to the :lookup:`trigram_similar`,
|
||||||
:lookup:`trigram_word_similar` lookups, you can use a couple of other
|
:lookup:`trigram_word_similar`, and :lookup:`trigram_strict_word_similar`
|
||||||
expressions.
|
lookups, you can use a couple of other expressions.
|
||||||
|
|
||||||
To use them, you need to activate the `pg_trgm extension
|
To use them, you need to activate the `pg_trgm extension
|
||||||
<https://www.postgresql.org/docs/current/pgtrgm.html>`_ on PostgreSQL. You can
|
<https://www.postgresql.org/docs/current/pgtrgm.html>`_ on PostgreSQL. You can
|
||||||
|
@ -334,6 +334,18 @@ Usage example::
|
||||||
... ).filter(similarity__gt=0.3).order_by('-similarity')
|
... ).filter(similarity__gt=0.3).order_by('-similarity')
|
||||||
[<Author: Katy Stevens>]
|
[<Author: Katy Stevens>]
|
||||||
|
|
||||||
|
``TrigramStrictWordSimilarity``
|
||||||
|
-------------------------------
|
||||||
|
|
||||||
|
.. class:: TrigramStrictWordSimilarity(string, expression, **extra)
|
||||||
|
|
||||||
|
.. versionadded:: 4.2
|
||||||
|
|
||||||
|
Accepts a string or expression, and a field name or expression. Returns the
|
||||||
|
trigram strict word similarity between the two arguments. Similar to
|
||||||
|
:class:`TrigramWordSimilarity() <TrigramWordSimilarity>`, except that it forces
|
||||||
|
extent boundaries to match word boundaries.
|
||||||
|
|
||||||
``TrigramDistance``
|
``TrigramDistance``
|
||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
|
@ -371,3 +383,13 @@ Usage example::
|
||||||
... distance=TrigramWordDistance(test, 'name'),
|
... distance=TrigramWordDistance(test, 'name'),
|
||||||
... ).filter(distance__lte=0.7).order_by('distance')
|
... ).filter(distance__lte=0.7).order_by('distance')
|
||||||
[<Author: Katy Stevens>]
|
[<Author: Katy Stevens>]
|
||||||
|
|
||||||
|
``TrigramStrictWordDistance``
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
.. class:: TrigramStrictWordDistance(string, expression, **extra)
|
||||||
|
|
||||||
|
.. versionadded:: 4.2
|
||||||
|
|
||||||
|
Accepts a string or expression, and a field name or expression. Returns the
|
||||||
|
trigram strict word distance between the two arguments.
|
||||||
|
|
|
@ -65,7 +65,12 @@ Minor features
|
||||||
:mod:`django.contrib.postgres`
|
:mod:`django.contrib.postgres`
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
* ...
|
* The new :lookup:`trigram_strict_word_similar` lookup, and the
|
||||||
|
:class:`TrigramStrictWordSimilarity()
|
||||||
|
<django.contrib.postgres.search.TrigramStrictWordSimilarity>` and
|
||||||
|
:class:`TrigramStrictWordDistance()
|
||||||
|
<django.contrib.postgres.search.TrigramStrictWordDistance>` expressions allow
|
||||||
|
using trigram strict word similarity.
|
||||||
|
|
||||||
:mod:`django.contrib.redirects`
|
:mod:`django.contrib.redirects`
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
|
@ -7,6 +7,8 @@ try:
|
||||||
from django.contrib.postgres.search import (
|
from django.contrib.postgres.search import (
|
||||||
TrigramDistance,
|
TrigramDistance,
|
||||||
TrigramSimilarity,
|
TrigramSimilarity,
|
||||||
|
TrigramStrictWordDistance,
|
||||||
|
TrigramStrictWordSimilarity,
|
||||||
TrigramWordDistance,
|
TrigramWordDistance,
|
||||||
TrigramWordSimilarity,
|
TrigramWordSimilarity,
|
||||||
)
|
)
|
||||||
|
@ -43,6 +45,25 @@ class TrigramTest(PostgreSQLTestCase):
|
||||||
self.Model.objects.filter(field__trigram_word_similar="Middlesborough"),
|
self.Model.objects.filter(field__trigram_word_similar="Middlesborough"),
|
||||||
[obj],
|
[obj],
|
||||||
)
|
)
|
||||||
|
self.assertSequenceEqual(
|
||||||
|
self.Model.objects.filter(field__trigram_word_similar="Middle"),
|
||||||
|
[obj],
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_trigram_strict_word_search_matched(self):
|
||||||
|
obj = self.Model.objects.create(
|
||||||
|
field="Gumby rides on the path of Middlesbrough",
|
||||||
|
)
|
||||||
|
self.assertSequenceEqual(
|
||||||
|
self.Model.objects.filter(
|
||||||
|
field__trigram_strict_word_similar="Middlesborough"
|
||||||
|
),
|
||||||
|
[obj],
|
||||||
|
)
|
||||||
|
self.assertSequenceEqual(
|
||||||
|
self.Model.objects.filter(field__trigram_strict_word_similar="Middle"),
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
|
||||||
def test_trigram_similarity(self):
|
def test_trigram_similarity(self):
|
||||||
search = "Bat sat on cat."
|
search = "Bat sat on cat."
|
||||||
|
@ -75,6 +96,19 @@ class TrigramTest(PostgreSQLTestCase):
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_trigram_strict_word_similarity(self):
|
||||||
|
search = "matt"
|
||||||
|
self.assertSequenceEqual(
|
||||||
|
self.Model.objects.filter(field__trigram_word_similar=search)
|
||||||
|
.annotate(word_similarity=TrigramStrictWordSimilarity(search, "field"))
|
||||||
|
.values("field", "word_similarity")
|
||||||
|
.order_by("-word_similarity"),
|
||||||
|
[
|
||||||
|
{"field": "Cat sat on mat.", "word_similarity": 0.5},
|
||||||
|
{"field": "Matthew", "word_similarity": 0.44444445},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
def test_trigram_similarity_alternate(self):
|
def test_trigram_similarity_alternate(self):
|
||||||
# Round result of distance because PostgreSQL uses greater precision.
|
# Round result of distance because PostgreSQL uses greater precision.
|
||||||
self.assertQuerysetEqual(
|
self.assertQuerysetEqual(
|
||||||
|
@ -104,6 +138,20 @@ class TrigramTest(PostgreSQLTestCase):
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_trigram_strict_word_distance(self):
|
||||||
|
self.assertSequenceEqual(
|
||||||
|
self.Model.objects.annotate(
|
||||||
|
word_distance=TrigramStrictWordDistance("matt", "field"),
|
||||||
|
)
|
||||||
|
.filter(word_distance__lte=0.7)
|
||||||
|
.values("field", "word_distance")
|
||||||
|
.order_by("word_distance"),
|
||||||
|
[
|
||||||
|
{"field": "Cat sat on mat.", "word_distance": 0.5},
|
||||||
|
{"field": "Matthew", "word_distance": 0.5555556},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TrigramTextFieldTest(TrigramTest):
|
class TrigramTextFieldTest(TrigramTest):
|
||||||
"""
|
"""
|
||||||
|
|
Loading…
Reference in New Issue