diff --git a/AUTHORS b/AUTHORS index 8d344d4241..e9e2a597b3 100644 --- a/AUTHORS +++ b/AUTHORS @@ -642,6 +642,7 @@ answer newbie questions, and generally made Django that much better: Terry Huang thebjorn Thejaswi Puthraya + Thomas Chaumeny Thomas Güttler Thomas Kerpe Thomas Sorrel diff --git a/django/contrib/postgres/apps.py b/django/contrib/postgres/apps.py index f6fa263a84..c172281cce 100644 --- a/django/contrib/postgres/apps.py +++ b/django/contrib/postgres/apps.py @@ -1,7 +1,9 @@ from django.apps import AppConfig from django.db.backends.signals import connection_created +from django.db.models import CharField, TextField from django.utils.translation import ugettext_lazy as _ +from .lookups import Unaccent from .signals import register_hstore_handler @@ -11,3 +13,5 @@ class PostgresConfig(AppConfig): def ready(self): connection_created.connect(register_hstore_handler) + CharField.register_lookup(Unaccent) + TextField.register_lookup(Unaccent) diff --git a/django/contrib/postgres/lookups.py b/django/contrib/postgres/lookups.py new file mode 100644 index 0000000000..4cf51dbd9c --- /dev/null +++ b/django/contrib/postgres/lookups.py @@ -0,0 +1,10 @@ +from django.db.models import Transform + + +class Unaccent(Transform): + bilateral = True + lookup_name = 'unaccent' + + def as_postgresql(self, compiler, connection): + lhs, params = compiler.compile(self.lhs) + return "UNACCENT(%s)" % lhs, params diff --git a/django/contrib/postgres/operations.py b/django/contrib/postgres/operations.py index e39d63ffa0..5b3bd2a3cc 100644 --- a/django/contrib/postgres/operations.py +++ b/django/contrib/postgres/operations.py @@ -32,3 +32,9 @@ class HStoreExtension(CreateExtension): # extension is installed, a subsequent data migration would use the # same connection register_hstore_handler(schema_editor.connection) + + +class UnaccentExtension(CreateExtension): + + def __init__(self): + self.name = 'unaccent' diff --git a/docs/ref/contrib/postgres/index.txt b/docs/ref/contrib/postgres/index.txt index b23db125f2..4c04d48d32 100644 --- a/docs/ref/contrib/postgres/index.txt +++ b/docs/ref/contrib/postgres/index.txt @@ -26,5 +26,6 @@ a number of PostgreSQL specific data types. fields forms + lookups operations validators diff --git a/docs/ref/contrib/postgres/lookups.txt b/docs/ref/contrib/postgres/lookups.txt new file mode 100644 index 0000000000..83477a61fe --- /dev/null +++ b/docs/ref/contrib/postgres/lookups.txt @@ -0,0 +1,36 @@ +=========================== +PostgreSQL specific lookups +=========================== + +Unaccent +======== + +.. fieldlookup:: unaccent + +The ``unaccent`` lookup allows you to perform accent-insensitive lookups using +a dedicated PostgreSQL extension. + +This lookup is implemented using :class:`~django.db.models.Transform`, so it +can be chained with other lookup functions. To use it, you need to add +``'django.contrib.postgres'`` in your :setting:`INSTALLED_APPS` and activate +the `unaccent extension on PostgreSQL`_. The +:class:`~django.contrib.postgres.operations.UnaccentExtension` migration +operation is available if you want to perform this activation using migrations). + +.. _unaccent extension on PostgreSQL: http://www.postgresql.org/docs/current/interactive/unaccent.html + +The ``unaccent`` lookup can be used on +:class:`~django.db.models.CharField` and :class:`~django.db.models.TextField`:: + + >>> City.objects.filter(name__unaccent="México") + [''] + + >>> User.objects.filter(first_name__unaccent__startswith="Jerem") + ['', '', '', ''] + +.. warning:: + + ``unaccent`` lookups should perform fine in most use cases. However, queries + using this filter will generally perform full table scans, which can be slow + on large tables. In those cases, using dedicated full text indexing tools + might be appropriate. diff --git a/docs/ref/contrib/postgres/operations.txt b/docs/ref/contrib/postgres/operations.txt index 4b9b7f5c44..79c2021c39 100644 --- a/docs/ref/contrib/postgres/operations.txt +++ b/docs/ref/contrib/postgres/operations.txt @@ -25,3 +25,11 @@ HStoreExtension A subclass of :class:`~django.contrib.postgres.operations.CreateExtension` which will install the ``hstore`` extension and also immediately set up the connection to interpret hstore data. + +UnaccentExtension +----------------- + +.. class:: UnaccentExtension() + + A subclass of :class:`~django.contrib.postgres.operations.CreateExtension` + which will install the ``unaccent`` extension. diff --git a/docs/releases/1.8.txt b/docs/releases/1.8.txt index 8f4053168d..2a1ec35873 100644 --- a/docs/releases/1.8.txt +++ b/docs/releases/1.8.txt @@ -45,9 +45,10 @@ New PostgreSQL specific functionality ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Django now has a module with extensions for PostgreSQL specific features, such -as :class:`~django.contrib.postgres.fields.ArrayField` and -:class:`~django.contrib.postgres.fields.HStoreField`. A full breakdown of the -features is available :doc:`in the documentation`. +as :class:`~django.contrib.postgres.fields.ArrayField`, +:class:`~django.contrib.postgres.fields.HStoreField`, and :lookup:`unaccent` +lookup. A full breakdown of the features is available :doc:`in the +documentation `. New data types ~~~~~~~~~~~~~~ diff --git a/tests/postgres_tests/migrations/0001_setup_extensions.py b/tests/postgres_tests/migrations/0001_setup_extensions.py index d4153b6556..0c43974954 100644 --- a/tests/postgres_tests/migrations/0001_setup_extensions.py +++ b/tests/postgres_tests/migrations/0001_setup_extensions.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -from django.contrib.postgres.operations import HStoreExtension +from django.contrib.postgres.operations import HStoreExtension, UnaccentExtension from django.db import migrations @@ -12,4 +12,5 @@ class Migration(migrations.Migration): operations = [ HStoreExtension(), + UnaccentExtension(), ] diff --git a/tests/postgres_tests/migrations/0002_create_test_models.py b/tests/postgres_tests/migrations/0002_create_test_models.py index 073e62b1d3..334bcaf0a6 100644 --- a/tests/postgres_tests/migrations/0002_create_test_models.py +++ b/tests/postgres_tests/migrations/0002_create_test_models.py @@ -73,4 +73,22 @@ class Migration(migrations.Migration): }, bases=(models.Model,), ), + migrations.CreateModel( + name='CharFieldModel', + fields=[ + ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ('field', models.CharField(max_length=16)), + ], + options=None, + bases=None, + ), + migrations.CreateModel( + name='TextFieldModel', + fields=[ + ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ('field', models.TextField()), + ], + options=None, + bases=None, + ), ] diff --git a/tests/postgres_tests/models.py b/tests/postgres_tests/models.py index cadab474af..a119b12f04 100644 --- a/tests/postgres_tests/models.py +++ b/tests/postgres_tests/models.py @@ -24,3 +24,11 @@ class NestedIntegerArrayModel(models.Model): class HStoreModel(models.Model): field = HStoreField(blank=True, null=True) + + +class CharFieldModel(models.Model): + field = models.CharField(max_length=16) + + +class TextFieldModel(models.Model): + field = models.TextField() diff --git a/tests/postgres_tests/test_unaccent.py b/tests/postgres_tests/test_unaccent.py new file mode 100644 index 0000000000..47ccbda519 --- /dev/null +++ b/tests/postgres_tests/test_unaccent.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import unittest + +from django.db import connection +from django.test import TestCase, modify_settings + +from .models import CharFieldModel, TextFieldModel + + +@unittest.skipUnless(connection.vendor == 'postgresql', 'PostgreSQL required') +@modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'}) +class UnaccentTest(TestCase): + + Model = CharFieldModel + + def setUp(self): + self.Model.objects.bulk_create([ + self.Model(field="àéÖ"), + self.Model(field="aeO"), + self.Model(field="aeo"), + ]) + + def test_unaccent(self): + self.assertQuerysetEqual( + self.Model.objects.filter(field__unaccent="aeO"), + ["àéÖ", "aeO"], + transform=lambda instance: instance.field, + ordered=False + ) + + def test_unaccent_chained(self): + """ + Check that unaccent can be used chained with a lookup (which should be + the case since unaccent implements the Transform API) + """ + self.assertQuerysetEqual( + self.Model.objects.filter(field__unaccent__iexact="aeO"), + ["àéÖ", "aeO", "aeo"], + transform=lambda instance: instance.field, + ordered=False + ) + self.assertQuerysetEqual( + self.Model.objects.filter(field__unaccent__endswith="éÖ"), + ["àéÖ", "aeO"], + transform=lambda instance: instance.field, + ordered=False + ) + + def test_unaccent_accentuated_needle(self): + self.assertQuerysetEqual( + self.Model.objects.filter(field__unaccent="aéÖ"), + ["àéÖ", "aeO"], + transform=lambda instance: instance.field, + ordered=False + ) + + +class UnaccentTextFieldTest(UnaccentTest): + """ + TextField should have the exact same behavior as CharField + regarding unaccent lookups. + """ + Model = TextFieldModel