From 24512a74befc6282a1d299cab452ee9463cc2baa Mon Sep 17 00:00:00 2001 From: Jacob Kaplan-Moss Date: Wed, 27 Jun 2007 18:58:10 +0000 Subject: [PATCH] Fixed #1465: added support for regex lookups. Thanks, Tom Tobin. git-svn-id: http://code.djangoproject.com/svn/django/trunk@5555 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/db/backends/mysql/base.py | 2 + django/db/backends/mysql_old/base.py | 2 + django/db/backends/postgresql/base.py | 2 + .../db/backends/postgresql_psycopg2/base.py | 2 + django/db/backends/sqlite3/base.py | 12 ++- django/db/models/fields/__init__.py | 2 +- django/db/models/query.py | 11 +++ docs/db-api.txt | 42 +++++++++ tests/modeltests/lookup/models.py | 94 +++++++++++++++++++ tests/regressiontests/templates/tests.py | 3 + 10 files changed, 170 insertions(+), 2 deletions(-) diff --git a/django/db/backends/mysql/base.py b/django/db/backends/mysql/base.py index b0ca7994b7..4c64134118 100644 --- a/django/db/backends/mysql/base.py +++ b/django/db/backends/mysql/base.py @@ -247,6 +247,8 @@ OPERATOR_MAPPING = { 'iexact': 'LIKE %s', 'contains': 'LIKE BINARY %s', 'icontains': 'LIKE %s', + 'regex': 'REGEXP BINARY %s', + 'iregex': 'REGEXP %s', 'gt': '> %s', 'gte': '>= %s', 'lt': '< %s', diff --git a/django/db/backends/mysql_old/base.py b/django/db/backends/mysql_old/base.py index 33960827ee..ca9d2c8b50 100644 --- a/django/db/backends/mysql_old/base.py +++ b/django/db/backends/mysql_old/base.py @@ -248,6 +248,8 @@ OPERATOR_MAPPING = { 'iexact': 'LIKE %s', 'contains': 'LIKE BINARY %s', 'icontains': 'LIKE %s', + 'regex': 'REGEXP BINARY %s', + 'iregex': 'REGEXP %s', 'gt': '> %s', 'gte': '>= %s', 'lt': '< %s', diff --git a/django/db/backends/postgresql/base.py b/django/db/backends/postgresql/base.py index 351b553506..611852e0dc 100644 --- a/django/db/backends/postgresql/base.py +++ b/django/db/backends/postgresql/base.py @@ -280,6 +280,8 @@ OPERATOR_MAPPING = { 'iexact': 'ILIKE %s', 'contains': 'LIKE %s', 'icontains': 'ILIKE %s', + 'regex': '~ %s', + 'iregex': '~* %s', 'gt': '> %s', 'gte': '>= %s', 'lt': '< %s', diff --git a/django/db/backends/postgresql_psycopg2/base.py b/django/db/backends/postgresql_psycopg2/base.py index 36f4d97a22..17d36a8613 100644 --- a/django/db/backends/postgresql_psycopg2/base.py +++ b/django/db/backends/postgresql_psycopg2/base.py @@ -225,6 +225,8 @@ OPERATOR_MAPPING = { 'iexact': 'ILIKE %s', 'contains': 'LIKE %s', 'icontains': 'ILIKE %s', + 'regex': '~ %s', + 'iregex': '~* %s', 'gt': '> %s', 'gte': '>= %s', 'lt': '< %s', diff --git a/django/db/backends/sqlite3/base.py b/django/db/backends/sqlite3/base.py index b753879d7a..c4ecf5f578 100644 --- a/django/db/backends/sqlite3/base.py +++ b/django/db/backends/sqlite3/base.py @@ -64,9 +64,10 @@ class DatabaseWrapper(local): } kwargs.update(self.options) self.connection = Database.connect(**kwargs) - # Register extract and date_trunc functions. + # Register extract, date_trunc, and regexp functions. self.connection.create_function("django_extract", 2, _sqlite_extract) self.connection.create_function("django_date_trunc", 2, _sqlite_date_trunc) + self.connection.create_function("regexp", 2, _sqlite_regexp) cursor = self.connection.cursor(factory=SQLiteCursorWrapper) cursor.row_factory = utf8rowFactory if settings.DEBUG: @@ -214,6 +215,13 @@ def _sqlite_date_trunc(lookup_type, dt): elif lookup_type == 'day': return "%i-%02i-%02i 00:00:00" % (dt.year, dt.month, dt.day) +def _sqlite_regexp(re_pattern, re_string): + import re + try: + return bool(re.search(re_pattern, re_string)) + except: + return False + # SQLite requires LIKE statements to include an ESCAPE clause if the value # being escaped has a percent or underscore in it. # See http://www.sqlite.org/lang_expr.html for an explanation. @@ -222,6 +230,8 @@ OPERATOR_MAPPING = { 'iexact': "LIKE %s ESCAPE '\\'", 'contains': "LIKE %s ESCAPE '\\'", 'icontains': "LIKE %s ESCAPE '\\'", + 'regex': 'REGEXP %s', + 'iregex': "REGEXP '(?i)' || %s", 'gt': '> %s', 'gte': '>= %s', 'lt': '< %s', diff --git a/django/db/models/fields/__init__.py b/django/db/models/fields/__init__.py index 3af8a41adc..14154fd6f7 100644 --- a/django/db/models/fields/__init__.py +++ b/django/db/models/fields/__init__.py @@ -174,7 +174,7 @@ class Field(object): def get_db_prep_lookup(self, lookup_type, value): "Returns field's value prepared for database lookup." - if lookup_type in ('exact', 'gt', 'gte', 'lt', 'lte', 'month', 'day', 'search'): + if lookup_type in ('exact', 'regex', 'iregex', 'gt', 'gte', 'lt', 'lte', 'month', 'day', 'search'): return [value] elif lookup_type in ('range', 'in'): return value diff --git a/django/db/models/query.py b/django/db/models/query.py index 24d701b10d..92bc9d78ed 100644 --- a/django/db/models/query.py +++ b/django/db/models/query.py @@ -1,3 +1,4 @@ +from django.conf import settings from django.db import backend, connection, transaction from django.db.models.fields import DateField, FieldDoesNotExist from django.db.models import signals, loading @@ -22,6 +23,7 @@ QUERY_TERMS = ( 'gt', 'gte', 'lt', 'lte', 'in', 'startswith', 'istartswith', 'endswith', 'iendswith', 'range', 'year', 'month', 'day', 'isnull', 'search', + 'regex', 'iregex', ) # Size of each "chunk" for get_iterator calls. @@ -797,6 +799,15 @@ def get_where_clause(lookup_type, table_prefix, field_name, value): return "%s%s IS %sNULL" % (table_prefix, field_name, (not value and 'NOT ' or '')) elif lookup_type == 'search': return backend.get_fulltext_search_sql(table_prefix + field_name) + elif lookup_type in ('regex', 'iregex'): + if settings.DATABASE_ENGINE == 'oracle': + if lookup_type == 'regex': + match_option = 'c' + else: + match_option = 'i' + return "REGEXP_LIKE(%s%s, %s, '%s')" % (table_prefix, field_name, cast_sql, match_option) + else: + raise NotImplementedError raise TypeError, "Got invalid lookup_type: %s" % repr(lookup_type) def get_cached_row(klass, row, index_start, max_depth=0, cur_depth=0): diff --git a/docs/db-api.txt b/docs/db-api.txt index e7b8183f6c..9284c9994c 100644 --- a/docs/db-api.txt +++ b/docs/db-api.txt @@ -1173,6 +1173,48 @@ like ``contains`` but is significantly faster due to full-text indexing. Note this is only available in MySQL and requires direct manipulation of the database to add the full-text index. +regex +~~~~~ + +Case-sensitive regular expression match. + +The regular expression syntax is that of the database backend in use; for the +``sqlite`` backend, the syntax is that of Python's ``re`` module. + +Example:: + + Entry.objects.get(title__regex=r'^(An?|The) +') + +SQL equivalents:: + + SELECT ... WHERE title REGEXP BINARY '^(An?|The) +'; -- MySQL + + SELECT ... WHERE title ~ '^(An?|The) +'; -- PostgreSQL + + SELECT ... WHERE title REGEXP '^(An?|The) +'; -- sqlite + +Using raw strings for passing in the regular expression syntax is recommended. + +Regular expression matching is not supported on the ``ado_mssql`` and +``oracle`` backends; these will raise a ``NotImplementedError``. + +iregex +~~~~~~ + +Case-insensitive regular expression match. + +Example:: + + Entry.objects.get(title__iregex=r'^(an?|the) +') + +SQL equivalents:: + + SELECT ... WHERE title REGEXP '^(an?|the) +'; -- MySQL + + SELECT ... WHERE title ~* '^(an?|the) +'; -- PostgreSQL + + SELECT ... WHERE title REGEXP '(?i)^(an?|the) +'; -- sqlite + Default lookups are exact ------------------------- diff --git a/tests/modeltests/lookup/models.py b/tests/modeltests/lookup/models.py index 6af70f8351..60ed6f7685 100644 --- a/tests/modeltests/lookup/models.py +++ b/tests/modeltests/lookup/models.py @@ -251,4 +251,98 @@ Traceback (most recent call last): ... TypeError: Cannot resolve keyword 'headline__starts' into field. Choices are: id, headline, pub_date +# Create some articles with a bit more interesting headlines for testing field lookups: +>>> now = datetime.now() +>>> for a in Article.objects.all(): +... a.delete() +>>> a1 = Article(pub_date=now, headline='f') +>>> a1.save() +>>> a2 = Article(pub_date=now, headline='fo') +>>> a2.save() +>>> a3 = Article(pub_date=now, headline='foo') +>>> a3.save() +>>> a4 = Article(pub_date=now, headline='fooo') +>>> a4.save() +>>> a5 = Article(pub_date=now, headline='Foo') +>>> a5.save() + +# zero-or-more +>>> Article.objects.filter(headline__regex=r'fo*') +[, , , ] +>>> Article.objects.filter(headline__iregex=r'fo*') +[, , , , ] + +# one-or-more +>>> Article.objects.filter(headline__regex=r'fo+') +[, , ] + +# wildcard +>>> Article.objects.filter(headline__regex=r'fooo?') +[, ] + +# and some more: +>>> a6 = Article(pub_date=now, headline='bar') +>>> a6.save() +>>> a7 = Article(pub_date=now, headline='Bar') +>>> a7.save() +>>> a8 = Article(pub_date=now, headline='baz') +>>> a8.save() +>>> a9 = Article(pub_date=now, headline='baZ') +>>> a9.save() + +# leading anchor +>>> Article.objects.filter(headline__regex=r'^b') +[, , ] +>>> Article.objects.filter(headline__iregex=r'^b') +[, , , ] + +# trailing anchor +>>> Article.objects.filter(headline__regex=r'z$') +[] +>>> Article.objects.filter(headline__iregex=r'z$') +[, ] + +# character sets +>>> Article.objects.filter(headline__regex=r'ba[rz]') +[, ] +>>> Article.objects.filter(headline__regex=r'ba[RZ]') +[] +>>> Article.objects.filter(headline__iregex=r'ba[RZ]') +[, , , ] + +# and yet more: +>>> a10 = Article(pub_date=now, headline='foobar') +>>> a10.save() +>>> a11 = Article(pub_date=now, headline='foobaz') +>>> a11.save() +>>> a12 = Article(pub_date=now, headline='FooBarBaz') +>>> a12.save() +>>> a13 = Article(pub_date=now, headline='foobarbaz') +>>> a13.save() +>>> a14 = Article(pub_date=now, headline='zoocarfaz') +>>> a14.save() +>>> a15 = Article(pub_date=now, headline='barfoobaz') +>>> a15.save() +>>> a16 = Article(pub_date=now, headline='BAZBARFOO') +>>> a16.save() + +# alternation +>>> Article.objects.filter(headline__regex=r'foo(bar|baz)') +[, , , ] +>>> Article.objects.filter(headline__iregex=r'foo(bar|baz)') +[, , , , ] +>>> Article.objects.filter(headline__regex=r'^foo(bar|baz)') +[, , ] + +# greedy matching +>>> Article.objects.filter(headline__regex=r'f.*z') +[, , , ] +>>> Article.objects.filter(headline__iregex=r'f.*z') +[, , , , ] + +# grouping and backreferences +>>> Article.objects.filter(headline__regex=r'b(.).*b\1') +[, ] +>>> Article.objects.filter(headline__iregex=r'b(.).*b\1') +[, , , ] """} diff --git a/tests/regressiontests/templates/tests.py b/tests/regressiontests/templates/tests.py index 8c2389b28a..8801100bcc 100644 --- a/tests/regressiontests/templates/tests.py +++ b/tests/regressiontests/templates/tests.py @@ -219,6 +219,9 @@ class Templates(unittest.TestCase): # value will be converted to a bytestring. 'filter-syntax18': (r'{{ var }}', {'var': UnicodeInStrClass()}, '\xc5\xa0\xc4\x90\xc4\x86\xc5\xbd\xc4\x87\xc5\xbe\xc5\xa1\xc4\x91'), + # Numbers as filter arguments should work + 'filter-syntax19': ('{{ var|truncatewords:1 }}', {"var": "hello world"}, "hello ..."), + ### COMMENT SYNTAX ######################################################## 'comment-syntax01': ("{# this is hidden #}hello", {}, "hello"), 'comment-syntax02': ("{# this is hidden #}hello{# foo #}", {}, "hello"),