From 24512a74befc6282a1d299cab452ee9463cc2baa Mon Sep 17 00:00:00 2001
From: Jacob Kaplan-Moss <jacob@jacobian.org>
Date: Wed, 27 Jun 2007 18:58:10 +0000
Subject: [PATCH] Fixed #1465: added support for regex lookups. Thanks, Tom
 Tobin.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@5555 bcc190cf-cafb-0310-a4f2-bffc1f526a37
---
 django/db/backends/mysql/base.py              |  2 +
 django/db/backends/mysql_old/base.py          |  2 +
 django/db/backends/postgresql/base.py         |  2 +
 .../db/backends/postgresql_psycopg2/base.py   |  2 +
 django/db/backends/sqlite3/base.py            | 12 ++-
 django/db/models/fields/__init__.py           |  2 +-
 django/db/models/query.py                     | 11 +++
 docs/db-api.txt                               | 42 +++++++++
 tests/modeltests/lookup/models.py             | 94 +++++++++++++++++++
 tests/regressiontests/templates/tests.py      |  3 +
 10 files changed, 170 insertions(+), 2 deletions(-)

diff --git a/django/db/backends/mysql/base.py b/django/db/backends/mysql/base.py
index b0ca7994b7..4c64134118 100644
--- a/django/db/backends/mysql/base.py
+++ b/django/db/backends/mysql/base.py
@@ -247,6 +247,8 @@ OPERATOR_MAPPING = {
     'iexact': 'LIKE %s',
     'contains': 'LIKE BINARY %s',
     'icontains': 'LIKE %s',
+    'regex': 'REGEXP BINARY %s',
+    'iregex': 'REGEXP %s',
     'gt': '> %s',
     'gte': '>= %s',
     'lt': '< %s',
diff --git a/django/db/backends/mysql_old/base.py b/django/db/backends/mysql_old/base.py
index 33960827ee..ca9d2c8b50 100644
--- a/django/db/backends/mysql_old/base.py
+++ b/django/db/backends/mysql_old/base.py
@@ -248,6 +248,8 @@ OPERATOR_MAPPING = {
     'iexact': 'LIKE %s',
     'contains': 'LIKE BINARY %s',
     'icontains': 'LIKE %s',
+    'regex': 'REGEXP BINARY %s',
+    'iregex': 'REGEXP %s',
     'gt': '> %s',
     'gte': '>= %s',
     'lt': '< %s',
diff --git a/django/db/backends/postgresql/base.py b/django/db/backends/postgresql/base.py
index 351b553506..611852e0dc 100644
--- a/django/db/backends/postgresql/base.py
+++ b/django/db/backends/postgresql/base.py
@@ -280,6 +280,8 @@ OPERATOR_MAPPING = {
     'iexact': 'ILIKE %s',
     'contains': 'LIKE %s',
     'icontains': 'ILIKE %s',
+    'regex': '~ %s',
+    'iregex': '~* %s',
     'gt': '> %s',
     'gte': '>= %s',
     'lt': '< %s',
diff --git a/django/db/backends/postgresql_psycopg2/base.py b/django/db/backends/postgresql_psycopg2/base.py
index 36f4d97a22..17d36a8613 100644
--- a/django/db/backends/postgresql_psycopg2/base.py
+++ b/django/db/backends/postgresql_psycopg2/base.py
@@ -225,6 +225,8 @@ OPERATOR_MAPPING = {
     'iexact': 'ILIKE %s',
     'contains': 'LIKE %s',
     'icontains': 'ILIKE %s',
+    'regex': '~ %s',
+    'iregex': '~* %s',
     'gt': '> %s',
     'gte': '>= %s',
     'lt': '< %s',
diff --git a/django/db/backends/sqlite3/base.py b/django/db/backends/sqlite3/base.py
index b753879d7a..c4ecf5f578 100644
--- a/django/db/backends/sqlite3/base.py
+++ b/django/db/backends/sqlite3/base.py
@@ -64,9 +64,10 @@ class DatabaseWrapper(local):
             }
             kwargs.update(self.options)
             self.connection = Database.connect(**kwargs)
-            # Register extract and date_trunc functions.
+            # Register extract, date_trunc, and regexp functions.
             self.connection.create_function("django_extract", 2, _sqlite_extract)
             self.connection.create_function("django_date_trunc", 2, _sqlite_date_trunc)
+            self.connection.create_function("regexp", 2, _sqlite_regexp)
         cursor = self.connection.cursor(factory=SQLiteCursorWrapper)
         cursor.row_factory = utf8rowFactory
         if settings.DEBUG:
@@ -214,6 +215,13 @@ def _sqlite_date_trunc(lookup_type, dt):
     elif lookup_type == 'day':
         return "%i-%02i-%02i 00:00:00" % (dt.year, dt.month, dt.day)
 
+def _sqlite_regexp(re_pattern, re_string):
+    import re
+    try:
+        return bool(re.search(re_pattern, re_string))
+    except:
+        return False
+
 # SQLite requires LIKE statements to include an ESCAPE clause if the value
 # being escaped has a percent or underscore in it.
 # See http://www.sqlite.org/lang_expr.html for an explanation.
@@ -222,6 +230,8 @@ OPERATOR_MAPPING = {
     'iexact': "LIKE %s ESCAPE '\\'",
     'contains': "LIKE %s ESCAPE '\\'",
     'icontains': "LIKE %s ESCAPE '\\'",
+    'regex': 'REGEXP %s',
+    'iregex': "REGEXP '(?i)' || %s",
     'gt': '> %s',
     'gte': '>= %s',
     'lt': '< %s',
diff --git a/django/db/models/fields/__init__.py b/django/db/models/fields/__init__.py
index 3af8a41adc..14154fd6f7 100644
--- a/django/db/models/fields/__init__.py
+++ b/django/db/models/fields/__init__.py
@@ -174,7 +174,7 @@ class Field(object):
 
     def get_db_prep_lookup(self, lookup_type, value):
         "Returns field's value prepared for database lookup."
-        if lookup_type in ('exact', 'gt', 'gte', 'lt', 'lte', 'month', 'day', 'search'):
+        if lookup_type in ('exact', 'regex', 'iregex', 'gt', 'gte', 'lt', 'lte', 'month', 'day', 'search'):
             return [value]
         elif lookup_type in ('range', 'in'):
             return value
diff --git a/django/db/models/query.py b/django/db/models/query.py
index 24d701b10d..92bc9d78ed 100644
--- a/django/db/models/query.py
+++ b/django/db/models/query.py
@@ -1,3 +1,4 @@
+from django.conf import settings
 from django.db import backend, connection, transaction
 from django.db.models.fields import DateField, FieldDoesNotExist
 from django.db.models import signals, loading
@@ -22,6 +23,7 @@ QUERY_TERMS = (
     'gt', 'gte', 'lt', 'lte', 'in',
     'startswith', 'istartswith', 'endswith', 'iendswith',
     'range', 'year', 'month', 'day', 'isnull', 'search',
+    'regex', 'iregex',
 )
 
 # Size of each "chunk" for get_iterator calls.
@@ -797,6 +799,15 @@ def get_where_clause(lookup_type, table_prefix, field_name, value):
         return "%s%s IS %sNULL" % (table_prefix, field_name, (not value and 'NOT ' or ''))
     elif lookup_type == 'search':
         return backend.get_fulltext_search_sql(table_prefix + field_name)
+    elif lookup_type in ('regex', 'iregex'):
+        if settings.DATABASE_ENGINE == 'oracle':
+            if lookup_type == 'regex':
+                match_option = 'c'
+            else:
+                match_option = 'i'
+            return "REGEXP_LIKE(%s%s, %s, '%s')" % (table_prefix, field_name, cast_sql, match_option)
+        else:
+            raise NotImplementedError
     raise TypeError, "Got invalid lookup_type: %s" % repr(lookup_type)
 
 def get_cached_row(klass, row, index_start, max_depth=0, cur_depth=0):
diff --git a/docs/db-api.txt b/docs/db-api.txt
index e7b8183f6c..9284c9994c 100644
--- a/docs/db-api.txt
+++ b/docs/db-api.txt
@@ -1173,6 +1173,48 @@ like ``contains`` but is significantly faster due to full-text indexing.
 Note this is only available in MySQL and requires direct manipulation of the
 database to add the full-text index.
 
+regex
+~~~~~
+
+Case-sensitive regular expression match.
+
+The regular expression syntax is that of the database backend in use; for the
+``sqlite`` backend, the syntax is that of Python's ``re`` module.
+
+Example::
+
+    Entry.objects.get(title__regex=r'^(An?|The) +')
+
+SQL equivalents::
+
+    SELECT ... WHERE title REGEXP BINARY '^(An?|The) +'; -- MySQL
+
+    SELECT ... WHERE title ~ '^(An?|The) +'; -- PostgreSQL
+
+    SELECT ... WHERE title REGEXP '^(An?|The) +'; -- sqlite
+
+Using raw strings for passing in the regular expression syntax is recommended.
+
+Regular expression matching is not supported on the ``ado_mssql`` and
+``oracle`` backends; these will raise a ``NotImplementedError``.
+
+iregex
+~~~~~~
+
+Case-insensitive regular expression match.
+
+Example::
+
+    Entry.objects.get(title__iregex=r'^(an?|the) +')
+
+SQL equivalents::
+
+    SELECT ... WHERE title REGEXP '^(an?|the) +'; -- MySQL
+
+    SELECT ... WHERE title ~* '^(an?|the) +'; -- PostgreSQL
+
+    SELECT ... WHERE title REGEXP '(?i)^(an?|the) +'; -- sqlite
+
 Default lookups are exact
 -------------------------
 
diff --git a/tests/modeltests/lookup/models.py b/tests/modeltests/lookup/models.py
index 6af70f8351..60ed6f7685 100644
--- a/tests/modeltests/lookup/models.py
+++ b/tests/modeltests/lookup/models.py
@@ -251,4 +251,98 @@ Traceback (most recent call last):
     ...
 TypeError: Cannot resolve keyword 'headline__starts' into field. Choices are: id, headline, pub_date
 
+# Create some articles with a bit more interesting headlines for testing field lookups:
+>>> now = datetime.now()
+>>> for a in Article.objects.all():
+...     a.delete()
+>>> a1 = Article(pub_date=now, headline='f')
+>>> a1.save()
+>>> a2 = Article(pub_date=now, headline='fo')
+>>> a2.save()
+>>> a3 = Article(pub_date=now, headline='foo')
+>>> a3.save()
+>>> a4 = Article(pub_date=now, headline='fooo')
+>>> a4.save()
+>>> a5 = Article(pub_date=now, headline='Foo')
+>>> a5.save()
+
+# zero-or-more
+>>> Article.objects.filter(headline__regex=r'fo*')
+[<Article: f>, <Article: fo>, <Article: foo>, <Article: fooo>]
+>>> Article.objects.filter(headline__iregex=r'fo*')
+[<Article: Foo>, <Article: f>, <Article: fo>, <Article: foo>, <Article: fooo>]
+
+# one-or-more
+>>> Article.objects.filter(headline__regex=r'fo+')
+[<Article: fo>, <Article: foo>, <Article: fooo>]
+
+# wildcard
+>>> Article.objects.filter(headline__regex=r'fooo?')
+[<Article: foo>, <Article: fooo>]
+
+# and some more:
+>>> a6 = Article(pub_date=now, headline='bar')
+>>> a6.save()
+>>> a7 = Article(pub_date=now, headline='Bar')
+>>> a7.save()
+>>> a8 = Article(pub_date=now, headline='baz')
+>>> a8.save()
+>>> a9 = Article(pub_date=now, headline='baZ')
+>>> a9.save()
+
+# leading anchor
+>>> Article.objects.filter(headline__regex=r'^b')
+[<Article: baZ>, <Article: bar>, <Article: baz>]
+>>> Article.objects.filter(headline__iregex=r'^b')
+[<Article: Bar>, <Article: baZ>, <Article: bar>, <Article: baz>]
+
+# trailing anchor
+>>> Article.objects.filter(headline__regex=r'z$')
+[<Article: baz>]
+>>> Article.objects.filter(headline__iregex=r'z$')
+[<Article: baZ>, <Article: baz>]
+
+# character sets
+>>> Article.objects.filter(headline__regex=r'ba[rz]')
+[<Article: bar>, <Article: baz>]
+>>> Article.objects.filter(headline__regex=r'ba[RZ]')
+[<Article: baZ>]
+>>> Article.objects.filter(headline__iregex=r'ba[RZ]')
+[<Article: Bar>, <Article: baZ>, <Article: bar>, <Article: baz>]
+
+# and yet more:
+>>> a10 = Article(pub_date=now, headline='foobar')
+>>> a10.save()
+>>> a11 = Article(pub_date=now, headline='foobaz')
+>>> a11.save()
+>>> a12 = Article(pub_date=now, headline='FooBarBaz')
+>>> a12.save()
+>>> a13 = Article(pub_date=now, headline='foobarbaz')
+>>> a13.save()
+>>> a14 = Article(pub_date=now, headline='zoocarfaz')
+>>> a14.save()
+>>> a15 = Article(pub_date=now, headline='barfoobaz')
+>>> a15.save()
+>>> a16 = Article(pub_date=now, headline='BAZBARFOO')
+>>> a16.save()
+
+# alternation
+>>> Article.objects.filter(headline__regex=r'foo(bar|baz)')
+[<Article: barfoobaz>, <Article: foobar>, <Article: foobarbaz>, <Article: foobaz>]
+>>> Article.objects.filter(headline__iregex=r'foo(bar|baz)')
+[<Article: FooBarBaz>, <Article: barfoobaz>, <Article: foobar>, <Article: foobarbaz>, <Article: foobaz>]
+>>> Article.objects.filter(headline__regex=r'^foo(bar|baz)')
+[<Article: foobar>, <Article: foobarbaz>, <Article: foobaz>]
+
+# greedy matching
+>>> Article.objects.filter(headline__regex=r'f.*z')
+[<Article: barfoobaz>, <Article: foobarbaz>, <Article: foobaz>, <Article: zoocarfaz>]
+>>> Article.objects.filter(headline__iregex=r'f.*z')
+[<Article: FooBarBaz>, <Article: barfoobaz>, <Article: foobarbaz>, <Article: foobaz>, <Article: zoocarfaz>]
+
+# grouping and backreferences
+>>> Article.objects.filter(headline__regex=r'b(.).*b\1')
+[<Article: barfoobaz>, <Article: foobarbaz>]
+>>> Article.objects.filter(headline__iregex=r'b(.).*b\1')
+[<Article: BAZBARFOO>, <Article: FooBarBaz>, <Article: barfoobaz>, <Article: foobarbaz>]
 """}
diff --git a/tests/regressiontests/templates/tests.py b/tests/regressiontests/templates/tests.py
index 8c2389b28a..8801100bcc 100644
--- a/tests/regressiontests/templates/tests.py
+++ b/tests/regressiontests/templates/tests.py
@@ -219,6 +219,9 @@ class Templates(unittest.TestCase):
             # value will be converted to a bytestring.
             'filter-syntax18': (r'{{ var }}', {'var': UnicodeInStrClass()}, '\xc5\xa0\xc4\x90\xc4\x86\xc5\xbd\xc4\x87\xc5\xbe\xc5\xa1\xc4\x91'),
 
+            # Numbers as filter arguments should work
+            'filter-syntax19': ('{{ var|truncatewords:1 }}', {"var": "hello world"}, "hello ..."),
+
             ### COMMENT SYNTAX ########################################################
             'comment-syntax01': ("{# this is hidden #}hello", {}, "hello"),
             'comment-syntax02': ("{# this is hidden #}hello{# foo #}", {}, "hello"),