Fixed #11863: added a `Model.objects.raw()` method for executing raw SQL queries and yield models.

See `docs/topics/db/raw.txt` for details.

Thanks to seanoc for getting the ball rolling, and to Russ for wrapping things up.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@11921 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Jacob Kaplan-Moss 2009-12-20 02:46:58 +00:00
parent 25ab93457c
commit 20ad30713e
9 changed files with 627 additions and 23 deletions

View File

@ -1,5 +1,5 @@
import django.utils.copycompat as copy
from django.db.models.query import QuerySet, EmptyQuerySet, insert_query
from django.db.models.query import QuerySet, EmptyQuerySet, insert_query, RawQuerySet
from django.db.models import signals
from django.db.models.fields import FieldDoesNotExist
@ -181,6 +181,9 @@ class Manager(object):
def _update(self, values, **kwargs):
return self.get_query_set()._update(values, **kwargs)
def raw(self, query, params=None, *args, **kwargs):
return RawQuerySet(model=self.model, query=query, params=params, *args, **kwargs)
class ManagerDescriptor(object):
# This class ensures managers aren't accessible via model instances.
# For example, Poll.objects works, but poll_obj.objects raises AttributeError.

View File

@ -5,7 +5,7 @@ The main QuerySet implementation. This provides the public API for the ORM.
from django.db import connection, transaction, IntegrityError
from django.db.models.aggregates import Aggregate
from django.db.models.fields import DateField
from django.db.models.query_utils import Q, select_related_descend, CollectedObjects, CyclicDependency, deferred_class_factory
from django.db.models.query_utils import Q, select_related_descend, CollectedObjects, CyclicDependency, deferred_class_factory, InvalidQuery
from django.db.models import signals, sql
from django.utils.copycompat import deepcopy
@ -287,7 +287,7 @@ class QuerySet(object):
Returns a dictionary containing the calculations (aggregation)
over the current queryset
If args is present the expression is passed as a kwarg ussing
If args is present the expression is passed as a kwarg using
the Aggregate object's default alias.
"""
for arg in args:
@ -1107,6 +1107,89 @@ def delete_objects(seen_objs):
if forced_managed:
transaction.leave_transaction_management()
class RawQuerySet(object):
"""
Provides an iterator which converts the results of raw SQL queries into
annotated model instances.
"""
def __init__(self, query, model=None, query_obj=None, params=None, translations=None):
self.model = model
self.query = query_obj or sql.RawQuery(sql=query, connection=connection, params=params)
self.params = params or ()
self.translations = translations or {}
def __iter__(self):
for row in self.query:
yield self.transform_results(row)
def __repr__(self):
return "<RawQuerySet: %r>" % (self.query.sql % self.params)
@property
def columns(self):
"""
A list of model field names in the order they'll appear in the
query results.
"""
if not hasattr(self, '_columns'):
self._columns = self.query.get_columns()
# Adjust any column names which don't match field names
for (query_name, model_name) in self.translations.items():
try:
index = self._columns.index(query_name)
self._columns[index] = model_name
except ValueError:
# Ignore translations for non-existant column names
pass
return self._columns
@property
def model_fields(self):
"""
A dict mapping column names to model field names.
"""
if not hasattr(self, '_model_fields'):
self._model_fields = {}
for field in self.model._meta.fields:
name, column = field.get_attname_column()
self._model_fields[column] = name
return self._model_fields
def transform_results(self, values):
model_init_kwargs = {}
annotations = ()
# Associate fields to values
for pos, value in enumerate(values):
column = self.columns[pos]
# Separate properties from annotations
if column in self.model_fields.keys():
model_init_kwargs[self.model_fields[column]] = value
else:
annotations += (column, value),
# Construct model instance and apply annotations
skip = set()
for field in self.model._meta.fields:
if field.name not in model_init_kwargs.keys():
skip.add(field.attname)
if skip:
if self.model._meta.pk.attname in skip:
raise InvalidQuery('Raw query must include the primary key')
model_cls = deferred_class_factory(self.model, skip)
else:
model_cls = self.model
instance = model_cls(**model_init_kwargs)
for field, value in annotations:
setattr(instance, field, value)
return instance
def insert_query(model, values, return_id=False, raw_values=False):
"""

View File

@ -20,6 +20,13 @@ class CyclicDependency(Exception):
"""
pass
class InvalidQuery(Exception):
"""
The query passed to raw isn't a safe query to use with raw.
"""
pass
class CollectedObjects(object):
"""
A container that stores keys and lists of values along with remembering the

View File

@ -15,7 +15,7 @@ from django.db.backends.util import truncate_name
from django.db import connection
from django.db.models import signals
from django.db.models.fields import FieldDoesNotExist
from django.db.models.query_utils import select_related_descend
from django.db.models.query_utils import select_related_descend, InvalidQuery
from django.db.models.sql import aggregates as base_aggregates_module
from django.db.models.sql.expressions import SQLEvaluator
from django.db.models.sql.where import WhereNode, Constraint, EverythingNode, AND, OR
@ -23,7 +23,42 @@ from django.core.exceptions import FieldError
from datastructures import EmptyResultSet, Empty, MultiJoin
from constants import *
__all__ = ['Query', 'BaseQuery']
__all__ = ['Query', 'BaseQuery', 'RawQuery']
class RawQuery(object):
"""
A single raw SQL query
"""
def __init__(self, sql, connection, params=None):
self.validate_sql(sql)
self.params = params or ()
self.sql = sql
self.connection = connection
self.cursor = None
def get_columns(self):
if self.cursor is None:
self._execute_query()
return [column_meta[0] for column_meta in self.cursor.description]
def validate_sql(self, sql):
if not sql.lower().strip().startswith('select'):
raise InvalidQuery('Raw queries are limited to SELECT queries. Use '
'connection.cursor directly for types of queries.')
def __iter__(self):
# Always execute a new query for a new iterator.
# This could be optomized with a cache at the expense of RAM.
self._execute_query()
return self.cursor
def __repr__(self):
return "<RawQuery: %r>" % (self.sql % self.params)
def _execute_query(self):
self.cursor = self.connection.cursor()
self.cursor.execute(self.sql, self.params)
class BaseQuery(object):
"""

View File

@ -1059,14 +1059,9 @@ Falling back to raw SQL
=======================
If you find yourself needing to write an SQL query that is too complex for
Django's database-mapper to handle, you can fall back into raw-SQL statement
mode.
The preferred way to do this is by giving your model custom methods or custom
manager methods that execute queries. Although there's nothing in Django that
*requires* database queries to live in the model layer, this approach keeps all
your data-access logic in one place, which is smart from an code-organization
standpoint. For instructions, see :ref:`topics-db-sql`.
Django's database-mapper to handle, you can fall back on writing SQL by hand.
Django has a couple of options for writing raw SQL queries; see
:ref:`topics-db-sql`.
Finally, it's important to note that the Django database layer is merely an
interface to your database. You can access your database via other tools,

View File

@ -1,10 +1,183 @@
.. _topics-db-sql:
==========================
Performing raw SQL queries
==========================
Feel free to write custom SQL statements in custom model methods and
module-level methods. The object ``django.db.connection`` represents the
.. currentmodule:: django.db.models
When the :ref:`model query APIs <topics-db-queries>` don't go far enough, you
can fall back to writing raw SQL. Django gives you two ways of performing raw
SQL queries: you can use :meth:`Manager.raw()` to `perform raw queries and
return model instances`__, or you can avoid the model layer entirely and
`execute custom SQL directly`__.
__ `performing raw queries`_
__ `executing custom SQL directly`_
Performing raw queries
======================
.. versionadded:: 1.2
The ``raw()`` manager method can be used to perform raw SQL queries that
return model instances:
.. method:: Manager.raw(query, params=None, translations=None)
This method method takes a raw SQL query, executes it, and returns model
instances.
This is best illustrated with an example. Suppose you've got the following model::
class Person(models.Model):
first_name = models.CharField(...)
last_name = models.CharField(...)
birth_date = models.DateField(...)
You could then execute custom SQL like so::
>>> Person.objects.raw('SELECT * from myapp_person')
[<Person: John Doe>, <Person: Jane Doe>, ...]
.. admonition:: Model table names
Where'd the name of the ``Person`` table come from in that example?
By default, Django figures out a database table name by joining the
model's "app label" -- the name you used in ``manage.py startapp`` -- to
the model's class name, with an underscore between them. In the example
we've assumed that the ``Person`` model lives in an app named ``myapp``,
so its table would be ``myapp_person``.
For more details check out the documentation for the
:attr:`~Options.db_table` option, which also lets you manually set the
database table name.
Of course, this example isn't very exciting -- it's exactly the same as
running ``Person.objects.all()``. However, ``raw()`` has a bunch of other
options that make it very powerful.
Mapping query fields to model fields
------------------------------------
``raw()`` automatically maps fields in the query to fields on the model.
The order of fields in your query doesn't matter. In other words, both
of the following queries work identically::
>>> Person.objects.raw('SELECT id, first_name, last_name, birth_date FROM myapp_person')
...
>>> Person.objects.raw('SELECT last_name, birth_date, first_name, id FROM myapp_person')
...
Matching is done by name. This means that you can use SQL's ``AS`` clauses to
map fields in the query to model fields. So if you had some other table that
had ``Person`` data in it, you could easily map it into ``Person`` instances::
>>> Person.objects.raw('''SELECT first AS first_name,
... last AS last_name,
... bd AS birth_date,
... pk as id,
... FROM some_other_table)
As long as the names match, the model instances will be created correctly.
Alternatively, you can map fields in the query to model fields using the
``translations`` argument to ``raw()``. This is a dictionary mapping names of
fields in the query to names of fields on the model. For example, the above
query could also be written::
>>> name_map = {'first': 'first_name', 'last': 'last_name', 'bd': 'birth_date', 'pk': 'id'}
>>> Person.objects.raw('SELECT * FROM some_other_table', translations=name_map)
Deferring model fields
----------------------
Fields may also be left out::
>>> people = Person.objects.raw('SELECT id, first_name FROM myapp_person'):
The ``Person`` objects returned by this query will be :ref:`deferred
<queryset-defer>` model instances. This means that the fields that are omitted
from the query will be loaded on demand. For example::
>>> for p in Person.objects.raw('SELECT id, first_name FROM myapp_person'):
... print p.first_name, # This will be retrieved by the original query
... print p.last_name # This will be retrieved on demand
...
John Smith
Jane Jones
From outward appearances, this looks like the query has retrieved both
the first name and last name. However, this example actually issued 3
queries. Only the first names were retrieved by the raw() query -- the
last names were both retrieved on demand when they were printed.
There is only one field that you can't leave out - the primary key
field. Django uses the primary key to identify model instances, so it
must always be included in a raw query. An ``InvalidQuery`` exception
will be raised if you forget to include the primary key.
Adding annotations
------------------
You can also execute queries containing fields that aren't defined on the
model. For example, we could use `PostgreSQL's age() function`__ to get a list
of people with their ages calculated by the database::
>>> people = Person.objects.raw('SELECT *, age(birth_date) AS age FROM myapp_person')
>>> for p in people:
... print "%s is %s." % (p.first_name, p.age)
John is 37.
Jane is 42.
...
__ http://www.postgresql.org/docs/8.4/static/functions-datetime.html
Passing parameters into ``raw()``
---------------------------------
If you need to perform parameterized queries, you can use the ``params``
argument to ``raw()``::
>>> lname = 'Doe'
>>> Person.objects.raw('SELECT * FROM myapp_person WHERE last_name = %s', [lname])
``params`` is a list of parameters. You'll use ``%s`` placeholders in the
query string (regardless of your database engine); they'll be replaced with
parameters from the ``params`` list.
.. warning::
**Do not use string formatting on raw queries!**
It's tempting to write the above query as::
>>> query = 'SELECT * FROM myapp_person WHERE last_name = %s', % lname
>>> Person.objects.raw(query)
**Don't.**
Using the ``params`` list completely protects you from `SQL injection
attacks`__`, a common exploit where attackers inject arbitrary SQL into
your database. If you use string interpolation, sooner or later you'll
fall victim to SQL injection. As long as you remember to always use the
``params`` list you'll be protected.
__ http://en.wikipedia.org/wiki/SQL_injection
Executing custom SQL directly
=============================
Sometimes even :meth:`Manager.raw` isn't quite enough: you might need to
perform queries that don't map cleanly to models, or directly execute
``UPDATE``, ``INSERT``, or ``DELETE`` queries.
In these cases, you can always access the database directly, routing around
the model layer entirely.
The object ``django.db.connection`` represents the
current database connection, and ``django.db.transaction`` represents the
current database transaction. To use the database connection, call
``connection.cursor()`` to get a cursor object. Then, call
@ -15,7 +188,7 @@ changing operation, you should then call
to the database. If your query is purely a data retrieval operation, no commit
is required. For example::
def my_custom_sql(self):
def my_custom_sql():
from django.db import connection, transaction
cursor = connection.cursor()
@ -78,12 +251,5 @@ necessary. (Also note that Django expects the ``"%s"`` placeholder, *not* the
``"?"`` placeholder, which is used by the SQLite Python bindings. This is for
the sake of consistency and sanity.)
An easier option?
-----------------
A final note: If all you want to do is a custom ``WHERE`` clause, you can just
use the ``where``, ``tables`` and ``params`` arguments to the
:ref:`extra clause <extra>` in the standard queryset API.
.. _Python DB-API: http://www.python.org/peps/pep-0249.html

View File

@ -0,0 +1,102 @@
[
{
"pk": 1,
"model": "raw_query.author",
"fields": {
"dob": "1950-09-20",
"first_name": "Joe",
"last_name": "Smith"
}
},
{
"pk": 2,
"model": "raw_query.author",
"fields": {
"dob": "1920-04-02",
"first_name": "Jill",
"last_name": "Doe"
}
},
{
"pk": 3,
"model": "raw_query.author",
"fields": {
"dob": "1986-01-25",
"first_name": "Bob",
"last_name": "Smith"
}
},
{
"pk": 4,
"model": "raw_query.author",
"fields": {
"dob": "1932-05-10",
"first_name": "Bill",
"last_name": "Jones"
}
},
{
"pk": 1,
"model": "raw_query.book",
"fields": {
"author": 1,
"title": "The awesome book"
}
},
{
"pk": 2,
"model": "raw_query.book",
"fields": {
"author": 1,
"title": "The horrible book"
}
},
{
"pk": 3,
"model": "raw_query.book",
"fields": {
"author": 1,
"title": "Another awesome book"
}
},
{
"pk": 4,
"model": "raw_query.book",
"fields": {
"author": 3,
"title": "Some other book"
}
},
{
"pk": 1,
"model": "raw_query.coffee",
"fields": {
"brand": "dunkin doughnuts"
}
},
{
"pk": 2,
"model": "raw_query.coffee",
"fields": {
"brand": "starbucks"
}
},
{
"pk": 1,
"model": "raw_query.reviewer",
"fields": {
"reviewed": [
2,
3,
4
]
}
},
{
"pk": 2,
"model": "raw_query.reviewer",
"fields": {
"reviewed": []
}
}
]

View File

@ -0,0 +1,25 @@
from django.db import models
class Author(models.Model):
first_name = models.CharField(max_length=255)
last_name = models.CharField(max_length=255)
dob = models.DateField()
def __init__(self, *args, **kwargs):
super(Author, self).__init__(*args, **kwargs)
# Protect against annotations being passed to __init__ --
# this'll make the test suite get angry if annotations aren't
# treated differently than fields.
for k in kwargs:
assert k in [f.attname for f in self._meta.fields], \
"Author.__init__ got an unexpected paramater: %s" % k
class Book(models.Model):
title = models.CharField(max_length=255)
author = models.ForeignKey(Author)
class Coffee(models.Model):
brand = models.CharField(max_length=255, db_column="name")
class Reviewer(models.Model):
reviewed = models.ManyToManyField(Book)

View File

@ -0,0 +1,188 @@
from django.test import TestCase
from datetime import datetime
from models import Author, Book, Coffee, Reviewer
from django.db.models.sql.query import InvalidQuery
class RawQueryTests(TestCase):
def assertSuccessfulRawQuery(self, model, query, expected_results,
expected_annotations=(), params=[], translations=None):
"""
Execute the passed query against the passed model and check the output
"""
results = list(model.objects.raw(query=query, params=params, translations=translations))
self.assertProcessed(results, expected_results, expected_annotations)
self.assertAnnotations(results, expected_annotations)
def assertProcessed(self, results, orig, expected_annotations=()):
"""
Compare the results of a raw query against expected results
"""
self.assertEqual(len(results), len(orig))
for index, item in enumerate(results):
orig_item = orig[index]
for annotation in expected_annotations:
setattr(orig_item, *annotation)
self.assertEqual(item.id, orig_item.id)
def assertNoAnnotations(self, results):
"""
Check that the results of a raw query contain no annotations
"""
self.assertAnnotations(results, ())
def assertAnnotations(self, results, expected_annotations):
"""
Check that the passed raw query results contain the expected
annotations
"""
if expected_annotations:
for index, result in enumerate(results):
annotation, value = expected_annotations[index]
self.assertTrue(hasattr(result, annotation))
self.assertEqual(getattr(result, annotation), value)
def testSimpleRawQuery(self):
"""
Basic test of raw query with a simple database query
"""
query = "SELECT * FROM raw_query_author"
authors = Author.objects.all()
self.assertSuccessfulRawQuery(Author, query, authors)
def testRawQueryLazy(self):
"""
Raw queries are lazy: they aren't actually executed until they're
iterated over.
"""
q = Author.objects.raw('SELECT * FROM raw_query_author')
self.assert_(q.query.cursor is None)
list(q)
self.assert_(q.query.cursor is not None)
def testFkeyRawQuery(self):
"""
Test of a simple raw query against a model containing a foreign key
"""
query = "SELECT * FROM raw_query_book"
books = Book.objects.all()
self.assertSuccessfulRawQuery(Book, query, books)
def testDBColumnHandler(self):
"""
Test of a simple raw query against a model containing a field with
db_column defined.
"""
query = "SELECT * FROM raw_query_coffee"
coffees = Coffee.objects.all()
self.assertSuccessfulRawQuery(Coffee, query, coffees)
def testOrderHandler(self):
"""
Test of raw raw query's tolerance for columns being returned in any
order
"""
selects = (
('dob, last_name, first_name, id'),
('last_name, dob, first_name, id'),
('first_name, last_name, dob, id'),
)
for select in selects:
query = "SELECT %s FROM raw_query_author" % select
authors = Author.objects.all()
self.assertSuccessfulRawQuery(Author, query, authors)
def testTranslations(self):
"""
Test of raw query's optional ability to translate unexpected result
column names to specific model fields
"""
query = "SELECT first_name AS first, last_name AS last, dob, id FROM raw_query_author"
translations = {'first': 'first_name', 'last': 'last_name'}
authors = Author.objects.all()
self.assertSuccessfulRawQuery(Author, query, authors, translations=translations)
def testParams(self):
"""
Test passing optional query parameters
"""
query = "SELECT * FROM raw_query_author WHERE first_name = %s"
author = Author.objects.all()[2]
params = [author.first_name]
results = list(Author.objects.raw(query=query, params=params))
self.assertProcessed(results, [author])
self.assertNoAnnotations(results)
self.assertEqual(len(results), 1)
def testManyToMany(self):
"""
Test of a simple raw query against a model containing a m2m field
"""
query = "SELECT * FROM raw_query_reviewer"
reviewers = Reviewer.objects.all()
self.assertSuccessfulRawQuery(Reviewer, query, reviewers)
def testExtraConversions(self):
"""
Test to insure that extra translations are ignored.
"""
query = "SELECT * FROM raw_query_author"
translations = {'something': 'else'}
authors = Author.objects.all()
self.assertSuccessfulRawQuery(Author, query, authors, translations=translations)
def testMissingFields(self):
query = "SELECT id, first_name, dob FROM raw_query_author"
for author in Author.objects.raw(query):
self.assertNotEqual(author.first_name, None)
# last_name isn't given, but it will be retrieved on demand
self.assertNotEqual(author.last_name, None)
def testMissingFieldsWithoutPK(self):
query = "SELECT first_name, dob FROM raw_query_author"
try:
list(Author.objects.raw(query))
self.fail('Query without primary key should fail')
except InvalidQuery:
pass
def testAnnotations(self):
query = "SELECT a.*, count(b.id) as book_count FROM raw_query_author a LEFT JOIN raw_query_book b ON a.id = b.author_id GROUP BY a.id, a.first_name, a.last_name, a.dob ORDER BY a.id"
expected_annotations = (
('book_count', 3),
('book_count', 0),
('book_count', 1),
('book_count', 0),
)
authors = Author.objects.all()
self.assertSuccessfulRawQuery(Author, query, authors, expected_annotations)
def testInvalidQuery(self):
query = "UPDATE raw_query_author SET first_name='thing' WHERE first_name='Joe'"
self.assertRaises(InvalidQuery, Author.objects.raw, query)
def testWhiteSpaceQuery(self):
query = " SELECT * FROM raw_query_author"
authors = Author.objects.all()
self.assertSuccessfulRawQuery(Author, query, authors)
def testMultipleIterations(self):
query = "SELECT * FROM raw_query_author"
normal_authors = Author.objects.all()
raw_authors = Author.objects.raw(query)
# First Iteration
first_iterations = 0
for index, raw_author in enumerate(raw_authors):
self.assertEqual(normal_authors[index], raw_author)
first_iterations += 1
# Second Iteration
second_iterations = 0
for index, raw_author in enumerate(raw_authors):
self.assertEqual(normal_authors[index], raw_author)
second_iterations += 1
self.assertEqual(first_iterations, second_iterations)