diff --git a/django/db/models/manager.py b/django/db/models/manager.py index 2eeb98bfbc..c4d47e0d36 100644 --- a/django/db/models/manager.py +++ b/django/db/models/manager.py @@ -1,5 +1,5 @@ import django.utils.copycompat as copy -from django.db.models.query import QuerySet, EmptyQuerySet, insert_query +from django.db.models.query import QuerySet, EmptyQuerySet, insert_query, RawQuerySet from django.db.models import signals from django.db.models.fields import FieldDoesNotExist @@ -181,6 +181,9 @@ class Manager(object): def _update(self, values, **kwargs): return self.get_query_set()._update(values, **kwargs) + def raw(self, query, params=None, *args, **kwargs): + return RawQuerySet(model=self.model, query=query, params=params, *args, **kwargs) + class ManagerDescriptor(object): # This class ensures managers aren't accessible via model instances. # For example, Poll.objects works, but poll_obj.objects raises AttributeError. diff --git a/django/db/models/query.py b/django/db/models/query.py index 84af1bec60..8c71155c0e 100644 --- a/django/db/models/query.py +++ b/django/db/models/query.py @@ -5,7 +5,7 @@ The main QuerySet implementation. This provides the public API for the ORM. from django.db import connection, transaction, IntegrityError from django.db.models.aggregates import Aggregate from django.db.models.fields import DateField -from django.db.models.query_utils import Q, select_related_descend, CollectedObjects, CyclicDependency, deferred_class_factory +from django.db.models.query_utils import Q, select_related_descend, CollectedObjects, CyclicDependency, deferred_class_factory, InvalidQuery from django.db.models import signals, sql from django.utils.copycompat import deepcopy @@ -287,7 +287,7 @@ class QuerySet(object): Returns a dictionary containing the calculations (aggregation) over the current queryset - If args is present the expression is passed as a kwarg ussing + If args is present the expression is passed as a kwarg using the Aggregate object's default alias. """ for arg in args: @@ -1107,6 +1107,89 @@ def delete_objects(seen_objs): if forced_managed: transaction.leave_transaction_management() +class RawQuerySet(object): + """ + Provides an iterator which converts the results of raw SQL queries into + annotated model instances. + """ + def __init__(self, query, model=None, query_obj=None, params=None, translations=None): + self.model = model + self.query = query_obj or sql.RawQuery(sql=query, connection=connection, params=params) + self.params = params or () + self.translations = translations or {} + + def __iter__(self): + for row in self.query: + yield self.transform_results(row) + + def __repr__(self): + return "" % (self.query.sql % self.params) + + @property + def columns(self): + """ + A list of model field names in the order they'll appear in the + query results. + """ + if not hasattr(self, '_columns'): + self._columns = self.query.get_columns() + + # Adjust any column names which don't match field names + for (query_name, model_name) in self.translations.items(): + try: + index = self._columns.index(query_name) + self._columns[index] = model_name + except ValueError: + # Ignore translations for non-existant column names + pass + + return self._columns + + @property + def model_fields(self): + """ + A dict mapping column names to model field names. + """ + if not hasattr(self, '_model_fields'): + self._model_fields = {} + for field in self.model._meta.fields: + name, column = field.get_attname_column() + self._model_fields[column] = name + return self._model_fields + + def transform_results(self, values): + model_init_kwargs = {} + annotations = () + + # Associate fields to values + for pos, value in enumerate(values): + column = self.columns[pos] + + # Separate properties from annotations + if column in self.model_fields.keys(): + model_init_kwargs[self.model_fields[column]] = value + else: + annotations += (column, value), + + # Construct model instance and apply annotations + skip = set() + for field in self.model._meta.fields: + if field.name not in model_init_kwargs.keys(): + skip.add(field.attname) + + if skip: + if self.model._meta.pk.attname in skip: + raise InvalidQuery('Raw query must include the primary key') + model_cls = deferred_class_factory(self.model, skip) + else: + model_cls = self.model + + instance = model_cls(**model_init_kwargs) + + for field, value in annotations: + setattr(instance, field, value) + + return instance def insert_query(model, values, return_id=False, raw_values=False): """ diff --git a/django/db/models/query_utils.py b/django/db/models/query_utils.py index c5cb336f4e..746b04d4fb 100644 --- a/django/db/models/query_utils.py +++ b/django/db/models/query_utils.py @@ -20,6 +20,13 @@ class CyclicDependency(Exception): """ pass +class InvalidQuery(Exception): + """ + The query passed to raw isn't a safe query to use with raw. + """ + pass + + class CollectedObjects(object): """ A container that stores keys and lists of values along with remembering the diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py index f3e24ed8ee..9ecf273be3 100644 --- a/django/db/models/sql/query.py +++ b/django/db/models/sql/query.py @@ -15,7 +15,7 @@ from django.db.backends.util import truncate_name from django.db import connection from django.db.models import signals from django.db.models.fields import FieldDoesNotExist -from django.db.models.query_utils import select_related_descend +from django.db.models.query_utils import select_related_descend, InvalidQuery from django.db.models.sql import aggregates as base_aggregates_module from django.db.models.sql.expressions import SQLEvaluator from django.db.models.sql.where import WhereNode, Constraint, EverythingNode, AND, OR @@ -23,7 +23,42 @@ from django.core.exceptions import FieldError from datastructures import EmptyResultSet, Empty, MultiJoin from constants import * -__all__ = ['Query', 'BaseQuery'] +__all__ = ['Query', 'BaseQuery', 'RawQuery'] + +class RawQuery(object): + """ + A single raw SQL query + """ + + def __init__(self, sql, connection, params=None): + self.validate_sql(sql) + self.params = params or () + self.sql = sql + self.connection = connection + self.cursor = None + + def get_columns(self): + if self.cursor is None: + self._execute_query() + return [column_meta[0] for column_meta in self.cursor.description] + + def validate_sql(self, sql): + if not sql.lower().strip().startswith('select'): + raise InvalidQuery('Raw queries are limited to SELECT queries. Use ' + 'connection.cursor directly for types of queries.') + + def __iter__(self): + # Always execute a new query for a new iterator. + # This could be optomized with a cache at the expense of RAM. + self._execute_query() + return self.cursor + + def __repr__(self): + return "" % (self.sql % self.params) + + def _execute_query(self): + self.cursor = self.connection.cursor() + self.cursor.execute(self.sql, self.params) class BaseQuery(object): """ diff --git a/docs/topics/db/queries.txt b/docs/topics/db/queries.txt index 968ea7fbc8..1f7e2a1a3c 100644 --- a/docs/topics/db/queries.txt +++ b/docs/topics/db/queries.txt @@ -1059,14 +1059,9 @@ Falling back to raw SQL ======================= If you find yourself needing to write an SQL query that is too complex for -Django's database-mapper to handle, you can fall back into raw-SQL statement -mode. - -The preferred way to do this is by giving your model custom methods or custom -manager methods that execute queries. Although there's nothing in Django that -*requires* database queries to live in the model layer, this approach keeps all -your data-access logic in one place, which is smart from an code-organization -standpoint. For instructions, see :ref:`topics-db-sql`. +Django's database-mapper to handle, you can fall back on writing SQL by hand. +Django has a couple of options for writing raw SQL queries; see +:ref:`topics-db-sql`. Finally, it's important to note that the Django database layer is merely an interface to your database. You can access your database via other tools, diff --git a/docs/topics/db/sql.txt b/docs/topics/db/sql.txt index 9c534709ca..57834eb98a 100644 --- a/docs/topics/db/sql.txt +++ b/docs/topics/db/sql.txt @@ -1,10 +1,183 @@ .. _topics-db-sql: +========================== Performing raw SQL queries ========================== -Feel free to write custom SQL statements in custom model methods and -module-level methods. The object ``django.db.connection`` represents the +.. currentmodule:: django.db.models + +When the :ref:`model query APIs ` don't go far enough, you +can fall back to writing raw SQL. Django gives you two ways of performing raw +SQL queries: you can use :meth:`Manager.raw()` to `perform raw queries and +return model instances`__, or you can avoid the model layer entirely and +`execute custom SQL directly`__. + +__ `performing raw queries`_ +__ `executing custom SQL directly`_ + +Performing raw queries +====================== + +.. versionadded:: 1.2 + +The ``raw()`` manager method can be used to perform raw SQL queries that +return model instances: + +.. method:: Manager.raw(query, params=None, translations=None) + +This method method takes a raw SQL query, executes it, and returns model +instances. + +This is best illustrated with an example. Suppose you've got the following model:: + + class Person(models.Model): + first_name = models.CharField(...) + last_name = models.CharField(...) + birth_date = models.DateField(...) + +You could then execute custom SQL like so:: + + >>> Person.objects.raw('SELECT * from myapp_person') + [, , ...] + +.. admonition:: Model table names + + Where'd the name of the ``Person`` table come from in that example? + + By default, Django figures out a database table name by joining the + model's "app label" -- the name you used in ``manage.py startapp`` -- to + the model's class name, with an underscore between them. In the example + we've assumed that the ``Person`` model lives in an app named ``myapp``, + so its table would be ``myapp_person``. + + For more details check out the documentation for the + :attr:`~Options.db_table` option, which also lets you manually set the + database table name. + +Of course, this example isn't very exciting -- it's exactly the same as +running ``Person.objects.all()``. However, ``raw()`` has a bunch of other +options that make it very powerful. + +Mapping query fields to model fields +------------------------------------ + +``raw()`` automatically maps fields in the query to fields on the model. + +The order of fields in your query doesn't matter. In other words, both +of the following queries work identically:: + + >>> Person.objects.raw('SELECT id, first_name, last_name, birth_date FROM myapp_person') + ... + >>> Person.objects.raw('SELECT last_name, birth_date, first_name, id FROM myapp_person') + ... + +Matching is done by name. This means that you can use SQL's ``AS`` clauses to +map fields in the query to model fields. So if you had some other table that +had ``Person`` data in it, you could easily map it into ``Person`` instances:: + + >>> Person.objects.raw('''SELECT first AS first_name, + ... last AS last_name, + ... bd AS birth_date, + ... pk as id, + ... FROM some_other_table) + +As long as the names match, the model instances will be created correctly. + +Alternatively, you can map fields in the query to model fields using the +``translations`` argument to ``raw()``. This is a dictionary mapping names of +fields in the query to names of fields on the model. For example, the above +query could also be written:: + + >>> name_map = {'first': 'first_name', 'last': 'last_name', 'bd': 'birth_date', 'pk': 'id'} + >>> Person.objects.raw('SELECT * FROM some_other_table', translations=name_map) + +Deferring model fields +---------------------- + +Fields may also be left out:: + + >>> people = Person.objects.raw('SELECT id, first_name FROM myapp_person'): + +The ``Person`` objects returned by this query will be :ref:`deferred +` model instances. This means that the fields that are omitted +from the query will be loaded on demand. For example:: + + >>> for p in Person.objects.raw('SELECT id, first_name FROM myapp_person'): + ... print p.first_name, # This will be retrieved by the original query + ... print p.last_name # This will be retrieved on demand + ... + John Smith + Jane Jones + +From outward appearances, this looks like the query has retrieved both +the first name and last name. However, this example actually issued 3 +queries. Only the first names were retrieved by the raw() query -- the +last names were both retrieved on demand when they were printed. + +There is only one field that you can't leave out - the primary key +field. Django uses the primary key to identify model instances, so it +must always be included in a raw query. An ``InvalidQuery`` exception +will be raised if you forget to include the primary key. + +Adding annotations +------------------ + +You can also execute queries containing fields that aren't defined on the +model. For example, we could use `PostgreSQL's age() function`__ to get a list +of people with their ages calculated by the database:: + + >>> people = Person.objects.raw('SELECT *, age(birth_date) AS age FROM myapp_person') + >>> for p in people: + ... print "%s is %s." % (p.first_name, p.age) + John is 37. + Jane is 42. + ... + +__ http://www.postgresql.org/docs/8.4/static/functions-datetime.html + +Passing parameters into ``raw()`` +--------------------------------- + +If you need to perform parameterized queries, you can use the ``params`` +argument to ``raw()``:: + + >>> lname = 'Doe' + >>> Person.objects.raw('SELECT * FROM myapp_person WHERE last_name = %s', [lname]) + +``params`` is a list of parameters. You'll use ``%s`` placeholders in the +query string (regardless of your database engine); they'll be replaced with +parameters from the ``params`` list. + +.. warning:: + + **Do not use string formatting on raw queries!** + + It's tempting to write the above query as:: + + >>> query = 'SELECT * FROM myapp_person WHERE last_name = %s', % lname + >>> Person.objects.raw(query) + + **Don't.** + + Using the ``params`` list completely protects you from `SQL injection + attacks`__`, a common exploit where attackers inject arbitrary SQL into + your database. If you use string interpolation, sooner or later you'll + fall victim to SQL injection. As long as you remember to always use the + ``params`` list you'll be protected. + +__ http://en.wikipedia.org/wiki/SQL_injection + +Executing custom SQL directly +============================= + +Sometimes even :meth:`Manager.raw` isn't quite enough: you might need to +perform queries that don't map cleanly to models, or directly execute +``UPDATE``, ``INSERT``, or ``DELETE`` queries. + +In these cases, you can always access the database directly, routing around +the model layer entirely. + +The object ``django.db.connection`` represents the current database connection, and ``django.db.transaction`` represents the current database transaction. To use the database connection, call ``connection.cursor()`` to get a cursor object. Then, call @@ -15,7 +188,7 @@ changing operation, you should then call to the database. If your query is purely a data retrieval operation, no commit is required. For example:: - def my_custom_sql(self): + def my_custom_sql(): from django.db import connection, transaction cursor = connection.cursor() @@ -78,12 +251,5 @@ necessary. (Also note that Django expects the ``"%s"`` placeholder, *not* the ``"?"`` placeholder, which is used by the SQLite Python bindings. This is for the sake of consistency and sanity.) -An easier option? ------------------ - -A final note: If all you want to do is a custom ``WHERE`` clause, you can just -use the ``where``, ``tables`` and ``params`` arguments to the -:ref:`extra clause ` in the standard queryset API. - .. _Python DB-API: http://www.python.org/peps/pep-0249.html diff --git a/tests/modeltests/raw_query/fixtures/initial_data.json b/tests/modeltests/raw_query/fixtures/initial_data.json new file mode 100644 index 0000000000..3ff9810b45 --- /dev/null +++ b/tests/modeltests/raw_query/fixtures/initial_data.json @@ -0,0 +1,102 @@ +[ + { + "pk": 1, + "model": "raw_query.author", + "fields": { + "dob": "1950-09-20", + "first_name": "Joe", + "last_name": "Smith" + } + }, + { + "pk": 2, + "model": "raw_query.author", + "fields": { + "dob": "1920-04-02", + "first_name": "Jill", + "last_name": "Doe" + } + }, + { + "pk": 3, + "model": "raw_query.author", + "fields": { + "dob": "1986-01-25", + "first_name": "Bob", + "last_name": "Smith" + } + }, + { + "pk": 4, + "model": "raw_query.author", + "fields": { + "dob": "1932-05-10", + "first_name": "Bill", + "last_name": "Jones" + } + }, + { + "pk": 1, + "model": "raw_query.book", + "fields": { + "author": 1, + "title": "The awesome book" + } + }, + { + "pk": 2, + "model": "raw_query.book", + "fields": { + "author": 1, + "title": "The horrible book" + } + }, + { + "pk": 3, + "model": "raw_query.book", + "fields": { + "author": 1, + "title": "Another awesome book" + } + }, + { + "pk": 4, + "model": "raw_query.book", + "fields": { + "author": 3, + "title": "Some other book" + } + }, + { + "pk": 1, + "model": "raw_query.coffee", + "fields": { + "brand": "dunkin doughnuts" + } + }, + { + "pk": 2, + "model": "raw_query.coffee", + "fields": { + "brand": "starbucks" + } + }, + { + "pk": 1, + "model": "raw_query.reviewer", + "fields": { + "reviewed": [ + 2, + 3, + 4 + ] + } + }, + { + "pk": 2, + "model": "raw_query.reviewer", + "fields": { + "reviewed": [] + } + } +] diff --git a/tests/modeltests/raw_query/models.py b/tests/modeltests/raw_query/models.py new file mode 100644 index 0000000000..fb5503df3d --- /dev/null +++ b/tests/modeltests/raw_query/models.py @@ -0,0 +1,25 @@ +from django.db import models + +class Author(models.Model): + first_name = models.CharField(max_length=255) + last_name = models.CharField(max_length=255) + dob = models.DateField() + + def __init__(self, *args, **kwargs): + super(Author, self).__init__(*args, **kwargs) + # Protect against annotations being passed to __init__ -- + # this'll make the test suite get angry if annotations aren't + # treated differently than fields. + for k in kwargs: + assert k in [f.attname for f in self._meta.fields], \ + "Author.__init__ got an unexpected paramater: %s" % k + +class Book(models.Model): + title = models.CharField(max_length=255) + author = models.ForeignKey(Author) + +class Coffee(models.Model): + brand = models.CharField(max_length=255, db_column="name") + +class Reviewer(models.Model): + reviewed = models.ManyToManyField(Book) \ No newline at end of file diff --git a/tests/modeltests/raw_query/tests.py b/tests/modeltests/raw_query/tests.py new file mode 100644 index 0000000000..b132605da5 --- /dev/null +++ b/tests/modeltests/raw_query/tests.py @@ -0,0 +1,188 @@ +from django.test import TestCase +from datetime import datetime +from models import Author, Book, Coffee, Reviewer +from django.db.models.sql.query import InvalidQuery + +class RawQueryTests(TestCase): + + def assertSuccessfulRawQuery(self, model, query, expected_results, + expected_annotations=(), params=[], translations=None): + """ + Execute the passed query against the passed model and check the output + """ + results = list(model.objects.raw(query=query, params=params, translations=translations)) + self.assertProcessed(results, expected_results, expected_annotations) + self.assertAnnotations(results, expected_annotations) + + def assertProcessed(self, results, orig, expected_annotations=()): + """ + Compare the results of a raw query against expected results + """ + self.assertEqual(len(results), len(orig)) + for index, item in enumerate(results): + orig_item = orig[index] + for annotation in expected_annotations: + setattr(orig_item, *annotation) + + self.assertEqual(item.id, orig_item.id) + + def assertNoAnnotations(self, results): + """ + Check that the results of a raw query contain no annotations + """ + self.assertAnnotations(results, ()) + + def assertAnnotations(self, results, expected_annotations): + """ + Check that the passed raw query results contain the expected + annotations + """ + if expected_annotations: + for index, result in enumerate(results): + annotation, value = expected_annotations[index] + self.assertTrue(hasattr(result, annotation)) + self.assertEqual(getattr(result, annotation), value) + + def testSimpleRawQuery(self): + """ + Basic test of raw query with a simple database query + """ + query = "SELECT * FROM raw_query_author" + authors = Author.objects.all() + self.assertSuccessfulRawQuery(Author, query, authors) + + def testRawQueryLazy(self): + """ + Raw queries are lazy: they aren't actually executed until they're + iterated over. + """ + q = Author.objects.raw('SELECT * FROM raw_query_author') + self.assert_(q.query.cursor is None) + list(q) + self.assert_(q.query.cursor is not None) + + def testFkeyRawQuery(self): + """ + Test of a simple raw query against a model containing a foreign key + """ + query = "SELECT * FROM raw_query_book" + books = Book.objects.all() + self.assertSuccessfulRawQuery(Book, query, books) + + def testDBColumnHandler(self): + """ + Test of a simple raw query against a model containing a field with + db_column defined. + """ + query = "SELECT * FROM raw_query_coffee" + coffees = Coffee.objects.all() + self.assertSuccessfulRawQuery(Coffee, query, coffees) + + def testOrderHandler(self): + """ + Test of raw raw query's tolerance for columns being returned in any + order + """ + selects = ( + ('dob, last_name, first_name, id'), + ('last_name, dob, first_name, id'), + ('first_name, last_name, dob, id'), + ) + + for select in selects: + query = "SELECT %s FROM raw_query_author" % select + authors = Author.objects.all() + self.assertSuccessfulRawQuery(Author, query, authors) + + def testTranslations(self): + """ + Test of raw query's optional ability to translate unexpected result + column names to specific model fields + """ + query = "SELECT first_name AS first, last_name AS last, dob, id FROM raw_query_author" + translations = {'first': 'first_name', 'last': 'last_name'} + authors = Author.objects.all() + self.assertSuccessfulRawQuery(Author, query, authors, translations=translations) + + def testParams(self): + """ + Test passing optional query parameters + """ + query = "SELECT * FROM raw_query_author WHERE first_name = %s" + author = Author.objects.all()[2] + params = [author.first_name] + results = list(Author.objects.raw(query=query, params=params)) + self.assertProcessed(results, [author]) + self.assertNoAnnotations(results) + self.assertEqual(len(results), 1) + + def testManyToMany(self): + """ + Test of a simple raw query against a model containing a m2m field + """ + query = "SELECT * FROM raw_query_reviewer" + reviewers = Reviewer.objects.all() + self.assertSuccessfulRawQuery(Reviewer, query, reviewers) + + def testExtraConversions(self): + """ + Test to insure that extra translations are ignored. + """ + query = "SELECT * FROM raw_query_author" + translations = {'something': 'else'} + authors = Author.objects.all() + self.assertSuccessfulRawQuery(Author, query, authors, translations=translations) + + def testMissingFields(self): + query = "SELECT id, first_name, dob FROM raw_query_author" + for author in Author.objects.raw(query): + self.assertNotEqual(author.first_name, None) + # last_name isn't given, but it will be retrieved on demand + self.assertNotEqual(author.last_name, None) + + def testMissingFieldsWithoutPK(self): + query = "SELECT first_name, dob FROM raw_query_author" + try: + list(Author.objects.raw(query)) + self.fail('Query without primary key should fail') + except InvalidQuery: + pass + + def testAnnotations(self): + query = "SELECT a.*, count(b.id) as book_count FROM raw_query_author a LEFT JOIN raw_query_book b ON a.id = b.author_id GROUP BY a.id, a.first_name, a.last_name, a.dob ORDER BY a.id" + expected_annotations = ( + ('book_count', 3), + ('book_count', 0), + ('book_count', 1), + ('book_count', 0), + ) + authors = Author.objects.all() + self.assertSuccessfulRawQuery(Author, query, authors, expected_annotations) + + def testInvalidQuery(self): + query = "UPDATE raw_query_author SET first_name='thing' WHERE first_name='Joe'" + self.assertRaises(InvalidQuery, Author.objects.raw, query) + + def testWhiteSpaceQuery(self): + query = " SELECT * FROM raw_query_author" + authors = Author.objects.all() + self.assertSuccessfulRawQuery(Author, query, authors) + + def testMultipleIterations(self): + query = "SELECT * FROM raw_query_author" + normal_authors = Author.objects.all() + raw_authors = Author.objects.raw(query) + + # First Iteration + first_iterations = 0 + for index, raw_author in enumerate(raw_authors): + self.assertEqual(normal_authors[index], raw_author) + first_iterations += 1 + + # Second Iteration + second_iterations = 0 + for index, raw_author in enumerate(raw_authors): + self.assertEqual(normal_authors[index], raw_author) + second_iterations += 1 + + self.assertEqual(first_iterations, second_iterations) \ No newline at end of file