Fixed #16649 -- Refactored save_base logic

Model.save() will use UPDATE - if not updated - INSERT instead of
SELECT - if found UPDATE else INSERT. This should save a query when
updating, but will cost a little when inserting model with PK set.

Also fixed #17341 -- made sure .save() commits transactions only after
the whole model has been saved. This wasn't the case in model
inheritance situations.

The save_base implementation was refactored into multiple methods.
A typical chain for inherited save is:
save_base()
    _save_parents(self)
        for each parent:
            _save_parents(parent)
            _save_table(parent)
    _save_table(self)
This commit is contained in:
Anssi Kääriäinen 2012-11-29 12:10:31 +02:00
parent 8a2f5300b2
commit 6b4834952d
7 changed files with 178 additions and 112 deletions

View File

@ -545,125 +545,139 @@ class Model(six.with_metaclass(ModelBase)):
force_update=force_update, update_fields=update_fields) force_update=force_update, update_fields=update_fields)
save.alters_data = True save.alters_data = True
def save_base(self, raw=False, cls=None, origin=None, force_insert=False, def save_base(self, raw=False, force_insert=False,
force_update=False, using=None, update_fields=None): force_update=False, using=None, update_fields=None):
""" """
Does the heavy-lifting involved in saving. Subclasses shouldn't need to Handles the parts of saving which should be done only once per save,
override this method. It's separate from save() in order to hide the yet need to be done in raw saves, too. This includes some sanity
need for overrides of save() to pass around internal-only parameters checks and signal sending.
('raw', 'cls', and 'origin').
The 'raw' argument is telling save_base not to save any parent
models and not to do any changes to the values before save. This
is used by fixture loading.
""" """
using = using or router.db_for_write(self.__class__, instance=self) using = using or router.db_for_write(self.__class__, instance=self)
assert not (force_insert and (force_update or update_fields)) assert not (force_insert and (force_update or update_fields))
assert update_fields is None or len(update_fields) > 0 assert update_fields is None or len(update_fields) > 0
if cls is None: cls = origin = self.__class__
cls = self.__class__ # Skip proxies, but keep the origin as the proxy model.
meta = cls._meta if cls._meta.proxy:
if not meta.proxy: cls = cls._meta.concrete_model
origin = cls meta = cls._meta
else: if not meta.auto_created:
meta = cls._meta
if origin and not meta.auto_created:
signals.pre_save.send(sender=origin, instance=self, raw=raw, using=using, signals.pre_save.send(sender=origin, instance=self, raw=raw, using=using,
update_fields=update_fields) update_fields=update_fields)
with transaction.commit_on_success_unless_managed(using=using, savepoint=False):
# If we are in a raw save, save the object exactly as presented. if not raw:
# That means that we don't try to be smart about saving attributes self._save_parents(cls, using, update_fields)
# that might have come from the parent class - we just save the updated = self._save_table(raw, cls, force_insert, force_update, using, update_fields)
# attributes we have been given to the class we have been given.
# We also go through this process to defer the save of proxy objects
# to their actual underlying model.
if not raw or meta.proxy:
if meta.proxy:
org = cls
else:
org = None
for parent, field in meta.parents.items():
# At this point, parent's primary key field may be unknown
# (for example, from administration form which doesn't fill
# this field). If so, fill it.
if field and getattr(self, parent._meta.pk.attname) is None and getattr(self, field.attname) is not None:
setattr(self, parent._meta.pk.attname, getattr(self, field.attname))
self.save_base(cls=parent, origin=org, using=using,
update_fields=update_fields)
if field:
setattr(self, field.attname, self._get_pk_val(parent._meta))
# Since we didn't have an instance of the parent handy, we
# set attname directly, bypassing the descriptor.
# Invalidate the related object cache, in case it's been
# accidentally populated. A fresh instance will be
# re-built from the database if necessary.
cache_name = field.get_cache_name()
if hasattr(self, cache_name):
delattr(self, cache_name)
if meta.proxy:
return
if not meta.proxy:
non_pks = [f for f in meta.local_fields if not f.primary_key]
if update_fields:
non_pks = [f for f in non_pks if f.name in update_fields or f.attname in update_fields]
with transaction.commit_on_success_unless_managed(using=using):
# First, try an UPDATE. If that doesn't update anything, do an INSERT.
pk_val = self._get_pk_val(meta)
pk_set = pk_val is not None
record_exists = True
manager = cls._base_manager
if pk_set:
# Determine if we should do an update (pk already exists, forced update,
# no force_insert)
if ((force_update or update_fields) or (not force_insert and
manager.using(using).filter(pk=pk_val).exists())):
if force_update or non_pks:
values = [(f, None, (raw and getattr(self, f.attname) or f.pre_save(self, False))) for f in non_pks]
if values:
rows = manager.using(using).filter(pk=pk_val)._update(values)
if force_update and not rows:
raise DatabaseError("Forced update did not affect any rows.")
if update_fields and not rows:
raise DatabaseError("Save with update_fields did not affect any rows.")
else:
record_exists = False
if not pk_set or not record_exists:
if meta.order_with_respect_to:
# If this is a model with an order_with_respect_to
# autopopulate the _order field
field = meta.order_with_respect_to
order_value = manager.using(using).filter(**{field.name: getattr(self, field.attname)}).count()
self._order = order_value
fields = meta.local_fields
if not pk_set:
if force_update or update_fields:
raise ValueError("Cannot force an update in save() with no primary key.")
fields = [f for f in fields if not isinstance(f, AutoField)]
record_exists = False
update_pk = bool(meta.has_auto_field and not pk_set)
result = manager._insert([self], fields=fields, return_id=update_pk, using=using, raw=raw)
if update_pk:
setattr(self, meta.pk.attname, result)
# Store the database on which the object was saved # Store the database on which the object was saved
self._state.db = using self._state.db = using
# Once saved, this is no longer a to-be-added instance. # Once saved, this is no longer a to-be-added instance.
self._state.adding = False self._state.adding = False
# Signal that the save is complete # Signal that the save is complete
if origin and not meta.auto_created: if not meta.auto_created:
signals.post_save.send(sender=origin, instance=self, created=(not record_exists), signals.post_save.send(sender=origin, instance=self, created=(not updated),
update_fields=update_fields, raw=raw, using=using) update_fields=update_fields, raw=raw, using=using)
save_base.alters_data = True save_base.alters_data = True
def _save_parents(self, cls, using, update_fields):
"""
Saves all the parents of cls using values from self.
"""
meta = cls._meta
for parent, field in meta.parents.items():
# Make sure the link fields are synced between parent and self.
if (field and getattr(self, parent._meta.pk.attname) is None
and getattr(self, field.attname) is not None):
setattr(self, parent._meta.pk.attname, getattr(self, field.attname))
self._save_parents(cls=parent, using=using, update_fields=update_fields)
self._save_table(cls=parent, using=using, update_fields=update_fields)
# Set the parent's PK value to self.
if field:
setattr(self, field.attname, self._get_pk_val(parent._meta))
# Since we didn't have an instance of the parent handy set
# attname directly, bypassing the descriptor. Invalidate
# the related object cache, in case it's been accidentally
# populated. A fresh instance will be re-built from the
# database if necessary.
cache_name = field.get_cache_name()
if hasattr(self, cache_name):
delattr(self, cache_name)
def _save_table(self, raw=False, cls=None, force_insert=False,
force_update=False, using=None, update_fields=None):
"""
Does the heavy-lifting involved in saving. Updates or inserts the data
for a single table.
"""
meta = cls._meta
non_pks = [f for f in meta.local_fields if not f.primary_key]
if update_fields:
non_pks = [f for f in non_pks
if f.name in update_fields or f.attname in update_fields]
pk_val = self._get_pk_val(meta)
pk_set = pk_val is not None
if not pk_set and (force_update or update_fields):
raise ValueError("Cannot force an update in save() with no primary key.")
updated = False
# If possible, try an UPDATE. If that doesn't update anything, do an INSERT.
if pk_set and not force_insert:
base_qs = cls._base_manager.using(using)
values = [(f, None, (raw and getattr(self, f.attname) or f.pre_save(self, False)))
for f in non_pks]
if not values:
# We can end up here when saving a model in inheritance chain where
# update_fields doesn't target any field in current model. In that
# case we just say the update succeeded. Another case ending up here
# is a model with just PK - in that case check that the PK still
# exists.
updated = update_fields is not None or base_qs.filter(pk=pk_val).exists()
else:
updated = self._do_update(base_qs, using, pk_val, values)
if force_update and not updated:
raise DatabaseError("Forced update did not affect any rows.")
if update_fields and not updated:
raise DatabaseError("Save with update_fields did not affect any rows.")
if not updated:
if meta.order_with_respect_to:
# If this is a model with an order_with_respect_to
# autopopulate the _order field
field = meta.order_with_respect_to
order_value = cls._base_manager.using(using).filter(
**{field.name: getattr(self, field.attname)}).count()
self._order = order_value
fields = meta.local_fields
if not pk_set:
fields = [f for f in fields if not isinstance(f, AutoField)]
update_pk = bool(meta.has_auto_field and not pk_set)
result = self._do_insert(cls._base_manager, using, fields, update_pk, raw)
if update_pk:
setattr(self, meta.pk.attname, result)
return updated
def _do_update(self, base_qs, using, pk_val, values):
"""
This method will try to update the model. If the model was updated (in
the sense that an update query was done and a matching row was found
from the DB) the method will return True.
"""
return base_qs.filter(pk=pk_val)._update(values) > 0
def _do_insert(self, manager, using, fields, update_pk, raw):
"""
Do an INSERT. If update_pk is defined then this method should return
the new pk for the model.
"""
return manager._insert([self], fields=fields, return_id=update_pk,
using=using, raw=raw)
def delete(self, using=None): def delete(self, using=None):
using = using or router.db_for_write(self.__class__, instance=self) using = using or router.db_for_write(self.__class__, instance=self)
assert self._get_pk_val() is not None, "%s object can't be deleted because its %s attribute is set to None." % (self._meta.object_name, self._meta.pk.attname) assert self._get_pk_val() is not None, "%s object can't be deleted because its %s attribute is set to None." % (self._meta.object_name, self._meta.pk.attname)

View File

@ -292,12 +292,13 @@ follows this algorithm:
* If the object's primary key attribute is set to a value that evaluates to * If the object's primary key attribute is set to a value that evaluates to
``True`` (i.e., a value other than ``None`` or the empty string), Django ``True`` (i.e., a value other than ``None`` or the empty string), Django
executes a ``SELECT`` query to determine whether a record with the given executes an ``UPDATE``.
primary key already exists. * If the object's primary key attribute is *not* set or if the ``UPDATE``
* If the record with the given primary key does already exist, Django didn't update anything, Django executes an ``INSERT``.
executes an ``UPDATE`` query.
* If the object's primary key attribute is *not* set, or if it's set but a .. versionchanged:: 1.6
record doesn't exist, Django executes an ``INSERT``. Previously Django used ``SELECT`` - if not found ``INSERT`` else ``UPDATE``
algorithm. The old algorithm resulted in one more query in ``UPDATE`` case.
The one gotcha here is that you should be careful not to specify a primary-key The one gotcha here is that you should be careful not to specify a primary-key
value explicitly when saving new objects, if you cannot guarantee the value explicitly when saving new objects, if you cannot guarantee the

View File

@ -150,6 +150,10 @@ Minor features
* Generic :class:`~django.contrib.gis.db.models.GeometryField` is now editable * Generic :class:`~django.contrib.gis.db.models.GeometryField` is now editable
with the OpenLayers widget in the admin. with the OpenLayers widget in the admin.
* The :meth:`Model.save() <django.db.models.Model.save()>` will do
``UPDATE`` - if not updated - ``INSERT`` instead of ``SELECT`` - if not
found ``INSERT`` else ``UPDATE`` in case the model's primary key is set.
Backwards incompatible changes in 1.6 Backwards incompatible changes in 1.6
===================================== =====================================

View File

@ -1,11 +1,13 @@
from __future__ import absolute_import, unicode_literals from __future__ import absolute_import, unicode_literals
from datetime import datetime from datetime import datetime
import threading
from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned, FieldError from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned, FieldError
from django.db import connections, DEFAULT_DB_ALIAS
from django.db.models.fields import Field, FieldDoesNotExist from django.db.models.fields import Field, FieldDoesNotExist
from django.db.models.query import QuerySet, EmptyQuerySet, ValuesListQuerySet from django.db.models.query import QuerySet, EmptyQuerySet, ValuesListQuerySet
from django.test import TestCase, skipIfDBFeature, skipUnlessDBFeature from django.test import TestCase, TransactionTestCase, skipIfDBFeature, skipUnlessDBFeature
from django.utils import six from django.utils import six
from django.utils.translation import ugettext_lazy from django.utils.translation import ugettext_lazy
@ -690,4 +692,29 @@ class ModelTest(TestCase):
def test_invalid_qs_list(self): def test_invalid_qs_list(self):
qs = Article.objects.order_by('invalid_column') qs = Article.objects.order_by('invalid_column')
self.assertRaises(FieldError, list, qs) self.assertRaises(FieldError, list, qs)
self.assertRaises(FieldError, list, qs) self.assertRaises(FieldError, list, qs)
class ConcurrentSaveTests(TransactionTestCase):
@skipUnlessDBFeature('test_db_allows_multiple_connections')
def test_concurrent_delete_with_save(self):
"""
Test fetching, deleting and finally saving an object - we should get
an insert in this case.
"""
a = Article.objects.create(headline='foo', pub_date=datetime.now())
exceptions = []
def deleter():
try:
# Do not delete a directly - doing so alters its state.
Article.objects.filter(pk=a.pk).delete()
connections[DEFAULT_DB_ALIAS].commit_unless_managed()
except Exception as e:
exceptions.append(e)
finally:
connections[DEFAULT_DB_ALIAS].close()
self.assertEqual(len(exceptions), 0)
t = threading.Thread(target=deleter)
t.start()
t.join()
a.save()
self.assertEqual(Article.objects.get(pk=a.pk).headline, 'foo')

View File

@ -294,7 +294,7 @@ class ModelInheritanceTests(TestCase):
rating=4, rating=4,
chef=c chef=c
) )
with self.assertNumQueries(6): with self.assertNumQueries(3):
ir.save() ir.save()
def test_update_parent_filtering(self): def test_update_parent_filtering(self):

View File

@ -4,6 +4,8 @@ from django.db import models
class Mod(models.Model): class Mod(models.Model):
fld = models.IntegerField() fld = models.IntegerField()
class SubMod(Mod):
cnt = models.IntegerField(unique=True)
class M2mA(models.Model): class M2mA(models.Model):
others = models.ManyToManyField('M2mB') others = models.ManyToManyField('M2mB')

View File

@ -1,6 +1,7 @@
from __future__ import absolute_import from __future__ import absolute_import
from django.db import connection, connections, transaction, DEFAULT_DB_ALIAS, DatabaseError from django.db import (connection, connections, transaction, DEFAULT_DB_ALIAS, DatabaseError,
IntegrityError)
from django.db.transaction import commit_on_success, commit_manually, TransactionManagementError from django.db.transaction import commit_on_success, commit_manually, TransactionManagementError
from django.test import TransactionTestCase, skipUnlessDBFeature from django.test import TransactionTestCase, skipUnlessDBFeature
from django.test.utils import override_settings from django.test.utils import override_settings
@ -8,8 +9,25 @@ from django.utils.unittest import skipIf, skipUnless
from transactions.tests import IgnorePendingDeprecationWarningsMixin from transactions.tests import IgnorePendingDeprecationWarningsMixin
from .models import Mod, M2mA, M2mB from .models import Mod, M2mA, M2mB, SubMod
class ModelInheritanceTests(TransactionTestCase):
def test_save(self):
# First, create a SubMod, then try to save another with conflicting
# cnt field. The problem was that transactions were committed after
# every parent save when not in managed transaction. As the cnt
# conflict is in the second model, we can check if the first save
# was committed or not.
SubMod(fld=1, cnt=1).save()
# We should have committed the transaction for the above - assert this.
connection.rollback()
self.assertEqual(SubMod.objects.count(), 1)
try:
SubMod(fld=2, cnt=1).save()
except IntegrityError:
connection.rollback()
self.assertEqual(SubMod.objects.count(), 1)
self.assertEqual(Mod.objects.count(), 1)
class TestTransactionClosing(IgnorePendingDeprecationWarningsMixin, TransactionTestCase): class TestTransactionClosing(IgnorePendingDeprecationWarningsMixin, TransactionTestCase):
""" """