Refs #16043 -- Refactored internal fields value cache.

* Removed all hardcoded logic for _{fieldname}_cache.
* Added an internal API for interacting with the field values cache.

Thanks carljm and MarkusH for support.
This commit is contained in:
Paulo 2016-06-04 16:10:37 -07:00 committed by Tim Graham
parent 22ff86ec52
commit bfb746f983
11 changed files with 156 additions and 107 deletions

View File

@ -7,6 +7,7 @@ from django.core.exceptions import FieldDoesNotExist, ObjectDoesNotExist
from django.db import DEFAULT_DB_ALIAS, models, router, transaction
from django.db.models import DO_NOTHING
from django.db.models.base import ModelBase, make_foreign_order_accessors
from django.db.models.fields.mixins import FieldCacheMixin
from django.db.models.fields.related import (
ForeignObject, ForeignObjectRel, ReverseManyToOneDescriptor,
lazy_related_operation,
@ -15,7 +16,7 @@ from django.db.models.query_utils import PathInfo
from django.utils.functional import cached_property
class GenericForeignKey:
class GenericForeignKey(FieldCacheMixin):
"""
Provide a generic many-to-one relation through the ``content_type`` and
``object_id`` fields.
@ -49,7 +50,6 @@ class GenericForeignKey:
def contribute_to_class(self, cls, name, **kwargs):
self.name = name
self.model = cls
self.cache_attr = "_%s_cache" % name
cls._meta.add_field(self, private=True)
setattr(cls, name, self)
@ -156,6 +156,9 @@ class GenericForeignKey:
else:
return []
def get_cache_name(self):
return self.name
def get_content_type(self, obj=None, id=None, using=None):
if obj is not None:
return ContentType.objects.db_manager(obj._state.db).get_for_model(
@ -203,14 +206,14 @@ class GenericForeignKey:
return (model._meta.pk.get_prep_value(getattr(obj, self.fk_field)),
model)
return (ret_val,
return (
ret_val,
lambda obj: (obj.pk, obj.__class__),
gfk_key,
True,
self.name)
def is_cached(self, instance):
return hasattr(instance, self.cache_attr)
self.name,
True,
)
def __get__(self, instance, cls=None):
if instance is None:
@ -224,23 +227,19 @@ class GenericForeignKey:
ct_id = getattr(instance, f.get_attname(), None)
pk_val = getattr(instance, self.fk_field)
try:
rel_obj = getattr(instance, self.cache_attr)
except AttributeError:
rel_obj = None
else:
if rel_obj and (ct_id != self.get_content_type(obj=rel_obj, using=instance._state.db).id or
rel_obj._meta.pk.to_python(pk_val) != rel_obj.pk):
rel_obj = None
rel_obj = self.get_cached_value(instance, default=None)
if rel_obj is not None:
ct_match = ct_id == self.get_content_type(obj=rel_obj, using=instance._state.db).id
pk_match = rel_obj._meta.pk.to_python(pk_val) == rel_obj.pk
if ct_match and pk_match:
return rel_obj
else:
rel_obj = None
if ct_id is not None:
ct = self.get_content_type(id=ct_id, using=instance._state.db)
with suppress(ObjectDoesNotExist):
rel_obj = ct.get_object_for_this_type(pk=pk_val)
setattr(instance, self.cache_attr, rel_obj)
self.set_cached_value(instance, rel_obj)
return rel_obj
def __set__(self, instance, value):
@ -252,7 +251,7 @@ class GenericForeignKey:
setattr(instance, self.ct_field, ct)
setattr(instance, self.fk_field, fk)
setattr(instance, self.cache_attr, value)
self.set_cached_value(instance, value)
class GenericRel(ForeignObjectRel):
@ -534,11 +533,14 @@ def create_generic_related_manager(superclass, rel):
# We (possibly) need to convert object IDs to the type of the
# instances' PK in order to match up instances:
object_id_converter = instances[0]._meta.pk.to_python
return (queryset.filter(**query),
return (
queryset.filter(**query),
lambda relobj: object_id_converter(getattr(relobj, self.object_id_field_name)),
lambda obj: obj.pk,
False,
self.prefetch_cache_name)
self.prefetch_cache_name,
False,
)
def add(self, *objs, bulk=True):
db = router.db_for_write(self.model, instance=self.instance)

View File

@ -378,6 +378,7 @@ class ModelState:
# Necessary for correct validation of new instances of objects with explicit (non-auto) PKs.
# This impacts validation only; it has no effect on the actual save.
self.adding = True
self.fields_cache = {}
class Model(metaclass=ModelBase):
@ -607,12 +608,12 @@ class Model(metaclass=ModelBase):
continue
setattr(self, field.attname, getattr(db_instance, field.attname))
# Throw away stale foreign key references.
if field.is_relation and field.get_cache_name() in self.__dict__:
rel_instance = getattr(self, field.get_cache_name())
if field.is_relation and field.is_cached(self):
rel_instance = field.get_cached_value(self)
local_val = getattr(db_instance, field.attname)
related_val = None if rel_instance is None else getattr(rel_instance, field.target_field.attname)
if local_val != related_val or (local_val is None and related_val is None):
del self.__dict__[field.get_cache_name()]
field.delete_cached_value(self)
self._state.db = db_instance._state.db
def serializable_value(self, field_name):
@ -646,13 +647,9 @@ class Model(metaclass=ModelBase):
# a ForeignKey or OneToOneField on this model. If the field is
# nullable, allowing the save() would result in silent data loss.
for field in self._meta.concrete_fields:
if field.is_relation:
# If the related field isn't cached, then an instance hasn't
# been assigned and there's no need to worry about this check.
try:
getattr(self, field.get_cache_name())
except AttributeError:
continue
if field.is_relation and field.is_cached(self):
obj = getattr(self, field.name, None)
# A pk may have been assigned manually to a model instance not
# saved to the database (or auto-generated in a case like
@ -663,7 +660,7 @@ class Model(metaclass=ModelBase):
if obj and obj.pk is None:
# Remove the object from a related instance cache.
if not field.remote_field.multiple:
delattr(obj, field.remote_field.get_cache_name())
field.remote_field.delete_cached_value(obj)
raise ValueError(
"save() prohibited to prevent data loss due to "
"unsaved related object '%s'." % field.name
@ -773,9 +770,8 @@ class Model(metaclass=ModelBase):
# the related object cache, in case it's been accidentally
# populated. A fresh instance will be re-built from the
# database if necessary.
cache_name = field.get_cache_name()
if hasattr(self, cache_name):
delattr(self, cache_name)
if field.is_cached(self):
field.delete_cached_value(self)
def _save_table(self, raw=False, cls=None, force_insert=False,
force_update=False, using=None, update_fields=None):

View File

@ -734,9 +734,6 @@ class Field(RegisterLookupMixin):
column = self.db_column or attname
return attname, column
def get_cache_name(self):
return '_%s_cache' % self.name
def get_internal_type(self):
return self.__class__.__name__

View File

@ -0,0 +1,26 @@
NOT_PROVIDED = object()
class FieldCacheMixin:
"""Provide an API for working with the model's fields value cache."""
def get_cache_name(self):
raise NotImplementedError
def get_cached_value(self, instance, default=NOT_PROVIDED):
cache_name = self.get_cache_name()
try:
return instance._state.fields_cache[cache_name]
except KeyError:
if default is NOT_PROVIDED:
raise
return default
def is_cached(self, instance):
return self.get_cache_name() in instance._state.fields_cache
def set_cached_value(self, instance, value):
instance._state.fields_cache[self.get_cache_name()] = value
def delete_cached_value(self, instance):
del instance._state.fields_cache[self.get_cache_name()]

View File

@ -16,6 +16,7 @@ from django.utils.functional import cached_property, curry
from django.utils.translation import gettext_lazy as _
from . import Field
from .mixins import FieldCacheMixin
from .related_descriptors import (
ForwardManyToOneDescriptor, ForwardOneToOneDescriptor,
ManyToManyDescriptor, ReverseManyToOneDescriptor,
@ -78,7 +79,7 @@ def lazy_related_operation(function, model, *related_models, **kwargs):
return apps.lazy_model_operation(partial(function, **kwargs), *model_keys)
class RelatedField(Field):
class RelatedField(FieldCacheMixin, Field):
"""Base class that all relational fields inherit from."""
# Field flags
@ -438,6 +439,9 @@ class RelatedField(Field):
"The relation has multiple target fields, but only single target field was asked for")
return target_fields[0]
def get_cache_name(self):
return self.name
class ForeignObject(RelatedField):
"""

View File

@ -86,7 +86,6 @@ class ForwardManyToOneDescriptor:
def __init__(self, field_with_rel):
self.field = field_with_rel
self.cache_name = self.field.get_cache_name()
@cached_property
def RelatedObjectDoesNotExist(self):
@ -100,7 +99,7 @@ class ForwardManyToOneDescriptor:
)
def is_cached(self, instance):
return hasattr(instance, self.cache_name)
return self.field.is_cached(instance)
def get_queryset(self, **hints):
return self.field.remote_field.model._base_manager.db_manager(hints=hints).all()
@ -114,6 +113,7 @@ class ForwardManyToOneDescriptor:
instance_attr = self.field.get_local_related_value
instances_dict = {instance_attr(inst): inst for inst in instances}
related_field = self.field.foreign_related_fields[0]
remote_field = self.field.remote_field
# FIXME: This will need to be revisited when we introduce support for
# composite fields. In the meantime we take this practical approach to
@ -121,7 +121,7 @@ class ForwardManyToOneDescriptor:
# (related_name ends with a '+'). Refs #21410.
# The check for len(...) == 1 is a special case that allows the query
# to be join-less and smaller. Refs #21760.
if self.field.remote_field.is_hidden() or len(self.field.foreign_related_fields) == 1:
if remote_field.is_hidden() or len(self.field.foreign_related_fields) == 1:
query = {'%s__in' % related_field.name: {instance_attr(inst)[0] for inst in instances}}
else:
query = {'%s__in' % self.field.related_query_name(): instances}
@ -129,12 +129,11 @@ class ForwardManyToOneDescriptor:
# Since we're going to assign directly in the cache,
# we must manage the reverse relation cache manually.
if not self.field.remote_field.multiple:
rel_obj_cache_name = self.field.remote_field.get_cache_name()
if not remote_field.multiple:
for rel_obj in queryset:
instance = instances_dict[rel_obj_attr(rel_obj)]
setattr(rel_obj, rel_obj_cache_name, instance)
return queryset, rel_obj_attr, instance_attr, True, self.cache_name
remote_field.set_cached_value(rel_obj, instance)
return queryset, rel_obj_attr, instance_attr, True, self.field.get_cache_name(), False
def get_object(self, instance):
qs = self.get_queryset(instance=instance)
@ -154,23 +153,24 @@ class ForwardManyToOneDescriptor:
if instance is None:
return self
# The related instance is loaded from the database and then cached in
# the attribute defined in self.cache_name. It can also be pre-cached
# The related instance is loaded from the database and then cached
# by the field on the model instance state. It can also be pre-cached
# by the reverse accessor (ReverseOneToOneDescriptor).
try:
rel_obj = getattr(instance, self.cache_name)
except AttributeError:
rel_obj = self.field.get_cached_value(instance)
except KeyError:
val = self.field.get_local_related_value(instance)
if None in val:
rel_obj = None
else:
rel_obj = self.get_object(instance)
remote_field = self.field.remote_field
# If this is a one-to-one relation, set the reverse accessor
# cache on the related object to the current instance to avoid
# an extra SQL query if it's accessed later on.
if not self.field.remote_field.multiple:
setattr(rel_obj, self.field.remote_field.get_cache_name(), instance)
setattr(instance, self.cache_name, rel_obj)
if not remote_field.multiple:
remote_field.set_cached_value(rel_obj, instance)
self.field.set_cached_value(instance, rel_obj)
if rel_obj is None and not self.field.null:
raise self.RelatedObjectDoesNotExist(
@ -208,6 +208,7 @@ class ForwardManyToOneDescriptor:
if not router.allow_relation(value, instance):
raise ValueError('Cannot assign "%r": the current database router prevents this relation.' % value)
remote_field = self.field.remote_field
# If we're setting the value of a OneToOneField to None, we need to clear
# out the cache on any old related object. Otherwise, deleting the
# previously-related object will also cause this object to be deleted,
@ -219,13 +220,13 @@ class ForwardManyToOneDescriptor:
# populated the cache, then we don't care - we're only accessing
# the object to invalidate the accessor cache, so there's no
# need to populate the cache just to expire it again.
related = getattr(instance, self.cache_name, None)
related = self.field.get_cached_value(instance, default=None)
# If we've got an old related object, we need to clear out its
# cache. This cache also might not exist if the related object
# hasn't been accessed yet.
if related is not None:
setattr(related, self.field.remote_field.get_cache_name(), None)
remote_field.set_cached_value(related, None)
for lh_field, rh_field in self.field.related_fields:
setattr(instance, lh_field.attname, None)
@ -237,13 +238,13 @@ class ForwardManyToOneDescriptor:
# Set the related instance cache used by __get__ to avoid an SQL query
# when accessing the attribute we just set.
setattr(instance, self.cache_name, value)
self.field.set_cached_value(instance, value)
# If this is a one-to-one relation, set the reverse accessor cache on
# the related object to the current instance to avoid an extra SQL
# query if it's accessed later on.
if value is not None and not self.field.remote_field.multiple:
setattr(value, self.field.remote_field.get_cache_name(), instance)
if value is not None and not remote_field.multiple:
remote_field.set_cached_value(value, instance)
class ForwardOneToOneDescriptor(ForwardManyToOneDescriptor):
@ -308,8 +309,9 @@ class ReverseOneToOneDescriptor:
"""
def __init__(self, related):
# Following the example above, `related` is an instance of OneToOneRel
# which represents the reverse restaurant field (place.restaurant).
self.related = related
self.cache_name = related.get_cache_name()
@cached_property
def RelatedObjectDoesNotExist(self):
@ -322,7 +324,7 @@ class ReverseOneToOneDescriptor:
)
def is_cached(self, instance):
return hasattr(instance, self.cache_name)
return self.related.is_cached(instance)
def get_queryset(self, **hints):
return self.related.related_model._base_manager.db_manager(hints=hints).all()
@ -343,11 +345,10 @@ class ReverseOneToOneDescriptor:
# Since we're going to assign directly in the cache,
# we must manage the reverse relation cache manually.
rel_obj_cache_name = self.related.field.get_cache_name()
for rel_obj in queryset:
instance = instances_dict[rel_obj_attr(rel_obj)]
setattr(rel_obj, rel_obj_cache_name, instance)
return queryset, rel_obj_attr, instance_attr, True, self.cache_name
self.related.field.set_cached_value(rel_obj, instance)
return queryset, rel_obj_attr, instance_attr, True, self.related.get_cache_name(), False
def __get__(self, instance, cls=None):
"""
@ -364,12 +365,12 @@ class ReverseOneToOneDescriptor:
if instance is None:
return self
# The related instance is loaded from the database and then cached in
# the attribute defined in self.cache_name. It can also be pre-cached
# The related instance is loaded from the database and then cached
# by the field on the model instance state. It can also be pre-cached
# by the forward accessor (ForwardManyToOneDescriptor).
try:
rel_obj = getattr(instance, self.cache_name)
except AttributeError:
rel_obj = self.related.get_cached_value(instance)
except KeyError:
related_pk = instance.pk
if related_pk is None:
rel_obj = None
@ -383,8 +384,8 @@ class ReverseOneToOneDescriptor:
# Set the forward accessor cache on the related object to
# the current instance to avoid an extra SQL query if it's
# accessed later on.
setattr(rel_obj, self.related.field.get_cache_name(), instance)
setattr(instance, self.cache_name, rel_obj)
self.related.field.set_cached_value(rel_obj, instance)
self.related.set_cached_value(instance, rel_obj)
if rel_obj is None:
raise self.RelatedObjectDoesNotExist(
@ -415,11 +416,17 @@ class ReverseOneToOneDescriptor:
if value is None:
# Update the cached related instance (if any) & clear the cache.
try:
rel_obj = getattr(instance, self.cache_name)
except AttributeError:
# Following the example above, this would be the cached
# ``restaurant`` instance (if any).
rel_obj = self.related.get_cached_value(instance)
except KeyError:
pass
else:
delattr(instance, self.cache_name)
# Remove the ``restaurant`` instance from the ``place``
# instance cache.
self.related.delete_cached_value(instance)
# Set the ``place`` field on the ``restaurant``
# instance to None.
setattr(rel_obj, self.related.field.name, None)
elif not isinstance(value, self.related.related_model):
# An object must be an instance of the related class.
@ -447,11 +454,11 @@ class ReverseOneToOneDescriptor:
# Set the related instance cache used by __get__ to avoid an SQL query
# when accessing the attribute we just set.
setattr(instance, self.cache_name, value)
self.related.set_cached_value(instance, value)
# Set the forward accessor cache on the related object to the current
# instance to avoid an extra SQL query if it's accessed later on.
setattr(value, self.related.field.get_cache_name(), instance)
self.related.field.set_cached_value(value, instance)
class ReverseManyToOneDescriptor:
@ -584,7 +591,7 @@ def create_reverse_many_to_one_manager(superclass, rel):
instance = instances_dict[rel_obj_attr(rel_obj)]
setattr(rel_obj, self.field.name, instance)
cache_name = self.field.related_query_name()
return queryset, rel_obj_attr, instance_attr, False, cache_name
return queryset, rel_obj_attr, instance_attr, False, cache_name, False
def add(self, *objs, bulk=True):
self._remove_prefetched_objects()
@ -882,6 +889,7 @@ def create_forward_many_to_many_manager(superclass, rel, reverse):
),
False,
self.prefetch_cache_name,
False,
)
def add(self, *objs):

View File

@ -13,9 +13,10 @@ from django.core import exceptions
from django.utils.functional import cached_property
from . import BLANK_CHOICE_DASH
from .mixins import FieldCacheMixin
class ForeignObjectRel:
class ForeignObjectRel(FieldCacheMixin):
"""
Used by ForeignObject to store information about the relation.
@ -162,12 +163,16 @@ class ForeignObjectRel:
return self.related_name
return opts.model_name + ('_set' if self.multiple else '')
def get_cache_name(self):
return "_%s_cache" % self.get_accessor_name()
def get_path_info(self):
return self.field.get_reverse_path_info()
def get_cache_name(self):
"""
Return the name of the cache key to use for storing an instance of the
forward model on the reverse model.
"""
return self.get_accessor_name()
class ManyToOneRel(ForeignObjectRel):
"""

View File

@ -72,7 +72,7 @@ class ModelIterable(BaseIterable):
if queryset._known_related_objects:
for field, rel_objs in queryset._known_related_objects.items():
# Avoid overwriting objects loaded e.g. by select_related
if hasattr(obj, field.get_cache_name()):
if field.is_cached(obj):
continue
pk = getattr(obj, field.get_attname())
try:
@ -1544,12 +1544,13 @@ def prefetch_one_level(instances, prefetcher, lookup, level):
# callable that gets value to be matched for returned instances,
# callable that gets value to be matched for passed in instances,
# boolean that is True for singly related objects,
# cache name to assign to).
# cache or field name to assign to,
# boolean that is True when the previous argument is a cache name vs a field name).
# The 'values to be matched' must be hashable as they will be used
# in a dictionary.
rel_qs, rel_obj_attr, instance_attr, single, cache_name = (
rel_qs, rel_obj_attr, instance_attr, single, cache_name, is_descriptor = (
prefetcher.get_prefetch_queryset(instances, lookup.get_current_queryset(level)))
# We have to handle the possibility that the QuerySet we just got back
# contains some prefetch_related lookups. We don't want to trigger the
@ -1597,8 +1598,18 @@ def prefetch_one_level(instances, prefetcher, lookup, level):
if single:
val = vals[0] if vals else None
to_attr = to_attr if as_attr else cache_name
if as_attr:
# A to_attr has been given for the prefetch.
setattr(obj, to_attr, val)
elif is_descriptor:
# cache_name points to a field name in obj.
# This field is a descriptor for a related object.
setattr(obj, cache_name, val)
else:
# No to_attr has been given for this prefetch operation and the
# cache_name does not point to a descriptor. Store the value of
# the field in the object's field cache.
obj._state.fields_cache[cache_name] = val
else:
if as_attr:
setattr(obj, to_attr, vals)
@ -1653,9 +1664,9 @@ class RelatedPopulator:
# model's fields.
# - related_populators: a list of RelatedPopulator instances if
# select_related() descends to related models from this model.
# - cache_name, reverse_cache_name: the names to use for setattr
# when assigning the fetched object to the from_obj. If the
# reverse_cache_name is set, then we also set the reverse link.
# - field, remote_field: the fields to use for populating the
# internal fields cache. If remote_field is set then we also
# set the reverse link.
select_fields = klass_info['select_fields']
from_parent = klass_info['from_parent']
if not from_parent:
@ -1674,16 +1685,16 @@ class RelatedPopulator:
self.model_cls = klass_info['model']
self.pk_idx = self.init_list.index(self.model_cls._meta.pk.attname)
self.related_populators = get_related_populators(klass_info, select, self.db)
field = klass_info['field']
reverse = klass_info['reverse']
self.reverse_cache_name = None
field = klass_info['field']
self.remote_field = None
if reverse:
self.cache_name = field.remote_field.get_cache_name()
self.reverse_cache_name = field.get_cache_name()
self.field = field.remote_field
self.remote_field = field
else:
self.cache_name = field.get_cache_name()
self.field = field
if field.unique:
self.reverse_cache_name = field.remote_field.get_cache_name()
self.remote_field = field.remote_field
def populate(self, row, from_obj):
if self.reorder_for_init:
@ -1697,9 +1708,9 @@ class RelatedPopulator:
if obj and self.related_populators:
for rel_iter in self.related_populators:
rel_iter.populate(row, obj)
setattr(from_obj, self.cache_name, obj)
if obj and self.reverse_cache_name:
setattr(obj, self.reverse_cache_name, from_obj)
self.field.set_cached_value(from_obj, obj)
if obj and self.remote_field:
self.remote_field.set_cached_value(obj, from_obj)
def get_related_populators(klass_info, select, db):

View File

@ -10,9 +10,9 @@ class ArticleTranslationDescriptor(ForwardManyToOneDescriptor):
def __set__(self, instance, value):
if instance is None:
raise AttributeError("%s must be accessed via instance" % self.field.name)
setattr(instance, self.cache_name, value)
self.field.set_cached_value(instance, value)
if value is not None and not self.field.remote_field.multiple:
setattr(value, self.field.related.get_cache_name(), instance)
self.field.remote_field.set_cached_value(value, instance)
class ColConstraint:

View File

@ -461,7 +461,7 @@ class ManyToOneTests(TestCase):
self.assertIs(c.parent, p)
# But if we kill the cache, we get a new object.
del c._parent_cache
del c._state.fields_cache['parent']
self.assertIsNot(c.parent, p)
# Assigning a new object results in that object getting cached immediately.

View File

@ -207,7 +207,7 @@ class OneToOneTests(TestCase):
self.assertIs(p.restaurant, r)
# But if we kill the cache, we get a new object
del p._restaurant_cache
del p._state.fields_cache['restaurant']
self.assertIsNot(p.restaurant, r)
# Reassigning the Restaurant object results in an immediate cache update