Fixed #24743, #24745 -- Optimized migration plan handling

The change partly goes back to the old behavior for forwards migrations
which should reduce the amount of memory consumption (#24745). However,
by the way the current state computation is done (there is no
`state_backwards` on a migration class) this change cannot be applied to
backwards migrations. Hence rolling back migrations still requires the
precomputation and storage of the intermediate migration states.

This improvement also implies that Django does not handle mixed
migration plans anymore. Mixed plans consist of a list of migrations
where some are being applied and others are being unapplied.

Thanks Andrew Godwin, Josh Smeaton and Tim Graham for the review as well
as everybody involved on the ticket that kept me looking into the issue.
This commit is contained in:
Markus Holtermann 2015-08-27 09:49:35 +10:00
parent 186eb21dc1
commit 5aa55038ca
7 changed files with 184 additions and 27 deletions

View File

@ -58,3 +58,7 @@ class NodeNotFoundError(LookupError):
class MigrationSchemaMissing(DatabaseError): class MigrationSchemaMissing(DatabaseError):
pass pass
class InvalidMigrationPlan(ValueError):
pass

View File

@ -3,6 +3,7 @@ from __future__ import unicode_literals
from django.apps.registry import apps as global_apps from django.apps.registry import apps as global_apps
from django.db import migrations from django.db import migrations
from .exceptions import InvalidMigrationPlan
from .loader import MigrationLoader from .loader import MigrationLoader
from .recorder import MigrationRecorder from .recorder import MigrationRecorder
from .state import ProjectState from .state import ProjectState
@ -71,46 +72,95 @@ class MigrationExecutor(object):
""" """
if plan is None: if plan is None:
plan = self.migration_plan(targets) plan = self.migration_plan(targets)
migrations_to_run = {m[0] for m in plan}
# Create the forwards plan Django would follow on an empty database # Create the forwards plan Django would follow on an empty database
full_plan = self.migration_plan(self.loader.graph.leaf_nodes(), clean_start=True) full_plan = self.migration_plan(self.loader.graph.leaf_nodes(), clean_start=True)
# Holds all states right before a migration is applied
# if the migration is being run. all_forwards = all(not backwards for mig, backwards in plan)
all_backwards = all(backwards for mig, backwards in plan)
if not plan:
pass # Nothing to do for an empty plan
elif all_forwards == all_backwards:
# This should only happen if there's a mixed plan
raise InvalidMigrationPlan(
"Migration plans with both forwards and backwards migrations "
"are not supported. Please split your migration process into "
"separate plans of only forwards OR backwards migrations.",
plan
)
elif all_forwards:
self._migrate_all_forwards(plan, full_plan, fake=fake, fake_initial=fake_initial)
else:
# No need to check for `elif all_backwards` here, as that condition
# would always evaluate to true.
self._migrate_all_backwards(plan, full_plan, fake=fake)
self.check_replacements()
def _migrate_all_forwards(self, plan, full_plan, fake, fake_initial):
"""
Take a list of 2-tuples of the form (migration instance, False) and
apply them in the order they occur in the full_plan.
"""
migrations_to_run = {m[0] for m in plan}
state = ProjectState(real_apps=list(self.loader.unmigrated_apps))
for migration, _ in full_plan:
if not migrations_to_run:
# We remove every migration that we applied from this set so
# that we can bail out once the last migration has been applied
# and don't always run until the very end of the migration
# process.
break
if migration in migrations_to_run:
if 'apps' not in state.__dict__:
if self.progress_callback:
self.progress_callback("render_start")
state.apps # Render all -- performance critical
if self.progress_callback:
self.progress_callback("render_success")
state = self.apply_migration(state, migration, fake=fake, fake_initial=fake_initial)
migrations_to_run.remove(migration)
else:
migration.mutate_state(state, preserve=False)
def _migrate_all_backwards(self, plan, full_plan, fake):
"""
Take a list of 2-tuples of the form (migration instance, True) and
unapply them in reverse order they occur in the full_plan.
Since unapplying a migration requires the project state prior to that
migration, Django will compute the migration states before each of them
in a first run over the plan and then unapply them in a second run over
the plan.
"""
migrations_to_run = {m[0] for m in plan}
# Holds all migration states prior to the migrations being unapplied
states = {} states = {}
state = ProjectState(real_apps=list(self.loader.unmigrated_apps)) state = ProjectState(real_apps=list(self.loader.unmigrated_apps))
if self.progress_callback: if self.progress_callback:
self.progress_callback("render_start") self.progress_callback("render_start")
# Phase 1 -- Store all project states of migrations right before they
# are applied. The first migration that will be applied in phase 2 will
# trigger the rendering of the initial project state. From this time on
# models will be recursively reloaded as explained in
# `django.db.migrations.state.get_related_models_recursive()`.
for migration, _ in full_plan: for migration, _ in full_plan:
if not migrations_to_run: if not migrations_to_run:
# We remove every migration whose state was already computed # We remove every migration that we applied from this set so
# from the set below (`migrations_to_run.remove(migration)`). # that we can bail out once the last migration has been applied
# If no states for migrations must be computed, we can exit # and don't always run until the very end of the migration
# this loop. Migrations that occur after the latest migration # process.
# that is about to be applied would only trigger unneeded
# mutate_state() calls.
break break
do_run = migration in migrations_to_run if migration in migrations_to_run:
if do_run:
if 'apps' not in state.__dict__: if 'apps' not in state.__dict__:
state.apps # Render all real_apps -- performance critical state.apps # Render all -- performance critical
states[migration] = state.clone() # The state before this migration
states[migration] = state
# The old state keeps as-is, we continue with the new state
state = migration.mutate_state(state, preserve=True)
migrations_to_run.remove(migration) migrations_to_run.remove(migration)
# Only preserve the state if the migration is being run later else:
state = migration.mutate_state(state, preserve=do_run) migration.mutate_state(state, preserve=False)
if self.progress_callback: if self.progress_callback:
self.progress_callback("render_success") self.progress_callback("render_success")
# Phase 2 -- Run the migrations
for migration, backwards in plan: for migration, _ in plan:
if not backwards:
self.apply_migration(states[migration], migration, fake=fake, fake_initial=fake_initial)
else:
self.unapply_migration(states[migration], migration, fake=fake) self.unapply_migration(states[migration], migration, fake=fake)
self.check_replacements()
def collect_sql(self, plan): def collect_sql(self, plan):
""" """

View File

@ -458,6 +458,21 @@ Migrations
* When supplying ``None`` as a value in :setting:`MIGRATION_MODULES`, Django * When supplying ``None`` as a value in :setting:`MIGRATION_MODULES`, Django
will consider the app an app without migrations. will consider the app an app without migrations.
* When applying migrations, the "Rendering model states" step that's displayed
when running migrate with verbosity 2 or higher now computes only the states
for the migrations that have already been applied. The model states for
migrations being applied are generated on demand, drastically reducing the
amount of required memory.
However, this improvement is not available when unapplying migrations and
therefore still requires the precomputation and storage of the intermediate
migration states.
This improvement also requires that Django no longer supports mixed migration
plans. Mixed plans consist of a list of migrations where some are being
applied and others are being unapplied. This was never officially supported
and never had a public API that supports this behavior.
Models Models
^^^^^^ ^^^^^^
@ -1094,6 +1109,10 @@ Miscellaneous
* The system checks for :class:`~django.contrib.admin.ModelAdmin` now check * The system checks for :class:`~django.contrib.admin.ModelAdmin` now check
instances rather than classes. instances rather than classes.
* The private API to apply mixed migration plans has been dropped for
performance reasons. Mixed plans consist of a list of migrations where some
are being applied and others are being unapplied.
.. _deprecated-features-1.9: .. _deprecated-features-1.9:
Features deprecated in 1.9 Features deprecated in 1.9

View File

@ -580,6 +580,7 @@ postgresql
pq pq
pre pre
precisions precisions
precomputation
preconfigured preconfigured
prefetch prefetch
prefetched prefetched

View File

@ -1,5 +1,6 @@
from django.apps.registry import apps as global_apps from django.apps.registry import apps as global_apps
from django.db import connection from django.db import connection
from django.db.migrations.exceptions import InvalidMigrationPlan
from django.db.migrations.executor import MigrationExecutor from django.db.migrations.executor import MigrationExecutor
from django.db.migrations.graph import MigrationGraph from django.db.migrations.graph import MigrationGraph
from django.db.migrations.recorder import MigrationRecorder from django.db.migrations.recorder import MigrationRecorder
@ -145,6 +146,64 @@ class ExecutorTests(MigrationTestBase):
executor.recorder.record_unapplied("migrations", "0002_second") executor.recorder.record_unapplied("migrations", "0002_second")
executor.recorder.record_unapplied("migrations", "0001_initial") executor.recorder.record_unapplied("migrations", "0001_initial")
@override_settings(MIGRATION_MODULES={
"migrations": "migrations.test_migrations",
"migrations2": "migrations2.test_migrations_2_no_deps",
})
def test_mixed_plan_not_supported(self):
"""
Although the MigrationExecutor interfaces allows for mixed migration
plans (combined forwards and backwards migrations) this is not
supported.
"""
# Prepare for mixed plan
executor = MigrationExecutor(connection)
plan = executor.migration_plan([("migrations", "0002_second")])
self.assertEqual(
plan,
[
(executor.loader.graph.nodes["migrations", "0001_initial"], False),
(executor.loader.graph.nodes["migrations", "0002_second"], False),
],
)
executor.migrate(None, plan)
# Rebuild the graph to reflect the new DB state
executor.loader.build_graph()
self.assertIn(('migrations', '0001_initial'), executor.loader.applied_migrations)
self.assertIn(('migrations', '0002_second'), executor.loader.applied_migrations)
self.assertNotIn(('migrations2', '0001_initial'), executor.loader.applied_migrations)
# Generate mixed plan
plan = executor.migration_plan([
("migrations", None),
("migrations2", "0001_initial"),
])
msg = (
'Migration plans with both forwards and backwards migrations are '
'not supported. Please split your migration process into separate '
'plans of only forwards OR backwards migrations.'
)
with self.assertRaisesMessage(InvalidMigrationPlan, msg) as cm:
executor.migrate(None, plan)
self.assertEqual(
cm.exception.args[1],
[
(executor.loader.graph.nodes["migrations", "0002_second"], True),
(executor.loader.graph.nodes["migrations", "0001_initial"], True),
(executor.loader.graph.nodes["migrations2", "0001_initial"], False),
],
)
# Rebuild the graph to reflect the new DB state
executor.loader.build_graph()
executor.migrate([
("migrations", None),
("migrations2", None),
])
# Are the tables gone?
self.assertTableNotExists("migrations_author")
self.assertTableNotExists("migrations_book")
self.assertTableNotExists("migrations2_otherauthor")
@override_settings(MIGRATION_MODULES={"migrations": "migrations.test_migrations"}) @override_settings(MIGRATION_MODULES={"migrations": "migrations.test_migrations"})
def test_soft_apply(self): def test_soft_apply(self):
""" """

View File

@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = []
operations = [
migrations.CreateModel(
"OtherAuthor",
[
("id", models.AutoField(primary_key=True)),
("name", models.CharField(max_length=255)),
("slug", models.SlugField(null=True)),
("age", models.IntegerField(default=0)),
("silly_field", models.BooleanField(default=False)),
],
),
]