Fixed #24366 -- Optimized traversal of large migration dependency graphs.
Switched from an adjancency list and uncached, iterative depth-first search to a Node-based design with direct parent/child links and a cached, recursive depth-first search. With this change, calculating a migration plan for a large graph takes several seconds instead of several hours. Marked test `migrations.test_graph.GraphTests.test_dfs` as an expected failure due to reaching the maximum recursion depth.
This commit is contained in:
parent
7fa7dd48c4
commit
78d43a5e10
|
@ -47,7 +47,7 @@ class MigrationExecutor(object):
|
||||||
# child(ren) in the same app, and no further.
|
# child(ren) in the same app, and no further.
|
||||||
next_in_app = sorted(
|
next_in_app = sorted(
|
||||||
n for n in
|
n for n in
|
||||||
self.loader.graph.dependents.get(target, set())
|
self.loader.graph.node_map[target].children
|
||||||
if n[0] == target[0]
|
if n[0] == target[0]
|
||||||
)
|
)
|
||||||
for node in next_in_app:
|
for node in next_in_app:
|
||||||
|
|
|
@ -5,6 +5,68 @@ from collections import deque
|
||||||
from django.db.migrations.state import ProjectState
|
from django.db.migrations.state import ProjectState
|
||||||
from django.utils.datastructures import OrderedSet
|
from django.utils.datastructures import OrderedSet
|
||||||
from django.utils.encoding import python_2_unicode_compatible
|
from django.utils.encoding import python_2_unicode_compatible
|
||||||
|
from django.utils.functional import total_ordering
|
||||||
|
|
||||||
|
|
||||||
|
@python_2_unicode_compatible
|
||||||
|
@total_ordering
|
||||||
|
class Node(object):
|
||||||
|
"""
|
||||||
|
A single node in the migration graph. Contains direct links to adjacent
|
||||||
|
nodes in either direction.
|
||||||
|
"""
|
||||||
|
def __init__(self, key):
|
||||||
|
self.key = key
|
||||||
|
self.children = set()
|
||||||
|
self.parents = set()
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return self.key == other
|
||||||
|
|
||||||
|
def __lt__(self, other):
|
||||||
|
return self.key < other
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash(self.key)
|
||||||
|
|
||||||
|
def __getitem__(self, item):
|
||||||
|
return self.key[item]
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return str(self.key)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<Node: (%r, %r)>' % self.key
|
||||||
|
|
||||||
|
def add_child(self, child):
|
||||||
|
self.children.add(child)
|
||||||
|
|
||||||
|
def add_parent(self, parent):
|
||||||
|
self.parents.add(parent)
|
||||||
|
|
||||||
|
# Use manual caching, @cached_property effectively doubles the
|
||||||
|
# recursion depth for each recursion.
|
||||||
|
def ancestors(self):
|
||||||
|
# Use self.key instead of self to speed up the frequent hashing
|
||||||
|
# when constructing an OrderedSet.
|
||||||
|
if '_ancestors' not in self.__dict__:
|
||||||
|
ancestors = deque([self.key])
|
||||||
|
for parent in sorted(self.parents):
|
||||||
|
ancestors.extendleft(reversed(parent.ancestors()))
|
||||||
|
self.__dict__['_ancestors'] = list(OrderedSet(ancestors))
|
||||||
|
return self.__dict__['_ancestors']
|
||||||
|
|
||||||
|
# Use manual caching, @cached_property effectively doubles the
|
||||||
|
# recursion depth for each recursion.
|
||||||
|
def descendants(self):
|
||||||
|
# Use self.key instead of self to speed up the frequent hashing
|
||||||
|
# when constructing an OrderedSet.
|
||||||
|
if '_descendants' not in self.__dict__:
|
||||||
|
descendants = deque([self.key])
|
||||||
|
for child in sorted(self.children):
|
||||||
|
descendants.extendleft(reversed(child.descendants()))
|
||||||
|
self.__dict__['_descendants'] = list(OrderedSet(descendants))
|
||||||
|
return self.__dict__['_descendants']
|
||||||
|
|
||||||
|
|
||||||
@python_2_unicode_compatible
|
@python_2_unicode_compatible
|
||||||
|
@ -32,12 +94,15 @@ class MigrationGraph(object):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
self.node_map = {}
|
||||||
self.nodes = {}
|
self.nodes = {}
|
||||||
self.dependencies = {}
|
self.cached = False
|
||||||
self.dependents = {}
|
|
||||||
|
|
||||||
def add_node(self, node, implementation):
|
def add_node(self, key, implementation):
|
||||||
self.nodes[node] = implementation
|
node = Node(key)
|
||||||
|
self.node_map[key] = node
|
||||||
|
self.nodes[key] = implementation
|
||||||
|
self.clear_cache()
|
||||||
|
|
||||||
def add_dependency(self, migration, child, parent):
|
def add_dependency(self, migration, child, parent):
|
||||||
if child not in self.nodes:
|
if child not in self.nodes:
|
||||||
|
@ -50,8 +115,16 @@ class MigrationGraph(object):
|
||||||
"Migration %s dependencies reference nonexistent parent node %r" % (migration, parent),
|
"Migration %s dependencies reference nonexistent parent node %r" % (migration, parent),
|
||||||
parent
|
parent
|
||||||
)
|
)
|
||||||
self.dependencies.setdefault(child, set()).add(parent)
|
self.node_map[child].add_parent(self.node_map[parent])
|
||||||
self.dependents.setdefault(parent, set()).add(child)
|
self.node_map[parent].add_child(self.node_map[child])
|
||||||
|
self.clear_cache()
|
||||||
|
|
||||||
|
def clear_cache(self):
|
||||||
|
if self.cached:
|
||||||
|
for node in self.nodes:
|
||||||
|
self.node_map[node].__dict__.pop('_ancestors', None)
|
||||||
|
self.node_map[node].__dict__.pop('_descendants', None)
|
||||||
|
self.cached = False
|
||||||
|
|
||||||
def forwards_plan(self, node):
|
def forwards_plan(self, node):
|
||||||
"""
|
"""
|
||||||
|
@ -62,7 +135,10 @@ class MigrationGraph(object):
|
||||||
"""
|
"""
|
||||||
if node not in self.nodes:
|
if node not in self.nodes:
|
||||||
raise NodeNotFoundError("Node %r not a valid node" % (node, ), node)
|
raise NodeNotFoundError("Node %r not a valid node" % (node, ), node)
|
||||||
return self.dfs(node, lambda x: self.dependencies.get(x, set()))
|
# Use parent.key instead of parent to speed up the frequent hashing in ensure_not_cyclic
|
||||||
|
self.ensure_not_cyclic(node, lambda x: (parent.key for parent in self.node_map[x].parents))
|
||||||
|
self.cached = True
|
||||||
|
return self.node_map[node].ancestors()
|
||||||
|
|
||||||
def backwards_plan(self, node):
|
def backwards_plan(self, node):
|
||||||
"""
|
"""
|
||||||
|
@ -73,7 +149,10 @@ class MigrationGraph(object):
|
||||||
"""
|
"""
|
||||||
if node not in self.nodes:
|
if node not in self.nodes:
|
||||||
raise NodeNotFoundError("Node %r not a valid node" % (node, ), node)
|
raise NodeNotFoundError("Node %r not a valid node" % (node, ), node)
|
||||||
return self.dfs(node, lambda x: self.dependents.get(x, set()))
|
# Use child.key instead of child to speed up the frequent hashing in ensure_not_cyclic
|
||||||
|
self.ensure_not_cyclic(node, lambda x: (child.key for child in self.node_map[x].children))
|
||||||
|
self.cached = True
|
||||||
|
return self.node_map[node].descendants()
|
||||||
|
|
||||||
def root_nodes(self, app=None):
|
def root_nodes(self, app=None):
|
||||||
"""
|
"""
|
||||||
|
@ -82,7 +161,7 @@ class MigrationGraph(object):
|
||||||
"""
|
"""
|
||||||
roots = set()
|
roots = set()
|
||||||
for node in self.nodes:
|
for node in self.nodes:
|
||||||
if (not any(key[0] == node[0] for key in self.dependencies.get(node, set()))
|
if (not any(key[0] == node[0] for key in self.node_map[node].parents)
|
||||||
and (not app or app == node[0])):
|
and (not app or app == node[0])):
|
||||||
roots.add(node)
|
roots.add(node)
|
||||||
return sorted(roots)
|
return sorted(roots)
|
||||||
|
@ -97,7 +176,7 @@ class MigrationGraph(object):
|
||||||
"""
|
"""
|
||||||
leaves = set()
|
leaves = set()
|
||||||
for node in self.nodes:
|
for node in self.nodes:
|
||||||
if (not any(key[0] == node[0] for key in self.dependents.get(node, set()))
|
if (not any(key[0] == node[0] for key in self.node_map[node].children)
|
||||||
and (not app or app == node[0])):
|
and (not app or app == node[0])):
|
||||||
leaves.add(node)
|
leaves.add(node)
|
||||||
return sorted(leaves)
|
return sorted(leaves)
|
||||||
|
@ -105,7 +184,7 @@ class MigrationGraph(object):
|
||||||
def ensure_not_cyclic(self, start, get_children):
|
def ensure_not_cyclic(self, start, get_children):
|
||||||
# Algo from GvR:
|
# Algo from GvR:
|
||||||
# http://neopythonic.blogspot.co.uk/2009/01/detecting-cycles-in-directed-graph.html
|
# http://neopythonic.blogspot.co.uk/2009/01/detecting-cycles-in-directed-graph.html
|
||||||
todo = set(self.nodes.keys())
|
todo = set(self.nodes)
|
||||||
while todo:
|
while todo:
|
||||||
node = todo.pop()
|
node = todo.pop()
|
||||||
stack = [node]
|
stack = [node]
|
||||||
|
@ -122,28 +201,10 @@ class MigrationGraph(object):
|
||||||
else:
|
else:
|
||||||
node = stack.pop()
|
node = stack.pop()
|
||||||
|
|
||||||
def dfs(self, start, get_children):
|
|
||||||
"""
|
|
||||||
Iterative depth first search, for finding dependencies.
|
|
||||||
"""
|
|
||||||
self.ensure_not_cyclic(start, get_children)
|
|
||||||
visited = deque()
|
|
||||||
visited.append(start)
|
|
||||||
stack = deque(sorted(get_children(start)))
|
|
||||||
while stack:
|
|
||||||
node = stack.popleft()
|
|
||||||
visited.appendleft(node)
|
|
||||||
children = sorted(get_children(node), reverse=True)
|
|
||||||
# reverse sorting is needed because prepending using deque.extendleft
|
|
||||||
# also effectively reverses values
|
|
||||||
stack.extendleft(children)
|
|
||||||
|
|
||||||
return list(OrderedSet(visited))
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Graph: %s nodes, %s edges" % (
|
return "Graph: %s nodes, %s edges" % (
|
||||||
len(self.nodes),
|
len(self.nodes),
|
||||||
sum(len(x) for x in self.dependencies.values()),
|
sum(len(node.parents) for node in self.node_map.values()),
|
||||||
)
|
)
|
||||||
|
|
||||||
def make_state(self, nodes=None, at_end=True, real_apps=None):
|
def make_state(self, nodes=None, at_end=True, real_apps=None):
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
from unittest import expectedFailure
|
||||||
|
|
||||||
from django.db.migrations.graph import (
|
from django.db.migrations.graph import (
|
||||||
CircularDependencyError, MigrationGraph, NodeNotFoundError,
|
CircularDependencyError, MigrationGraph, NodeNotFoundError,
|
||||||
)
|
)
|
||||||
|
@ -151,7 +153,23 @@ class GraphTests(TestCase):
|
||||||
graph.forwards_plan, ('C', '0001')
|
graph.forwards_plan, ('C', '0001')
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_dfs(self):
|
def test_deep_graph(self):
|
||||||
|
graph = MigrationGraph()
|
||||||
|
root = ("app_a", "1")
|
||||||
|
graph.add_node(root, None)
|
||||||
|
expected = [root]
|
||||||
|
for i in range(2, 750):
|
||||||
|
parent = ("app_a", str(i - 1))
|
||||||
|
child = ("app_a", str(i))
|
||||||
|
graph.add_node(child, None)
|
||||||
|
graph.add_dependency(str(i), child, parent)
|
||||||
|
expected.append(child)
|
||||||
|
|
||||||
|
actual = graph.node_map[root].descendants()
|
||||||
|
self.assertEqual(expected[::-1], actual)
|
||||||
|
|
||||||
|
@expectedFailure
|
||||||
|
def test_recursion_depth(self):
|
||||||
graph = MigrationGraph()
|
graph = MigrationGraph()
|
||||||
root = ("app_a", "1")
|
root = ("app_a", "1")
|
||||||
graph.add_node(root, None)
|
graph.add_node(root, None)
|
||||||
|
@ -163,7 +181,7 @@ class GraphTests(TestCase):
|
||||||
graph.add_dependency(str(i), child, parent)
|
graph.add_dependency(str(i), child, parent)
|
||||||
expected.append(child)
|
expected.append(child)
|
||||||
|
|
||||||
actual = graph.dfs(root, lambda x: graph.dependents.get(x, set()))
|
actual = graph.node_map[root].descendants()
|
||||||
self.assertEqual(expected[::-1], actual)
|
self.assertEqual(expected[::-1], actual)
|
||||||
|
|
||||||
def test_plan_invalid_node(self):
|
def test_plan_invalid_node(self):
|
||||||
|
|
Loading…
Reference in New Issue