From 117c92745bd098bf05a69489b7b78cac6364e1d0 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Thu, 29 Jun 2017 01:20:23 +1000 Subject: [PATCH] rootfs: switch ms_private remount of oldroot to ms_slave Using MS_PRIVATE meant that there was a race between the mount(2) and the umount2(2) calls where runc inadvertently has a live reference to a mountpoint that existed on the host (which the host cannot kill implicitly through an unmount and peer sharing). In particular, this means that if we have a devicemapper mountpoint and the host is trying to delete the underlying device, the delete will fail because it is "in use" during the race. While the race is _very_ small (and libdm actually retries to avoid these sorts of cases) this appears to manifest in various cases. Signed-off-by: Aleksa Sarai --- libcontainer/rootfs_linux.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index e2e734a8..c4dbe3d5 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -668,9 +668,12 @@ func pivotRoot(rootfs string) error { return err } - // Make oldroot rprivate to make sure our unmounts don't propagate to the - // host (and thus bork the machine). - if err := unix.Mount("", ".", "", unix.MS_PRIVATE|unix.MS_REC, ""); err != nil { + // Make oldroot rslave to make sure our unmounts don't propagate to the + // host (and thus bork the machine). We don't use rprivate because this is + // known to cause issues due to races where we still have a reference to a + // mount while a process in the host namespace are trying to operate on + // something they think has no mounts (devicemapper in particular). + if err := unix.Mount("", ".", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil { return err } // Preform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd.