diff --git a/libcontainer/configs/config.go b/libcontainer/configs/config.go index 4b298e1e..7fd311db 100644 --- a/libcontainer/configs/config.go +++ b/libcontainer/configs/config.go @@ -92,8 +92,8 @@ type Config struct { // bind mounts are writtable. Readonlyfs bool `json:"readonlyfs"` - // Privatefs will mount the container's rootfs as private where mount points from the parent will not propogate - Privatefs bool `json:"privatefs"` + // Specifies the mount propagation flags to be applied to /. + RootPropagation int `json:"rootPropagation"` // Mounts specify additional source and destination paths that will be mounted inside the container's // rootfs and mount namespace if specified diff --git a/libcontainer/integration/exec_test.go b/libcontainer/integration/exec_test.go index 04fbe651..267c0731 100644 --- a/libcontainer/integration/exec_test.go +++ b/libcontainer/integration/exec_test.go @@ -4,6 +4,7 @@ import ( "bytes" "io/ioutil" "os" + "os/exec" "path/filepath" "strconv" "strings" @@ -1014,3 +1015,238 @@ func TestSTDIOPermissions(t *testing.T) { t.Fatalf("stderr should equal be equal %q %q", actual, "hi") } } + +func unmountOp(path string) error { + if err := syscall.Unmount(path, syscall.MNT_DETACH); err != nil { + return err + } + return nil +} + +// Launch container with rootfsPropagation in rslave mode. Also +// bind mount a volume /mnt1host at /mnt1cont at the time of launch. Now do +// another mount on host (/mnt1host/mnt2host) and this new mount should +// propagate to container (/mnt1cont/mnt2host) +func TestRootfsPropagationSlaveMount(t *testing.T) { + var mountPropagated bool + var dir1cont string + var dir2cont string + + dir1cont = "/root/mnt1cont" + + if testing.Short() { + return + } + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + config := newTemplateConfig(rootfs) + + config.RootPropagation = syscall.MS_SLAVE | syscall.MS_REC + + // Bind mount a volume + dir1host, err := ioutil.TempDir("", "mnt1host") + ok(t, err) + defer os.RemoveAll(dir1host) + + // Make this dir a "shared" mount point. This will make sure a + // slave relationship can be established in container. + err = syscall.Mount(dir1host, dir1host, "bind", syscall.MS_BIND|syscall.MS_REC, "") + ok(t, err) + err = syscall.Mount("", dir1host, "", syscall.MS_SHARED|syscall.MS_REC, "") + ok(t, err) + defer unmountOp(dir1host) + + config.Mounts = append(config.Mounts, &configs.Mount{ + Source: dir1host, + Destination: dir1cont, + Device: "bind", + Flags: syscall.MS_BIND | syscall.MS_REC}) + + // TODO: systemd specific processing + f := factory + + container, err := f.Create("testSlaveMount", config) + ok(t, err) + defer container.Destroy() + + stdinR, stdinW, err := os.Pipe() + ok(t, err) + + pconfig := &libcontainer.Process{ + Args: []string{"cat"}, + Env: standardEnvironment, + Stdin: stdinR, + } + + err = container.Start(pconfig) + stdinR.Close() + defer stdinW.Close() + ok(t, err) + + // Create mnt1host/mnt2host and bind mount itself on top of it. This + // should be visible in container. + dir2host, err := ioutil.TempDir(dir1host, "mnt2host") + ok(t, err) + defer os.RemoveAll(dir2host) + + err = syscall.Mount(dir2host, dir2host, "bind", syscall.MS_BIND, "") + defer unmountOp(dir2host) + ok(t, err) + + // Run "cat /proc/self/mountinfo" in container and look at mount points. + var stdout2 bytes.Buffer + + stdinR2, stdinW2, err := os.Pipe() + ok(t, err) + + pconfig2 := &libcontainer.Process{ + Args: []string{"cat", "/proc/self/mountinfo"}, + Env: standardEnvironment, + Stdin: stdinR2, + Stdout: &stdout2, + } + + err = container.Start(pconfig2) + stdinR2.Close() + defer stdinW2.Close() + ok(t, err) + + // Wait for process + stdinW2.Close() + waitProcess(pconfig2, t) + stdinW.Close() + waitProcess(pconfig, t) + + mountPropagated = false + dir2cont = filepath.Join(dir1cont, filepath.Base(dir2host)) + + propagationInfo := string(stdout2.Bytes()) + lines := strings.Split(propagationInfo, "\n") + for _, l := range lines { + linefields := strings.Split(l, " ") + if len(linefields) < 5 { + continue + } + + if linefields[4] == dir2cont { + mountPropagated = true + break + } + } + + if mountPropagated != true { + t.Fatalf("Mount on host %s did not propagate in container at %s\n", dir2host, dir2cont) + } +} + +// Launch container with rootfsPropagation 0 so no propagation flags are +// applied. Also bind mount a volume /mnt1host at /mnt1cont at the time of +// launch. Now do a mount in container (/mnt1cont/mnt2cont) and this new +// mount should propagate to host (/mnt1host/mnt2cont) + +func TestRootfsPropagationSharedMount(t *testing.T) { + var dir1cont string + var dir2cont string + + dir1cont = "/root/mnt1cont" + + if testing.Short() { + return + } + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + config := newTemplateConfig(rootfs) + config.RootPropagation = syscall.MS_PRIVATE + + // Bind mount a volume + dir1host, err := ioutil.TempDir("", "mnt1host") + ok(t, err) + defer os.RemoveAll(dir1host) + + // Make this dir a "shared" mount point. This will make sure a + // shared relationship can be established in container. + err = syscall.Mount(dir1host, dir1host, "bind", syscall.MS_BIND|syscall.MS_REC, "") + ok(t, err) + err = syscall.Mount("", dir1host, "", syscall.MS_SHARED|syscall.MS_REC, "") + ok(t, err) + defer unmountOp(dir1host) + + config.Mounts = append(config.Mounts, &configs.Mount{ + Source: dir1host, + Destination: dir1cont, + Device: "bind", + Flags: syscall.MS_BIND | syscall.MS_REC}) + + // TODO: systemd specific processing + f := factory + + container, err := f.Create("testSharedMount", config) + ok(t, err) + defer container.Destroy() + + stdinR, stdinW, err := os.Pipe() + ok(t, err) + + pconfig := &libcontainer.Process{ + Args: []string{"cat"}, + Env: standardEnvironment, + Stdin: stdinR, + } + + err = container.Start(pconfig) + stdinR.Close() + defer stdinW.Close() + ok(t, err) + + // Create mnt1host/mnt2cont. This will become visible inside container + // at mnt1cont/mnt2cont. Bind mount itself on top of it. This + // should be visible on host now. + dir2host, err := ioutil.TempDir(dir1host, "mnt2cont") + ok(t, err) + defer os.RemoveAll(dir2host) + + dir2cont = filepath.Join(dir1cont, filepath.Base(dir2host)) + + // Mount something in container and see if it is visible on host. + var stdout2 bytes.Buffer + + stdinR2, stdinW2, err := os.Pipe() + ok(t, err) + + // Provide CAP_SYS_ADMIN + processCaps := append(config.Capabilities, "CAP_SYS_ADMIN") + + pconfig2 := &libcontainer.Process{ + Args: []string{"mount", "--bind", dir2cont, dir2cont}, + Env: standardEnvironment, + Stdin: stdinR2, + Stdout: &stdout2, + Capabilities: processCaps, + } + + err = container.Start(pconfig2) + stdinR2.Close() + defer stdinW2.Close() + ok(t, err) + + // Wait for process + stdinW2.Close() + waitProcess(pconfig2, t) + stdinW.Close() + waitProcess(pconfig, t) + + defer unmountOp(dir2host) + + // Check if mount is visible on host or not. + out, err := exec.Command("findmnt", "-n", "-f", "-oTARGET", dir2host).CombinedOutput() + outtrim := strings.TrimSpace(string(out)) + if err != nil { + t.Logf("findmnt error %q: %q", err, outtrim) + } + + if string(outtrim) != dir2host { + t.Fatalf("Mount in container on %s did not propagate to host on %s. finmnt output=%s", dir2cont, dir2host, outtrim) + } +} diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index f9566975..147c93d6 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -13,6 +13,7 @@ import ( "syscall" "time" + "github.com/docker/docker/pkg/mount" "github.com/docker/docker/pkg/symlink" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" @@ -420,14 +421,89 @@ func mknodDevice(dest string, node *configs.Device) error { return syscall.Chown(dest, int(node.Uid), int(node.Gid)) } +func getMountInfo(mountinfo []*mount.MountInfo, dir string) *mount.MountInfo { + for _, m := range mountinfo { + if m.Mountpoint == dir { + return m + } + } + return nil +} + +// Get the parent mount point of directory passed in as argument. Also return +// optional fields. +func getParentMount(rootfs string) (string, string, error) { + var path string + + mountinfos, err := mount.GetMounts() + if err != nil { + return "", "", err + } + + mountinfo := getMountInfo(mountinfos, rootfs) + if mountinfo != nil { + return rootfs, mountinfo.Optional, nil + } + + path = rootfs + for { + path = filepath.Dir(path) + + mountinfo = getMountInfo(mountinfos, path) + if mountinfo != nil { + return path, mountinfo.Optional, nil + } + + if path == "/" { + break + } + } + + // If we are here, we did not find parent mount. Something is wrong. + return "", "", fmt.Errorf("Could not find parent mount of %s", rootfs) +} + +// Make parent mount private if it was shared +func rootfsParentMountPrivate(config *configs.Config) error { + sharedMount := false + + parentMount, optionalOpts, err := getParentMount(config.Rootfs) + if err != nil { + return err + } + + optsSplit := strings.Split(optionalOpts, " ") + for _, opt := range optsSplit { + if strings.HasPrefix(opt, "shared:") { + sharedMount = true + break + } + } + + // Make parent mount PRIVATE if it was shared. It is needed for two + // reasons. First of all pivot_root() will fail if parent mount is + // shared. Secondly when we bind mount rootfs it will propagate to + // parent namespace and we don't want that to happen. + if sharedMount { + return syscall.Mount("", parentMount, "", syscall.MS_PRIVATE, "") + } + + return nil +} + func prepareRoot(config *configs.Config) error { flag := syscall.MS_SLAVE | syscall.MS_REC - if config.Privatefs { - flag = syscall.MS_PRIVATE | syscall.MS_REC + if config.RootPropagation != 0 { + flag = config.RootPropagation } if err := syscall.Mount("", "/", "", uintptr(flag), ""); err != nil { return err } + + if err := rootfsParentMountPrivate(config); err != nil { + return err + } + return syscall.Mount(config.Rootfs, config.Rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, "") } @@ -469,6 +545,13 @@ func pivotRoot(rootfs, pivotBaseDir string) error { } // path to pivot dir now changed, update pivotDir = filepath.Join(pivotBaseDir, filepath.Base(pivotDir)) + + // Make pivotDir rprivate to make sure any of the unmounts don't + // propagate to parent. + if err := syscall.Mount("", pivotDir, "", syscall.MS_PRIVATE|syscall.MS_REC, ""); err != nil { + return err + } + if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil { return fmt.Errorf("unmount pivot_root dir %s", err) } diff --git a/spec.go b/spec.go index 8604121b..6d5a6ca6 100644 --- a/spec.go +++ b/spec.go @@ -280,6 +280,16 @@ var namespaceMapping = map[specs.NamespaceType]configs.NamespaceType{ specs.UTSNamespace: configs.NEWUTS, } +var mountPropagationMapping = map[string]int{ + "rprivate": syscall.MS_PRIVATE | syscall.MS_REC, + "private": syscall.MS_PRIVATE, + "rslave": syscall.MS_SLAVE | syscall.MS_REC, + "slave": syscall.MS_SLAVE, + "rshared": syscall.MS_SHARED | syscall.MS_REC, + "shared": syscall.MS_SHARED, + "": syscall.MS_PRIVATE | syscall.MS_REC, +} + // loadSpec loads the specification from the provided path. // If the path is empty then the default path will be "config.json" func loadSpec(cPath, rPath string) (spec *specs.LinuxSpec, rspec *specs.LinuxRuntimeSpec, err error) { @@ -333,8 +343,13 @@ func createLibcontainerConfig(cgroupName string, spec *specs.LinuxSpec, rspec *s Capabilities: spec.Linux.Capabilities, Readonlyfs: spec.Root.Readonly, Hostname: spec.Hostname, - Privatefs: true, } + + exists := false + if config.RootPropagation, exists = mountPropagationMapping[rspec.Linux.RootfsPropagation]; !exists { + return nil, fmt.Errorf("rootfsPropagation=%v is not supported", rspec.Linux.RootfsPropagation) + } + for _, ns := range rspec.Linux.Namespaces { t, exists := namespaceMapping[ns.Type] if !exists {