From 9e71710125a380b2692f2bd1e0d352d2ab21edab Mon Sep 17 00:00:00 2001 From: Alexander Larsson Date: Mon, 3 Mar 2014 16:15:29 +0100 Subject: [PATCH] Move all bind-mounts in the container inside the namespace This moves the bind mounts like /.dockerinit, /etc/hostname, volumes, etc into the container namespace, by setting them up using lxc. This is useful to avoid littering the global namespace with a lot of mounts that are internal to each container and are not generally needed on the outside. In particular, it seems that having a lot of mounts is problematic wrt scaling to a lot of containers on systems where the root filesystem is mounted --rshared. Note that the "private" option is only supported by the native driver, as lxc doesn't support setting this. This is not a huge problem, but it does mean that some mounts are unnecessarily shared inside the container if you're using the lxc driver. Docker-DCO-1.1-Signed-off-by: Alexander Larsson (github: alexlarsson) --- container.go | 10 ++++++++++ nsinit/init.go | 2 +- nsinit/mount.go | 20 +++++++++++++++++++- 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/container.go b/container.go index a777da58..14b4b65d 100644 --- a/container.go +++ b/container.go @@ -23,6 +23,7 @@ type Container struct { Networks []*Network `json:"networks,omitempty"` // nil for host's network stack Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"` // cgroups Context Context `json:"context,omitempty"` // generic context for specific options (apparmor, selinux) + Mounts []Mount `json:"mounts,omitempty"` } // Network defines configuration for a container's networking stack @@ -36,3 +37,12 @@ type Network struct { Gateway string `json:"gateway,omitempty"` Mtu int `json:"mtu,omitempty"` } + +// Bind mounts from the host system to the container +// +type Mount struct { + Source string `json:"source"` // Source path, in the host namespace + Destination string `json:"destination"` // Destination path, in the container + Writable bool `json:"writable"` + Private bool `json:"private"` +} diff --git a/nsinit/init.go b/nsinit/init.go index 336fc1ea..5d47b950 100644 --- a/nsinit/init.go +++ b/nsinit/init.go @@ -51,7 +51,7 @@ func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consol if err := system.ParentDeathSignal(); err != nil { return fmt.Errorf("parent death signal %s", err) } - if err := setupNewMountNamespace(rootfs, console, container.ReadonlyFs, container.NoPivotRoot); err != nil { + if err := setupNewMountNamespace(rootfs, container.Mounts, console, container.ReadonlyFs, container.NoPivotRoot); err != nil { return fmt.Errorf("setup mount namespace %s", err) } if err := setupNetwork(container, context); err != nil { diff --git a/nsinit/mount.go b/nsinit/mount.go index 83577cfa..562ae25a 100644 --- a/nsinit/mount.go +++ b/nsinit/mount.go @@ -4,6 +4,7 @@ package nsinit import ( "fmt" + "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/system" "io/ioutil" "os" @@ -19,7 +20,7 @@ const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NOD // // There is no need to unmount the new mounts because as soon as the mount namespace // is no longer in use, the mounts will be removed automatically -func setupNewMountNamespace(rootfs, console string, readonly, noPivotRoot bool) error { +func setupNewMountNamespace(rootfs string, bindMounts []libcontainer.Mount, console string, readonly, noPivotRoot bool) error { flag := syscall.MS_PRIVATE if noPivotRoot { flag = syscall.MS_SLAVE @@ -38,6 +39,23 @@ func setupNewMountNamespace(rootfs, console string, readonly, noPivotRoot bool) if err := mountSystem(rootfs); err != nil { return fmt.Errorf("mount system %s", err) } + + for _, m := range bindMounts { + flags := syscall.MS_BIND | syscall.MS_REC + if !m.Writable { + flags = flags | syscall.MS_RDONLY + } + dest := filepath.Join(rootfs, m.Destination) + if err := system.Mount(m.Source, dest, "bind", uintptr(flags), ""); err != nil { + return fmt.Errorf("mounting %s into %s %s", m.Source, dest, err) + } + if m.Private { + if err := system.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil { + return fmt.Errorf("mounting %s private %s", dest, err) + } + } + } + if err := copyDevNodes(rootfs); err != nil { return fmt.Errorf("copy dev nodes %s", err) }