Merge pull request #208 from rhvgoyal/config-rootfsPropagation
Create container_private, container_slave and container_shared modes for rootfsPropagation
This commit is contained in:
commit
c573ffbd05
|
@ -92,8 +92,8 @@ type Config struct {
|
|||
// bind mounts are writtable.
|
||||
Readonlyfs bool `json:"readonlyfs"`
|
||||
|
||||
// Privatefs will mount the container's rootfs as private where mount points from the parent will not propogate
|
||||
Privatefs bool `json:"privatefs"`
|
||||
// Specifies the mount propagation flags to be applied to /.
|
||||
RootPropagation int `json:"rootPropagation"`
|
||||
|
||||
// Mounts specify additional source and destination paths that will be mounted inside the container's
|
||||
// rootfs and mount namespace if specified
|
||||
|
|
|
@ -4,6 +4,7 @@ import (
|
|||
"bytes"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
@ -1014,3 +1015,238 @@ func TestSTDIOPermissions(t *testing.T) {
|
|||
t.Fatalf("stderr should equal be equal %q %q", actual, "hi")
|
||||
}
|
||||
}
|
||||
|
||||
func unmountOp(path string) error {
|
||||
if err := syscall.Unmount(path, syscall.MNT_DETACH); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Launch container with rootfsPropagation in rslave mode. Also
|
||||
// bind mount a volume /mnt1host at /mnt1cont at the time of launch. Now do
|
||||
// another mount on host (/mnt1host/mnt2host) and this new mount should
|
||||
// propagate to container (/mnt1cont/mnt2host)
|
||||
func TestRootfsPropagationSlaveMount(t *testing.T) {
|
||||
var mountPropagated bool
|
||||
var dir1cont string
|
||||
var dir2cont string
|
||||
|
||||
dir1cont = "/root/mnt1cont"
|
||||
|
||||
if testing.Short() {
|
||||
return
|
||||
}
|
||||
rootfs, err := newRootfs()
|
||||
ok(t, err)
|
||||
defer remove(rootfs)
|
||||
config := newTemplateConfig(rootfs)
|
||||
|
||||
config.RootPropagation = syscall.MS_SLAVE | syscall.MS_REC
|
||||
|
||||
// Bind mount a volume
|
||||
dir1host, err := ioutil.TempDir("", "mnt1host")
|
||||
ok(t, err)
|
||||
defer os.RemoveAll(dir1host)
|
||||
|
||||
// Make this dir a "shared" mount point. This will make sure a
|
||||
// slave relationship can be established in container.
|
||||
err = syscall.Mount(dir1host, dir1host, "bind", syscall.MS_BIND|syscall.MS_REC, "")
|
||||
ok(t, err)
|
||||
err = syscall.Mount("", dir1host, "", syscall.MS_SHARED|syscall.MS_REC, "")
|
||||
ok(t, err)
|
||||
defer unmountOp(dir1host)
|
||||
|
||||
config.Mounts = append(config.Mounts, &configs.Mount{
|
||||
Source: dir1host,
|
||||
Destination: dir1cont,
|
||||
Device: "bind",
|
||||
Flags: syscall.MS_BIND | syscall.MS_REC})
|
||||
|
||||
// TODO: systemd specific processing
|
||||
f := factory
|
||||
|
||||
container, err := f.Create("testSlaveMount", config)
|
||||
ok(t, err)
|
||||
defer container.Destroy()
|
||||
|
||||
stdinR, stdinW, err := os.Pipe()
|
||||
ok(t, err)
|
||||
|
||||
pconfig := &libcontainer.Process{
|
||||
Args: []string{"cat"},
|
||||
Env: standardEnvironment,
|
||||
Stdin: stdinR,
|
||||
}
|
||||
|
||||
err = container.Start(pconfig)
|
||||
stdinR.Close()
|
||||
defer stdinW.Close()
|
||||
ok(t, err)
|
||||
|
||||
// Create mnt1host/mnt2host and bind mount itself on top of it. This
|
||||
// should be visible in container.
|
||||
dir2host, err := ioutil.TempDir(dir1host, "mnt2host")
|
||||
ok(t, err)
|
||||
defer os.RemoveAll(dir2host)
|
||||
|
||||
err = syscall.Mount(dir2host, dir2host, "bind", syscall.MS_BIND, "")
|
||||
defer unmountOp(dir2host)
|
||||
ok(t, err)
|
||||
|
||||
// Run "cat /proc/self/mountinfo" in container and look at mount points.
|
||||
var stdout2 bytes.Buffer
|
||||
|
||||
stdinR2, stdinW2, err := os.Pipe()
|
||||
ok(t, err)
|
||||
|
||||
pconfig2 := &libcontainer.Process{
|
||||
Args: []string{"cat", "/proc/self/mountinfo"},
|
||||
Env: standardEnvironment,
|
||||
Stdin: stdinR2,
|
||||
Stdout: &stdout2,
|
||||
}
|
||||
|
||||
err = container.Start(pconfig2)
|
||||
stdinR2.Close()
|
||||
defer stdinW2.Close()
|
||||
ok(t, err)
|
||||
|
||||
// Wait for process
|
||||
stdinW2.Close()
|
||||
waitProcess(pconfig2, t)
|
||||
stdinW.Close()
|
||||
waitProcess(pconfig, t)
|
||||
|
||||
mountPropagated = false
|
||||
dir2cont = filepath.Join(dir1cont, filepath.Base(dir2host))
|
||||
|
||||
propagationInfo := string(stdout2.Bytes())
|
||||
lines := strings.Split(propagationInfo, "\n")
|
||||
for _, l := range lines {
|
||||
linefields := strings.Split(l, " ")
|
||||
if len(linefields) < 5 {
|
||||
continue
|
||||
}
|
||||
|
||||
if linefields[4] == dir2cont {
|
||||
mountPropagated = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if mountPropagated != true {
|
||||
t.Fatalf("Mount on host %s did not propagate in container at %s\n", dir2host, dir2cont)
|
||||
}
|
||||
}
|
||||
|
||||
// Launch container with rootfsPropagation 0 so no propagation flags are
|
||||
// applied. Also bind mount a volume /mnt1host at /mnt1cont at the time of
|
||||
// launch. Now do a mount in container (/mnt1cont/mnt2cont) and this new
|
||||
// mount should propagate to host (/mnt1host/mnt2cont)
|
||||
|
||||
func TestRootfsPropagationSharedMount(t *testing.T) {
|
||||
var dir1cont string
|
||||
var dir2cont string
|
||||
|
||||
dir1cont = "/root/mnt1cont"
|
||||
|
||||
if testing.Short() {
|
||||
return
|
||||
}
|
||||
rootfs, err := newRootfs()
|
||||
ok(t, err)
|
||||
defer remove(rootfs)
|
||||
config := newTemplateConfig(rootfs)
|
||||
config.RootPropagation = syscall.MS_PRIVATE
|
||||
|
||||
// Bind mount a volume
|
||||
dir1host, err := ioutil.TempDir("", "mnt1host")
|
||||
ok(t, err)
|
||||
defer os.RemoveAll(dir1host)
|
||||
|
||||
// Make this dir a "shared" mount point. This will make sure a
|
||||
// shared relationship can be established in container.
|
||||
err = syscall.Mount(dir1host, dir1host, "bind", syscall.MS_BIND|syscall.MS_REC, "")
|
||||
ok(t, err)
|
||||
err = syscall.Mount("", dir1host, "", syscall.MS_SHARED|syscall.MS_REC, "")
|
||||
ok(t, err)
|
||||
defer unmountOp(dir1host)
|
||||
|
||||
config.Mounts = append(config.Mounts, &configs.Mount{
|
||||
Source: dir1host,
|
||||
Destination: dir1cont,
|
||||
Device: "bind",
|
||||
Flags: syscall.MS_BIND | syscall.MS_REC})
|
||||
|
||||
// TODO: systemd specific processing
|
||||
f := factory
|
||||
|
||||
container, err := f.Create("testSharedMount", config)
|
||||
ok(t, err)
|
||||
defer container.Destroy()
|
||||
|
||||
stdinR, stdinW, err := os.Pipe()
|
||||
ok(t, err)
|
||||
|
||||
pconfig := &libcontainer.Process{
|
||||
Args: []string{"cat"},
|
||||
Env: standardEnvironment,
|
||||
Stdin: stdinR,
|
||||
}
|
||||
|
||||
err = container.Start(pconfig)
|
||||
stdinR.Close()
|
||||
defer stdinW.Close()
|
||||
ok(t, err)
|
||||
|
||||
// Create mnt1host/mnt2cont. This will become visible inside container
|
||||
// at mnt1cont/mnt2cont. Bind mount itself on top of it. This
|
||||
// should be visible on host now.
|
||||
dir2host, err := ioutil.TempDir(dir1host, "mnt2cont")
|
||||
ok(t, err)
|
||||
defer os.RemoveAll(dir2host)
|
||||
|
||||
dir2cont = filepath.Join(dir1cont, filepath.Base(dir2host))
|
||||
|
||||
// Mount something in container and see if it is visible on host.
|
||||
var stdout2 bytes.Buffer
|
||||
|
||||
stdinR2, stdinW2, err := os.Pipe()
|
||||
ok(t, err)
|
||||
|
||||
// Provide CAP_SYS_ADMIN
|
||||
processCaps := append(config.Capabilities, "CAP_SYS_ADMIN")
|
||||
|
||||
pconfig2 := &libcontainer.Process{
|
||||
Args: []string{"mount", "--bind", dir2cont, dir2cont},
|
||||
Env: standardEnvironment,
|
||||
Stdin: stdinR2,
|
||||
Stdout: &stdout2,
|
||||
Capabilities: processCaps,
|
||||
}
|
||||
|
||||
err = container.Start(pconfig2)
|
||||
stdinR2.Close()
|
||||
defer stdinW2.Close()
|
||||
ok(t, err)
|
||||
|
||||
// Wait for process
|
||||
stdinW2.Close()
|
||||
waitProcess(pconfig2, t)
|
||||
stdinW.Close()
|
||||
waitProcess(pconfig, t)
|
||||
|
||||
defer unmountOp(dir2host)
|
||||
|
||||
// Check if mount is visible on host or not.
|
||||
out, err := exec.Command("findmnt", "-n", "-f", "-oTARGET", dir2host).CombinedOutput()
|
||||
outtrim := strings.TrimSpace(string(out))
|
||||
if err != nil {
|
||||
t.Logf("findmnt error %q: %q", err, outtrim)
|
||||
}
|
||||
|
||||
if string(outtrim) != dir2host {
|
||||
t.Fatalf("Mount in container on %s did not propagate to host on %s. finmnt output=%s", dir2cont, dir2host, outtrim)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -13,6 +13,7 @@ import (
|
|||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/docker/docker/pkg/mount"
|
||||
"github.com/docker/docker/pkg/symlink"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
|
@ -420,14 +421,89 @@ func mknodDevice(dest string, node *configs.Device) error {
|
|||
return syscall.Chown(dest, int(node.Uid), int(node.Gid))
|
||||
}
|
||||
|
||||
func getMountInfo(mountinfo []*mount.MountInfo, dir string) *mount.MountInfo {
|
||||
for _, m := range mountinfo {
|
||||
if m.Mountpoint == dir {
|
||||
return m
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get the parent mount point of directory passed in as argument. Also return
|
||||
// optional fields.
|
||||
func getParentMount(rootfs string) (string, string, error) {
|
||||
var path string
|
||||
|
||||
mountinfos, err := mount.GetMounts()
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
|
||||
mountinfo := getMountInfo(mountinfos, rootfs)
|
||||
if mountinfo != nil {
|
||||
return rootfs, mountinfo.Optional, nil
|
||||
}
|
||||
|
||||
path = rootfs
|
||||
for {
|
||||
path = filepath.Dir(path)
|
||||
|
||||
mountinfo = getMountInfo(mountinfos, path)
|
||||
if mountinfo != nil {
|
||||
return path, mountinfo.Optional, nil
|
||||
}
|
||||
|
||||
if path == "/" {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// If we are here, we did not find parent mount. Something is wrong.
|
||||
return "", "", fmt.Errorf("Could not find parent mount of %s", rootfs)
|
||||
}
|
||||
|
||||
// Make parent mount private if it was shared
|
||||
func rootfsParentMountPrivate(config *configs.Config) error {
|
||||
sharedMount := false
|
||||
|
||||
parentMount, optionalOpts, err := getParentMount(config.Rootfs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
optsSplit := strings.Split(optionalOpts, " ")
|
||||
for _, opt := range optsSplit {
|
||||
if strings.HasPrefix(opt, "shared:") {
|
||||
sharedMount = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Make parent mount PRIVATE if it was shared. It is needed for two
|
||||
// reasons. First of all pivot_root() will fail if parent mount is
|
||||
// shared. Secondly when we bind mount rootfs it will propagate to
|
||||
// parent namespace and we don't want that to happen.
|
||||
if sharedMount {
|
||||
return syscall.Mount("", parentMount, "", syscall.MS_PRIVATE, "")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func prepareRoot(config *configs.Config) error {
|
||||
flag := syscall.MS_SLAVE | syscall.MS_REC
|
||||
if config.Privatefs {
|
||||
flag = syscall.MS_PRIVATE | syscall.MS_REC
|
||||
if config.RootPropagation != 0 {
|
||||
flag = config.RootPropagation
|
||||
}
|
||||
if err := syscall.Mount("", "/", "", uintptr(flag), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := rootfsParentMountPrivate(config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return syscall.Mount(config.Rootfs, config.Rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, "")
|
||||
}
|
||||
|
||||
|
@ -469,6 +545,13 @@ func pivotRoot(rootfs, pivotBaseDir string) error {
|
|||
}
|
||||
// path to pivot dir now changed, update
|
||||
pivotDir = filepath.Join(pivotBaseDir, filepath.Base(pivotDir))
|
||||
|
||||
// Make pivotDir rprivate to make sure any of the unmounts don't
|
||||
// propagate to parent.
|
||||
if err := syscall.Mount("", pivotDir, "", syscall.MS_PRIVATE|syscall.MS_REC, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil {
|
||||
return fmt.Errorf("unmount pivot_root dir %s", err)
|
||||
}
|
||||
|
|
17
spec.go
17
spec.go
|
@ -280,6 +280,16 @@ var namespaceMapping = map[specs.NamespaceType]configs.NamespaceType{
|
|||
specs.UTSNamespace: configs.NEWUTS,
|
||||
}
|
||||
|
||||
var mountPropagationMapping = map[string]int{
|
||||
"rprivate": syscall.MS_PRIVATE | syscall.MS_REC,
|
||||
"private": syscall.MS_PRIVATE,
|
||||
"rslave": syscall.MS_SLAVE | syscall.MS_REC,
|
||||
"slave": syscall.MS_SLAVE,
|
||||
"rshared": syscall.MS_SHARED | syscall.MS_REC,
|
||||
"shared": syscall.MS_SHARED,
|
||||
"": syscall.MS_PRIVATE | syscall.MS_REC,
|
||||
}
|
||||
|
||||
// loadSpec loads the specification from the provided path.
|
||||
// If the path is empty then the default path will be "config.json"
|
||||
func loadSpec(cPath, rPath string) (spec *specs.LinuxSpec, rspec *specs.LinuxRuntimeSpec, err error) {
|
||||
|
@ -333,8 +343,13 @@ func createLibcontainerConfig(cgroupName string, spec *specs.LinuxSpec, rspec *s
|
|||
Capabilities: spec.Linux.Capabilities,
|
||||
Readonlyfs: spec.Root.Readonly,
|
||||
Hostname: spec.Hostname,
|
||||
Privatefs: true,
|
||||
}
|
||||
|
||||
exists := false
|
||||
if config.RootPropagation, exists = mountPropagationMapping[rspec.Linux.RootfsPropagation]; !exists {
|
||||
return nil, fmt.Errorf("rootfsPropagation=%v is not supported", rspec.Linux.RootfsPropagation)
|
||||
}
|
||||
|
||||
for _, ns := range rspec.Linux.Namespaces {
|
||||
t, exists := namespaceMapping[ns.Type]
|
||||
if !exists {
|
||||
|
|
Loading…
Reference in New Issue