diff --git a/linux_rootfs.go b/linux_rootfs.go index 1f0259c3..86491480 100644 --- a/linux_rootfs.go +++ b/linux_rootfs.go @@ -8,6 +8,7 @@ import ( "os" "path/filepath" "syscall" + "time" "github.com/docker/docker/pkg/symlink" "github.com/docker/libcontainer/configs" @@ -350,24 +351,54 @@ func tmpfsMount(m *configs.Mount, rootfs, mountLabel string) error { return syscall.Mount("tmpfs", dest, "tmpfs", uintptr(defaultMountFlags), l) } +// createIfNotExists creates a file or a directory only if it does not already exist. func createIfNotExists(path string, isDir bool) error { if _, err := os.Stat(path); err != nil { if os.IsNotExist(err) { if isDir { - if err := os.MkdirAll(path, 0755); err != nil { - return err - } - } else { - if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { - return err - } - f, err := os.OpenFile(path, os.O_CREATE, 0755) - if err != nil { - return err - } - f.Close() + return os.MkdirAll(path, 0755) } + if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { + return err + } + f, err := os.OpenFile(path, os.O_CREATE, 0755) + if err != nil { + return err + } + f.Close() } } return nil } + +// remountReadonly will bind over the top of an existing path and ensure that it is read-only. +func remountReadonly(path string) error { + for i := 0; i < 5; i++ { + if err := syscall.Mount("", path, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil && !os.IsNotExist(err) { + switch err { + case syscall.EINVAL: + // Probably not a mountpoint, use bind-mount + if err := syscall.Mount(path, path, "", syscall.MS_BIND, ""); err != nil { + return err + } + return syscall.Mount(path, path, "", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC|defaultMountFlags, "") + case syscall.EBUSY: + time.Sleep(100 * time.Millisecond) + continue + default: + return err + } + } + return nil + } + return fmt.Errorf("unable to mount %s as readonly max retries reached", path) +} + +// maskProckcore bind mounts /dev/null over the top of /proc/kcore inside a container to avoid security +// issues from processes reading memory information. +func maskProckcore() error { + if err := syscall.Mount("/dev/null", "/proc/kcore", "", syscall.MS_BIND, ""); err != nil && !os.IsNotExist(err) { + return fmt.Errorf("unable to bind-mount /dev/null over /proc/kcore: %s", err) + } + return nil +} diff --git a/linux_standard_init.go b/linux_standard_init.go index c667d0c3..2cf7a9f2 100644 --- a/linux_standard_init.go +++ b/linux_standard_init.go @@ -8,7 +8,6 @@ import ( "github.com/docker/libcontainer/apparmor" "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/label" - "github.com/docker/libcontainer/security/restrict" "github.com/docker/libcontainer/system" ) @@ -64,7 +63,12 @@ func (l *linuxStandardInit) Init() error { return err } if l.config.Config.RestrictSys { - if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { + for _, path := range []string{"proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"} { + if err := remountReadonly(path); err != nil { + return err + } + } + if err := maskProckcore(); err != nil { return err } } diff --git a/linux_userns_init.go b/linux_userns_init.go index 2c32f274..a898f2d2 100644 --- a/linux_userns_init.go +++ b/linux_userns_init.go @@ -7,7 +7,6 @@ import ( "github.com/docker/libcontainer/apparmor" "github.com/docker/libcontainer/label" - "github.com/docker/libcontainer/security/restrict" "github.com/docker/libcontainer/system" ) @@ -53,7 +52,12 @@ func (l *linuxUsernsInit) Init() error { return err } if l.config.Config.RestrictSys { - if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { + for _, path := range []string{"proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"} { + if err := remountReadonly(path); err != nil { + return err + } + } + if err := maskProckcore(); err != nil { return err } } diff --git a/security/restrict/restrict.go b/security/restrict/restrict.go deleted file mode 100644 index dd765b1f..00000000 --- a/security/restrict/restrict.go +++ /dev/null @@ -1,53 +0,0 @@ -// +build linux - -package restrict - -import ( - "fmt" - "os" - "syscall" - "time" -) - -const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV - -func mountReadonly(path string) error { - for i := 0; i < 5; i++ { - if err := syscall.Mount("", path, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil && !os.IsNotExist(err) { - switch err { - case syscall.EINVAL: - // Probably not a mountpoint, use bind-mount - if err := syscall.Mount(path, path, "", syscall.MS_BIND, ""); err != nil { - return err - } - - return syscall.Mount(path, path, "", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC|defaultMountFlags, "") - case syscall.EBUSY: - time.Sleep(100 * time.Millisecond) - continue - default: - return err - } - } - - return nil - } - - return fmt.Errorf("unable to mount %s as readonly max retries reached", path) -} - -// This has to be called while the container still has CAP_SYS_ADMIN (to be able to perform mounts). -// However, afterwards, CAP_SYS_ADMIN should be dropped (otherwise the user will be able to revert those changes). -func Restrict(mounts ...string) error { - for _, dest := range mounts { - if err := mountReadonly(dest); err != nil { - return fmt.Errorf("unable to remount %s readonly: %s", dest, err) - } - } - - if err := syscall.Mount("/dev/null", "/proc/kcore", "", syscall.MS_BIND, ""); err != nil && !os.IsNotExist(err) { - return fmt.Errorf("unable to bind-mount /dev/null over /proc/kcore: %s", err) - } - - return nil -} diff --git a/security/restrict/unsupported.go b/security/restrict/unsupported.go deleted file mode 100644 index 464e8d49..00000000 --- a/security/restrict/unsupported.go +++ /dev/null @@ -1,9 +0,0 @@ -// +build !linux - -package restrict - -import "fmt" - -func Restrict() error { - return fmt.Errorf("not supported") -}