rootless: cgroup: treat EROFS as a skippable error

In some cases, /sys/fs/cgroups is mounted read-only. In rootless containers we can consider this effectively identical to having cgroups that we don't have write permission to -- because the user isn't responsible for the read-only setup and cannot modify it. The rules are identical to when /sys/fs/cgroups is not writable by the unprivileged user. An example of this is the default configuration of Docker, where cgroups are mounted as read-only as a preventative security measure. Reported-by: Vladimir Rutsky <rutsky@google.com> Signed-off-by: Aleksa Sarai <asarai@suse.de>
2018-03-16 11:33:04 +11:00 · 2018-03-16 11:33:04 +11:00 · 03e585985f
parent 69663f0bd4
commit 03e585985f
4 changed files with 60 additions and 16 deletions
--- a/libcontainer/cgroups/fs/apply_raw.go
+++ b/libcontainer/cgroups/fs/apply_raw.go
@ -3,7 +3,6 @@
 package fs

 import (
-	"errors"
 	"fmt"
 	"io"
 	"io/ioutil"
@ -14,6 +13,8 @@ import (
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
+	"github.com/pkg/errors"
+	"golang.org/x/sys/unix"
 )

 var (
@ -35,7 +36,7 @@ var (
 	HugePageSizes, _ = cgroups.GetHugePageSize()
 )

-var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
+var errSubsystemDoesNotExist = fmt.Errorf("cgroup: subsystem does not exist")

 type subsystemSet []subsystem

@ -62,9 +63,10 @@ type subsystem interface {
 }

 type Manager struct {
-	mu      sync.Mutex
-	Cgroups *configs.Cgroup
-	Paths   map[string]string
+	mu       sync.Mutex
+	Cgroups  *configs.Cgroup
+	Rootless bool
+	Paths    map[string]string
 }

 // The absolute path to the root of the cgroup hierarchies.
@ -100,6 +102,27 @@ type cgroupData struct {
 	pid       int
 }

+// isIgnorableError returns whether err is a permission error (in the loose
+// sense of the word). This includes EROFS (which for an unprivileged user is
+// basically a permission error) and EACCES (for similar reasons) as well as
+// the normal EPERM.
+func isIgnorableError(err error) bool {
+	if os.IsPermission(errors.Cause(err)) {
+		return true
+	}
+
+	var errno error
+	switch err := errors.Cause(err).(type) {
+	case *os.PathError:
+		errno = err.Err
+	case *os.LinkError:
+		errno = err.Err
+	case *os.SyscallError:
+		errno = err.Err
+	}
+	return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES
+}
+
 func (m *Manager) Apply(pid int) (err error) {
 	if m.Cgroups == nil {
 		return nil
@ -145,11 +168,11 @@ func (m *Manager) Apply(pid int) (err error) {
 		m.Paths[sys.Name()] = p

 		if err := sys.Apply(d); err != nil {
-			if os.IsPermission(err) && m.Cgroups.Path == "" {
-				// If we didn't set a cgroup path, then let's defer the error here
-				// until we know whether we have set limits or not.
-				// If we hadn't set limits, then it's ok that we couldn't join this cgroup, because
-				// it will have the same limits as its parent.
+			// In the case of rootless, where an explicit cgroup path hasn't
+			// been set, we don't bail on error in case of permission problems.
+			// Cases where limits have been set (and we couldn't create our own
+			// cgroup) are handled by Set.
+			if m.Rootless && isIgnorableError(err) && m.Cgroups.Path == "" {
 				delete(m.Paths, sys.Name())
 				continue
 			}
@ -208,8 +231,9 @@ func (m *Manager) Set(container *configs.Config) error {
 		path := paths[sys.Name()]
 		if err := sys.Set(path, container.Cgroups); err != nil {
 			if path == "" {
-				// cgroup never applied
-				return fmt.Errorf("cannot set limits on the %s cgroup, as the container has not joined it", sys.Name())
+				// We never created a path for this cgroup, so we cannot set
+				// limits for it (though we have already tried at this point).
+				return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name())
 			}
 			return err
 		}
--- a/libcontainer/factory_linux.go
+++ b/libcontainer/factory_linux.go
@ -59,9 +59,9 @@ func SystemdCgroups(l *LinuxFactory) error {
 	return nil
 }

-// Cgroupfs is an options func to configure a LinuxFactory to return
-// containers that use the native cgroups filesystem implementation to
-// create and manage cgroups.
+// Cgroupfs is an options func to configure a LinuxFactory to return containers
+// that use the native cgroups filesystem implementation to create and manage
+// cgroups.
 func Cgroupfs(l *LinuxFactory) error {
 	l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
 		return &fs.Manager{
@ -72,6 +72,23 @@ func Cgroupfs(l *LinuxFactory) error {
 	return nil
 }

+// RootlessCgroupfs is an options func to configure a LinuxFactory to return
+// containers that use the native cgroups filesystem implementation to create
+// and manage cgroups. The difference between RootlessCgroupfs and Cgroupfs is
+// that RootlessCgroupfs can transparently handle permission errors that occur
+// during rootless container setup (while still allowing cgroup usage if
+// they've been set up properly).
+func RootlessCgroupfs(l *LinuxFactory) error {
+	l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
+		return &fs.Manager{
+			Cgroups:  config,
+			Rootless: true,
+			Paths:    paths,
+		}
+	}
+	return nil
+}
+
 // IntelRdtfs is an options func to configure a LinuxFactory to return
 // containers that use the Intel RDT "resource control" filesystem to
 // create and manage Intel Xeon platform shared resources (e.g., L3 cache).
--- a/tests/integration/cgroups.bats
+++ b/tests/integration/cgroups.bats
@ -99,7 +99,7 @@ EOF

    runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_permissions
    [ "$status" -eq 1 ]
-    [[ ${lines[1]} == *"cannot set limits on the pids cgroup, as the container has not joined it"* ]]
+    [[ ${lines[1]} == *"cannot set pids limit: container could not join or create cgroup"* ]]
 }

@test "runc create (limits + cgrouppath + permission on the cgroup dir) succeeds" {
--- a/utils_linux.go
+++ b/utils_linux.go
@ -38,6 +38,9 @@ func loadFactory(context *cli.Context) (libcontainer.Factory, error) {
 	// We default to cgroupfs, and can only use systemd if the system is a
 	// systemd box.
 	cgroupManager := libcontainer.Cgroupfs
+	if isRootless() {
+		cgroupManager = libcontainer.RootlessCgroupfs
+	}
 	if context.GlobalBool("systemd-cgroup") {
 		if systemd.UseSystemd() {
 			cgroupManager = libcontainer.SystemdCgroups