rootless: cgroup: treat EROFS as a skippable error
In some cases, /sys/fs/cgroups is mounted read-only. In rootless containers we can consider this effectively identical to having cgroups that we don't have write permission to -- because the user isn't responsible for the read-only setup and cannot modify it. The rules are identical to when /sys/fs/cgroups is not writable by the unprivileged user. An example of this is the default configuration of Docker, where cgroups are mounted as read-only as a preventative security measure. Reported-by: Vladimir Rutsky <rutsky@google.com> Signed-off-by: Aleksa Sarai <asarai@suse.de>
This commit is contained in:
parent
69663f0bd4
commit
03e585985f
|
@ -3,7 +3,6 @@
|
|||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
|
@ -14,6 +13,8 @@ import (
|
|||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -35,7 +36,7 @@ var (
|
|||
HugePageSizes, _ = cgroups.GetHugePageSize()
|
||||
)
|
||||
|
||||
var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
|
||||
var errSubsystemDoesNotExist = fmt.Errorf("cgroup: subsystem does not exist")
|
||||
|
||||
type subsystemSet []subsystem
|
||||
|
||||
|
@ -62,9 +63,10 @@ type subsystem interface {
|
|||
}
|
||||
|
||||
type Manager struct {
|
||||
mu sync.Mutex
|
||||
Cgroups *configs.Cgroup
|
||||
Paths map[string]string
|
||||
mu sync.Mutex
|
||||
Cgroups *configs.Cgroup
|
||||
Rootless bool
|
||||
Paths map[string]string
|
||||
}
|
||||
|
||||
// The absolute path to the root of the cgroup hierarchies.
|
||||
|
@ -100,6 +102,27 @@ type cgroupData struct {
|
|||
pid int
|
||||
}
|
||||
|
||||
// isIgnorableError returns whether err is a permission error (in the loose
|
||||
// sense of the word). This includes EROFS (which for an unprivileged user is
|
||||
// basically a permission error) and EACCES (for similar reasons) as well as
|
||||
// the normal EPERM.
|
||||
func isIgnorableError(err error) bool {
|
||||
if os.IsPermission(errors.Cause(err)) {
|
||||
return true
|
||||
}
|
||||
|
||||
var errno error
|
||||
switch err := errors.Cause(err).(type) {
|
||||
case *os.PathError:
|
||||
errno = err.Err
|
||||
case *os.LinkError:
|
||||
errno = err.Err
|
||||
case *os.SyscallError:
|
||||
errno = err.Err
|
||||
}
|
||||
return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(pid int) (err error) {
|
||||
if m.Cgroups == nil {
|
||||
return nil
|
||||
|
@ -145,11 +168,11 @@ func (m *Manager) Apply(pid int) (err error) {
|
|||
m.Paths[sys.Name()] = p
|
||||
|
||||
if err := sys.Apply(d); err != nil {
|
||||
if os.IsPermission(err) && m.Cgroups.Path == "" {
|
||||
// If we didn't set a cgroup path, then let's defer the error here
|
||||
// until we know whether we have set limits or not.
|
||||
// If we hadn't set limits, then it's ok that we couldn't join this cgroup, because
|
||||
// it will have the same limits as its parent.
|
||||
// In the case of rootless, where an explicit cgroup path hasn't
|
||||
// been set, we don't bail on error in case of permission problems.
|
||||
// Cases where limits have been set (and we couldn't create our own
|
||||
// cgroup) are handled by Set.
|
||||
if m.Rootless && isIgnorableError(err) && m.Cgroups.Path == "" {
|
||||
delete(m.Paths, sys.Name())
|
||||
continue
|
||||
}
|
||||
|
@ -208,8 +231,9 @@ func (m *Manager) Set(container *configs.Config) error {
|
|||
path := paths[sys.Name()]
|
||||
if err := sys.Set(path, container.Cgroups); err != nil {
|
||||
if path == "" {
|
||||
// cgroup never applied
|
||||
return fmt.Errorf("cannot set limits on the %s cgroup, as the container has not joined it", sys.Name())
|
||||
// We never created a path for this cgroup, so we cannot set
|
||||
// limits for it (though we have already tried at this point).
|
||||
return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name())
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -59,9 +59,9 @@ func SystemdCgroups(l *LinuxFactory) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// Cgroupfs is an options func to configure a LinuxFactory to return
|
||||
// containers that use the native cgroups filesystem implementation to
|
||||
// create and manage cgroups.
|
||||
// Cgroupfs is an options func to configure a LinuxFactory to return containers
|
||||
// that use the native cgroups filesystem implementation to create and manage
|
||||
// cgroups.
|
||||
func Cgroupfs(l *LinuxFactory) error {
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &fs.Manager{
|
||||
|
@ -72,6 +72,23 @@ func Cgroupfs(l *LinuxFactory) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// RootlessCgroupfs is an options func to configure a LinuxFactory to return
|
||||
// containers that use the native cgroups filesystem implementation to create
|
||||
// and manage cgroups. The difference between RootlessCgroupfs and Cgroupfs is
|
||||
// that RootlessCgroupfs can transparently handle permission errors that occur
|
||||
// during rootless container setup (while still allowing cgroup usage if
|
||||
// they've been set up properly).
|
||||
func RootlessCgroupfs(l *LinuxFactory) error {
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &fs.Manager{
|
||||
Cgroups: config,
|
||||
Rootless: true,
|
||||
Paths: paths,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// IntelRdtfs is an options func to configure a LinuxFactory to return
|
||||
// containers that use the Intel RDT "resource control" filesystem to
|
||||
// create and manage Intel Xeon platform shared resources (e.g., L3 cache).
|
||||
|
|
|
@ -99,7 +99,7 @@ EOF
|
|||
|
||||
runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_permissions
|
||||
[ "$status" -eq 1 ]
|
||||
[[ ${lines[1]} == *"cannot set limits on the pids cgroup, as the container has not joined it"* ]]
|
||||
[[ ${lines[1]} == *"cannot set pids limit: container could not join or create cgroup"* ]]
|
||||
}
|
||||
|
||||
@test "runc create (limits + cgrouppath + permission on the cgroup dir) succeeds" {
|
||||
|
|
|
@ -38,6 +38,9 @@ func loadFactory(context *cli.Context) (libcontainer.Factory, error) {
|
|||
// We default to cgroupfs, and can only use systemd if the system is a
|
||||
// systemd box.
|
||||
cgroupManager := libcontainer.Cgroupfs
|
||||
if isRootless() {
|
||||
cgroupManager = libcontainer.RootlessCgroupfs
|
||||
}
|
||||
if context.GlobalBool("systemd-cgroup") {
|
||||
if systemd.UseSystemd() {
|
||||
cgroupManager = libcontainer.SystemdCgroups
|
||||
|
|
Loading…
Reference in New Issue