Merge pull request #1759 from cyphar/rootless-erofs-as-eperm
rootless: cgroup: treat EROFS as a skippable error
This commit is contained in:
commit
dd67ab10d7
|
@ -3,7 +3,6 @@
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
|
@ -14,6 +13,8 @@ import (
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||||
|
"github.com/pkg/errors"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -35,7 +36,7 @@ var (
|
||||||
HugePageSizes, _ = cgroups.GetHugePageSize()
|
HugePageSizes, _ = cgroups.GetHugePageSize()
|
||||||
)
|
)
|
||||||
|
|
||||||
var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
|
var errSubsystemDoesNotExist = fmt.Errorf("cgroup: subsystem does not exist")
|
||||||
|
|
||||||
type subsystemSet []subsystem
|
type subsystemSet []subsystem
|
||||||
|
|
||||||
|
@ -64,6 +65,7 @@ type subsystem interface {
|
||||||
type Manager struct {
|
type Manager struct {
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
Cgroups *configs.Cgroup
|
Cgroups *configs.Cgroup
|
||||||
|
Rootless bool
|
||||||
Paths map[string]string
|
Paths map[string]string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -100,6 +102,27 @@ type cgroupData struct {
|
||||||
pid int
|
pid int
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// isIgnorableError returns whether err is a permission error (in the loose
|
||||||
|
// sense of the word). This includes EROFS (which for an unprivileged user is
|
||||||
|
// basically a permission error) and EACCES (for similar reasons) as well as
|
||||||
|
// the normal EPERM.
|
||||||
|
func isIgnorableError(err error) bool {
|
||||||
|
if os.IsPermission(errors.Cause(err)) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
var errno error
|
||||||
|
switch err := errors.Cause(err).(type) {
|
||||||
|
case *os.PathError:
|
||||||
|
errno = err.Err
|
||||||
|
case *os.LinkError:
|
||||||
|
errno = err.Err
|
||||||
|
case *os.SyscallError:
|
||||||
|
errno = err.Err
|
||||||
|
}
|
||||||
|
return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES
|
||||||
|
}
|
||||||
|
|
||||||
func (m *Manager) Apply(pid int) (err error) {
|
func (m *Manager) Apply(pid int) (err error) {
|
||||||
if m.Cgroups == nil {
|
if m.Cgroups == nil {
|
||||||
return nil
|
return nil
|
||||||
|
@ -145,11 +168,11 @@ func (m *Manager) Apply(pid int) (err error) {
|
||||||
m.Paths[sys.Name()] = p
|
m.Paths[sys.Name()] = p
|
||||||
|
|
||||||
if err := sys.Apply(d); err != nil {
|
if err := sys.Apply(d); err != nil {
|
||||||
if os.IsPermission(err) && m.Cgroups.Path == "" {
|
// In the case of rootless, where an explicit cgroup path hasn't
|
||||||
// If we didn't set a cgroup path, then let's defer the error here
|
// been set, we don't bail on error in case of permission problems.
|
||||||
// until we know whether we have set limits or not.
|
// Cases where limits have been set (and we couldn't create our own
|
||||||
// If we hadn't set limits, then it's ok that we couldn't join this cgroup, because
|
// cgroup) are handled by Set.
|
||||||
// it will have the same limits as its parent.
|
if m.Rootless && isIgnorableError(err) && m.Cgroups.Path == "" {
|
||||||
delete(m.Paths, sys.Name())
|
delete(m.Paths, sys.Name())
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -208,8 +231,9 @@ func (m *Manager) Set(container *configs.Config) error {
|
||||||
path := paths[sys.Name()]
|
path := paths[sys.Name()]
|
||||||
if err := sys.Set(path, container.Cgroups); err != nil {
|
if err := sys.Set(path, container.Cgroups); err != nil {
|
||||||
if path == "" {
|
if path == "" {
|
||||||
// cgroup never applied
|
// We never created a path for this cgroup, so we cannot set
|
||||||
return fmt.Errorf("cannot set limits on the %s cgroup, as the container has not joined it", sys.Name())
|
// limits for it (though we have already tried at this point).
|
||||||
|
return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name())
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -141,9 +141,10 @@ type Config struct {
|
||||||
|
|
||||||
// OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores
|
// OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores
|
||||||
// for a process. Valid values are between the range [-1000, '1000'], where processes with
|
// for a process. Valid values are between the range [-1000, '1000'], where processes with
|
||||||
// higher scores are preferred for being killed.
|
// higher scores are preferred for being killed. If it is unset then we don't touch the current
|
||||||
|
// value.
|
||||||
// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
|
// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
|
||||||
OomScoreAdj int `json:"oom_score_adj"`
|
OomScoreAdj *int `json:"oom_score_adj,omitempty"`
|
||||||
|
|
||||||
// UidMappings is an array of User ID mappings for User Namespaces
|
// UidMappings is an array of User ID mappings for User Namespaces
|
||||||
UidMappings []IDMap `json:"uid_mappings"`
|
UidMappings []IDMap `json:"uid_mappings"`
|
||||||
|
|
|
@ -1813,11 +1813,13 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if c.config.OomScoreAdj != nil {
|
||||||
// write oom_score_adj
|
// write oom_score_adj
|
||||||
r.AddData(&Bytemsg{
|
r.AddData(&Bytemsg{
|
||||||
Type: OomScoreAdjAttr,
|
Type: OomScoreAdjAttr,
|
||||||
Value: []byte(fmt.Sprintf("%d", c.config.OomScoreAdj)),
|
Value: []byte(fmt.Sprintf("%d", *c.config.OomScoreAdj)),
|
||||||
})
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// write rootless
|
// write rootless
|
||||||
r.AddData(&Boolmsg{
|
r.AddData(&Boolmsg{
|
||||||
|
|
|
@ -59,9 +59,9 @@ func SystemdCgroups(l *LinuxFactory) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cgroupfs is an options func to configure a LinuxFactory to return
|
// Cgroupfs is an options func to configure a LinuxFactory to return containers
|
||||||
// containers that use the native cgroups filesystem implementation to
|
// that use the native cgroups filesystem implementation to create and manage
|
||||||
// create and manage cgroups.
|
// cgroups.
|
||||||
func Cgroupfs(l *LinuxFactory) error {
|
func Cgroupfs(l *LinuxFactory) error {
|
||||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||||
return &fs.Manager{
|
return &fs.Manager{
|
||||||
|
@ -72,6 +72,23 @@ func Cgroupfs(l *LinuxFactory) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RootlessCgroupfs is an options func to configure a LinuxFactory to return
|
||||||
|
// containers that use the native cgroups filesystem implementation to create
|
||||||
|
// and manage cgroups. The difference between RootlessCgroupfs and Cgroupfs is
|
||||||
|
// that RootlessCgroupfs can transparently handle permission errors that occur
|
||||||
|
// during rootless container setup (while still allowing cgroup usage if
|
||||||
|
// they've been set up properly).
|
||||||
|
func RootlessCgroupfs(l *LinuxFactory) error {
|
||||||
|
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||||
|
return &fs.Manager{
|
||||||
|
Cgroups: config,
|
||||||
|
Rootless: true,
|
||||||
|
Paths: paths,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// IntelRdtfs is an options func to configure a LinuxFactory to return
|
// IntelRdtfs is an options func to configure a LinuxFactory to return
|
||||||
// containers that use the Intel RDT "resource control" filesystem to
|
// containers that use the Intel RDT "resource control" filesystem to
|
||||||
// create and manage Intel Xeon platform shared resources (e.g., L3 cache).
|
// create and manage Intel Xeon platform shared resources (e.g., L3 cache).
|
||||||
|
|
|
@ -1075,7 +1075,7 @@ func TestOomScoreAdj(t *testing.T) {
|
||||||
defer remove(rootfs)
|
defer remove(rootfs)
|
||||||
|
|
||||||
config := newTemplateConfig(rootfs)
|
config := newTemplateConfig(rootfs)
|
||||||
config.OomScoreAdj = 200
|
config.OomScoreAdj = ptrInt(200)
|
||||||
|
|
||||||
factory, err := libcontainer.New(root, libcontainer.Cgroupfs)
|
factory, err := libcontainer.New(root, libcontainer.Cgroupfs)
|
||||||
ok(t, err)
|
ok(t, err)
|
||||||
|
@ -1100,8 +1100,8 @@ func TestOomScoreAdj(t *testing.T) {
|
||||||
outputOomScoreAdj := strings.TrimSpace(string(stdout.Bytes()))
|
outputOomScoreAdj := strings.TrimSpace(string(stdout.Bytes()))
|
||||||
|
|
||||||
// Check that the oom_score_adj matches the value that was set as part of config.
|
// Check that the oom_score_adj matches the value that was set as part of config.
|
||||||
if outputOomScoreAdj != strconv.Itoa(config.OomScoreAdj) {
|
if outputOomScoreAdj != strconv.Itoa(*config.OomScoreAdj) {
|
||||||
t.Fatalf("Expected oom_score_adj %d; got %q", config.OomScoreAdj, outputOomScoreAdj)
|
t.Fatalf("Expected oom_score_adj %d; got %q", *config.OomScoreAdj, outputOomScoreAdj)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -497,7 +497,7 @@ func TestExecInOomScoreAdj(t *testing.T) {
|
||||||
ok(t, err)
|
ok(t, err)
|
||||||
defer remove(rootfs)
|
defer remove(rootfs)
|
||||||
config := newTemplateConfig(rootfs)
|
config := newTemplateConfig(rootfs)
|
||||||
config.OomScoreAdj = 200
|
config.OomScoreAdj = ptrInt(200)
|
||||||
container, err := newContainer(config)
|
container, err := newContainer(config)
|
||||||
ok(t, err)
|
ok(t, err)
|
||||||
defer container.Destroy()
|
defer container.Destroy()
|
||||||
|
@ -532,8 +532,8 @@ func TestExecInOomScoreAdj(t *testing.T) {
|
||||||
waitProcess(process, t)
|
waitProcess(process, t)
|
||||||
|
|
||||||
out := buffers.Stdout.String()
|
out := buffers.Stdout.String()
|
||||||
if oomScoreAdj := strings.TrimSpace(out); oomScoreAdj != strconv.Itoa(config.OomScoreAdj) {
|
if oomScoreAdj := strings.TrimSpace(out); oomScoreAdj != strconv.Itoa(*config.OomScoreAdj) {
|
||||||
t.Fatalf("expected oomScoreAdj to be %d, got %s", config.OomScoreAdj, oomScoreAdj)
|
t.Fatalf("expected oomScoreAdj to be %d, got %s", *config.OomScoreAdj, oomScoreAdj)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,10 @@ import (
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func ptrInt(v int) *int {
|
||||||
|
return &v
|
||||||
|
}
|
||||||
|
|
||||||
func newStdBuffers() *stdBuffers {
|
func newStdBuffers() *stdBuffers {
|
||||||
return &stdBuffers{
|
return &stdBuffers{
|
||||||
Stdin: bytes.NewBuffer(nil),
|
Stdin: bytes.NewBuffer(nil),
|
||||||
|
|
|
@ -243,8 +243,8 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
|
||||||
if spec.Process.SelinuxLabel != "" {
|
if spec.Process.SelinuxLabel != "" {
|
||||||
config.ProcessLabel = spec.Process.SelinuxLabel
|
config.ProcessLabel = spec.Process.SelinuxLabel
|
||||||
}
|
}
|
||||||
if spec.Process != nil && spec.Process.OOMScoreAdj != nil {
|
if spec.Process != nil {
|
||||||
config.OomScoreAdj = *spec.Process.OOMScoreAdj
|
config.OomScoreAdj = spec.Process.OOMScoreAdj
|
||||||
}
|
}
|
||||||
if spec.Process.Capabilities != nil {
|
if spec.Process.Capabilities != nil {
|
||||||
config.Capabilities = &configs.Capabilities{
|
config.Capabilities = &configs.Capabilities{
|
||||||
|
|
|
@ -99,7 +99,7 @@ EOF
|
||||||
|
|
||||||
runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_permissions
|
runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_permissions
|
||||||
[ "$status" -eq 1 ]
|
[ "$status" -eq 1 ]
|
||||||
[[ ${lines[1]} == *"cannot set limits on the pids cgroup, as the container has not joined it"* ]]
|
[[ ${lines[1]} == *"cannot set pids limit: container could not join or create cgroup"* ]]
|
||||||
}
|
}
|
||||||
|
|
||||||
@test "runc create (limits + cgrouppath + permission on the cgroup dir) succeeds" {
|
@test "runc create (limits + cgrouppath + permission on the cgroup dir) succeeds" {
|
||||||
|
|
|
@ -38,6 +38,9 @@ func loadFactory(context *cli.Context) (libcontainer.Factory, error) {
|
||||||
// We default to cgroupfs, and can only use systemd if the system is a
|
// We default to cgroupfs, and can only use systemd if the system is a
|
||||||
// systemd box.
|
// systemd box.
|
||||||
cgroupManager := libcontainer.Cgroupfs
|
cgroupManager := libcontainer.Cgroupfs
|
||||||
|
if isRootless() {
|
||||||
|
cgroupManager = libcontainer.RootlessCgroupfs
|
||||||
|
}
|
||||||
if context.GlobalBool("systemd-cgroup") {
|
if context.GlobalBool("systemd-cgroup") {
|
||||||
if systemd.UseSystemd() {
|
if systemd.UseSystemd() {
|
||||||
cgroupManager = libcontainer.SystemdCgroups
|
cgroupManager = libcontainer.SystemdCgroups
|
||||||
|
|
Loading…
Reference in New Issue