diff --git a/libcontainer/cgroups/fs/devices.go b/libcontainer/cgroups/fs/devices.go index a70619cb..6a8f1d35 100644 --- a/libcontainer/cgroups/fs/devices.go +++ b/libcontainer/cgroups/fs/devices.go @@ -23,6 +23,9 @@ func (s *DevicesGroup) Name() string { } func (s *DevicesGroup) Apply(d *cgroupData) error { + if d.config.SkipDevices { + return nil + } _, err := d.join("devices") if err != nil { // We will return error even it's `not found` error, devices @@ -52,7 +55,7 @@ func buildEmulator(rules []*configs.DeviceRule) (*devices.Emulator, error) { } func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error { - if system.RunningInUserNS() { + if system.RunningInUserNS() || cgroup.SkipDevices { return nil } diff --git a/libcontainer/cgroups/fs/fs.go b/libcontainer/cgroups/fs/fs.go index 245ce84f..f00fa11a 100644 --- a/libcontainer/cgroups/fs/fs.go +++ b/libcontainer/cgroups/fs/fs.go @@ -204,7 +204,7 @@ func (m *manager) Apply(pid int) (err error) { if err != nil { // The non-presence of the devices subsystem is // considered fatal for security reasons. - if cgroups.IsNotFound(err) && sys.Name() != "devices" { + if cgroups.IsNotFound(err) && (c.SkipDevices || sys.Name() != "devices") { continue } return err diff --git a/libcontainer/cgroups/fs2/devices.go b/libcontainer/cgroups/fs2/devices.go index ee180bda..053ec33e 100644 --- a/libcontainer/cgroups/fs2/devices.go +++ b/libcontainer/cgroups/fs2/devices.go @@ -37,6 +37,9 @@ func canSkipEBPFError(cgroup *configs.Cgroup) bool { } func setDevices(dirPath string, cgroup *configs.Cgroup) error { + if cgroup.SkipDevices { + return nil + } // XXX: This is currently a white-list (but all callers pass a blacklist of // devices). This is bad for a whole variety of reasons, but will need // to be fixed with co-ordinated effort with downstreams. diff --git a/libcontainer/cgroups/systemd/v1.go b/libcontainer/cgroups/systemd/v1.go index 9e8e2fdd..63503ad8 100644 --- a/libcontainer/cgroups/systemd/v1.go +++ b/libcontainer/cgroups/systemd/v1.go @@ -379,24 +379,27 @@ func (m *legacyManager) Set(container *configs.Config) error { return err } - // Figure out the current freezer state, so we can revert to it after we - // temporarily freeze the container. - targetFreezerState, err := m.GetFreezerState() - if err != nil { - return err - } - if targetFreezerState == configs.Undefined { - targetFreezerState = configs.Thawed - } - // We have to freeze the container while systemd sets the cgroup settings. // The reason for this is that systemd's application of DeviceAllow rules // is done disruptively, resulting in spurrious errors to common devices // (unlike our fs driver, they will happily write deny-all rules to running // containers). So we freeze the container to avoid them hitting the cgroup // error. But if the freezer cgroup isn't supported, we just warn about it. - if err := m.Freeze(configs.Frozen); err != nil { - logrus.Infof("freeze container before SetUnitProperties failed: %v", err) + targetFreezerState := configs.Undefined + if !m.cgroups.SkipDevices { + // Figure out the current freezer state, so we can revert to it after we + // temporarily freeze the container. + targetFreezerState, err = m.GetFreezerState() + if err != nil { + return err + } + if targetFreezerState == configs.Undefined { + targetFreezerState = configs.Thawed + } + + if err := m.Freeze(configs.Frozen); err != nil { + logrus.Infof("freeze container before SetUnitProperties failed: %v", err) + } } if err := dbusConnection.SetUnitProperties(getUnitName(container.Cgroups), true, properties...); err != nil { diff --git a/libcontainer/cgroups/systemd/v2.go b/libcontainer/cgroups/systemd/v2.go index 6e343aa9..e1a6622a 100644 --- a/libcontainer/cgroups/systemd/v2.go +++ b/libcontainer/cgroups/systemd/v2.go @@ -298,24 +298,27 @@ func (m *unifiedManager) Set(container *configs.Config) error { return err } - // Figure out the current freezer state, so we can revert to it after we - // temporarily freeze the container. - targetFreezerState, err := m.GetFreezerState() - if err != nil { - return err - } - if targetFreezerState == configs.Undefined { - targetFreezerState = configs.Thawed - } - // We have to freeze the container while systemd sets the cgroup settings. // The reason for this is that systemd's application of DeviceAllow rules // is done disruptively, resulting in spurrious errors to common devices // (unlike our fs driver, they will happily write deny-all rules to running // containers). So we freeze the container to avoid them hitting the cgroup // error. But if the freezer cgroup isn't supported, we just warn about it. - if err := m.Freeze(configs.Frozen); err != nil { - logrus.Infof("freeze container before SetUnitProperties failed: %v", err) + targetFreezerState := configs.Undefined + if !m.cgroups.SkipDevices { + // Figure out the current freezer state, so we can revert to it after we + // temporarily freeze the container. + targetFreezerState, err = m.GetFreezerState() + if err != nil { + return err + } + if targetFreezerState == configs.Undefined { + targetFreezerState = configs.Thawed + } + + if err := m.Freeze(configs.Frozen); err != nil { + logrus.Infof("freeze container before SetUnitProperties failed: %v", err) + } } if err := dbusConnection.SetUnitProperties(getUnitName(m.cgroups), true, properties...); err != nil { diff --git a/libcontainer/configs/cgroup_linux.go b/libcontainer/configs/cgroup_linux.go index f1a5bd11..6e90ae16 100644 --- a/libcontainer/configs/cgroup_linux.go +++ b/libcontainer/configs/cgroup_linux.go @@ -126,4 +126,11 @@ type Resources struct { // CpuWeight sets a proportional bandwidth limit. CpuWeight uint64 `json:"cpu_weight"` + + // SkipDevices allows to skip configuring device permissions. + // Used by e.g. kubelet while creating a parent cgroup (kubepods) + // common for many containers. + // + // NOTE it is impossible to start a container which has this flag set. + SkipDevices bool `json:"skip_devices"` } diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index b4d722b0..c5188b1d 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -251,6 +251,9 @@ func (c *linuxContainer) Set(config configs.Config) error { func (c *linuxContainer) Start(process *Process) error { c.m.Lock() defer c.m.Unlock() + if c.config.Cgroups.Resources.SkipDevices { + return newGenericError(errors.New("can't start container with SkipDevices set"), ConfigInvalid) + } if process.Init { if err := c.createExecFifo(); err != nil { return err