Support cgroups with limits as rootless

Signed-off-by: Ed King <eking@pivotal.io>
Signed-off-by: Gabriel Rosenhouse <grosenhouse@pivotal.io>
Signed-off-by: Konstantinos Karampogias <konstantinos.karampogias@swisscom.com>
This commit is contained in:
Will Martin 2017-09-15 10:39:35 +01:00 committed by CF Garden
parent dc1552a6f3
commit ca4f427af1
10 changed files with 145 additions and 201 deletions

View File

@ -145,8 +145,17 @@ func (m *Manager) Apply(pid int) (err error) {
m.Paths[sys.Name()] = p
if err := sys.Apply(d); err != nil {
if os.IsPermission(err) && m.Cgroups.Path == "" {
// If we didn't set a cgroup path, then let's defer the error here
// until we know whether we have set limits or not.
// If we hadn't set limits, then it's ok that we couldn't join this cgroup, because
// it will have the same limits as its parent.
delete(m.Paths, sys.Name())
continue
}
return err
}
}
return nil
}
@ -198,6 +207,10 @@ func (m *Manager) Set(container *configs.Config) error {
for _, sys := range subsystems {
path := paths[sys.Name()]
if err := sys.Set(path, container.Cgroups); err != nil {
if path == "" {
// cgroup never applied
return fmt.Errorf("cannot set limits on the %s cgroup, as the container has not joined it", sys.Name())
}
return err
}
}

View File

@ -1,128 +0,0 @@
// +build linux
package rootless
import (
"fmt"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate"
)
// TODO: This is copied from libcontainer/cgroups/fs, which duplicates this code
// needlessly. We should probably export this list.
var subsystems = []subsystem{
&fs.CpusetGroup{},
&fs.DevicesGroup{},
&fs.MemoryGroup{},
&fs.CpuGroup{},
&fs.CpuacctGroup{},
&fs.PidsGroup{},
&fs.BlkioGroup{},
&fs.HugetlbGroup{},
&fs.NetClsGroup{},
&fs.NetPrioGroup{},
&fs.PerfEventGroup{},
&fs.FreezerGroup{},
&fs.NameGroup{GroupName: "name=systemd"},
}
type subsystem interface {
// Name returns the name of the subsystem.
Name() string
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
GetStats(path string, stats *cgroups.Stats) error
}
// The noop cgroup manager is used for rootless containers, because we currently
// cannot manage cgroups if we are in a rootless setup. This manager is chosen
// by factory if we are in rootless mode. We error out if any cgroup options are
// set in the config -- this may change in the future with upcoming kernel features
// like the cgroup namespace.
type Manager struct {
Cgroups *configs.Cgroup
Paths map[string]string
}
func (m *Manager) Apply(pid int) error {
// If there are no cgroup settings, there's nothing to do.
if m.Cgroups == nil {
return nil
}
// We can't set paths.
// TODO(cyphar): Implement the case where the runner of a rootless container
// owns their own cgroup, which would allow us to set up a
// cgroup for each path.
if m.Cgroups.Paths != nil {
return fmt.Errorf("cannot change cgroup path in rootless container")
}
// We load the paths into the manager.
paths := make(map[string]string)
for _, sys := range subsystems {
name := sys.Name()
path, err := cgroups.GetOwnCgroupPath(name)
if err != nil {
// Ignore paths we couldn't resolve.
continue
}
paths[name] = path
}
m.Paths = paths
return nil
}
func (m *Manager) GetPaths() map[string]string {
return m.Paths
}
func (m *Manager) Set(container *configs.Config) error {
// We have to re-do the validation here, since someone might decide to
// update a rootless container.
return validate.New().Validate(container)
}
func (m *Manager) GetPids() ([]int, error) {
dir, err := cgroups.GetOwnCgroupPath("devices")
if err != nil {
return nil, err
}
return cgroups.GetPids(dir)
}
func (m *Manager) GetAllPids() ([]int, error) {
dir, err := cgroups.GetOwnCgroupPath("devices")
if err != nil {
return nil, err
}
return cgroups.GetAllPids(dir)
}
func (m *Manager) GetStats() (*cgroups.Stats, error) {
// TODO(cyphar): We can make this work if we figure out a way to allow usage
// of cgroups with a rootless container. While this doesn't
// actually require write access to a cgroup directory, the
// statistics are not useful if they can be affected by
// non-container processes.
return nil, fmt.Errorf("cannot get cgroup stats in rootless container")
}
func (m *Manager) Freeze(state configs.FreezerState) error {
// TODO(cyphar): We can make this work if we figure out a way to allow usage
// of cgroups with a rootless container.
return fmt.Errorf("cannot use freezer cgroup in rootless container")
}
func (m *Manager) Destroy() error {
// We don't have to do anything here because we didn't do any setup.
return nil
}

View File

@ -21,13 +21,6 @@ func (v *ConfigValidator) rootless(config *configs.Config) error {
if err := rootlessMount(config); err != nil {
return err
}
// Currently, cgroups cannot effectively be used in rootless containers.
// The new cgroup namespace doesn't really help us either because it doesn't
// have nice interactions with the user namespace (we're working with upstream
// to fix this).
if err := rootlessCgroup(config); err != nil {
return err
}
// XXX: We currently can't verify the user config at all, because
// configs.Config doesn't store the user-related configs. So this

View File

@ -157,19 +157,3 @@ func TestValidateRootlessMountGid(t *testing.T) {
t.Errorf("Expected error to occur when setting gid=11 in mount options and GidMapping[0].size is 10")
}
}
/* rootlessCgroup() */
func TestValidateRootlessCgroup(t *testing.T) {
validator := New()
config := rootlessConfig()
config.Cgroups = &configs.Cgroup{
Resources: &configs.Resources{
PidsLimit: 1337,
},
}
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if cgroup limits set")
}
}

View File

@ -14,7 +14,6 @@ import (
"github.com/docker/docker/pkg/mount"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/cgroups/rootless"
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate"
@ -73,20 +72,6 @@ func Cgroupfs(l *LinuxFactory) error {
return nil
}
// RootlessCgroups is an options func to configure a LinuxFactory to
// return containers that use the "rootless" cgroup manager, which will
// fail to do any operations not possible to do with an unprivileged user.
// It should only be used in conjunction with rootless containers.
func RootlessCgroups(l *LinuxFactory) error {
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
return &rootless.Manager{
Cgroups: config,
Paths: paths,
}
}
return nil
}
// IntelRdtfs is an options func to configure a LinuxFactory to return
// containers that use the Intel RDT "resource control" filesystem to
// create and manage Intel Xeon platform shared resources (e.g., L3 cache).
@ -200,9 +185,6 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
if err := os.Chown(containerRoot, unix.Geteuid(), unix.Getegid()); err != nil {
return nil, newGenericError(err, SystemError)
}
if config.Rootless {
RootlessCgroups(l)
}
c := &linuxContainer{
id: id,
root: containerRoot,
@ -235,10 +217,6 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
processStartTime: state.InitProcessStartTime,
fds: state.ExternalDescriptors,
}
// We have to use the RootlessManager.
if state.Rootless {
RootlessCgroups(l)
}
c := &linuxContainer{
initProcess: r,
initProcessStartTime: state.InitProcessStartTime,

View File

@ -85,8 +85,7 @@ func (p *setnsProcess) start() (err error) {
if err = p.execSetns(); err != nil {
return newSystemErrorWithCause(err, "executing setns process")
}
// We can't join cgroups if we're in a rootless container.
if !p.config.Rootless && len(p.cgroupPaths) > 0 {
if len(p.cgroupPaths) > 0 {
if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil {
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
}

View File

@ -2,12 +2,10 @@
load helpers
TEST_CGROUP_NAME="runc-cgroups-integration-test"
CGROUP_MEMORY="${CGROUP_MEMORY_BASE_PATH}/${TEST_CGROUP_NAME}"
function teardown() {
rm -f $BATS_TMPDIR/runc-update-integration-test.json
rm -f $BATS_TMPDIR/runc-cgroups-integration-test.json
teardown_running_container test_cgroups_kmem
teardown_running_container test_cgroups_permissions
teardown_busybox
}
@ -28,11 +26,10 @@ function check_cgroup_value() {
}
@test "runc update --kernel-memory (initialized)" {
# XXX: currently cgroups require root containers.
requires cgroups_kmem root
[[ "$ROOTLESS" -ne 0 ]] && requires rootless_cgroup
requires cgroups_kmem
# Add cgroup path
sed -i 's/\("linux": {\)/\1\n "cgroupsPath": "\/runc-cgroups-integration-test",/' ${BUSYBOX_BUNDLE}/config.json
set_cgroups_path "$BUSYBOX_BUNDLE"
# Set some initial known values
DATA=$(cat <<-EOF
@ -57,11 +54,10 @@ EOF
}
@test "runc update --kernel-memory (uninitialized)" {
# XXX: currently cgroups require root containers.
requires cgroups_kmem root
[[ "$ROOTLESS" -ne 0 ]] && requires rootless_cgroup
requires cgroups_kmem
# Add cgroup path
sed -i 's/\("linux": {\)/\1\n "cgroupsPath": "\/runc-cgroups-integration-test",/' ${BUSYBOX_BUNDLE}/config.json
set_cgroups_path "$BUSYBOX_BUNDLE"
# run a detached busybox to work with
runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_kmem
@ -78,3 +74,54 @@ EOF
check_cgroup_value $CGROUP_MEMORY "memory.kmem.limit_in_bytes" 50331648
fi
}
@test "runc create (no limits + no cgrouppath + no permission) succeeds" {
runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_permissions
[ "$status" -eq 0 ]
}
@test "runc create (rootless + no limits + cgrouppath + no permission) fails with permission error" {
requires rootless
requires rootless_no_cgroup
set_cgroups_path "$BUSYBOX_BUNDLE"
runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_permissions
[ "$status" -eq 1 ]
[[ ${lines[1]} == *"permission denied"* ]]
}
@test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error" {
requires rootless
requires rootless_no_cgroup
set_resources_limit "$BUSYBOX_BUNDLE"
runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_permissions
[ "$status" -eq 1 ]
[[ ${lines[1]} == *"cannot set limits on the pids cgroup, as the container has not joined it"* ]]
}
@test "runc create (limits + cgrouppath + permission on the cgroup dir) succeeds" {
[[ "$ROOTLESS" -ne 0 ]] && requires rootless_cgroup
set_cgroups_path "$BUSYBOX_BUNDLE"
set_resources_limit "$BUSYBOX_BUNDLE"
runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_permissions
[ "$status" -eq 0 ]
}
@test "runc exec (limits + cgrouppath + permission on the cgroup dir) succeeds" {
[[ "$ROOTLESS" -ne 0 ]] && requires rootless_cgroup
set_cgroups_path "$BUSYBOX_BUNDLE"
set_resources_limit "$BUSYBOX_BUNDLE"
runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_permissions
[ "$status" -eq 0 ]
runc exec test_cgroups_permissions echo "cgroups_exec"
[ "$status" -eq 0 ]
[[ ${lines[0]} == *"cgroups_exec"* ]]
}

View File

@ -32,9 +32,11 @@ ROOT=$(mktemp -d "$BATS_TMPDIR/runc.XXXXXX")
# Path to console socket.
CONSOLE_SOCKET="$BATS_TMPDIR/console.sock"
# Cgroup mount
# Cgroup paths
CGROUP_MEMORY_BASE_PATH=$(grep "cgroup" /proc/self/mountinfo | gawk 'toupper($NF) ~ /\<MEMORY\>/ { print $5; exit }')
CGROUP_CPU_BASE_PATH=$(grep "cgroup" /proc/self/mountinfo | gawk 'toupper($NF) ~ /\<CPU\>/ { print $5; exit }')
CGROUPS_PATH="/runc-cgroups-integration-test/test-cgroup"
CGROUP_MEMORY="${CGROUP_MEMORY_BASE_PATH}${CGROUPS_PATH}"
# CONFIG_MEMCG_KMEM support
KMEM="${CGROUP_MEMORY_BASE_PATH}/memory.kmem.limit_in_bytes"
@ -79,6 +81,11 @@ function runc_spec() {
if [[ "$ROOTLESS" -ne 0 ]] && [[ "$ROOTLESS_FEATURES" == *"idmap"* ]]; then
runc_rootless_idmap "$bundle"
fi
# Ensure config.json contains linux.resources
if [[ "$ROOTLESS" -ne 0 ]] && [[ "$ROOTLESS_FEATURES" == *"cgroup"* ]]; then
runc_rootless_cgroup "$bundle"
fi
}
# Shortcut to add additional uids and gids, based on the values set as part of
@ -95,6 +102,27 @@ function runc_rootless_idmap() {
mv "$bundle/config.json"{.tmp,}
}
# Shortcut to add empty resources as part of a rootless configuration.
function runc_rootless_cgroup() {
bundle="${1:-.}"
cat "$bundle/config.json" \
| jq '.linux.resources |= .+ {"memory":{},"cpu":{},"blockio":{},"pids":{}}' \
>"$bundle/config.json.tmp"
mv "$bundle/config.json"{.tmp,}
}
# Helper function to set cgroupsPath to the value of $CGROUPS_PATH
function set_cgroups_path() {
bundle="${1:-.}"
sed -i 's/\("linux": {\)/\1\n "cgroupsPath": "\/runc-cgroups-integration-test\/test-cgroup",/' "$bundle/config.json"
}
# Helper function to set a resouces limit
function set_resources_limit() {
bundle="${1:-.}"
sed -i 's/\("linux": {\)/\1\n "resources": { "pids": { "limit": 100 } },/' "$bundle/config.json"
}
# Fails the current test, providing the error given.
function fail() {
echo "$@" >&2
@ -116,11 +144,26 @@ function requires() {
skip "test requires ${var}"
fi
;;
rootless)
if [ "$ROOTLESS" -eq 0 ]; then
skip "test requires ${var}"
fi
;;
rootless_idmap)
if [[ "$ROOTLESS_FEATURES" != *"idmap"* ]]; then
skip "test requires ${var}"
fi
;;
rootless_cgroup)
if [[ "$ROOTLESS_FEATURES" != *"cgroup"* ]]; then
skip "test requires ${var}"
fi
;;
rootless_no_cgroup)
if [[ "$ROOTLESS_FEATURES" == *"cgroup"* ]]; then
skip "test requires ${var}"
fi
;;
cgroups_kmem)
if [ ! -e "$KMEM" ]; then
skip "Test requires ${var}"

View File

@ -3,7 +3,7 @@
load helpers
function teardown() {
rm -f $BATS_TMPDIR/runc-update-integration-test.json
rm -f $BATS_TMPDIR/runc-cgroups-integration-test.json
teardown_running_container test_update
teardown_running_container test_update_rt
teardown_busybox
@ -13,8 +13,7 @@ function setup() {
teardown
setup_busybox
# Add cgroup path
sed -i 's/\("linux": {\)/\1\n "cgroupsPath": "\/runc-update-integration-test",/' ${BUSYBOX_BUNDLE}/config.json
set_cgroups_path "$BUSYBOX_BUNDLE"
# Set some initial known values
DATA=$(cat <<EOF
@ -53,10 +52,10 @@ function check_cgroup_value() {
# TODO: test rt cgroup updating
@test "update" {
# XXX: currently cgroups require root containers.
# XXX: Also, this test should be split into separate sections so that we
# can skip kmem without skipping update tests overall.
requires cgroups_kmem root
# XXX: Also, this test should be split into separate sections so that we
# can skip kmem without skipping update tests overall.
[[ "$ROOTLESS" -ne 0 ]] && requires rootless_cgroup
requires cgroups_kmem
# run a few busyboxes detached
runc run -d --console-socket $CONSOLE_SOCKET test_update
@ -65,7 +64,7 @@ function check_cgroup_value() {
# get the cgroup paths
for g in MEMORY CPUSET CPU BLKIO PIDS; do
base_path=$(grep "cgroup" /proc/self/mountinfo | gawk 'toupper($NF) ~ /\<'${g}'\>/ { print $5; exit }')
eval CGROUP_${g}="${base_path}/runc-update-integration-test"
eval CGROUP_${g}="${base_path}${CGROUPS_PATH}"
done
CGROUP_SYSTEM_MEMORY=$(grep "cgroup" /proc/self/mountinfo | gawk 'toupper($NF) ~ /\<'MEMORY'\>/ { print $5; exit }')
@ -243,9 +242,9 @@ EOF
}
EOF
)
echo $DATA > $BATS_TMPDIR/runc-update-integration-test.json
echo $DATA > $BATS_TMPDIR/runc-cgroups-integration-test.json
runc update -r $BATS_TMPDIR/runc-update-integration-test.json test_update
runc update -r $BATS_TMPDIR/runc-cgroups-integration-test.json test_update
[ "$status" -eq 0 ]
check_cgroup_value $CGROUP_BLKIO "blkio.weight" 1000
check_cgroup_value $CGROUP_CPU "cpu.cfs_period_us" 1000000
@ -260,6 +259,7 @@ EOF
}
@test "update rt period and runtime" {
[[ "$ROOTLESS" -ne 0 ]] && requires rootless_cgroup
requires cgroups_rt
# run a detached busybox
@ -267,7 +267,7 @@ EOF
[ "$status" -eq 0 ]
# get the cgroup paths
eval CGROUP_CPU="${CGROUP_CPU_BASE_PATH}/runc-update-integration-test"
eval CGROUP_CPU="${CGROUP_CPU_BASE_PATH}${CGROUPS_PATH}"
runc update -r - test_update_rt <<EOF
{

View File

@ -19,7 +19,7 @@
# a new feature, please match the existing style. Add an entry to $ALL_FEATURES,
# and add an enable_* and disable_* hook.
ALL_FEATURES=("idmap")
ALL_FEATURES=("idmap" "cgroup")
ROOT="$(readlink -f "$(dirname "${BASH_SOURCE}")/..")"
# FEATURE: Opportunistic new{uid,gid}map support, allowing a rootless container
@ -55,6 +55,21 @@ function disable_idmap() {
[ -e /usr/bin/newgidmap ] && mv /usr/bin/{,unused-}newgidmap
}
# FEATURE: Opportunistic cgroups support, allowing a rootless container to set
# resource limits on condition that cgroupsPath is set to a path the
# rootless user has permissions on.
function enable_cgroup() {
# Set up cgroups for use in rootless containers.
mkdir -p /sys/fs/cgroup/{blkio,cpu,cpuacct,cpuset,devices,freezer,hugetlb,memory,net_cls,net_prio,openrc,perf_event,pids,systemd}/runc-cgroups-integration-test
chown rootless:rootless -R /sys/fs/cgroup/*/runc-cgroups-integration-test
}
function disable_cgroup() {
# Remove cgroups used in rootless containers.
[ -d /sys/fs/cgroup/devices/runc-cgroups-integration-test ] && rmdir /sys/fs/cgroup/*/runc-cgroups-integration-test
}
# Create a powerset of $ALL_FEATURES (the set of all subsets of $ALL_FEATURES).
# We test all of the possible combinations (as long as we don't add too many
# feature knobs this shouldn't take too long -- but the number of tested
@ -83,6 +98,6 @@ do
set -e
echo path: $PATH
export ROOTLESS_FEATURES="$enabled_features"
sudo -HE -u rootless PATH="$PATH" bats -t "$ROOT/tests/integration"
sudo -HE -u rootless PATH="$PATH" bats -t "$ROOT/tests/integration$TESTFLAGS"
set +e
done