diff --git a/.travis.yml b/.travis.yml index 202335ef..1b78ef0d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -36,6 +36,8 @@ matrix: - sudo ssh default -t 'cd /vagrant && sudo make localintegration RUNC_USE_SYSTEMD=yes' # same setup but with fs2 driver instead of systemd - sudo ssh default -t 'cd /vagrant && sudo make localintegration' + # rootless + - sudo ssh default -t 'cd /vagrant && sudo make localrootlessintegration' allow_failures: - go: tip diff --git a/libcontainer/cgroups/fs2/create.go b/libcontainer/cgroups/fs2/create.go index 0cc2c1f6..66afdec7 100644 --- a/libcontainer/cgroups/fs2/create.go +++ b/libcontainer/cgroups/fs2/create.go @@ -95,15 +95,22 @@ func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) { } }() } + // Write cgroup.type explicitly. + // Otherwise ENOTSUP may happen. + cgType := filepath.Join(current, "cgroup.type") + _ = ioutil.WriteFile(cgType, []byte("threaded"), 0644) } // enable needed controllers if i < len(elements)-1 { file := filepath.Join(current, "cgroup.subtree_control") - if err := ioutil.WriteFile(file, []byte(allCtrs), 0755); err != nil { - // XXX: we can enable _some_ controllers doing it one-by one - // instead of erroring out -- does it makes sense to do so? - return err + if err := ioutil.WriteFile(file, []byte(allCtrs), 0644); err != nil { + // try write one by one + for _, ctr := range ctrs { + _ = ioutil.WriteFile(file, []byte(ctr), 0644) + } } + // Some controllers might not be enabled when rootless or containerized, + // but we don't catch the error here. (Caught in setXXX() functions.) } } diff --git a/libcontainer/cgroups/fs2/fs2.go b/libcontainer/cgroups/fs2/fs2.go index 93a2aa59..fc469e10 100644 --- a/libcontainer/cgroups/fs2/fs2.go +++ b/libcontainer/cgroups/fs2/fs2.go @@ -53,7 +53,10 @@ func (m *manager) getControllers() error { file := filepath.Join(m.dirPath, "cgroup.controllers") data, err := ioutil.ReadFile(file) - if err != nil && !m.rootless { + if err != nil { + if m.rootless && m.config.Path == "" { + return nil + } return err } fields := strings.Fields(string(data)) @@ -67,9 +70,22 @@ func (m *manager) getControllers() error { func (m *manager) Apply(pid int) error { if err := CreateCgroupPath(m.dirPath, m.config); err != nil { + // Related tests: + // - "runc create (no limits + no cgrouppath + no permission) succeeds" + // - "runc create (rootless + no limits + cgrouppath + no permission) fails with permission error" + // - "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error" + if m.rootless { + if m.config.Path == "" { + cl, clErr := neededControllers(m.config) + if clErr == nil && len(cl) == 0 { + return nil + } + return errors.Wrap(err, "rootless needs no limits + no cgrouppath when no permission is granted for cgroups") + } + } return err } - if err := cgroups.WriteCgroupProc(m.dirPath, pid); err != nil && !m.rootless { + if err := cgroups.WriteCgroupProc(m.dirPath, pid); err != nil { return err } return nil @@ -199,7 +215,11 @@ func (m *manager) Set(container *configs.Config) error { } } // devices (since kernel 4.15, pseudo-controller) - if err := setDevices(m.dirPath, container.Cgroups); err != nil { + // + // When m.Rootless is true, errors from the device subsystem are ignored because it is really not expected to work. + // However, errors from other subsystems are not ignored. + // see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error" + if err := setDevices(m.dirPath, container.Cgroups); err != nil && !m.rootless { errs = append(errs, err) } // cpuset (since kernel 5.0) @@ -218,7 +238,7 @@ func (m *manager) Set(container *configs.Config) error { if err := setFreezer(m.dirPath, container.Cgroups.Freezer); err != nil { errs = append(errs, err) } - if len(errs) > 0 && !m.rootless { + if len(errs) > 0 { return errors.Errorf("error while setting cgroup v2: %+v", errs) } m.config = container.Cgroups diff --git a/tests/integration/cgroups.bats b/tests/integration/cgroups.bats index 6abb2ed6..31a82336 100644 --- a/tests/integration/cgroups.bats +++ b/tests/integration/cgroups.bats @@ -95,7 +95,7 @@ EOF runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_permissions [ "$status" -eq 1 ] - [[ ${lines[1]} == *"cannot set pids limit: container could not join or create cgroup"* ]] + [[ ${lines[1]} == *"rootless needs no limits + no cgrouppath when no permission is granted for cgroups"* ]] || [[ ${lines[1]} == *"cannot set pids limit: container could not join or create cgroup"* ]] } @test "runc create (limits + cgrouppath + permission on the cgroup dir) succeeds" { diff --git a/tests/integration/update.bats b/tests/integration/update.bats index a097549c..64d58dde 100644 --- a/tests/integration/update.bats +++ b/tests/integration/update.bats @@ -68,6 +68,10 @@ EOF # check that initial values were properly set check_cgroup_value "cpuset.cpus" 0 + if [[ "$CGROUP_UNIFIED" = "yes" ]] && ! grep -qw memory "$CGROUP_PATH/cgroup.controllers"; then + # This happen on containerized environment because "echo +memory > /sys/fs/cgroup/cgroup.subtree_control" fails with EINVAL + skip "memory controller not available" + fi check_cgroup_value $MEM_LIMIT 33554432 check_cgroup_value $MEM_RESERVE 25165824 check_cgroup_value "pids.max" 20 diff --git a/tests/rootless.sh b/tests/rootless.sh index d6092d04..0bee3855 100755 --- a/tests/rootless.sh +++ b/tests/rootless.sh @@ -82,6 +82,27 @@ function enable_cgroup() { # handling. [[ "$cg" == "cpuset" ]] && chown rootless:rootless "$CGROUP_MOUNT/$cg$CGROUP_PATH/cpuset."{cpus,mems} done + # cgroup v2 + if [[ -e "$CGROUP_MOUNT/cgroup.controllers" ]]; then + # Enable controllers. Some controller (e.g. memory) may fail on containerized environment. + set -x + for f in $(cat "$CGROUP_MOUNT/cgroup.controllers"); do echo +$f > "$CGROUP_MOUNT/cgroup.subtree_control"; done + set +x + # Create the cgroup. + mkdir -p "$CGROUP_MOUNT/$CGROUP_PATH" + # chown/chmod dir + cgroup.subtree_control + cgroup.procs + parent's cgroup.procs. + # See https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#delegation-containment + chown root:rootless "$CGROUP_MOUNT/$CGROUP_PATH" "$CGROUP_MOUNT/$CGROUP_PATH/cgroup.subtree_control" "$CGROUP_MOUNT/$CGROUP_PATH/cgroup.procs" "$CGROUP_MOUNT/cgroup.procs" + chmod g+rwx "$CGROUP_MOUNT/$CGROUP_PATH" + chmod g+rw "$CGROUP_MOUNT/$CGROUP_PATH/cgroup.subtree_control" "$CGROUP_MOUNT/$CGROUP_PATH/cgroup.procs" "$CGROUP_MOUNT/cgroup.procs" + # Fix up cgroup.type. + echo threaded > "$CGROUP_MOUNT/$CGROUP_PATH/cgroup.type" + # Make sure cgroup.type doesn't contain "invalid". Otherwise write ops will fail with ENOTSUP. + # See http://man7.org/linux/man-pages/man7/cgroups.7.html + if grep -qw invalid "$CGROUP_MOUNT/$CGROUP_PATH/cgroup.type"; then + exit 1 + fi + fi } function disable_cgroup() { @@ -90,6 +111,8 @@ function disable_cgroup() { do [ -d "$CGROUP_MOUNT/$cg$CGROUP_PATH" ] && rmdir "$CGROUP_MOUNT/$cg$CGROUP_PATH" done + # cgroup v2 + [ -d "$CGROUP_MOUNT/$CGROUP_PATH" ] && rmdir "$CGROUP_MOUNT/$CGROUP_PATH" } # Create a powerset of $ALL_FEATURES (the set of all subsets of $ALL_FEATURES).