cgroup2: exec: join the cgroup of the init process on EBUSY
Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
This commit is contained in:
parent
e664e732d5
commit
c91fe9aeba
|
@ -566,6 +566,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
|
|||
config: c.newInitConfig(p),
|
||||
process: p,
|
||||
bootstrapData: data,
|
||||
initProcessPid: state.InitProcessPid,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@ package libcontainer
|
|||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
|
@ -12,12 +13,14 @@ import (
|
|||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/intelrdt"
|
||||
"github.com/opencontainers/runc/libcontainer/logs"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
|
@ -66,6 +69,7 @@ type setnsProcess struct {
|
|||
fds []string
|
||||
process *Process
|
||||
bootstrapData io.Reader
|
||||
initProcessPid int
|
||||
}
|
||||
|
||||
func (p *setnsProcess) startTime() (uint64, error) {
|
||||
|
@ -100,7 +104,25 @@ func (p *setnsProcess) start() (err error) {
|
|||
}
|
||||
if len(p.cgroupPaths) > 0 {
|
||||
if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil && !p.rootlessCgroups {
|
||||
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
|
||||
// On cgroup v2 + nesting + domain controllers, EnterPid may fail with EBUSY.
|
||||
// https://github.com/opencontainers/runc/issues/2356#issuecomment-621277643
|
||||
// Try to join the cgroup of InitProcessPid.
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
initProcCgroupFile := fmt.Sprintf("/proc/%d/cgroup", p.initProcessPid)
|
||||
initCg, initCgErr := cgroups.ParseCgroupFile(initProcCgroupFile)
|
||||
if initCgErr == nil {
|
||||
if initCgPath, ok := initCg[""]; ok {
|
||||
initCgDirpath := filepath.Join(fs2.UnifiedMountpoint, initCgPath)
|
||||
logrus.Debugf("adding pid %d to cgroups %v failed (%v), attempting to join %q (obtained from %s)",
|
||||
p.pid(), p.cgroupPaths, err, initCg, initCgDirpath)
|
||||
// NOTE: initCgDirPath is not guaranteed to exist because we didn't pause the container.
|
||||
err = cgroups.WriteCgroupProc(initCgDirpath, p.pid())
|
||||
}
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
|
||||
}
|
||||
}
|
||||
}
|
||||
if p.intelRdtPath != "" {
|
||||
|
|
|
@ -136,3 +136,49 @@ EOF
|
|||
[ "$status" -eq 0 ]
|
||||
[[ ${lines[0]} == *"cgroups_exec"* ]]
|
||||
}
|
||||
|
||||
@test "runc exec (cgroup v2 + init process in non-root cgroup) succeeds" {
|
||||
requires root cgroups_v2
|
||||
|
||||
set_cgroups_path "$BUSYBOX_BUNDLE"
|
||||
set_cgroup_mount_writable "$BUSYBOX_BUNDLE"
|
||||
|
||||
runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_group
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
runc exec test_cgroups_group cat /sys/fs/cgroup/cgroup.controllers
|
||||
[ "$status" -eq 0 ]
|
||||
[[ ${lines[0]} == *"memory"* ]]
|
||||
|
||||
runc exec test_cgroups_group cat /proc/self/cgroup
|
||||
[ "$status" -eq 0 ]
|
||||
[[ ${lines[0]} == "0::/" ]]
|
||||
|
||||
runc exec test_cgroups_group mkdir /sys/fs/cgroup/foo
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
runc exec test_cgroups_group sh -c "echo 1 > /sys/fs/cgroup/foo/cgroup.procs"
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
# the init process is now in "/foo", but an exec process can still join "/"
|
||||
# because we haven't enabled any domain controller.
|
||||
runc exec test_cgroups_group cat /proc/self/cgroup
|
||||
[ "$status" -eq 0 ]
|
||||
[[ ${lines[0]} == "0::/" ]]
|
||||
|
||||
# turn on a domain controller (memory)
|
||||
runc exec test_cgroups_group sh -euxc 'echo $$ > /sys/fs/cgroup/foo/cgroup.procs; echo +memory > /sys/fs/cgroup/cgroup.subtree_control'
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
# an exec process can no longer join "/" after turning on a domain controller.
|
||||
# falls back to "/foo".
|
||||
runc exec test_cgroups_group cat /proc/self/cgroup
|
||||
[ "$status" -eq 0 ]
|
||||
[[ ${lines[0]} == "0::/foo" ]]
|
||||
|
||||
# teardown: remove "/foo"
|
||||
runc exec test_cgroups_group sh -uxc 'echo -memory > /sys/fs/cgroup/cgroup.subtree_control; for f in $(cat /sys/fs/cgroup/foo/cgroup.procs); do echo $f > /sys/fs/cgroup/cgroup.procs; done; rmdir /sys/fs/cgroup/foo'
|
||||
runc exec test_cgroups_group test ! -d /sys/fs/cgroup/foo
|
||||
[ "$status" -eq 0 ]
|
||||
#
|
||||
}
|
||||
|
|
|
@ -55,12 +55,7 @@ function teardown() {
|
|||
@test "runc delete --force in cgroupv2 with subcgroups" {
|
||||
requires cgroups_v2 root
|
||||
set_cgroups_path "$BUSYBOX_BUNDLE"
|
||||
|
||||
# grant `rw` priviledge to `/sys/fs/cgroup`
|
||||
cat "${BUSYBOX_BUNDLE}/config.json"\
|
||||
| jq '.mounts |= map((select(.type=="cgroup") | .options -= ["ro"]) // .)'\
|
||||
> "${BUSYBOX_BUNDLE}/config.json.tmp"
|
||||
mv "${BUSYBOX_BUNDLE}/config.json"{.tmp,}
|
||||
set_cgroup_mount_writable "$BUSYBOX_BUNDLE"
|
||||
|
||||
# run busybox detached
|
||||
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
|
||||
|
|
|
@ -197,6 +197,15 @@ function set_resources_limit() {
|
|||
sed -i 's/\("linux": {\)/\1\n "resources": { "pids": { "limit": 100 } },/' "$bundle/config.json"
|
||||
}
|
||||
|
||||
# Helper function to make /sys/fs/cgroup writable
|
||||
function set_cgroup_mount_writable() {
|
||||
bundle="${1:-.}"
|
||||
cat "$bundle/config.json" \
|
||||
| jq '.mounts |= map((select(.type == "cgroup") | .options -= ["ro"]) // .)' \
|
||||
>"$bundle/config.json.tmp"
|
||||
mv "$bundle/config.json"{.tmp,}
|
||||
}
|
||||
|
||||
# Fails the current test, providing the error given.
|
||||
function fail() {
|
||||
echo "$@" >&2
|
||||
|
|
Loading…
Reference in New Issue