From e91b2b8acae0243c378a1f109354d83ce99db784 Mon Sep 17 00:00:00 2001 From: Julian Friedman Date: Fri, 25 Mar 2016 15:03:30 +0000 Subject: [PATCH] Set rlimits using prlimit in parent Fixes #680 This changes setupRlimit to use the Prlimit syscall (rather than Setrlimit) and moves the call to the parent process. This is necessary because Setrlimit would affect the libcontainer consumer if called in the parent, and would fail if called from the child if the child process is in a user namespace and the requested rlimit is higher than that in the parent. Signed-off-by: Julian Friedman --- libcontainer/init_linux.go | 5 ++--- libcontainer/integration/exec_test.go | 25 +++++++++++++++++++++ libcontainer/integration/execin_test.go | 29 +++++++++++++++++++++++-- libcontainer/process_linux.go | 10 +++++++++ libcontainer/setns_init_linux.go | 3 --- libcontainer/standard_init_linux.go | 3 --- libcontainer/system/linux.go | 8 +++++++ 7 files changed, 72 insertions(+), 11 deletions(-) diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index eb8ad83a..0bde656e 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -316,10 +316,9 @@ func setupRoute(config *configs.Config) error { return nil } -func setupRlimits(limits []configs.Rlimit) error { +func setupRlimits(limits []configs.Rlimit, pid int) error { for _, rlimit := range limits { - l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft} - if err := syscall.Setrlimit(rlimit.Type, l); err != nil { + if err := system.Prlimit(pid, rlimit.Type, syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}); err != nil { return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err) } } diff --git a/libcontainer/integration/exec_test.go b/libcontainer/integration/exec_test.go index 453843de..33241d3e 100644 --- a/libcontainer/integration/exec_test.go +++ b/libcontainer/integration/exec_test.go @@ -158,6 +158,18 @@ func TestIPCBadPath(t *testing.T) { } func TestRlimit(t *testing.T) { + testRlimit(t, false) +} + +func TestUsernsRlimit(t *testing.T) { + if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { + t.Skip("userns is unsupported") + } + + testRlimit(t, true) +} + +func testRlimit(t *testing.T, userns bool) { if testing.Short() { return } @@ -167,6 +179,19 @@ func TestRlimit(t *testing.T) { defer remove(rootfs) config := newTemplateConfig(rootfs) + if userns { + config.UidMappings = []configs.IDMap{{0, 0, 1000}} + config.GidMappings = []configs.IDMap{{0, 0, 1000}} + config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER}) + } + + // ensure limit is lower than what the config requests to test that in a user namespace + // the Setrlimit call happens early enough that we still have permissions to raise the limit. + ok(t, syscall.Setrlimit(syscall.RLIMIT_NOFILE, &syscall.Rlimit{ + Max: 1024, + Cur: 1024, + })) + out, _, err := runContainer(config, "", "/bin/sh", "-c", "ulimit -n") ok(t, err) if limit := strings.TrimSpace(out.Stdout.String()); limit != "1025" { diff --git a/libcontainer/integration/execin_test.go b/libcontainer/integration/execin_test.go index 378063e8..da920ddb 100644 --- a/libcontainer/integration/execin_test.go +++ b/libcontainer/integration/execin_test.go @@ -7,6 +7,7 @@ import ( "os" "strconv" "strings" + "syscall" "testing" "time" @@ -62,14 +63,34 @@ func TestExecIn(t *testing.T) { } } +func TestExecInUsernsRlimit(t *testing.T) { + if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { + t.Skip("userns is unsupported") + } + + testExecInRlimit(t, true) +} + func TestExecInRlimit(t *testing.T) { + testExecInRlimit(t, false) +} + +func testExecInRlimit(t *testing.T, userns bool) { if testing.Short() { return } + rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) + config := newTemplateConfig(rootfs) + if userns { + config.UidMappings = []configs.IDMap{{0, 0, 1000}} + config.GidMappings = []configs.IDMap{{0, 0, 1000}} + config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER}) + } + container, err := newContainer(config) ok(t, err) defer container.Destroy() @@ -95,6 +116,10 @@ func TestExecInRlimit(t *testing.T) { Stdin: buffers.Stdin, Stdout: buffers.Stdout, Stderr: buffers.Stderr, + Rlimits: []configs.Rlimit{ + // increase process rlimit higher than container rlimit to test per-process limit + {Type: syscall.RLIMIT_NOFILE, Hard: 1026, Soft: 1026}, + }, } err = container.Start(ps) ok(t, err) @@ -104,8 +129,8 @@ func TestExecInRlimit(t *testing.T) { waitProcess(process, t) out := buffers.Stdout.String() - if limit := strings.TrimSpace(out); limit != "1025" { - t.Fatalf("expected rlimit to be 1025, got %s", limit) + if limit := strings.TrimSpace(out); limit != "1026" { + t.Fatalf("expected rlimit to be 1026, got %s", limit) } } diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index 6c8ac0f4..e2ae1b1a 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go @@ -89,6 +89,11 @@ func (p *setnsProcess) start() (err error) { if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil { return newSystemError(err) } + // set rlimits, this has to be done here because we lose permissions + // to raise the limits once we enter a user-namespace + if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { + return newSystemError(err) + } if err := utils.WriteJSON(p.parentPipe, p.config); err != nil { return newSystemError(err) } @@ -284,6 +289,11 @@ loop: if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil { return newSystemError(err) } + // set rlimits, this has to be done here because we lose permissions + // to raise the limits once we enter a user-namespace + if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { + return newSystemError(err) + } // call prestart hooks if !p.config.Config.Namespaces.Contains(configs.NEWNS) { if p.config.Config.Hooks != nil { diff --git a/libcontainer/setns_init_linux.go b/libcontainer/setns_init_linux.go index 33ef68e5..b1a198fd 100644 --- a/libcontainer/setns_init_linux.go +++ b/libcontainer/setns_init_linux.go @@ -28,9 +28,6 @@ func (l *linuxSetnsInit) Init() error { if _, err := keyctl.JoinSessionKeyring(l.getSessionRingName()); err != nil { return err } - if err := setupRlimits(l.config.Rlimits); err != nil { - return err - } if l.config.NoNewPrivileges { if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil { return err diff --git a/libcontainer/standard_init_linux.go b/libcontainer/standard_init_linux.go index 2e101505..59bd3700 100644 --- a/libcontainer/standard_init_linux.go +++ b/libcontainer/standard_init_linux.go @@ -73,9 +73,6 @@ func (l *linuxStandardInit) Init() error { if err := setupRoute(l.config.Config); err != nil { return err } - if err := setupRlimits(l.config.Rlimits); err != nil { - return err - } label.Init() // InitializeMountNamespace() can be executed only for a new mount namespace diff --git a/libcontainer/system/linux.go b/libcontainer/system/linux.go index d109d7f8..8b199d92 100644 --- a/libcontainer/system/linux.go +++ b/libcontainer/system/linux.go @@ -53,6 +53,14 @@ func Execv(cmd string, args []string, env []string) error { return syscall.Exec(name, args, env) } +func Prlimit(pid, resource int, limit syscall.Rlimit) error { + _, _, err := syscall.RawSyscall6(syscall.SYS_PRLIMIT64, uintptr(pid), uintptr(resource), uintptr(unsafe.Pointer(&limit)), uintptr(unsafe.Pointer(&limit)), 0, 0) + if err != 0 { + return err + } + return nil +} + func SetParentDeathSignal(sig uintptr) error { if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, sig, 0); err != 0 { return err