diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index eb8ad83a..0bde656e 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -316,10 +316,9 @@ func setupRoute(config *configs.Config) error { return nil } -func setupRlimits(limits []configs.Rlimit) error { +func setupRlimits(limits []configs.Rlimit, pid int) error { for _, rlimit := range limits { - l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft} - if err := syscall.Setrlimit(rlimit.Type, l); err != nil { + if err := system.Prlimit(pid, rlimit.Type, syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}); err != nil { return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err) } } diff --git a/libcontainer/integration/exec_test.go b/libcontainer/integration/exec_test.go index 453843de..33241d3e 100644 --- a/libcontainer/integration/exec_test.go +++ b/libcontainer/integration/exec_test.go @@ -158,6 +158,18 @@ func TestIPCBadPath(t *testing.T) { } func TestRlimit(t *testing.T) { + testRlimit(t, false) +} + +func TestUsernsRlimit(t *testing.T) { + if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { + t.Skip("userns is unsupported") + } + + testRlimit(t, true) +} + +func testRlimit(t *testing.T, userns bool) { if testing.Short() { return } @@ -167,6 +179,19 @@ func TestRlimit(t *testing.T) { defer remove(rootfs) config := newTemplateConfig(rootfs) + if userns { + config.UidMappings = []configs.IDMap{{0, 0, 1000}} + config.GidMappings = []configs.IDMap{{0, 0, 1000}} + config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER}) + } + + // ensure limit is lower than what the config requests to test that in a user namespace + // the Setrlimit call happens early enough that we still have permissions to raise the limit. + ok(t, syscall.Setrlimit(syscall.RLIMIT_NOFILE, &syscall.Rlimit{ + Max: 1024, + Cur: 1024, + })) + out, _, err := runContainer(config, "", "/bin/sh", "-c", "ulimit -n") ok(t, err) if limit := strings.TrimSpace(out.Stdout.String()); limit != "1025" { diff --git a/libcontainer/integration/execin_test.go b/libcontainer/integration/execin_test.go index 378063e8..da920ddb 100644 --- a/libcontainer/integration/execin_test.go +++ b/libcontainer/integration/execin_test.go @@ -7,6 +7,7 @@ import ( "os" "strconv" "strings" + "syscall" "testing" "time" @@ -62,14 +63,34 @@ func TestExecIn(t *testing.T) { } } +func TestExecInUsernsRlimit(t *testing.T) { + if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { + t.Skip("userns is unsupported") + } + + testExecInRlimit(t, true) +} + func TestExecInRlimit(t *testing.T) { + testExecInRlimit(t, false) +} + +func testExecInRlimit(t *testing.T, userns bool) { if testing.Short() { return } + rootfs, err := newRootfs() ok(t, err) defer remove(rootfs) + config := newTemplateConfig(rootfs) + if userns { + config.UidMappings = []configs.IDMap{{0, 0, 1000}} + config.GidMappings = []configs.IDMap{{0, 0, 1000}} + config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER}) + } + container, err := newContainer(config) ok(t, err) defer container.Destroy() @@ -95,6 +116,10 @@ func TestExecInRlimit(t *testing.T) { Stdin: buffers.Stdin, Stdout: buffers.Stdout, Stderr: buffers.Stderr, + Rlimits: []configs.Rlimit{ + // increase process rlimit higher than container rlimit to test per-process limit + {Type: syscall.RLIMIT_NOFILE, Hard: 1026, Soft: 1026}, + }, } err = container.Start(ps) ok(t, err) @@ -104,8 +129,8 @@ func TestExecInRlimit(t *testing.T) { waitProcess(process, t) out := buffers.Stdout.String() - if limit := strings.TrimSpace(out); limit != "1025" { - t.Fatalf("expected rlimit to be 1025, got %s", limit) + if limit := strings.TrimSpace(out); limit != "1026" { + t.Fatalf("expected rlimit to be 1026, got %s", limit) } } diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index 6c8ac0f4..e2ae1b1a 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go @@ -89,6 +89,11 @@ func (p *setnsProcess) start() (err error) { if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil { return newSystemError(err) } + // set rlimits, this has to be done here because we lose permissions + // to raise the limits once we enter a user-namespace + if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { + return newSystemError(err) + } if err := utils.WriteJSON(p.parentPipe, p.config); err != nil { return newSystemError(err) } @@ -284,6 +289,11 @@ loop: if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil { return newSystemError(err) } + // set rlimits, this has to be done here because we lose permissions + // to raise the limits once we enter a user-namespace + if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { + return newSystemError(err) + } // call prestart hooks if !p.config.Config.Namespaces.Contains(configs.NEWNS) { if p.config.Config.Hooks != nil { diff --git a/libcontainer/setns_init_linux.go b/libcontainer/setns_init_linux.go index 33ef68e5..b1a198fd 100644 --- a/libcontainer/setns_init_linux.go +++ b/libcontainer/setns_init_linux.go @@ -28,9 +28,6 @@ func (l *linuxSetnsInit) Init() error { if _, err := keyctl.JoinSessionKeyring(l.getSessionRingName()); err != nil { return err } - if err := setupRlimits(l.config.Rlimits); err != nil { - return err - } if l.config.NoNewPrivileges { if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil { return err diff --git a/libcontainer/standard_init_linux.go b/libcontainer/standard_init_linux.go index 2e101505..59bd3700 100644 --- a/libcontainer/standard_init_linux.go +++ b/libcontainer/standard_init_linux.go @@ -73,9 +73,6 @@ func (l *linuxStandardInit) Init() error { if err := setupRoute(l.config.Config); err != nil { return err } - if err := setupRlimits(l.config.Rlimits); err != nil { - return err - } label.Init() // InitializeMountNamespace() can be executed only for a new mount namespace diff --git a/libcontainer/system/linux.go b/libcontainer/system/linux.go index d109d7f8..8b199d92 100644 --- a/libcontainer/system/linux.go +++ b/libcontainer/system/linux.go @@ -53,6 +53,14 @@ func Execv(cmd string, args []string, env []string) error { return syscall.Exec(name, args, env) } +func Prlimit(pid, resource int, limit syscall.Rlimit) error { + _, _, err := syscall.RawSyscall6(syscall.SYS_PRLIMIT64, uintptr(pid), uintptr(resource), uintptr(unsafe.Pointer(&limit)), uintptr(unsafe.Pointer(&limit)), 0, 0) + if err != 0 { + return err + } + return nil +} + func SetParentDeathSignal(sig uintptr) error { if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, sig, 0); err != 0 { return err