Set rlimits using prlimit in parent

Fixes #680

This changes setupRlimit to use the Prlimit syscall (rather than
Setrlimit) and moves the call to the parent process. This is necessary
because Setrlimit would affect the libcontainer consumer if called in
the parent, and would fail if called from the child if the
child process is in a user namespace and the requested rlimit is higher
than that in the parent.

Signed-off-by: Julian Friedman <julz.friedman@uk.ibm.com>
This commit is contained in:
Julian Friedman 2016-03-25 15:03:30 +00:00
parent 344b0ccaa6
commit e91b2b8aca
7 changed files with 72 additions and 11 deletions

View File

@ -316,10 +316,9 @@ func setupRoute(config *configs.Config) error {
return nil
}
func setupRlimits(limits []configs.Rlimit) error {
func setupRlimits(limits []configs.Rlimit, pid int) error {
for _, rlimit := range limits {
l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}
if err := syscall.Setrlimit(rlimit.Type, l); err != nil {
if err := system.Prlimit(pid, rlimit.Type, syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}); err != nil {
return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
}
}

View File

@ -158,6 +158,18 @@ func TestIPCBadPath(t *testing.T) {
}
func TestRlimit(t *testing.T) {
testRlimit(t, false)
}
func TestUsernsRlimit(t *testing.T) {
if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
t.Skip("userns is unsupported")
}
testRlimit(t, true)
}
func testRlimit(t *testing.T, userns bool) {
if testing.Short() {
return
}
@ -167,6 +179,19 @@ func TestRlimit(t *testing.T) {
defer remove(rootfs)
config := newTemplateConfig(rootfs)
if userns {
config.UidMappings = []configs.IDMap{{0, 0, 1000}}
config.GidMappings = []configs.IDMap{{0, 0, 1000}}
config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER})
}
// ensure limit is lower than what the config requests to test that in a user namespace
// the Setrlimit call happens early enough that we still have permissions to raise the limit.
ok(t, syscall.Setrlimit(syscall.RLIMIT_NOFILE, &syscall.Rlimit{
Max: 1024,
Cur: 1024,
}))
out, _, err := runContainer(config, "", "/bin/sh", "-c", "ulimit -n")
ok(t, err)
if limit := strings.TrimSpace(out.Stdout.String()); limit != "1025" {

View File

@ -7,6 +7,7 @@ import (
"os"
"strconv"
"strings"
"syscall"
"testing"
"time"
@ -62,14 +63,34 @@ func TestExecIn(t *testing.T) {
}
}
func TestExecInUsernsRlimit(t *testing.T) {
if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
t.Skip("userns is unsupported")
}
testExecInRlimit(t, true)
}
func TestExecInRlimit(t *testing.T) {
testExecInRlimit(t, false)
}
func testExecInRlimit(t *testing.T, userns bool) {
if testing.Short() {
return
}
rootfs, err := newRootfs()
ok(t, err)
defer remove(rootfs)
config := newTemplateConfig(rootfs)
if userns {
config.UidMappings = []configs.IDMap{{0, 0, 1000}}
config.GidMappings = []configs.IDMap{{0, 0, 1000}}
config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER})
}
container, err := newContainer(config)
ok(t, err)
defer container.Destroy()
@ -95,6 +116,10 @@ func TestExecInRlimit(t *testing.T) {
Stdin: buffers.Stdin,
Stdout: buffers.Stdout,
Stderr: buffers.Stderr,
Rlimits: []configs.Rlimit{
// increase process rlimit higher than container rlimit to test per-process limit
{Type: syscall.RLIMIT_NOFILE, Hard: 1026, Soft: 1026},
},
}
err = container.Start(ps)
ok(t, err)
@ -104,8 +129,8 @@ func TestExecInRlimit(t *testing.T) {
waitProcess(process, t)
out := buffers.Stdout.String()
if limit := strings.TrimSpace(out); limit != "1025" {
t.Fatalf("expected rlimit to be 1025, got %s", limit)
if limit := strings.TrimSpace(out); limit != "1026" {
t.Fatalf("expected rlimit to be 1026, got %s", limit)
}
}

View File

@ -89,6 +89,11 @@ func (p *setnsProcess) start() (err error) {
if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil {
return newSystemError(err)
}
// set rlimits, this has to be done here because we lose permissions
// to raise the limits once we enter a user-namespace
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
return newSystemError(err)
}
if err := utils.WriteJSON(p.parentPipe, p.config); err != nil {
return newSystemError(err)
}
@ -284,6 +289,11 @@ loop:
if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil {
return newSystemError(err)
}
// set rlimits, this has to be done here because we lose permissions
// to raise the limits once we enter a user-namespace
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
return newSystemError(err)
}
// call prestart hooks
if !p.config.Config.Namespaces.Contains(configs.NEWNS) {
if p.config.Config.Hooks != nil {

View File

@ -28,9 +28,6 @@ func (l *linuxSetnsInit) Init() error {
if _, err := keyctl.JoinSessionKeyring(l.getSessionRingName()); err != nil {
return err
}
if err := setupRlimits(l.config.Rlimits); err != nil {
return err
}
if l.config.NoNewPrivileges {
if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
return err

View File

@ -73,9 +73,6 @@ func (l *linuxStandardInit) Init() error {
if err := setupRoute(l.config.Config); err != nil {
return err
}
if err := setupRlimits(l.config.Rlimits); err != nil {
return err
}
label.Init()
// InitializeMountNamespace() can be executed only for a new mount namespace

View File

@ -53,6 +53,14 @@ func Execv(cmd string, args []string, env []string) error {
return syscall.Exec(name, args, env)
}
func Prlimit(pid, resource int, limit syscall.Rlimit) error {
_, _, err := syscall.RawSyscall6(syscall.SYS_PRLIMIT64, uintptr(pid), uintptr(resource), uintptr(unsafe.Pointer(&limit)), uintptr(unsafe.Pointer(&limit)), 0, 0)
if err != 0 {
return err
}
return nil
}
func SetParentDeathSignal(sig uintptr) error {
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, sig, 0); err != 0 {
return err