runc: add support for rootless containers

This enables the support for the rootless container mode. There are many
restrictions on what rootless containers can do, so many different runC
commands have been disabled:

* runc checkpoint
* runc events
* runc pause
* runc ps
* runc restore
* runc resume
* runc update

The following commands work:

* runc create
* runc delete
* runc exec
* runc kill
* runc list
* runc run
* runc spec
* runc state

In addition, any specification options that imply joining cgroups have
also been disabled. This is due to support for unprivileged subtree
management not being available from Linux upstream.

Signed-off-by: Aleksa Sarai <asarai@suse.de>
This commit is contained in:
Aleksa Sarai 2016-04-23 23:39:42 +10:00
parent 6bd4bd9030
commit d2f49696b0
No known key found for this signature in database
GPG Key ID: 9E18AA267DDB8DB4
21 changed files with 742 additions and 193 deletions

View File

@ -4,7 +4,7 @@
SOURCES := $(shell find . 2>&1 | grep -E '.*\.(c|h|go)$$')
PREFIX := $(DESTDIR)/usr/local
BINDIR := $(PREFIX)/sbin
BINDIR := $(PREFIX)/bin
GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null)
GIT_BRANCH_CLEAN := $(shell echo $(GIT_BRANCH) | sed -e "s/[^[:alnum:]]/-/g")
RUNC_IMAGE := runc_dev$(if $(GIT_BRANCH_CLEAN),:$(GIT_BRANCH_CLEAN))

View File

@ -39,6 +39,11 @@ checkpointed.`,
if err := checkArgs(context, 1, exactArgs); err != nil {
return err
}
// XXX: Currently this is untested with rootless containers.
if isRootless() {
return fmt.Errorf("runc checkpoint requires root")
}
container, err := getContainer(context)
if err != nil {
return err

View File

@ -90,9 +90,6 @@ following will output a list of processes running in the container:
if err := checkArgs(context, 1, minArgs); err != nil {
return err
}
if os.Geteuid() != 0 {
return fmt.Errorf("runc should be run as root")
}
if err := revisePidFile(context); err != nil {
return err
}

View File

@ -183,6 +183,9 @@ type Config struct {
// NoNewKeyring will not allocated a new session keyring for the container. It will use the
// callers keyring in this case.
NoNewKeyring bool `json:"no_new_keyring"`
// Rootless specifies whether the container is a rootless container.
Rootless bool `json:"rootless"`
}
type Hooks struct {

View File

@ -0,0 +1,117 @@
package validate
import (
"fmt"
"os"
"reflect"
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
)
var (
geteuid = os.Geteuid
getegid = os.Getegid
)
func (v *ConfigValidator) rootless(config *configs.Config) error {
if err := rootlessMappings(config); err != nil {
return err
}
if err := rootlessMount(config); err != nil {
return err
}
// Currently, cgroups cannot effectively be used in rootless containers.
// The new cgroup namespace doesn't really help us either because it doesn't
// have nice interactions with the user namespace (we're working with upstream
// to fix this).
if err := rootlessCgroup(config); err != nil {
return err
}
// XXX: We currently can't verify the user config at all, because
// configs.Config doesn't store the user-related configs. So this
// has to be verified by setupUser() in init_linux.go.
return nil
}
func rootlessMappings(config *configs.Config) error {
rootuid, err := config.HostUID()
if err != nil {
return fmt.Errorf("failed to get root uid from uidMappings: %v", err)
}
if euid := geteuid(); euid != 0 {
if !config.Namespaces.Contains(configs.NEWUSER) {
return fmt.Errorf("rootless containers require user namespaces")
}
if rootuid != euid {
return fmt.Errorf("rootless containers cannot map container root to a different host user")
}
}
rootgid, err := config.HostGID()
if err != nil {
return fmt.Errorf("failed to get root gid from gidMappings: %v", err)
}
// Similar to the above test, we need to make sure that we aren't trying to
// map to a group ID that we don't have the right to be.
if rootgid != getegid() {
return fmt.Errorf("rootless containers cannot map container root to a different host group")
}
// We can only map one user and group inside a container (our own).
if len(config.UidMappings) != 1 || config.UidMappings[0].Size != 1 {
return fmt.Errorf("rootless containers cannot map more than one user")
}
if len(config.GidMappings) != 1 || config.GidMappings[0].Size != 1 {
return fmt.Errorf("rootless containers cannot map more than one group")
}
return nil
}
// cgroup verifies that the user isn't trying to set any cgroup limits or paths.
func rootlessCgroup(config *configs.Config) error {
// Nothing set at all.
if config.Cgroups == nil || config.Cgroups.Resources == nil {
return nil
}
// Used for comparing to the zero value.
left := reflect.ValueOf(*config.Cgroups.Resources)
right := reflect.Zero(left.Type())
// This is all we need to do, since specconv won't add cgroup options in
// rootless mode.
if !reflect.DeepEqual(left.Interface(), right.Interface()) {
return fmt.Errorf("cannot specify resource limits in rootless container")
}
return nil
}
// mount verifies that the user isn't trying to set up any mounts they don't have
// the rights to do. In addition, it makes sure that no mount has a `uid=` or
// `gid=` option that doesn't resolve to root.
func rootlessMount(config *configs.Config) error {
// XXX: We could whitelist allowed devices at this point, but I'm not
// convinced that's a good idea. The kernel is the best arbiter of
// access control.
for _, mount := range config.Mounts {
// Check that the options list doesn't contain any uid= or gid= entries
// that don't resolve to root.
for _, opt := range strings.Split(mount.Data, ",") {
if strings.HasPrefix(opt, "uid=") && opt != "uid=0" {
return fmt.Errorf("cannot specify uid= mount options in rootless containers where argument isn't 0")
}
if strings.HasPrefix(opt, "gid=") && opt != "gid=0" {
return fmt.Errorf("cannot specify gid= mount options in rootless containers where argument isn't 0")
}
}
}
return nil
}

View File

@ -0,0 +1,195 @@
package validate
import (
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
)
func init() {
geteuid = func() int { return 1337 }
getegid = func() int { return 7331 }
}
func rootlessConfig() *configs.Config {
return &configs.Config{
Rootfs: "/var",
Rootless: true,
Namespaces: configs.Namespaces(
[]configs.Namespace{
{Type: configs.NEWUSER},
},
),
UidMappings: []configs.IDMap{
{
HostID: geteuid(),
ContainerID: 0,
Size: 1,
},
},
GidMappings: []configs.IDMap{
{
HostID: getegid(),
ContainerID: 0,
Size: 1,
},
},
}
}
func TestValidateRootless(t *testing.T) {
validator := New()
config := rootlessConfig()
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur: %+v", err)
}
}
/* rootlessMappings() */
func TestValidateRootlessUserns(t *testing.T) {
validator := New()
config := rootlessConfig()
config.Namespaces = nil
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if user namespaces not set")
}
}
func TestValidateRootlessMappingUid(t *testing.T) {
validator := New()
config := rootlessConfig()
config.UidMappings = nil
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if no uid mappings provided")
}
config = rootlessConfig()
config.UidMappings[0].HostID = geteuid() + 1
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if geteuid() != mapped uid")
}
config = rootlessConfig()
config.UidMappings[0].Size = 1024
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if more than one uid mapped")
}
config = rootlessConfig()
config.UidMappings = append(config.UidMappings, configs.IDMap{
HostID: geteuid() + 1,
ContainerID: 0,
Size: 1,
})
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if more than one uid extent mapped")
}
}
func TestValidateRootlessMappingGid(t *testing.T) {
validator := New()
config := rootlessConfig()
config.GidMappings = nil
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if no gid mappings provided")
}
config = rootlessConfig()
config.GidMappings[0].HostID = getegid() + 1
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if getegid() != mapped gid")
}
config = rootlessConfig()
config.GidMappings[0].Size = 1024
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if more than one gid mapped")
}
config = rootlessConfig()
config.GidMappings = append(config.GidMappings, configs.IDMap{
HostID: getegid() + 1,
ContainerID: 0,
Size: 1,
})
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if more than one gid extent mapped")
}
}
/* rootlessMount() */
func TestValidateRootlessMountUid(t *testing.T) {
config := rootlessConfig()
validator := New()
config.Mounts = []*configs.Mount{
{
Source: "devpts",
Destination: "/dev/pts",
Device: "devpts",
},
}
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur when uid= not set in mount options: %+v", err)
}
config.Mounts[0].Data = "uid=5"
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur when setting uid=5 in mount options")
}
config.Mounts[0].Data = "uid=0"
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur when setting uid=0 in mount options: %+v", err)
}
}
func TestValidateRootlessMountGid(t *testing.T) {
config := rootlessConfig()
validator := New()
config.Mounts = []*configs.Mount{
{
Source: "devpts",
Destination: "/dev/pts",
Device: "devpts",
},
}
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur when gid= not set in mount options: %+v", err)
}
config.Mounts[0].Data = "gid=5"
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur when setting gid=5 in mount options")
}
config.Mounts[0].Data = "gid=0"
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur when setting gid=0 in mount options: %+v", err)
}
}
/* rootlessCgroup() */
func TestValidateRootlessCgroup(t *testing.T) {
validator := New()
config := rootlessConfig()
config.Cgroups = &configs.Cgroup{
Resources: &configs.Resources{
PidsLimit: 1337,
},
}
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if cgroup limits set")
}
}

View File

@ -40,6 +40,11 @@ func (v *ConfigValidator) Validate(config *configs.Config) error {
if err := v.sysctl(config); err != nil {
return err
}
if config.Rootless {
if err := v.rootless(config); err != nil {
return err
}
}
return nil
}

View File

@ -51,6 +51,9 @@ type State struct {
// Platform specific fields below here
// Specifies if the container was started under the rootless mode.
Rootless bool `json:"rootless"`
// Path to all the cgroups setup for a container. Key is cgroup subsystem name
// with the value as the path.
CgroupPaths map[string]string `json:"cgroup_paths"`
@ -452,6 +455,7 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
PassedFilesCount: len(process.ExtraFiles),
ContainerId: c.ID(),
NoNewPrivileges: c.config.NoNewPrivileges,
Rootless: c.config.Rootless,
AppArmorProfile: c.config.AppArmorProfile,
ProcessLabel: c.config.ProcessLabel,
Rlimits: c.config.Rlimits,
@ -622,6 +626,13 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
c.m.Lock()
defer c.m.Unlock()
// TODO(avagin): Figure out how to make this work nicely. CRIU 2.0 has
// support for doing unprivileged dumps, but the setup of
// rootless containers might make this complicated.
if c.config.Rootless {
return fmt.Errorf("cannot checkpoint a rootless container")
}
if err := c.checkCriuVersion("1.5.2"); err != nil {
return err
}
@ -791,6 +802,13 @@ func (c *linuxContainer) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts
func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
c.m.Lock()
defer c.m.Unlock()
// TODO(avagin): Figure out how to make this work nicely. CRIU doesn't have
// support for unprivileged restore at the moment.
if c.config.Rootless {
return fmt.Errorf("cannot restore a rootless container")
}
if err := c.checkCriuVersion("1.5.2"); err != nil {
return err
}
@ -918,6 +936,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
}
func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
// XXX: Do we need to deal with this case? AFAIK criu still requires root.
if err := c.cgroupManager.Apply(pid); err != nil {
return err
}
@ -1314,6 +1333,7 @@ func (c *linuxContainer) currentState() (*State, error) {
InitProcessStartTime: startTime,
Created: c.created,
},
Rootless: c.config.Rootless,
CgroupPaths: c.cgroupManager.GetPaths(),
NamespacePaths: make(map[configs.NamespaceType]string),
ExternalDescriptors: externalDescriptors,
@ -1441,6 +1461,8 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na
Type: GidmapAttr,
Value: b,
})
// The following only applies if we are root.
if !c.config.Rootless {
// check if we have CAP_SETGID to setgroup properly
pid, err := capability.NewPid(os.Getpid())
if err != nil {
@ -1454,6 +1476,7 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na
}
}
}
}
// write oom_score_adj
r.AddData(&Bytemsg{
@ -1461,5 +1484,11 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na
Value: []byte(fmt.Sprintf("%d", c.config.OomScoreAdj)),
})
// write rootless
r.AddData(&Boolmsg{
Type: RootlessAttr,
Value: c.config.Rootless,
})
return bytes.NewReader(r.Serialize()), nil
}

View File

@ -58,6 +58,7 @@ type initConfig struct {
ContainerId string `json:"containerid"`
Rlimits []configs.Rlimit `json:"rlimits"`
CreateConsole bool `json:"create_console"`
Rootless bool `json:"rootless"`
}
type initer interface {
@ -229,18 +230,21 @@ func syncParentHooks(pipe io.ReadWriter) error {
func setupUser(config *initConfig) error {
// Set up defaults.
defaultExecUser := user.ExecUser{
Uid: syscall.Getuid(),
Gid: syscall.Getgid(),
Uid: 0,
Gid: 0,
Home: "/",
}
passwdPath, err := user.GetPasswdPath()
if err != nil {
return err
}
groupPath, err := user.GetGroupPath()
if err != nil {
return err
}
execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
if err != nil {
return err
@ -253,22 +257,49 @@ func setupUser(config *initConfig) error {
return err
}
}
if config.Rootless {
if execUser.Uid != 0 {
return fmt.Errorf("cannot run as a non-root user in a rootless container")
}
if execUser.Gid != 0 {
return fmt.Errorf("cannot run as a non-root group in a rootless container")
}
// We cannot set any additional groups in a rootless container and thus we
// bail if the user asked us to do so. TODO: We currently can't do this
// earlier, but if libcontainer.Process.User was typesafe this might work.
if len(addGroups) > 0 {
return fmt.Errorf("cannot set any additional groups in a rootless container")
}
}
// before we change to the container's user make sure that the processes STDIO
// is correctly owned by the user that we are switching to.
if err := fixStdioPermissions(execUser); err != nil {
return err
}
// This isn't allowed in an unprivileged user namespace since Linux 3.19.
// There's nothing we can do about /etc/group entries, so we silently
// ignore setting groups here (since the user didn't explicitly ask us to
// set the group).
if !config.Rootless {
suppGroups := append(execUser.Sgids, addGroups...)
if err := syscall.Setgroups(suppGroups); err != nil {
return err
}
}
if err := system.Setgid(execUser.Gid); err != nil {
return err
}
if err := system.Setuid(execUser.Uid); err != nil {
return err
}
// if we didn't get HOME already, set it based on the user's HOME
if envHome := os.Getenv("HOME"); envHome == "" {
if err := os.Setenv("HOME", execUser.Home); err != nil {

View File

@ -18,6 +18,7 @@ const (
GidmapAttr uint16 = 27284
SetgroupAttr uint16 = 27285
OomScoreAdjAttr uint16 = 27286
RootlessAttr uint16 = 27287
// When syscall.NLA_HDRLEN is in gccgo, take this out.
syscall_NLA_HDRLEN = (syscall.SizeofNlAttr + syscall.NLA_ALIGNTO - 1) & ^(syscall.NLA_ALIGNTO - 1)

View File

@ -72,6 +72,7 @@ struct nlconfig_t {
char *namespaces;
size_t namespaces_len;
uint8_t is_setgroup;
uint8_t is_rootless;
char *oom_score_adj;
size_t oom_score_adj_len;
};
@ -87,6 +88,7 @@ struct nlconfig_t {
#define GIDMAP_ATTR 27284
#define SETGROUP_ATTR 27285
#define OOM_SCORE_ADJ_ATTR 27286
#define ROOTLESS_ATTR 27287
/*
* Use the raw syscall for versions of glibc which don't include a function for
@ -175,6 +177,7 @@ static void update_setgroups(int pid, enum policy_t setgroup)
policy = "deny";
break;
case SETGROUPS_DEFAULT:
default:
/* Nothing to do. */
return;
}
@ -329,6 +332,9 @@ static void nl_parse(int fd, struct nlconfig_t *config)
case CLONE_FLAGS_ATTR:
config->cloneflags = readint32(current);
break;
case ROOTLESS_ATTR:
config->is_rootless = readint8(current);
break;
case OOM_SCORE_ADJ_ATTR:
config->oom_score_adj = current;
config->oom_score_adj_len = payload_len;
@ -574,9 +580,21 @@ void nsexec(void)
exit(ret);
case SYNC_USERMAP_PLS:
/* Enable setgroups(2) if we've been asked to. */
/*
* Enable setgroups(2) if we've been asked to. But we also
* have to explicitly disable setgroups(2) if we're
* creating a rootless container (this is required since
* Linux 3.19).
*/
if (config.is_rootless && config.is_setgroup) {
kill(child, SIGKILL);
bail("cannot allow setgroup in an unprivileged user namespace setup");
}
if (config.is_setgroup)
update_setgroups(child, SETGROUPS_ALLOW);
if (config.is_rootless)
update_setgroups(child, SETGROUPS_DENY);
/* Set up mappings. */
update_uidmap(child, config.uidmap, config.uidmap_len);
@ -818,8 +836,10 @@ void nsexec(void)
if (setgid(0) < 0)
bail("setgid failed");
if (!config.is_rootless && config.is_setgroup) {
if (setgroups(0, NULL) < 0)
bail("setgroups failed");
}
s = SYNC_CHILD_READY;
if (write(syncfd, &s, sizeof(s)) != sizeof(s))

View File

@ -80,7 +80,8 @@ func (p *setnsProcess) start() (err error) {
if err = p.execSetns(); err != nil {
return newSystemErrorWithCause(err, "executing setns process")
}
if len(p.cgroupPaths) > 0 {
// We can't join cgroups if we're in a rootless container.
if !p.config.Rootless && len(p.cgroupPaths) > 0 {
if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil {
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
}
@ -253,13 +254,15 @@ func (p *initProcess) start() error {
return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
}
p.setExternalDescriptors(fds)
// Do this before syncing with child so that no children
// can escape the cgroup
if !p.container.config.Rootless {
// Do this before syncing with child so that no children can escape the
// cgroup. We can't do this if we're not running as root.
if err := p.manager.Apply(p.pid()); err != nil {
return newSystemErrorWithCause(err, "applying cgroup configuration for process")
}
}
defer func() {
if err != nil {
if err != nil && !p.container.config.Rootless {
// TODO: should not be the responsibility to call here
p.manager.Destroy()
}
@ -278,9 +281,12 @@ func (p *initProcess) start() error {
ierr := parseSync(p.parentPipe, func(sync *syncT) error {
switch sync.Type {
case procReady:
// We can't set cgroups if we're in a rootless container.
if !p.container.config.Rootless {
if err := p.manager.Set(p.config.Config); err != nil {
return newSystemErrorWithCause(err, "setting cgroup config for ready process")
}
}
// set rlimits, this has to be done here because we lose permissions
// to raise the limits once we enter a user-namespace
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
@ -424,6 +430,12 @@ func getPipeFds(pid int) ([]string, error) {
f := filepath.Join(dirPath, strconv.Itoa(i))
target, err := os.Readlink(f)
if err != nil {
// Ignore permission errors, for rootless containers and other
// non-dumpable processes. if we can't get the fd for a particular
// file, there's not much we can do.
if os.IsPermission(err) {
continue
}
return fds, err
}
fds[i] = target

View File

@ -0,0 +1,160 @@
package specconv
import (
"runtime"
"github.com/opencontainers/runtime-spec/specs-go"
)
func sPtr(s string) *string { return &s }
// ExampleSpec returns an example spec file, with many options set so a user
// can see what a standard spec file looks like.
func ExampleSpec() *specs.Spec {
return &specs.Spec{
Version: specs.Version,
Platform: specs.Platform{
OS: runtime.GOOS,
Arch: runtime.GOARCH,
},
Root: specs.Root{
Path: "rootfs",
Readonly: true,
},
Process: specs.Process{
Terminal: true,
User: specs.User{},
Args: []string{
"sh",
},
Env: []string{
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"TERM=xterm",
},
Cwd: "/",
NoNewPrivileges: true,
Capabilities: &specs.LinuxCapabilities{
Bounding: []string{
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE",
},
Permitted: []string{
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE",
},
Inheritable: []string{
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE",
},
Ambient: []string{
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE",
},
Effective: []string{
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE",
},
},
Rlimits: []specs.LinuxRlimit{
{
Type: "RLIMIT_NOFILE",
Hard: uint64(1024),
Soft: uint64(1024),
},
},
},
Hostname: "runc",
Mounts: []specs.Mount{
{
Destination: "/proc",
Type: "proc",
Source: "proc",
Options: nil,
},
{
Destination: "/dev",
Type: "tmpfs",
Source: "tmpfs",
Options: []string{"nosuid", "strictatime", "mode=755", "size=65536k"},
},
{
Destination: "/dev/pts",
Type: "devpts",
Source: "devpts",
Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"},
},
{
Destination: "/dev/shm",
Type: "tmpfs",
Source: "shm",
Options: []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"},
},
{
Destination: "/dev/mqueue",
Type: "mqueue",
Source: "mqueue",
Options: []string{"nosuid", "noexec", "nodev"},
},
{
Destination: "/sys",
Type: "sysfs",
Source: "sysfs",
Options: []string{"nosuid", "noexec", "nodev", "ro"},
},
{
Destination: "/sys/fs/cgroup",
Type: "cgroup",
Source: "cgroup",
Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"},
},
},
Linux: &specs.Linux{
MaskedPaths: []string{
"/proc/kcore",
"/proc/latency_stats",
"/proc/timer_list",
"/proc/timer_stats",
"/proc/sched_debug",
"/sys/firmware",
},
ReadonlyPaths: []string{
"/proc/asound",
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger",
},
Resources: &specs.LinuxResources{
Devices: []specs.LinuxDeviceCgroup{
{
Allow: false,
Access: "rwm",
},
},
},
Namespaces: []specs.LinuxNamespace{
{
Type: "pid",
},
{
Type: "network",
},
{
Type: "ipc",
},
{
Type: "uts",
},
{
Type: "mount",
},
},
},
}
}

View File

@ -145,6 +145,7 @@ type CreateOpts struct {
NoPivotRoot bool
NoNewKeyring bool
Spec *specs.Spec
Rootless bool
}
// CreateLibcontainerConfig creates a new libcontainer configuration from a
@ -175,6 +176,7 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
Hostname: spec.Hostname,
Labels: append(labels, fmt.Sprintf("bundle=%s", cwd)),
NoNewKeyring: opts.NoNewKeyring,
Rootless: opts.Rootless,
}
exists := false
@ -208,7 +210,7 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
if err := setupUserNamespace(spec, config); err != nil {
return nil, err
}
c, err := createCgroupConfig(opts.CgroupName, opts.UseSystemdCgroup, spec)
c, err := createCgroupConfig(opts)
if err != nil {
return nil, err
}
@ -264,8 +266,14 @@ func createLibcontainerMount(cwd string, m specs.Mount) *configs.Mount {
}
}
func createCgroupConfig(name string, useSystemdCgroup bool, spec *specs.Spec) (*configs.Cgroup, error) {
var myCgroupPath string
func createCgroupConfig(opts *CreateOpts) (*configs.Cgroup, error) {
var (
myCgroupPath string
spec = opts.Spec
useSystemdCgroup = opts.UseSystemdCgroup
name = opts.CgroupName
)
c := &configs.Cgroup{
Resources: &configs.Resources{},
@ -301,10 +309,15 @@ func createCgroupConfig(name string, useSystemdCgroup bool, spec *specs.Spec) (*
c.Path = myCgroupPath
}
// In rootless containers, any attempt to make cgroup changes will fail.
// libcontainer will validate this and we shouldn't add any cgroup options
// the user didn't specify.
if !opts.Rootless {
c.Resources.AllowedDevices = allowedDevices
if spec.Linux == nil {
return c, nil
}
}
r := spec.Linux.Resources
if r == nil {
return c, nil
@ -340,8 +353,10 @@ func createCgroupConfig(name string, useSystemdCgroup bool, spec *specs.Spec) (*
}
c.Resources.Devices = append(c.Resources.Devices, dd)
}
if !opts.Rootless {
// append the default allowed devices to the end of the list
c.Resources.Devices = append(c.Resources.Devices, allowedDevices...)
}
if r.Memory != nil {
if r.Memory.Limit != nil {
c.Resources.Memory = *r.Memory.Limit

View File

@ -3,8 +3,10 @@
package specconv
import (
"os"
"testing"
"github.com/opencontainers/runc/libcontainer/configs/validate"
"github.com/opencontainers/runtime-spec/specs-go"
)
@ -16,7 +18,13 @@ func TestLinuxCgroupsPathSpecified(t *testing.T) {
CgroupsPath: cgroupsPath,
}
cgroup, err := createCgroupConfig("ContainerID", false, spec)
opts := &CreateOpts{
CgroupName: "ContainerID",
UseSystemdCgroup: false,
Spec: spec,
}
cgroup, err := createCgroupConfig(opts)
if err != nil {
t.Errorf("Couldn't create Cgroup config: %v", err)
}
@ -28,8 +36,13 @@ func TestLinuxCgroupsPathSpecified(t *testing.T) {
func TestLinuxCgroupsPathNotSpecified(t *testing.T) {
spec := &specs.Spec{}
opts := &CreateOpts{
CgroupName: "ContainerID",
UseSystemdCgroup: false,
Spec: spec,
}
cgroup, err := createCgroupConfig("ContainerID", false, spec)
cgroup, err := createCgroupConfig(opts)
if err != nil {
t.Errorf("Couldn't create Cgroup config: %v", err)
}
@ -39,6 +52,26 @@ func TestLinuxCgroupsPathNotSpecified(t *testing.T) {
}
}
func TestSpecconvExampleValidate(t *testing.T) {
spec := ExampleSpec()
spec.Root.Path = "/"
opts := &CreateOpts{
CgroupName: "ContainerID",
UseSystemdCgroup: false,
Spec: spec,
}
config, err := CreateLibcontainerConfig(opts)
if err != nil {
t.Errorf("Couldn't create libcontainer config: %v", err)
}
validator := validate.New()
if err := validator.Validate(config); err != nil {
t.Errorf("Expected specconv to produce valid container config: %v", err)
}
}
func TestDupNamespaces(t *testing.T) {
spec := &specs.Spec{
Linux: &specs.Linux{
@ -62,3 +95,46 @@ func TestDupNamespaces(t *testing.T) {
t.Errorf("Duplicated namespaces should be forbidden")
}
}
func TestRootlessSpecconvValidate(t *testing.T) {
spec := &specs.Spec{
Linux: specs.Linux{
Namespaces: []specs.Namespace{
{
Type: specs.UserNamespace,
},
},
UIDMappings: []specs.IDMapping{
{
HostID: uint32(os.Geteuid()),
ContainerID: 0,
Size: 1,
},
},
GIDMappings: []specs.IDMapping{
{
HostID: uint32(os.Getegid()),
ContainerID: 0,
Size: 1,
},
},
},
}
opts := &CreateOpts{
CgroupName: "ContainerID",
UseSystemdCgroup: false,
Spec: spec,
Rootless: true,
}
config, err := CreateLibcontainerConfig(opts)
if err != nil {
t.Errorf("Couldn't create libcontainer config: %v", err)
}
validator := validate.New()
if err := validator.Validate(config); err != nil {
t.Errorf("Expected specconv to produce valid rootless container config: %v", err)
}
}

19
list.go
View File

@ -7,12 +7,14 @@ import (
"io/ioutil"
"os"
"path/filepath"
"syscall"
"text/tabwriter"
"time"
"encoding/json"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/user"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/urfave/cli"
)
@ -38,6 +40,8 @@ type containerState struct {
Created time.Time `json:"created"`
// Annotations is the user defined annotations added to the config.
Annotations map[string]string `json:"annotations,omitempty"`
// The owner of the state directory (the owner of the container).
Owner string `json:"owner"`
}
var listCommand = cli.Command{
@ -85,14 +89,15 @@ To list containers created using a non-default value for "--root":
switch context.String("format") {
case "table":
w := tabwriter.NewWriter(os.Stdout, 12, 1, 3, ' ', 0)
fmt.Fprint(w, "ID\tPID\tSTATUS\tBUNDLE\tCREATED\n")
fmt.Fprint(w, "ID\tPID\tSTATUS\tBUNDLE\tCREATED\tOWNER\n")
for _, item := range s {
fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%s\n",
fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%s\t%s\n",
item.ID,
item.InitProcessPid,
item.Status,
item.Bundle,
item.Created.Format(time.RFC3339Nano))
item.Created.Format(time.RFC3339Nano),
item.Owner)
}
if err := w.Flush(); err != nil {
return err
@ -126,6 +131,13 @@ func getContainers(context *cli.Context) ([]containerState, error) {
var s []containerState
for _, item := range list {
if item.IsDir() {
// This cast is safe on Linux.
stat := item.Sys().(*syscall.Stat_t)
owner, err := user.LookupUid(int(stat.Uid))
if err != nil {
owner.Name = string(stat.Uid)
}
container, err := factory.Load(item.Name())
if err != nil {
fmt.Fprintf(os.Stderr, "load container %s: %v\n", item.Name(), err)
@ -155,6 +167,7 @@ func getContainers(context *cli.Context) ([]containerState, error) {
Rootfs: state.BaseState.Config.Rootfs,
Created: state.BaseState.Created,
Annotations: annotations,
Owner: owner.Name,
})
}
}

5
ps.go
View File

@ -28,6 +28,11 @@ var psCommand = cli.Command{
if err := checkArgs(context, 1, minArgs); err != nil {
return err
}
// XXX: Currently not supported with rootless containers.
if isRootless() {
return fmt.Errorf("runc ps requires root")
}
container, err := getContainer(context)
if err != nil {
return err

View File

@ -3,6 +3,7 @@
package main
import (
"fmt"
"os"
"syscall"
@ -86,6 +87,11 @@ using the runc checkpoint command.`,
if err := checkArgs(context, 1, exactArgs); err != nil {
return err
}
// XXX: Currently this is untested with rootless containers.
if isRootless() {
return fmt.Errorf("runc restore requires root")
}
imagePath := context.String("image-path")
id := context.Args().First()
if id == "" {

150
spec.go
View File

@ -10,6 +10,7 @@ import (
"runtime"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/specconv"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/urfave/cli"
)
@ -68,152 +69,7 @@ container on your host.`,
if err := checkArgs(context, 0, exactArgs); err != nil {
return err
}
spec := specs.Spec{
Version: specs.Version,
Platform: specs.Platform{
OS: runtime.GOOS,
Arch: runtime.GOARCH,
},
Root: specs.Root{
Path: "rootfs",
Readonly: true,
},
Process: specs.Process{
Terminal: true,
User: specs.User{},
Args: []string{
"sh",
},
Env: []string{
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"TERM=xterm",
},
Cwd: "/",
NoNewPrivileges: true,
Capabilities: &specs.LinuxCapabilities{
Bounding: []string{
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE",
},
Permitted: []string{
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE",
},
Inheritable: []string{
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE",
},
Ambient: []string{
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE",
},
Effective: []string{
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE",
},
},
Rlimits: []specs.LinuxRlimit{
{
Type: "RLIMIT_NOFILE",
Hard: uint64(1024),
Soft: uint64(1024),
},
},
},
Hostname: "runc",
Mounts: []specs.Mount{
{
Destination: "/proc",
Type: "proc",
Source: "proc",
Options: nil,
},
{
Destination: "/dev",
Type: "tmpfs",
Source: "tmpfs",
Options: []string{"nosuid", "strictatime", "mode=755", "size=65536k"},
},
{
Destination: "/dev/pts",
Type: "devpts",
Source: "devpts",
Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"},
},
{
Destination: "/dev/shm",
Type: "tmpfs",
Source: "shm",
Options: []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"},
},
{
Destination: "/dev/mqueue",
Type: "mqueue",
Source: "mqueue",
Options: []string{"nosuid", "noexec", "nodev"},
},
{
Destination: "/sys",
Type: "sysfs",
Source: "sysfs",
Options: []string{"nosuid", "noexec", "nodev", "ro"},
},
{
Destination: "/sys/fs/cgroup",
Type: "cgroup",
Source: "cgroup",
Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"},
},
},
Linux: &specs.Linux{
MaskedPaths: []string{
"/proc/kcore",
"/proc/latency_stats",
"/proc/timer_list",
"/proc/timer_stats",
"/proc/sched_debug",
"/sys/firmware",
},
ReadonlyPaths: []string{
"/proc/asound",
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger",
},
Resources: &specs.LinuxResources{
Devices: []specs.LinuxDeviceCgroup{
{
Allow: false,
Access: "rwm",
},
},
},
Namespaces: []specs.LinuxNamespace{
{
Type: "pid",
},
{
Type: "network",
},
{
Type: "ipc",
},
{
Type: "uts",
},
{
Type: "mount",
},
},
},
}
spec := specconv.ExampleSpec()
checkNoFile := func(name string) error {
_, err := os.Stat(name)
@ -234,7 +90,7 @@ container on your host.`,
if err := checkNoFile(specConfig); err != nil {
return err
}
data, err := json.MarshalIndent(&spec, "", "\t")
data, err := json.MarshalIndent(spec, "", "\t")
if err != nil {
return err
}

View File

@ -63,9 +63,6 @@ func setupSpec(context *cli.Context) (*specs.Spec, error) {
if err != nil {
return nil, err
}
if os.Geteuid() != 0 {
return nil, fmt.Errorf("runc should be run as root")
}
return spec, nil
}

View File

@ -186,6 +186,11 @@ func createPidFile(path string, process *libcontainer.Process) error {
return os.Rename(tmpName, path)
}
// XXX: Currently we autodetect rootless mode.
func isRootless() bool {
return os.Geteuid() != 0
}
func createContainer(context *cli.Context, id string, spec *specs.Spec) (libcontainer.Container, error) {
config, err := specconv.CreateLibcontainerConfig(&specconv.CreateOpts{
CgroupName: id,
@ -193,6 +198,7 @@ func createContainer(context *cli.Context, id string, spec *specs.Spec) (libcont
NoPivotRoot: context.Bool("no-pivot"),
NoNewKeyring: context.Bool("no-new-keyring"),
Spec: spec,
Rootless: isRootless(),
})
if err != nil {
return nil, err