Merge pull request #774 from cyphar/rootless-containers

Rootless Containers
This commit is contained in:
Mrunal Patel 2017-03-27 11:58:03 -07:00 committed by GitHub
commit 653207bc29
46 changed files with 1257 additions and 306 deletions

View File

@ -6,6 +6,7 @@ RUN echo 'deb http://httpredir.debian.org/debian jessie-backports main' > /etc/a
RUN apt-get update && apt-get install -y \ RUN apt-get update && apt-get install -y \
build-essential \ build-essential \
curl \ curl \
sudo \
gawk \ gawk \
iptables \ iptables \
jq \ jq \
@ -22,6 +23,12 @@ RUN apt-get update && apt-get install -y \
--no-install-recommends \ --no-install-recommends \
&& apt-get clean && apt-get clean
# Add a dummy user for the rootless integration tests. While runC does
# not require an entry in /etc/passwd to operate, one of the tests uses
# `git clone` -- and `git clone` does not allow you to clone a
# repository if the current uid does not have an entry in /etc/passwd.
RUN useradd -u1000 -m -d/home/rootless -s/bin/bash rootless
# install bats # install bats
RUN cd /tmp \ RUN cd /tmp \
&& git clone https://github.com/sstephenson/bats.git \ && git clone https://github.com/sstephenson/bats.git \

View File

@ -4,7 +4,7 @@
SOURCES := $(shell find . 2>&1 | grep -E '.*\.(c|h|go)$$') SOURCES := $(shell find . 2>&1 | grep -E '.*\.(c|h|go)$$')
PREFIX := $(DESTDIR)/usr/local PREFIX := $(DESTDIR)/usr/local
BINDIR := $(PREFIX)/sbin BINDIR := $(PREFIX)/bin
GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null) GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null)
GIT_BRANCH_CLEAN := $(shell echo $(GIT_BRANCH) | sed -e "s/[^[:alnum:]]/-/g") GIT_BRANCH_CLEAN := $(shell echo $(GIT_BRANCH) | sed -e "s/[^[:alnum:]]/-/g")
RUNC_IMAGE := runc_dev$(if $(GIT_BRANCH_CLEAN),:$(GIT_BRANCH_CLEAN)) RUNC_IMAGE := runc_dev$(if $(GIT_BRANCH_CLEAN),:$(GIT_BRANCH_CLEAN))
@ -79,10 +79,10 @@ runcimage:
docker build -t $(RUNC_IMAGE) . docker build -t $(RUNC_IMAGE) .
test: test:
make unittest integration make unittest integration rootlessintegration
localtest: localtest:
make localunittest localintegration make localunittest localintegration localrootlessintegration
unittest: runcimage unittest: runcimage
docker run -e TESTFLAGS -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localunittest docker run -e TESTFLAGS -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localunittest
@ -96,6 +96,13 @@ integration: runcimage
localintegration: all localintegration: all
bats -t tests/integration${TESTFLAGS} bats -t tests/integration${TESTFLAGS}
rootlessintegration: runcimage
docker run -e TESTFLAGS -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) --cap-drop=ALL -u rootless $(RUNC_IMAGE) make localintegration
# FIXME: This should not be separate from rootlessintegration's method of running.
localrootlessintegration: all
sudo -u rootless -H PATH="${PATH}" bats -t tests/integration${TESTFLAGS}
shell: all shell: all
docker run -e TESTFLAGS -ti --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) bash docker run -e TESTFLAGS -ti --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) bash

View File

@ -39,6 +39,11 @@ checkpointed.`,
if err := checkArgs(context, 1, exactArgs); err != nil { if err := checkArgs(context, 1, exactArgs); err != nil {
return err return err
} }
// XXX: Currently this is untested with rootless containers.
if isRootless() {
return fmt.Errorf("runc checkpoint requires root")
}
container, err := getContainer(context) container, err := getContainer(context)
if err != nil { if err != nil {
return err return err

View File

@ -90,9 +90,6 @@ following will output a list of processes running in the container:
if err := checkArgs(context, 1, minArgs); err != nil { if err := checkArgs(context, 1, minArgs); err != nil {
return err return err
} }
if os.Geteuid() != 0 {
return fmt.Errorf("runc should be run as root")
}
if err := revisePidFile(context); err != nil { if err := revisePidFile(context); err != nil {
return err return err
} }

View File

@ -267,25 +267,8 @@ func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
}, nil }, nil
} }
func (raw *cgroupData) parentPath(subsystem, mountpoint, root string) (string, error) {
// Use GetThisCgroupDir instead of GetInitCgroupDir, because the creating
// process could in container and shared pid namespace with host, and
// /proc/1/cgroup could point to whole other world of cgroups.
initPath, err := cgroups.GetThisCgroupDir(subsystem)
if err != nil {
return "", err
}
// This is needed for nested containers, because in /proc/self/cgroup we
// see pathes from host, which don't exist in container.
relDir, err := filepath.Rel(root, initPath)
if err != nil {
return "", err
}
return filepath.Join(mountpoint, relDir), nil
}
func (raw *cgroupData) path(subsystem string) (string, error) { func (raw *cgroupData) path(subsystem string) (string, error) {
mnt, root, err := cgroups.FindCgroupMountpointAndRoot(subsystem) mnt, err := cgroups.FindCgroupMountpoint(subsystem)
// If we didn't mount the subsystem, there is no point we make the path. // If we didn't mount the subsystem, there is no point we make the path.
if err != nil { if err != nil {
return "", err return "", err
@ -297,7 +280,10 @@ func (raw *cgroupData) path(subsystem string) (string, error) {
return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
} }
parentPath, err := raw.parentPath(subsystem, mnt, root) // Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
// process could in container and shared pid namespace with host, and
// /proc/1/cgroup could point to whole other world of cgroups.
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
if err != nil { if err != nil {
return "", err return "", err
} }

View File

@ -0,0 +1,128 @@
// +build linux
package rootless
import (
"fmt"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate"
)
// TODO: This is copied from libcontainer/cgroups/fs, which duplicates this code
// needlessly. We should probably export this list.
var subsystems = []subsystem{
&fs.CpusetGroup{},
&fs.DevicesGroup{},
&fs.MemoryGroup{},
&fs.CpuGroup{},
&fs.CpuacctGroup{},
&fs.PidsGroup{},
&fs.BlkioGroup{},
&fs.HugetlbGroup{},
&fs.NetClsGroup{},
&fs.NetPrioGroup{},
&fs.PerfEventGroup{},
&fs.FreezerGroup{},
&fs.NameGroup{GroupName: "name=systemd"},
}
type subsystem interface {
// Name returns the name of the subsystem.
Name() string
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
GetStats(path string, stats *cgroups.Stats) error
}
// The noop cgroup manager is used for rootless containers, because we currently
// cannot manage cgroups if we are in a rootless setup. This manager is chosen
// by factory if we are in rootless mode. We error out if any cgroup options are
// set in the config -- this may change in the future with upcoming kernel features
// like the cgroup namespace.
type Manager struct {
Cgroups *configs.Cgroup
Paths map[string]string
}
func (m *Manager) Apply(pid int) error {
// If there are no cgroup settings, there's nothing to do.
if m.Cgroups == nil {
return nil
}
// We can't set paths.
// TODO(cyphar): Implement the case where the runner of a rootless container
// owns their own cgroup, which would allow us to set up a
// cgroup for each path.
if m.Cgroups.Paths != nil {
return fmt.Errorf("cannot change cgroup path in rootless container")
}
// We load the paths into the manager.
paths := make(map[string]string)
for _, sys := range subsystems {
name := sys.Name()
path, err := cgroups.GetOwnCgroupPath(name)
if err != nil {
// Ignore paths we couldn't resolve.
continue
}
paths[name] = path
}
m.Paths = paths
return nil
}
func (m *Manager) GetPaths() map[string]string {
return m.Paths
}
func (m *Manager) Set(container *configs.Config) error {
// We have to re-do the validation here, since someone might decide to
// update a rootless container.
return validate.New().Validate(container)
}
func (m *Manager) GetPids() ([]int, error) {
dir, err := cgroups.GetOwnCgroupPath("devices")
if err != nil {
return nil, err
}
return cgroups.GetPids(dir)
}
func (m *Manager) GetAllPids() ([]int, error) {
dir, err := cgroups.GetOwnCgroupPath("devices")
if err != nil {
return nil, err
}
return cgroups.GetAllPids(dir)
}
func (m *Manager) GetStats() (*cgroups.Stats, error) {
// TODO(cyphar): We can make this work if we figure out a way to allow usage
// of cgroups with a rootless container. While this doesn't
// actually require write access to a cgroup directory, the
// statistics are not useful if they can be affected by
// non-container processes.
return nil, fmt.Errorf("cannot get cgroup stats in rootless container")
}
func (m *Manager) Freeze(state configs.FreezerState) error {
// TODO(cyphar): We can make this work if we figure out a way to allow usage
// of cgroups with a rootless container.
return fmt.Errorf("cannot use freezer cgroup in rootless container")
}
func (m *Manager) Destroy() error {
// We don't have to do anything here because we didn't do any setup.
return nil
}

View File

@ -426,7 +426,7 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
return "", err return "", err
} }
initPath, err := cgroups.GetInitCgroupDir(subsystem) initPath, err := cgroups.GetInitCgroup(subsystem)
if err != nil { if err != nil {
return "", err return "", err
} }

View File

@ -109,7 +109,7 @@ type Mount struct {
Subsystems []string Subsystems []string
} }
func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) { func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
if len(m.Subsystems) == 0 { if len(m.Subsystems) == 0 {
return "", fmt.Errorf("no subsystem for mount") return "", fmt.Errorf("no subsystem for mount")
} }
@ -203,8 +203,8 @@ func GetAllSubsystems() ([]string, error) {
return subsystems, nil return subsystems, nil
} }
// GetThisCgroupDir returns the relative path to the cgroup docker is running in. // GetOwnCgroup returns the relative path to the cgroup docker is running in.
func GetThisCgroupDir(subsystem string) (string, error) { func GetOwnCgroup(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/self/cgroup") cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil { if err != nil {
return "", err return "", err
@ -213,8 +213,16 @@ func GetThisCgroupDir(subsystem string) (string, error) {
return getControllerPath(subsystem, cgroups) return getControllerPath(subsystem, cgroups)
} }
func GetInitCgroupDir(subsystem string) (string, error) { func GetOwnCgroupPath(subsystem string) (string, error) {
cgroup, err := GetOwnCgroup(subsystem)
if err != nil {
return "", err
}
return getCgroupPathHelper(subsystem, cgroup)
}
func GetInitCgroup(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/1/cgroup") cgroups, err := ParseCgroupFile("/proc/1/cgroup")
if err != nil { if err != nil {
return "", err return "", err
@ -223,6 +231,31 @@ func GetInitCgroupDir(subsystem string) (string, error) {
return getControllerPath(subsystem, cgroups) return getControllerPath(subsystem, cgroups)
} }
func GetInitCgroupPath(subsystem string) (string, error) {
cgroup, err := GetInitCgroup(subsystem)
if err != nil {
return "", err
}
return getCgroupPathHelper(subsystem, cgroup)
}
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
mnt, root, err := FindCgroupMountpointAndRoot(subsystem)
if err != nil {
return "", err
}
// This is needed for nested containers, because in /proc/self/cgroup we
// see pathes from host, which don't exist in container.
relCgroup, err := filepath.Rel(root, cgroup)
if err != nil {
return "", err
}
return filepath.Join(mnt, relCgroup), nil
}
func readProcsFile(dir string) ([]int, error) { func readProcsFile(dir string) ([]int, error) {
f, err := os.Open(filepath.Join(dir, CgroupProcesses)) f, err := os.Open(filepath.Join(dir, CgroupProcesses))
if err != nil { if err != nil {

View File

@ -183,6 +183,9 @@ type Config struct {
// NoNewKeyring will not allocated a new session keyring for the container. It will use the // NoNewKeyring will not allocated a new session keyring for the container. It will use the
// callers keyring in this case. // callers keyring in this case.
NoNewKeyring bool `json:"no_new_keyring"` NoNewKeyring bool `json:"no_new_keyring"`
// Rootless specifies whether the container is a rootless container.
Rootless bool `json:"rootless"`
} }
type Hooks struct { type Hooks struct {

View File

@ -4,38 +4,50 @@ package configs
import "fmt" import "fmt"
// HostUID gets the root uid for the process on host which could be non-zero // HostUID gets the translated uid for the process on host which could be
// when user namespaces are enabled. // different when user namespaces are enabled.
func (c Config) HostUID() (int, error) { func (c Config) HostUID(containerId int) (int, error) {
if c.Namespaces.Contains(NEWUSER) { if c.Namespaces.Contains(NEWUSER) {
if c.UidMappings == nil { if c.UidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.") return -1, fmt.Errorf("User namespaces enabled, but no uid mappings found.")
} }
id, found := c.hostIDFromMapping(0, c.UidMappings) id, found := c.hostIDFromMapping(containerId, c.UidMappings)
if !found { if !found {
return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.") return -1, fmt.Errorf("User namespaces enabled, but no user mapping found.")
} }
return id, nil return id, nil
} }
// Return default root uid 0 // Return unchanged id.
return 0, nil return containerId, nil
} }
// HostGID gets the root gid for the process on host which could be non-zero // HostRootUID gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled. // when user namespaces are enabled.
func (c Config) HostGID() (int, error) { func (c Config) HostRootUID() (int, error) {
return c.HostUID(0)
}
// HostGID gets the translated gid for the process on host which could be
// different when user namespaces are enabled.
func (c Config) HostGID(containerId int) (int, error) {
if c.Namespaces.Contains(NEWUSER) { if c.Namespaces.Contains(NEWUSER) {
if c.GidMappings == nil { if c.GidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.") return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
} }
id, found := c.hostIDFromMapping(0, c.GidMappings) id, found := c.hostIDFromMapping(containerId, c.GidMappings)
if !found { if !found {
return -1, fmt.Errorf("User namespaces enabled, but no root group mapping found.") return -1, fmt.Errorf("User namespaces enabled, but no group mapping found.")
} }
return id, nil return id, nil
} }
// Return default root gid 0 // Return unchanged id.
return 0, nil return containerId, nil
}
// HostRootGID gets the root gid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostRootGID() (int, error) {
return c.HostGID(0)
} }
// Utility function that gets a host ID for a container ID from user namespace map // Utility function that gets a host ID for a container ID from user namespace map

View File

@ -65,11 +65,11 @@ func TestRemoveNamespace(t *testing.T) {
} }
} }
func TestHostUIDNoUSERNS(t *testing.T) { func TestHostRootUIDNoUSERNS(t *testing.T) {
config := &Config{ config := &Config{
Namespaces: Namespaces{}, Namespaces: Namespaces{},
} }
uid, err := config.HostUID() uid, err := config.HostRootUID()
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -78,7 +78,7 @@ func TestHostUIDNoUSERNS(t *testing.T) {
} }
} }
func TestHostUIDWithUSERNS(t *testing.T) { func TestHostRootUIDWithUSERNS(t *testing.T) {
config := &Config{ config := &Config{
Namespaces: Namespaces{{Type: NEWUSER}}, Namespaces: Namespaces{{Type: NEWUSER}},
UidMappings: []IDMap{ UidMappings: []IDMap{
@ -89,7 +89,7 @@ func TestHostUIDWithUSERNS(t *testing.T) {
}, },
}, },
} }
uid, err := config.HostUID() uid, err := config.HostRootUID()
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -98,11 +98,11 @@ func TestHostUIDWithUSERNS(t *testing.T) {
} }
} }
func TestHostGIDNoUSERNS(t *testing.T) { func TestHostRootGIDNoUSERNS(t *testing.T) {
config := &Config{ config := &Config{
Namespaces: Namespaces{}, Namespaces: Namespaces{},
} }
uid, err := config.HostGID() uid, err := config.HostRootGID()
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -111,7 +111,7 @@ func TestHostGIDNoUSERNS(t *testing.T) {
} }
} }
func TestHostGIDWithUSERNS(t *testing.T) { func TestHostRootGIDWithUSERNS(t *testing.T) {
config := &Config{ config := &Config{
Namespaces: Namespaces{{Type: NEWUSER}}, Namespaces: Namespaces{{Type: NEWUSER}},
GidMappings: []IDMap{ GidMappings: []IDMap{
@ -122,7 +122,7 @@ func TestHostGIDWithUSERNS(t *testing.T) {
}, },
}, },
} }
uid, err := config.HostGID() uid, err := config.HostRootGID()
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }

View File

@ -0,0 +1,117 @@
package validate
import (
"fmt"
"os"
"reflect"
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
)
var (
geteuid = os.Geteuid
getegid = os.Getegid
)
func (v *ConfigValidator) rootless(config *configs.Config) error {
if err := rootlessMappings(config); err != nil {
return err
}
if err := rootlessMount(config); err != nil {
return err
}
// Currently, cgroups cannot effectively be used in rootless containers.
// The new cgroup namespace doesn't really help us either because it doesn't
// have nice interactions with the user namespace (we're working with upstream
// to fix this).
if err := rootlessCgroup(config); err != nil {
return err
}
// XXX: We currently can't verify the user config at all, because
// configs.Config doesn't store the user-related configs. So this
// has to be verified by setupUser() in init_linux.go.
return nil
}
func rootlessMappings(config *configs.Config) error {
rootuid, err := config.HostRootUID()
if err != nil {
return fmt.Errorf("failed to get root uid from uidMappings: %v", err)
}
if euid := geteuid(); euid != 0 {
if !config.Namespaces.Contains(configs.NEWUSER) {
return fmt.Errorf("rootless containers require user namespaces")
}
if rootuid != euid {
return fmt.Errorf("rootless containers cannot map container root to a different host user")
}
}
rootgid, err := config.HostRootGID()
if err != nil {
return fmt.Errorf("failed to get root gid from gidMappings: %v", err)
}
// Similar to the above test, we need to make sure that we aren't trying to
// map to a group ID that we don't have the right to be.
if rootgid != getegid() {
return fmt.Errorf("rootless containers cannot map container root to a different host group")
}
// We can only map one user and group inside a container (our own).
if len(config.UidMappings) != 1 || config.UidMappings[0].Size != 1 {
return fmt.Errorf("rootless containers cannot map more than one user")
}
if len(config.GidMappings) != 1 || config.GidMappings[0].Size != 1 {
return fmt.Errorf("rootless containers cannot map more than one group")
}
return nil
}
// cgroup verifies that the user isn't trying to set any cgroup limits or paths.
func rootlessCgroup(config *configs.Config) error {
// Nothing set at all.
if config.Cgroups == nil || config.Cgroups.Resources == nil {
return nil
}
// Used for comparing to the zero value.
left := reflect.ValueOf(*config.Cgroups.Resources)
right := reflect.Zero(left.Type())
// This is all we need to do, since specconv won't add cgroup options in
// rootless mode.
if !reflect.DeepEqual(left.Interface(), right.Interface()) {
return fmt.Errorf("cannot specify resource limits in rootless container")
}
return nil
}
// mount verifies that the user isn't trying to set up any mounts they don't have
// the rights to do. In addition, it makes sure that no mount has a `uid=` or
// `gid=` option that doesn't resolve to root.
func rootlessMount(config *configs.Config) error {
// XXX: We could whitelist allowed devices at this point, but I'm not
// convinced that's a good idea. The kernel is the best arbiter of
// access control.
for _, mount := range config.Mounts {
// Check that the options list doesn't contain any uid= or gid= entries
// that don't resolve to root.
for _, opt := range strings.Split(mount.Data, ",") {
if strings.HasPrefix(opt, "uid=") && opt != "uid=0" {
return fmt.Errorf("cannot specify uid= mount options in rootless containers where argument isn't 0")
}
if strings.HasPrefix(opt, "gid=") && opt != "gid=0" {
return fmt.Errorf("cannot specify gid= mount options in rootless containers where argument isn't 0")
}
}
}
return nil
}

View File

@ -0,0 +1,195 @@
package validate
import (
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
)
func init() {
geteuid = func() int { return 1337 }
getegid = func() int { return 7331 }
}
func rootlessConfig() *configs.Config {
return &configs.Config{
Rootfs: "/var",
Rootless: true,
Namespaces: configs.Namespaces(
[]configs.Namespace{
{Type: configs.NEWUSER},
},
),
UidMappings: []configs.IDMap{
{
HostID: geteuid(),
ContainerID: 0,
Size: 1,
},
},
GidMappings: []configs.IDMap{
{
HostID: getegid(),
ContainerID: 0,
Size: 1,
},
},
}
}
func TestValidateRootless(t *testing.T) {
validator := New()
config := rootlessConfig()
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur: %+v", err)
}
}
/* rootlessMappings() */
func TestValidateRootlessUserns(t *testing.T) {
validator := New()
config := rootlessConfig()
config.Namespaces = nil
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if user namespaces not set")
}
}
func TestValidateRootlessMappingUid(t *testing.T) {
validator := New()
config := rootlessConfig()
config.UidMappings = nil
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if no uid mappings provided")
}
config = rootlessConfig()
config.UidMappings[0].HostID = geteuid() + 1
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if geteuid() != mapped uid")
}
config = rootlessConfig()
config.UidMappings[0].Size = 1024
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if more than one uid mapped")
}
config = rootlessConfig()
config.UidMappings = append(config.UidMappings, configs.IDMap{
HostID: geteuid() + 1,
ContainerID: 0,
Size: 1,
})
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if more than one uid extent mapped")
}
}
func TestValidateRootlessMappingGid(t *testing.T) {
validator := New()
config := rootlessConfig()
config.GidMappings = nil
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if no gid mappings provided")
}
config = rootlessConfig()
config.GidMappings[0].HostID = getegid() + 1
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if getegid() != mapped gid")
}
config = rootlessConfig()
config.GidMappings[0].Size = 1024
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if more than one gid mapped")
}
config = rootlessConfig()
config.GidMappings = append(config.GidMappings, configs.IDMap{
HostID: getegid() + 1,
ContainerID: 0,
Size: 1,
})
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if more than one gid extent mapped")
}
}
/* rootlessMount() */
func TestValidateRootlessMountUid(t *testing.T) {
config := rootlessConfig()
validator := New()
config.Mounts = []*configs.Mount{
{
Source: "devpts",
Destination: "/dev/pts",
Device: "devpts",
},
}
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur when uid= not set in mount options: %+v", err)
}
config.Mounts[0].Data = "uid=5"
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur when setting uid=5 in mount options")
}
config.Mounts[0].Data = "uid=0"
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur when setting uid=0 in mount options: %+v", err)
}
}
func TestValidateRootlessMountGid(t *testing.T) {
config := rootlessConfig()
validator := New()
config.Mounts = []*configs.Mount{
{
Source: "devpts",
Destination: "/dev/pts",
Device: "devpts",
},
}
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur when gid= not set in mount options: %+v", err)
}
config.Mounts[0].Data = "gid=5"
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur when setting gid=5 in mount options")
}
config.Mounts[0].Data = "gid=0"
if err := validator.Validate(config); err != nil {
t.Errorf("Expected error to not occur when setting gid=0 in mount options: %+v", err)
}
}
/* rootlessCgroup() */
func TestValidateRootlessCgroup(t *testing.T) {
validator := New()
config := rootlessConfig()
config.Cgroups = &configs.Cgroup{
Resources: &configs.Resources{
PidsLimit: 1337,
},
}
if err := validator.Validate(config); err == nil {
t.Errorf("Expected error to occur if cgroup limits set")
}
}

View File

@ -40,6 +40,11 @@ func (v *ConfigValidator) Validate(config *configs.Config) error {
if err := v.sysctl(config); err != nil { if err := v.sysctl(config); err != nil {
return err return err
} }
if config.Rootless {
if err := v.rootless(config); err != nil {
return err
}
}
return nil return nil
} }

View File

@ -51,6 +51,9 @@ type State struct {
// Platform specific fields below here // Platform specific fields below here
// Specifies if the container was started under the rootless mode.
Rootless bool `json:"rootless"`
// Path to all the cgroups setup for a container. Key is cgroup subsystem name // Path to all the cgroups setup for a container. Key is cgroup subsystem name
// with the value as the path. // with the value as the path.
CgroupPaths map[string]string `json:"cgroup_paths"` CgroupPaths map[string]string `json:"cgroup_paths"`
@ -304,11 +307,11 @@ func (c *linuxContainer) Signal(s os.Signal, all bool) error {
} }
func (c *linuxContainer) createExecFifo() error { func (c *linuxContainer) createExecFifo() error {
rootuid, err := c.Config().HostUID() rootuid, err := c.Config().HostRootUID()
if err != nil { if err != nil {
return err return err
} }
rootgid, err := c.Config().HostGID() rootgid, err := c.Config().HostRootGID()
if err != nil { if err != nil {
return err return err
} }
@ -452,6 +455,7 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
PassedFilesCount: len(process.ExtraFiles), PassedFilesCount: len(process.ExtraFiles),
ContainerId: c.ID(), ContainerId: c.ID(),
NoNewPrivileges: c.config.NoNewPrivileges, NoNewPrivileges: c.config.NoNewPrivileges,
Rootless: c.config.Rootless,
AppArmorProfile: c.config.AppArmorProfile, AppArmorProfile: c.config.AppArmorProfile,
ProcessLabel: c.config.ProcessLabel, ProcessLabel: c.config.ProcessLabel,
Rlimits: c.config.Rlimits, Rlimits: c.config.Rlimits,
@ -516,10 +520,18 @@ func (c *linuxContainer) Resume() error {
} }
func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) { func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) {
// XXX(cyphar): This requires cgroups.
if c.config.Rootless {
return nil, fmt.Errorf("cannot get OOM notifications from rootless container")
}
return notifyOnOOM(c.cgroupManager.GetPaths()) return notifyOnOOM(c.cgroupManager.GetPaths())
} }
func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) { func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) {
// XXX(cyphar): This requires cgroups.
if c.config.Rootless {
return nil, fmt.Errorf("cannot get memory pressure notifications from rootless container")
}
return notifyMemoryPressure(c.cgroupManager.GetPaths(), level) return notifyMemoryPressure(c.cgroupManager.GetPaths(), level)
} }
@ -622,6 +634,13 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
c.m.Lock() c.m.Lock()
defer c.m.Unlock() defer c.m.Unlock()
// TODO(avagin): Figure out how to make this work nicely. CRIU 2.0 has
// support for doing unprivileged dumps, but the setup of
// rootless containers might make this complicated.
if c.config.Rootless {
return fmt.Errorf("cannot checkpoint a rootless container")
}
if err := c.checkCriuVersion("1.5.2"); err != nil { if err := c.checkCriuVersion("1.5.2"); err != nil {
return err return err
} }
@ -791,6 +810,13 @@ func (c *linuxContainer) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts
func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
c.m.Lock() c.m.Lock()
defer c.m.Unlock() defer c.m.Unlock()
// TODO(avagin): Figure out how to make this work nicely. CRIU doesn't have
// support for unprivileged restore at the moment.
if c.config.Rootless {
return fmt.Errorf("cannot restore a rootless container")
}
if err := c.checkCriuVersion("1.5.2"); err != nil { if err := c.checkCriuVersion("1.5.2"); err != nil {
return err return err
} }
@ -918,6 +944,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
} }
func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error { func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
// XXX: Do we need to deal with this case? AFAIK criu still requires root.
if err := c.cgroupManager.Apply(pid); err != nil { if err := c.cgroupManager.Apply(pid); err != nil {
return err return err
} }
@ -1314,6 +1341,7 @@ func (c *linuxContainer) currentState() (*State, error) {
InitProcessStartTime: startTime, InitProcessStartTime: startTime,
Created: c.created, Created: c.created,
}, },
Rootless: c.config.Rootless,
CgroupPaths: c.cgroupManager.GetPaths(), CgroupPaths: c.cgroupManager.GetPaths(),
NamespacePaths: make(map[configs.NamespaceType]string), NamespacePaths: make(map[configs.NamespaceType]string),
ExternalDescriptors: externalDescriptors, ExternalDescriptors: externalDescriptors,
@ -1441,19 +1469,34 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na
Type: GidmapAttr, Type: GidmapAttr,
Value: b, Value: b,
}) })
// check if we have CAP_SETGID to setgroup properly // The following only applies if we are root.
pid, err := capability.NewPid(os.Getpid()) if !c.config.Rootless {
if err != nil { // check if we have CAP_SETGID to setgroup properly
return nil, err pid, err := capability.NewPid(os.Getpid())
} if err != nil {
if !pid.Get(capability.EFFECTIVE, capability.CAP_SETGID) { return nil, err
r.AddData(&Boolmsg{ }
Type: SetgroupAttr, if !pid.Get(capability.EFFECTIVE, capability.CAP_SETGID) {
Value: true, r.AddData(&Boolmsg{
}) Type: SetgroupAttr,
Value: true,
})
}
} }
} }
} }
// write oom_score_adj
r.AddData(&Bytemsg{
Type: OomScoreAdjAttr,
Value: []byte(fmt.Sprintf("%d", c.config.OomScoreAdj)),
})
// write rootless
r.AddData(&Boolmsg{
Type: RootlessAttr,
Value: c.config.Rootless,
})
return bytes.NewReader(r.Serialize()), nil return bytes.NewReader(r.Serialize()), nil
} }

View File

@ -15,6 +15,7 @@ import (
"github.com/docker/docker/pkg/mount" "github.com/docker/docker/pkg/mount"
"github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs" "github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/cgroups/rootless"
"github.com/opencontainers/runc/libcontainer/cgroups/systemd" "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate" "github.com/opencontainers/runc/libcontainer/configs/validate"
@ -73,6 +74,20 @@ func Cgroupfs(l *LinuxFactory) error {
return nil return nil
} }
// RootlessCgroups is an options func to configure a LinuxFactory to
// return containers that use the "rootless" cgroup manager, which will
// fail to do any operations not possible to do with an unprivileged user.
// It should only be used in conjunction with rootless containers.
func RootlessCgroups(l *LinuxFactory) error {
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
return &rootless.Manager{
Cgroups: config,
Paths: paths,
}
}
return nil
}
// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs. // TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
func TmpfsRoot(l *LinuxFactory) error { func TmpfsRoot(l *LinuxFactory) error {
mounted, err := mount.Mounted(l.Root) mounted, err := mount.Mounted(l.Root)
@ -149,11 +164,11 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
if err := l.Validator.Validate(config); err != nil { if err := l.Validator.Validate(config); err != nil {
return nil, newGenericError(err, ConfigInvalid) return nil, newGenericError(err, ConfigInvalid)
} }
uid, err := config.HostUID() uid, err := config.HostRootUID()
if err != nil { if err != nil {
return nil, newGenericError(err, SystemError) return nil, newGenericError(err, SystemError)
} }
gid, err := config.HostGID() gid, err := config.HostRootGID()
if err != nil { if err != nil {
return nil, newGenericError(err, SystemError) return nil, newGenericError(err, SystemError)
} }
@ -169,6 +184,9 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
if err := os.Chown(containerRoot, uid, gid); err != nil { if err := os.Chown(containerRoot, uid, gid); err != nil {
return nil, newGenericError(err, SystemError) return nil, newGenericError(err, SystemError)
} }
if config.Rootless {
RootlessCgroups(l)
}
c := &linuxContainer{ c := &linuxContainer{
id: id, id: id,
root: containerRoot, root: containerRoot,
@ -195,6 +213,10 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
processStartTime: state.InitProcessStartTime, processStartTime: state.InitProcessStartTime,
fds: state.ExternalDescriptors, fds: state.ExternalDescriptors,
} }
// We have to use the RootlessManager.
if state.Rootless {
RootlessCgroups(l)
}
c := &linuxContainer{ c := &linuxContainer{
initProcess: r, initProcess: r,
initProcessStartTime: state.InitProcessStartTime, initProcessStartTime: state.InitProcessStartTime,

View File

@ -6,10 +6,8 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
"io/ioutil"
"net" "net"
"os" "os"
"strconv"
"strings" "strings"
"syscall" "syscall"
"unsafe" "unsafe"
@ -60,6 +58,7 @@ type initConfig struct {
ContainerId string `json:"containerid"` ContainerId string `json:"containerid"`
Rlimits []configs.Rlimit `json:"rlimits"` Rlimits []configs.Rlimit `json:"rlimits"`
CreateConsole bool `json:"create_console"` CreateConsole bool `json:"create_console"`
Rootless bool `json:"rootless"`
} }
type initer interface { type initer interface {
@ -231,18 +230,21 @@ func syncParentHooks(pipe io.ReadWriter) error {
func setupUser(config *initConfig) error { func setupUser(config *initConfig) error {
// Set up defaults. // Set up defaults.
defaultExecUser := user.ExecUser{ defaultExecUser := user.ExecUser{
Uid: syscall.Getuid(), Uid: 0,
Gid: syscall.Getgid(), Gid: 0,
Home: "/", Home: "/",
} }
passwdPath, err := user.GetPasswdPath() passwdPath, err := user.GetPasswdPath()
if err != nil { if err != nil {
return err return err
} }
groupPath, err := user.GetGroupPath() groupPath, err := user.GetGroupPath()
if err != nil { if err != nil {
return err return err
} }
execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath) execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
if err != nil { if err != nil {
return err return err
@ -255,22 +257,49 @@ func setupUser(config *initConfig) error {
return err return err
} }
} }
if config.Rootless {
if execUser.Uid != 0 {
return fmt.Errorf("cannot run as a non-root user in a rootless container")
}
if execUser.Gid != 0 {
return fmt.Errorf("cannot run as a non-root group in a rootless container")
}
// We cannot set any additional groups in a rootless container and thus we
// bail if the user asked us to do so. TODO: We currently can't do this
// earlier, but if libcontainer.Process.User was typesafe this might work.
if len(addGroups) > 0 {
return fmt.Errorf("cannot set any additional groups in a rootless container")
}
}
// before we change to the container's user make sure that the processes STDIO // before we change to the container's user make sure that the processes STDIO
// is correctly owned by the user that we are switching to. // is correctly owned by the user that we are switching to.
if err := fixStdioPermissions(execUser); err != nil { if err := fixStdioPermissions(config, execUser); err != nil {
return err return err
} }
suppGroups := append(execUser.Sgids, addGroups...)
if err := syscall.Setgroups(suppGroups); err != nil { // This isn't allowed in an unprivileged user namespace since Linux 3.19.
return err // There's nothing we can do about /etc/group entries, so we silently
// ignore setting groups here (since the user didn't explicitly ask us to
// set the group).
if !config.Rootless {
suppGroups := append(execUser.Sgids, addGroups...)
if err := syscall.Setgroups(suppGroups); err != nil {
return err
}
} }
if err := system.Setgid(execUser.Gid); err != nil { if err := system.Setgid(execUser.Gid); err != nil {
return err return err
} }
if err := system.Setuid(execUser.Uid); err != nil { if err := system.Setuid(execUser.Uid); err != nil {
return err return err
} }
// if we didn't get HOME already, set it based on the user's HOME // if we didn't get HOME already, set it based on the user's HOME
if envHome := os.Getenv("HOME"); envHome == "" { if envHome := os.Getenv("HOME"); envHome == "" {
if err := os.Setenv("HOME", execUser.Home); err != nil { if err := os.Setenv("HOME", execUser.Home); err != nil {
@ -283,7 +312,7 @@ func setupUser(config *initConfig) error {
// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user. // fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user.
// The ownership needs to match because it is created outside of the container and needs to be // The ownership needs to match because it is created outside of the container and needs to be
// localized. // localized.
func fixStdioPermissions(u *user.ExecUser) error { func fixStdioPermissions(config *initConfig, u *user.ExecUser) error {
var null syscall.Stat_t var null syscall.Stat_t
if err := syscall.Stat("/dev/null", &null); err != nil { if err := syscall.Stat("/dev/null", &null); err != nil {
return err return err
@ -297,10 +326,20 @@ func fixStdioPermissions(u *user.ExecUser) error {
if err := syscall.Fstat(int(fd), &s); err != nil { if err := syscall.Fstat(int(fd), &s); err != nil {
return err return err
} }
// Skip chown of /dev/null if it was used as one of the STDIO fds. // Skip chown of /dev/null if it was used as one of the STDIO fds.
if s.Rdev == null.Rdev { if s.Rdev == null.Rdev {
continue continue
} }
// Skip chown if s.Gid is actually an unmapped gid in the host. While
// this is a bit dodgy if it just so happens that the console _is_
// owned by overflow_gid, there's no way for us to disambiguate this as
// a userspace program.
if _, err := config.Config.HostGID(int(s.Gid)); err != nil {
continue
}
// We only change the uid owner (as it is possible for the mount to // We only change the uid owner (as it is possible for the mount to
// prefer a different gid, and there's no reason for us to change it). // prefer a different gid, and there's no reason for us to change it).
// The reason why we don't just leave the default uid=X mount setup is // The reason why we don't just leave the default uid=X mount setup is
@ -369,12 +408,6 @@ func setupRlimits(limits []configs.Rlimit, pid int) error {
return nil return nil
} }
func setOomScoreAdj(oomScoreAdj int, pid int) error {
path := fmt.Sprintf("/proc/%d/oom_score_adj", pid)
return ioutil.WriteFile(path, []byte(strconv.Itoa(oomScoreAdj)), 0600)
}
const _P_PID = 1 const _P_PID = 1
type siginfo struct { type siginfo struct {

View File

@ -11,12 +11,15 @@ import (
// list of known message types we want to send to bootstrap program // list of known message types we want to send to bootstrap program
// The number is randomly chosen to not conflict with known netlink types // The number is randomly chosen to not conflict with known netlink types
const ( const (
InitMsg uint16 = 62000 InitMsg uint16 = 62000
CloneFlagsAttr uint16 = 27281 CloneFlagsAttr uint16 = 27281
NsPathsAttr uint16 = 27282 NsPathsAttr uint16 = 27282
UidmapAttr uint16 = 27283 UidmapAttr uint16 = 27283
GidmapAttr uint16 = 27284 GidmapAttr uint16 = 27284
SetgroupAttr uint16 = 27285 SetgroupAttr uint16 = 27285
OomScoreAdjAttr uint16 = 27286
RootlessAttr uint16 = 27287
// When syscall.NLA_HDRLEN is in gccgo, take this out. // When syscall.NLA_HDRLEN is in gccgo, take this out.
syscall_NLA_HDRLEN = (syscall.SizeofNlAttr + syscall.NLA_ALIGNTO - 1) & ^(syscall.NLA_ALIGNTO - 1) syscall_NLA_HDRLEN = (syscall.SizeofNlAttr + syscall.NLA_ALIGNTO - 1) & ^(syscall.NLA_ALIGNTO - 1)
) )

View File

@ -72,18 +72,23 @@ struct nlconfig_t {
char *namespaces; char *namespaces;
size_t namespaces_len; size_t namespaces_len;
uint8_t is_setgroup; uint8_t is_setgroup;
uint8_t is_rootless;
char *oom_score_adj;
size_t oom_score_adj_len;
}; };
/* /*
* List of netlink message types sent to us as part of bootstrapping the init. * List of netlink message types sent to us as part of bootstrapping the init.
* These constants are defined in libcontainer/message_linux.go. * These constants are defined in libcontainer/message_linux.go.
*/ */
#define INIT_MSG 62000 #define INIT_MSG 62000
#define CLONE_FLAGS_ATTR 27281 #define CLONE_FLAGS_ATTR 27281
#define NS_PATHS_ATTR 27282 #define NS_PATHS_ATTR 27282
#define UIDMAP_ATTR 27283 #define UIDMAP_ATTR 27283
#define GIDMAP_ATTR 27284 #define GIDMAP_ATTR 27284
#define SETGROUP_ATTR 27285 #define SETGROUP_ATTR 27285
#define OOM_SCORE_ADJ_ATTR 27286
#define ROOTLESS_ATTR 27287
/* /*
* Use the raw syscall for versions of glibc which don't include a function for * Use the raw syscall for versions of glibc which don't include a function for
@ -172,6 +177,7 @@ static void update_setgroups(int pid, enum policy_t setgroup)
policy = "deny"; policy = "deny";
break; break;
case SETGROUPS_DEFAULT: case SETGROUPS_DEFAULT:
default:
/* Nothing to do. */ /* Nothing to do. */
return; return;
} }
@ -186,7 +192,7 @@ static void update_setgroups(int pid, enum policy_t setgroup)
} }
} }
static void update_uidmap(int pid, char *map, int map_len) static void update_uidmap(int pid, char *map, size_t map_len)
{ {
if (map == NULL || map_len <= 0) if (map == NULL || map_len <= 0)
return; return;
@ -195,7 +201,7 @@ static void update_uidmap(int pid, char *map, int map_len)
bail("failed to update /proc/%d/uid_map", pid); bail("failed to update /proc/%d/uid_map", pid);
} }
static void update_gidmap(int pid, char *map, int map_len) static void update_gidmap(int pid, char *map, size_t map_len)
{ {
if (map == NULL || map_len <= 0) if (map == NULL || map_len <= 0)
return; return;
@ -204,6 +210,15 @@ static void update_gidmap(int pid, char *map, int map_len)
bail("failed to update /proc/%d/gid_map", pid); bail("failed to update /proc/%d/gid_map", pid);
} }
static void update_oom_score_adj(char *data, size_t len)
{
if (data == NULL || len <= 0)
return;
if (write_file(data, len, "/proc/self/oom_score_adj") < 0)
bail("failed to update /proc/self/oom_score_adj");
}
/* A dummy function that just jumps to the given jumpval. */ /* A dummy function that just jumps to the given jumpval. */
static int child_func(void *arg) __attribute__ ((noinline)); static int child_func(void *arg) __attribute__ ((noinline));
static int child_func(void *arg) static int child_func(void *arg)
@ -317,6 +332,13 @@ static void nl_parse(int fd, struct nlconfig_t *config)
case CLONE_FLAGS_ATTR: case CLONE_FLAGS_ATTR:
config->cloneflags = readint32(current); config->cloneflags = readint32(current);
break; break;
case ROOTLESS_ATTR:
config->is_rootless = readint8(current);
break;
case OOM_SCORE_ADJ_ATTR:
config->oom_score_adj = current;
config->oom_score_adj_len = payload_len;
break;
case NS_PATHS_ATTR: case NS_PATHS_ATTR:
config->namespaces = current; config->namespaces = current;
config->namespaces_len = payload_len; config->namespaces_len = payload_len;
@ -425,14 +447,32 @@ void nsexec(void)
if (pipenum == -1) if (pipenum == -1)
return; return;
/* make the process non-dumpable */
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) != 0) {
bail("failed to set process as non-dumpable");
}
/* Parse all of the netlink configuration. */ /* Parse all of the netlink configuration. */
nl_parse(pipenum, &config); nl_parse(pipenum, &config);
/* Set oom_score_adj. This has to be done before !dumpable because
* /proc/self/oom_score_adj is not writeable unless you're an privileged
* user (if !dumpable is set). All children inherit their parent's
* oom_score_adj value on fork(2) so this will always be propagated
* properly.
*/
update_oom_score_adj(config.oom_score_adj, config.oom_score_adj_len);
/*
* Make the process non-dumpable, to avoid various race conditions that
* could cause processes in namespaces we're joining to access host
* resources (or potentially execute code).
*
* However, if the number of namespaces we are joining is 0, we are not
* going to be switching to a different security context. Thus setting
* ourselves to be non-dumpable only breaks things (like rootless
* containers), which is the recommendation from the kernel folks.
*/
if (config.namespaces) {
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
bail("failed to set process as non-dumpable");
}
/* Pipe so we can tell the child when we've finished setting up. */ /* Pipe so we can tell the child when we've finished setting up. */
if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_child_pipe) < 0) if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_child_pipe) < 0)
bail("failed to setup sync pipe between parent and child"); bail("failed to setup sync pipe between parent and child");
@ -540,9 +580,21 @@ void nsexec(void)
exit(ret); exit(ret);
case SYNC_USERMAP_PLS: case SYNC_USERMAP_PLS:
/* Enable setgroups(2) if we've been asked to. */ /*
* Enable setgroups(2) if we've been asked to. But we also
* have to explicitly disable setgroups(2) if we're
* creating a rootless container (this is required since
* Linux 3.19).
*/
if (config.is_rootless && config.is_setgroup) {
kill(child, SIGKILL);
bail("cannot allow setgroup in an unprivileged user namespace setup");
}
if (config.is_setgroup) if (config.is_setgroup)
update_setgroups(child, SETGROUPS_ALLOW); update_setgroups(child, SETGROUPS_ALLOW);
if (config.is_rootless)
update_setgroups(child, SETGROUPS_DENY);
/* Set up mappings. */ /* Set up mappings. */
update_uidmap(child, config.uidmap, config.uidmap_len); update_uidmap(child, config.uidmap, config.uidmap_len);
@ -681,6 +733,11 @@ void nsexec(void)
* clone_parent rant). So signal our parent to hook us up. * clone_parent rant). So signal our parent to hook us up.
*/ */
/* Switching is only necessary if we joined namespaces. */
if (config.namespaces) {
if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0)
bail("failed to set process as dumpable");
}
s = SYNC_USERMAP_PLS; s = SYNC_USERMAP_PLS;
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) if (write(syncfd, &s, sizeof(s)) != sizeof(s))
bail("failed to sync with parent: write(SYNC_USERMAP_PLS)"); bail("failed to sync with parent: write(SYNC_USERMAP_PLS)");
@ -691,6 +748,11 @@ void nsexec(void)
bail("failed to sync with parent: read(SYNC_USERMAP_ACK)"); bail("failed to sync with parent: read(SYNC_USERMAP_ACK)");
if (s != SYNC_USERMAP_ACK) if (s != SYNC_USERMAP_ACK)
bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s); bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s);
/* Switching is only necessary if we joined namespaces. */
if (config.namespaces) {
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
bail("failed to set process as dumpable");
}
} }
/* /*
@ -774,8 +836,10 @@ void nsexec(void)
if (setgid(0) < 0) if (setgid(0) < 0)
bail("setgid failed"); bail("setgid failed");
if (setgroups(0, NULL) < 0) if (!config.is_rootless && config.is_setgroup) {
bail("setgroups failed"); if (setgroups(0, NULL) < 0)
bail("setgroups failed");
}
s = SYNC_CHILD_READY; s = SYNC_CHILD_READY;
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) if (write(syncfd, &s, sizeof(s)) != sizeof(s))

View File

@ -80,15 +80,12 @@ func (p *setnsProcess) start() (err error) {
if err = p.execSetns(); err != nil { if err = p.execSetns(); err != nil {
return newSystemErrorWithCause(err, "executing setns process") return newSystemErrorWithCause(err, "executing setns process")
} }
if len(p.cgroupPaths) > 0 { // We can't join cgroups if we're in a rootless container.
if !p.config.Rootless && len(p.cgroupPaths) > 0 {
if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil { if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil {
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid()) return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
} }
} }
// set oom_score_adj
if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil {
return newSystemErrorWithCause(err, "setting oom score")
}
// set rlimits, this has to be done here because we lose permissions // set rlimits, this has to be done here because we lose permissions
// to raise the limits once we enter a user-namespace // to raise the limits once we enter a user-namespace
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
@ -257,8 +254,9 @@ func (p *initProcess) start() error {
return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid()) return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
} }
p.setExternalDescriptors(fds) p.setExternalDescriptors(fds)
// Do this before syncing with child so that no children // Do this before syncing with child so that no children can escape the
// can escape the cgroup // cgroup. We don't need to worry about not doing this and not being root
// because we'd be using the rootless cgroup manager in that case.
if err := p.manager.Apply(p.pid()); err != nil { if err := p.manager.Apply(p.pid()); err != nil {
return newSystemErrorWithCause(err, "applying cgroup configuration for process") return newSystemErrorWithCause(err, "applying cgroup configuration for process")
} }
@ -285,10 +283,6 @@ func (p *initProcess) start() error {
if err := p.manager.Set(p.config.Config); err != nil { if err := p.manager.Set(p.config.Config); err != nil {
return newSystemErrorWithCause(err, "setting cgroup config for ready process") return newSystemErrorWithCause(err, "setting cgroup config for ready process")
} }
// set oom_score_adj
if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil {
return newSystemErrorWithCause(err, "setting oom score for ready process")
}
// set rlimits, this has to be done here because we lose permissions // set rlimits, this has to be done here because we lose permissions
// to raise the limits once we enter a user-namespace // to raise the limits once we enter a user-namespace
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
@ -432,6 +426,12 @@ func getPipeFds(pid int) ([]string, error) {
f := filepath.Join(dirPath, strconv.Itoa(i)) f := filepath.Join(dirPath, strconv.Itoa(i))
target, err := os.Readlink(f) target, err := os.Readlink(f)
if err != nil { if err != nil {
// Ignore permission errors, for rootless containers and other
// non-dumpable processes. if we can't get the fd for a particular
// file, there's not much we can do.
if os.IsPermission(err) {
continue
}
return fds, err return fds, err
} }
fds[i] = target fds[i] = target

View File

@ -348,7 +348,7 @@ func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
var binds []*configs.Mount var binds []*configs.Mount
for _, mm := range mounts { for _, mm := range mounts {
dir, err := mm.GetThisCgroupDir(cgroupPaths) dir, err := mm.GetOwnCgroup(cgroupPaths)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -0,0 +1,227 @@
package specconv
import (
"os"
"runtime"
"strings"
"github.com/opencontainers/runtime-spec/specs-go"
)
func sPtr(s string) *string { return &s }
// Example returns an example spec file, with many options set so a user can
// see what a standard spec file looks like.
func Example() *specs.Spec {
return &specs.Spec{
Version: specs.Version,
Platform: specs.Platform{
OS: runtime.GOOS,
Arch: runtime.GOARCH,
},
Root: specs.Root{
Path: "rootfs",
Readonly: true,
},
Process: specs.Process{
Terminal: true,
User: specs.User{},
Args: []string{
"sh",
},
Env: []string{
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"TERM=xterm",
},
Cwd: "/",
NoNewPrivileges: true,
Capabilities: &specs.LinuxCapabilities{
Bounding: []string{
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE",
},
Permitted: []string{
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE",
},
Inheritable: []string{
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE",
},
Ambient: []string{
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE",
},
Effective: []string{
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE",
},
},
Rlimits: []specs.LinuxRlimit{
{
Type: "RLIMIT_NOFILE",
Hard: uint64(1024),
Soft: uint64(1024),
},
},
},
Hostname: "runc",
Mounts: []specs.Mount{
{
Destination: "/proc",
Type: "proc",
Source: "proc",
Options: nil,
},
{
Destination: "/dev",
Type: "tmpfs",
Source: "tmpfs",
Options: []string{"nosuid", "strictatime", "mode=755", "size=65536k"},
},
{
Destination: "/dev/pts",
Type: "devpts",
Source: "devpts",
Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"},
},
{
Destination: "/dev/shm",
Type: "tmpfs",
Source: "shm",
Options: []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"},
},
{
Destination: "/dev/mqueue",
Type: "mqueue",
Source: "mqueue",
Options: []string{"nosuid", "noexec", "nodev"},
},
{
Destination: "/sys",
Type: "sysfs",
Source: "sysfs",
Options: []string{"nosuid", "noexec", "nodev", "ro"},
},
{
Destination: "/sys/fs/cgroup",
Type: "cgroup",
Source: "cgroup",
Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"},
},
},
Linux: &specs.Linux{
MaskedPaths: []string{
"/proc/kcore",
"/proc/latency_stats",
"/proc/timer_list",
"/proc/timer_stats",
"/proc/sched_debug",
"/sys/firmware",
},
ReadonlyPaths: []string{
"/proc/asound",
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger",
},
Resources: &specs.LinuxResources{
Devices: []specs.LinuxDeviceCgroup{
{
Allow: false,
Access: "rwm",
},
},
},
Namespaces: []specs.LinuxNamespace{
{
Type: "pid",
},
{
Type: "network",
},
{
Type: "ipc",
},
{
Type: "uts",
},
{
Type: "mount",
},
},
},
}
}
// ExampleRootless returns an example spec file that works with rootless
// containers. It's essentially a modified version of the specfile from
// Example().
func ToRootless(spec *specs.Spec) {
var namespaces []specs.LinuxNamespace
// Remove networkns from the spec.
for _, ns := range spec.Linux.Namespaces {
switch ns.Type {
case specs.NetworkNamespace, specs.UserNamespace:
// Do nothing.
default:
namespaces = append(namespaces, ns)
}
}
// Add userns to the spec.
namespaces = append(namespaces, specs.LinuxNamespace{
Type: specs.UserNamespace,
})
spec.Linux.Namespaces = namespaces
// Add mappings for the current user.
spec.Linux.UIDMappings = []specs.LinuxIDMapping{{
HostID: uint32(os.Geteuid()),
ContainerID: 0,
Size: 1,
}}
spec.Linux.GIDMappings = []specs.LinuxIDMapping{{
HostID: uint32(os.Getegid()),
ContainerID: 0,
Size: 1,
}}
// Fix up mounts.
var mounts []specs.Mount
for _, mount := range spec.Mounts {
// Ignore all mounts that are under /sys.
if strings.HasPrefix(mount.Destination, "/sys") {
continue
}
// Remove all gid= and uid= mappings.
var options []string
for _, option := range mount.Options {
if !strings.HasPrefix(option, "gid=") && !strings.HasPrefix(option, "uid=") {
options = append(options, option)
}
}
mount.Options = options
mounts = append(mounts, mount)
}
// Add the sysfs mount as an rbind.
mounts = append(mounts, specs.Mount{
Source: "/sys",
Destination: "/sys",
Type: "none",
Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
})
spec.Mounts = mounts
// Remove cgroup settings.
spec.Linux.Resources = nil
}

View File

@ -145,6 +145,7 @@ type CreateOpts struct {
NoPivotRoot bool NoPivotRoot bool
NoNewKeyring bool NoNewKeyring bool
Spec *specs.Spec Spec *specs.Spec
Rootless bool
} }
// CreateLibcontainerConfig creates a new libcontainer configuration from a // CreateLibcontainerConfig creates a new libcontainer configuration from a
@ -175,6 +176,7 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
Hostname: spec.Hostname, Hostname: spec.Hostname,
Labels: append(labels, fmt.Sprintf("bundle=%s", cwd)), Labels: append(labels, fmt.Sprintf("bundle=%s", cwd)),
NoNewKeyring: opts.NoNewKeyring, NoNewKeyring: opts.NoNewKeyring,
Rootless: opts.Rootless,
} }
exists := false exists := false
@ -208,7 +210,7 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
if err := setupUserNamespace(spec, config); err != nil { if err := setupUserNamespace(spec, config); err != nil {
return nil, err return nil, err
} }
c, err := createCgroupConfig(opts.CgroupName, opts.UseSystemdCgroup, spec) c, err := createCgroupConfig(opts)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -264,8 +266,14 @@ func createLibcontainerMount(cwd string, m specs.Mount) *configs.Mount {
} }
} }
func createCgroupConfig(name string, useSystemdCgroup bool, spec *specs.Spec) (*configs.Cgroup, error) { func createCgroupConfig(opts *CreateOpts) (*configs.Cgroup, error) {
var myCgroupPath string var (
myCgroupPath string
spec = opts.Spec
useSystemdCgroup = opts.UseSystemdCgroup
name = opts.CgroupName
)
c := &configs.Cgroup{ c := &configs.Cgroup{
Resources: &configs.Resources{}, Resources: &configs.Resources{},
@ -301,9 +309,14 @@ func createCgroupConfig(name string, useSystemdCgroup bool, spec *specs.Spec) (*
c.Path = myCgroupPath c.Path = myCgroupPath
} }
c.Resources.AllowedDevices = allowedDevices // In rootless containers, any attempt to make cgroup changes will fail.
if spec.Linux == nil { // libcontainer will validate this and we shouldn't add any cgroup options
return c, nil // the user didn't specify.
if !opts.Rootless {
c.Resources.AllowedDevices = allowedDevices
if spec.Linux == nil {
return c, nil
}
} }
r := spec.Linux.Resources r := spec.Linux.Resources
if r == nil { if r == nil {
@ -340,8 +353,10 @@ func createCgroupConfig(name string, useSystemdCgroup bool, spec *specs.Spec) (*
} }
c.Resources.Devices = append(c.Resources.Devices, dd) c.Resources.Devices = append(c.Resources.Devices, dd)
} }
// append the default allowed devices to the end of the list if !opts.Rootless {
c.Resources.Devices = append(c.Resources.Devices, allowedDevices...) // append the default allowed devices to the end of the list
c.Resources.Devices = append(c.Resources.Devices, allowedDevices...)
}
if r.Memory != nil { if r.Memory != nil {
if r.Memory.Limit != nil { if r.Memory.Limit != nil {
c.Resources.Memory = *r.Memory.Limit c.Resources.Memory = *r.Memory.Limit
@ -595,11 +610,11 @@ func setupUserNamespace(spec *specs.Spec, config *configs.Config) error {
for _, m := range spec.Linux.GIDMappings { for _, m := range spec.Linux.GIDMappings {
config.GidMappings = append(config.GidMappings, create(m)) config.GidMappings = append(config.GidMappings, create(m))
} }
rootUID, err := config.HostUID() rootUID, err := config.HostRootUID()
if err != nil { if err != nil {
return err return err
} }
rootGID, err := config.HostGID() rootGID, err := config.HostRootGID()
if err != nil { if err != nil {
return err return err
} }

View File

@ -5,6 +5,7 @@ package specconv
import ( import (
"testing" "testing"
"github.com/opencontainers/runc/libcontainer/configs/validate"
"github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-spec/specs-go"
) )
@ -16,7 +17,13 @@ func TestLinuxCgroupsPathSpecified(t *testing.T) {
CgroupsPath: cgroupsPath, CgroupsPath: cgroupsPath,
} }
cgroup, err := createCgroupConfig("ContainerID", false, spec) opts := &CreateOpts{
CgroupName: "ContainerID",
UseSystemdCgroup: false,
Spec: spec,
}
cgroup, err := createCgroupConfig(opts)
if err != nil { if err != nil {
t.Errorf("Couldn't create Cgroup config: %v", err) t.Errorf("Couldn't create Cgroup config: %v", err)
} }
@ -28,8 +35,13 @@ func TestLinuxCgroupsPathSpecified(t *testing.T) {
func TestLinuxCgroupsPathNotSpecified(t *testing.T) { func TestLinuxCgroupsPathNotSpecified(t *testing.T) {
spec := &specs.Spec{} spec := &specs.Spec{}
opts := &CreateOpts{
CgroupName: "ContainerID",
UseSystemdCgroup: false,
Spec: spec,
}
cgroup, err := createCgroupConfig("ContainerID", false, spec) cgroup, err := createCgroupConfig(opts)
if err != nil { if err != nil {
t.Errorf("Couldn't create Cgroup config: %v", err) t.Errorf("Couldn't create Cgroup config: %v", err)
} }
@ -39,6 +51,27 @@ func TestLinuxCgroupsPathNotSpecified(t *testing.T) {
} }
} }
func TestSpecconvExampleValidate(t *testing.T) {
spec := Example()
spec.Root.Path = "/"
opts := &CreateOpts{
CgroupName: "ContainerID",
UseSystemdCgroup: false,
Spec: spec,
}
config, err := CreateLibcontainerConfig(opts)
if err != nil {
t.Errorf("Couldn't create libcontainer config: %v", err)
}
validator := validate.New()
if err := validator.Validate(config); err != nil {
t.Errorf("Expected specconv to produce valid container config: %v", err)
}
}
func TestDupNamespaces(t *testing.T) { func TestDupNamespaces(t *testing.T) {
spec := &specs.Spec{ spec := &specs.Spec{
Linux: &specs.Linux{ Linux: &specs.Linux{
@ -62,3 +95,26 @@ func TestDupNamespaces(t *testing.T) {
t.Errorf("Duplicated namespaces should be forbidden") t.Errorf("Duplicated namespaces should be forbidden")
} }
} }
func TestRootlessSpecconvValidate(t *testing.T) {
spec := Example()
spec.Root.Path = "/"
ToRootless(spec)
opts := &CreateOpts{
CgroupName: "ContainerID",
UseSystemdCgroup: false,
Spec: spec,
Rootless: true,
}
config, err := CreateLibcontainerConfig(opts)
if err != nil {
t.Errorf("Couldn't create libcontainer config: %v", err)
}
validator := validate.New()
if err := validator.Validate(config); err != nil {
t.Errorf("Expected specconv to produce valid rootless container config: %v", err)
}
}

19
list.go
View File

@ -7,12 +7,14 @@ import (
"io/ioutil" "io/ioutil"
"os" "os"
"path/filepath" "path/filepath"
"syscall"
"text/tabwriter" "text/tabwriter"
"time" "time"
"encoding/json" "encoding/json"
"github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/user"
"github.com/opencontainers/runc/libcontainer/utils" "github.com/opencontainers/runc/libcontainer/utils"
"github.com/urfave/cli" "github.com/urfave/cli"
) )
@ -38,6 +40,8 @@ type containerState struct {
Created time.Time `json:"created"` Created time.Time `json:"created"`
// Annotations is the user defined annotations added to the config. // Annotations is the user defined annotations added to the config.
Annotations map[string]string `json:"annotations,omitempty"` Annotations map[string]string `json:"annotations,omitempty"`
// The owner of the state directory (the owner of the container).
Owner string `json:"owner"`
} }
var listCommand = cli.Command{ var listCommand = cli.Command{
@ -85,14 +89,15 @@ To list containers created using a non-default value for "--root":
switch context.String("format") { switch context.String("format") {
case "table": case "table":
w := tabwriter.NewWriter(os.Stdout, 12, 1, 3, ' ', 0) w := tabwriter.NewWriter(os.Stdout, 12, 1, 3, ' ', 0)
fmt.Fprint(w, "ID\tPID\tSTATUS\tBUNDLE\tCREATED\n") fmt.Fprint(w, "ID\tPID\tSTATUS\tBUNDLE\tCREATED\tOWNER\n")
for _, item := range s { for _, item := range s {
fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%s\n", fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%s\t%s\n",
item.ID, item.ID,
item.InitProcessPid, item.InitProcessPid,
item.Status, item.Status,
item.Bundle, item.Bundle,
item.Created.Format(time.RFC3339Nano)) item.Created.Format(time.RFC3339Nano),
item.Owner)
} }
if err := w.Flush(); err != nil { if err := w.Flush(); err != nil {
return err return err
@ -126,6 +131,13 @@ func getContainers(context *cli.Context) ([]containerState, error) {
var s []containerState var s []containerState
for _, item := range list { for _, item := range list {
if item.IsDir() { if item.IsDir() {
// This cast is safe on Linux.
stat := item.Sys().(*syscall.Stat_t)
owner, err := user.LookupUid(int(stat.Uid))
if err != nil {
owner.Name = string(stat.Uid)
}
container, err := factory.Load(item.Name()) container, err := factory.Load(item.Name())
if err != nil { if err != nil {
fmt.Fprintf(os.Stderr, "load container %s: %v\n", item.Name(), err) fmt.Fprintf(os.Stderr, "load container %s: %v\n", item.Name(), err)
@ -155,6 +167,7 @@ func getContainers(context *cli.Context) ([]containerState, error) {
Rootfs: state.BaseState.Config.Rootfs, Rootfs: state.BaseState.Config.Rootfs,
Created: state.BaseState.Created, Created: state.BaseState.Created,
Annotations: annotations, Annotations: annotations,
Owner: owner.Name,
}) })
} }
} }

5
ps.go
View File

@ -28,6 +28,11 @@ var psCommand = cli.Command{
if err := checkArgs(context, 1, minArgs); err != nil { if err := checkArgs(context, 1, minArgs); err != nil {
return err return err
} }
// XXX: Currently not supported with rootless containers.
if isRootless() {
return fmt.Errorf("runc ps requires root")
}
container, err := getContainer(context) container, err := getContainer(context)
if err != nil { if err != nil {
return err return err

View File

@ -3,6 +3,7 @@
package main package main
import ( import (
"fmt"
"os" "os"
"syscall" "syscall"
@ -86,6 +87,11 @@ using the runc checkpoint command.`,
if err := checkArgs(context, 1, exactArgs); err != nil { if err := checkArgs(context, 1, exactArgs); err != nil {
return err return err
} }
// XXX: Currently this is untested with rootless containers.
if isRootless() {
return fmt.Errorf("runc restore requires root")
}
imagePath := context.String("image-path") imagePath := context.String("image-path")
id := context.Args().First() id := context.Args().First()
if id == "" { if id == "" {

157
spec.go
View File

@ -10,6 +10,7 @@ import (
"runtime" "runtime"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/specconv"
"github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-spec/specs-go"
"github.com/urfave/cli" "github.com/urfave/cli"
) )
@ -63,156 +64,20 @@ container on your host.`,
Value: "", Value: "",
Usage: "path to the root of the bundle directory", Usage: "path to the root of the bundle directory",
}, },
cli.BoolFlag{
Name: "rootless",
Usage: "generate a configuration for a rootless container",
},
}, },
Action: func(context *cli.Context) error { Action: func(context *cli.Context) error {
if err := checkArgs(context, 0, exactArgs); err != nil { if err := checkArgs(context, 0, exactArgs); err != nil {
return err return err
} }
spec := specs.Spec{ spec := specconv.Example()
Version: specs.Version,
Platform: specs.Platform{ rootless := context.Bool("rootless")
OS: runtime.GOOS, if rootless {
Arch: runtime.GOARCH, specconv.ToRootless(spec)
},
Root: specs.Root{
Path: "rootfs",
Readonly: true,
},
Process: specs.Process{
Terminal: true,
User: specs.User{},
Args: []string{
"sh",
},
Env: []string{
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"TERM=xterm",
},
Cwd: "/",
NoNewPrivileges: true,
Capabilities: &specs.LinuxCapabilities{
Bounding: []string{
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE",
},
Permitted: []string{
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE",
},
Inheritable: []string{
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE",
},
Ambient: []string{
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE",
},
Effective: []string{
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE",
},
},
Rlimits: []specs.LinuxRlimit{
{
Type: "RLIMIT_NOFILE",
Hard: uint64(1024),
Soft: uint64(1024),
},
},
},
Hostname: "runc",
Mounts: []specs.Mount{
{
Destination: "/proc",
Type: "proc",
Source: "proc",
Options: nil,
},
{
Destination: "/dev",
Type: "tmpfs",
Source: "tmpfs",
Options: []string{"nosuid", "strictatime", "mode=755", "size=65536k"},
},
{
Destination: "/dev/pts",
Type: "devpts",
Source: "devpts",
Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"},
},
{
Destination: "/dev/shm",
Type: "tmpfs",
Source: "shm",
Options: []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"},
},
{
Destination: "/dev/mqueue",
Type: "mqueue",
Source: "mqueue",
Options: []string{"nosuid", "noexec", "nodev"},
},
{
Destination: "/sys",
Type: "sysfs",
Source: "sysfs",
Options: []string{"nosuid", "noexec", "nodev", "ro"},
},
{
Destination: "/sys/fs/cgroup",
Type: "cgroup",
Source: "cgroup",
Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"},
},
},
Linux: &specs.Linux{
MaskedPaths: []string{
"/proc/kcore",
"/proc/latency_stats",
"/proc/timer_list",
"/proc/timer_stats",
"/proc/sched_debug",
"/sys/firmware",
},
ReadonlyPaths: []string{
"/proc/asound",
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger",
},
Resources: &specs.LinuxResources{
Devices: []specs.LinuxDeviceCgroup{
{
Allow: false,
Access: "rwm",
},
},
},
Namespaces: []specs.LinuxNamespace{
{
Type: "pid",
},
{
Type: "network",
},
{
Type: "ipc",
},
{
Type: "uts",
},
{
Type: "mount",
},
},
},
} }
checkNoFile := func(name string) error { checkNoFile := func(name string) error {
@ -234,7 +99,7 @@ container on your host.`,
if err := checkNoFile(specConfig); err != nil { if err := checkNoFile(specConfig); err != nil {
return err return err
} }
data, err := json.MarshalIndent(&spec, "", "\t") data, err := json.MarshalIndent(spec, "", "\t")
if err != nil { if err != nil {
return err return err
} }

View File

@ -28,7 +28,9 @@ function check_cgroup_value() {
} }
@test "runc update --kernel-memory (initialized)" { @test "runc update --kernel-memory (initialized)" {
requires cgroups_kmem # XXX: currently cgroups require root containers.
requires cgroups_kmem root
# Add cgroup path # Add cgroup path
sed -i 's/\("linux": {\)/\1\n "cgroupsPath": "\/runc-cgroups-integration-test",/' ${BUSYBOX_BUNDLE}/config.json sed -i 's/\("linux": {\)/\1\n "cgroupsPath": "\/runc-cgroups-integration-test",/' ${BUSYBOX_BUNDLE}/config.json
@ -56,7 +58,9 @@ EOF
} }
@test "runc update --kernel-memory (uninitialized)" { @test "runc update --kernel-memory (uninitialized)" {
requires cgroups_kmem # XXX: currently cgroups require root containers.
requires cgroups_kmem root
# Add cgroup path # Add cgroup path
sed -i 's/\("linux": {\)/\1\n "cgroupsPath": "\/runc-cgroups-integration-test",/' ${BUSYBOX_BUNDLE}/config.json sed -i 's/\("linux": {\)/\1\n "cgroupsPath": "\/runc-cgroups-integration-test",/' ${BUSYBOX_BUNDLE}/config.json

View File

@ -12,7 +12,8 @@ function teardown() {
} }
@test "checkpoint and restore" { @test "checkpoint and restore" {
requires criu # XXX: currently criu require root containers.
requires criu root
# criu does not work with external terminals so.. # criu does not work with external terminals so..
# setting terminal and root:readonly: to false # setting terminal and root:readonly: to false
@ -58,8 +59,9 @@ function teardown() {
[[ "${output}" == *"running"* ]] [[ "${output}" == *"running"* ]]
} }
@test "checkpoint(pre-dump) and restore" { @test "checkpoint --pre-dump and restore" {
requires criu # XXX: currently criu require root containers.
requires criu root
# criu does not work with external terminals so.. # criu does not work with external terminals so..
# setting terminal and root:readonly: to false # setting terminal and root:readonly: to false

View File

@ -22,11 +22,13 @@ function teardown() {
testcontainer test_busybox running testcontainer test_busybox running
runc kill test_busybox KILL runc kill test_busybox KILL
[ "$status" -eq 0 ]
# wait for busybox to be in the destroyed state # wait for busybox to be in the destroyed state
retry 10 1 eval "__runc state test_busybox | grep -q 'stopped'" retry 10 1 eval "__runc state test_busybox | grep -q 'stopped'"
# delete test_busybox # delete test_busybox
runc delete test_busybox runc delete test_busybox
[ "$status" -eq 0 ]
runc state test_busybox runc state test_busybox
[ "$status" -ne 0 ] [ "$status" -ne 0 ]

View File

@ -12,6 +12,9 @@ function teardown() {
} }
@test "events --stats" { @test "events --stats" {
# XXX: currently cgroups require root containers.
requires root
# run busybox detached # run busybox detached
runc run -d --console-socket $CONSOLE_SOCKET test_busybox runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ] [ "$status" -eq 0 ]
@ -27,6 +30,9 @@ function teardown() {
} }
@test "events --interval default " { @test "events --interval default " {
# XXX: currently cgroups require root containers.
requires root
# run busybox detached # run busybox detached
runc run -d --console-socket $CONSOLE_SOCKET test_busybox runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ] [ "$status" -eq 0 ]
@ -54,6 +60,9 @@ function teardown() {
} }
@test "events --interval 1s " { @test "events --interval 1s " {
# XXX: currently cgroups require root containers.
requires root
# run busybox detached # run busybox detached
runc run -d --console-socket $CONSOLE_SOCKET test_busybox runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ] [ "$status" -eq 0 ]
@ -80,6 +89,9 @@ function teardown() {
} }
@test "events --interval 100ms " { @test "events --interval 100ms " {
# XXX: currently cgroups require root containers.
requires root
# run busybox detached # run busybox detached
runc run -d --console-socket $CONSOLE_SOCKET test_busybox runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ] [ "$status" -eq 0 ]

View File

@ -112,6 +112,9 @@ function teardown() {
} }
@test "runc exec --user" { @test "runc exec --user" {
# --user can't work in rootless containers
requires root
# run busybox detached # run busybox detached
runc run -d --console-socket $CONSOLE_SOCKET test_busybox runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ] [ "$status" -eq 0 ]

View File

@ -57,6 +57,7 @@ load helpers
[ "$status" -eq 0 ] [ "$status" -eq 0 ]
[[ ${lines[1]} =~ runc\ resume+ ]] [[ ${lines[1]} =~ runc\ resume+ ]]
# We don't use runc_spec here, because we're just testing the help page.
runc spec -h runc spec -h
[ "$status" -eq 0 ] [ "$status" -eq 0 ]
[[ ${lines[1]} =~ runc\ spec+ ]] [[ ${lines[1]} =~ runc\ spec+ ]]

View File

@ -4,7 +4,7 @@
INTEGRATION_ROOT=$(dirname "$(readlink -f "$BASH_SOURCE")") INTEGRATION_ROOT=$(dirname "$(readlink -f "$BASH_SOURCE")")
RUNC="${INTEGRATION_ROOT}/../../runc" RUNC="${INTEGRATION_ROOT}/../../runc"
RECVTTY="${INTEGRATION_ROOT}/../../contrib/cmd/recvtty/recvtty" RECVTTY="${INTEGRATION_ROOT}/../../contrib/cmd/recvtty/recvtty"
GOPATH="${INTEGRATION_ROOT}/../../../.." GOPATH="$(mktemp -d --tmpdir runc-integration-gopath.XXXXXX)"
# Test data path. # Test data path.
TESTDATA="${INTEGRATION_ROOT}/testdata" TESTDATA="${INTEGRATION_ROOT}/testdata"
@ -27,7 +27,7 @@ KERNEL_MINOR="${KERNEL_VERSION#$KERNEL_MAJOR.}"
KERNEL_MINOR="${KERNEL_MINOR%%.*}" KERNEL_MINOR="${KERNEL_MINOR%%.*}"
# Root state path. # Root state path.
ROOT="$BATS_TMPDIR/runc" ROOT=$(mktemp -d "$BATS_TMPDIR/runc.XXXXXX")
# Path to console socket. # Path to console socket.
CONSOLE_SOCKET="$BATS_TMPDIR/console.sock" CONSOLE_SOCKET="$BATS_TMPDIR/console.sock"
@ -40,6 +40,9 @@ CGROUP_CPU_BASE_PATH=$(grep "cgroup" /proc/self/mountinfo | gawk 'toupper($NF) ~
KMEM="${CGROUP_MEMORY_BASE_PATH}/memory.kmem.limit_in_bytes" KMEM="${CGROUP_MEMORY_BASE_PATH}/memory.kmem.limit_in_bytes"
RT_PERIOD="${CGROUP_CPU_BASE_PATH}/cpu.rt_period_us" RT_PERIOD="${CGROUP_CPU_BASE_PATH}/cpu.rt_period_us"
# Check if we're in rootless mode.
ROOTLESS=$(id -u)
# Wrapper for runc. # Wrapper for runc.
function runc() { function runc() {
run __runc "$@" run __runc "$@"
@ -55,6 +58,17 @@ function __runc() {
"$RUNC" --root "$ROOT" "$@" "$RUNC" --root "$ROOT" "$@"
} }
# Wrapper for runc spec.
function runc_spec() {
local args=""
if [ "$ROOTLESS" -ne 0 ]; then
args+="--rootless"
fi
runc spec $args "$@"
}
# Fails the current test, providing the error given. # Fails the current test, providing the error given.
function fail() { function fail() {
echo "$@" >&2 echo "$@" >&2
@ -68,7 +82,12 @@ function requires() {
case $var in case $var in
criu) criu)
if [ ! -e "$CRIU" ]; then if [ ! -e "$CRIU" ]; then
skip "Test requires ${var}." skip "test requires ${var}"
fi
;;
root)
if [ "$ROOTLESS" -ne 0 ]; then
skip "test requires ${var}"
fi fi
;; ;;
cgroups_kmem) cgroups_kmem)
@ -179,18 +198,18 @@ function setup_busybox() {
if [ ! -e $BUSYBOX_IMAGE ]; then if [ ! -e $BUSYBOX_IMAGE ]; then
curl -o $BUSYBOX_IMAGE -sSL 'https://github.com/docker-library/busybox/raw/a0558a9006ce0dd6f6ec5d56cfd3f32ebeeb815f/glibc/busybox.tar.xz' curl -o $BUSYBOX_IMAGE -sSL 'https://github.com/docker-library/busybox/raw/a0558a9006ce0dd6f6ec5d56cfd3f32ebeeb815f/glibc/busybox.tar.xz'
fi fi
tar -C "$BUSYBOX_BUNDLE"/rootfs -xf "$BUSYBOX_IMAGE" tar --exclude './dev/*' -C "$BUSYBOX_BUNDLE"/rootfs -xf "$BUSYBOX_IMAGE"
cd "$BUSYBOX_BUNDLE" cd "$BUSYBOX_BUNDLE"
runc spec runc_spec
} }
function setup_hello() { function setup_hello() {
setup_recvtty setup_recvtty
run mkdir "$HELLO_BUNDLE" run mkdir "$HELLO_BUNDLE"
run mkdir "$HELLO_BUNDLE"/rootfs run mkdir "$HELLO_BUNDLE"/rootfs
tar -C "$HELLO_BUNDLE"/rootfs -xf "$HELLO_IMAGE" tar --exclude './dev/*' -C "$HELLO_BUNDLE"/rootfs -xf "$HELLO_IMAGE"
cd "$HELLO_BUNDLE" cd "$HELLO_BUNDLE"
runc spec runc_spec
sed -i 's;"sh";"/hello";' config.json sed -i 's;"sh";"/hello";' config.json
} }

View File

@ -13,7 +13,6 @@ function teardown() {
@test "kill detached busybox" { @test "kill detached busybox" {
# run busybox detached # run busybox detached
runc run -d --console-socket $CONSOLE_SOCKET test_busybox runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ] [ "$status" -eq 0 ]

View File

@ -12,6 +12,9 @@ function teardown() {
} }
@test "runc pause and resume" { @test "runc pause and resume" {
# XXX: currently cgroups require root containers.
requires root
# run busybox detached # run busybox detached
runc run -d --console-socket $CONSOLE_SOCKET test_busybox runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ] [ "$status" -eq 0 ]
@ -34,6 +37,9 @@ function teardown() {
} }
@test "runc pause and resume with nonexist container" { @test "runc pause and resume with nonexist container" {
# XXX: currently cgroups require root containers.
requires root
# run test_busybox detached # run test_busybox detached
runc run -d --console-socket $CONSOLE_SOCKET test_busybox runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ] [ "$status" -eq 0 ]

View File

@ -12,6 +12,9 @@ function teardown() {
} }
@test "ps" { @test "ps" {
# ps is not supported, it requires cgroups
requires root
# start busybox detached # start busybox detached
runc run -d --console-socket $CONSOLE_SOCKET test_busybox runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ] [ "$status" -eq 0 ]
@ -24,10 +27,13 @@ function teardown() {
runc ps test_busybox runc ps test_busybox
[ "$status" -eq 0 ] [ "$status" -eq 0 ]
[[ ${lines[0]} =~ UID\ +PID\ +PPID\ +C\ +STIME\ +TTY\ +TIME\ +CMD+ ]] [[ ${lines[0]} =~ UID\ +PID\ +PPID\ +C\ +STIME\ +TTY\ +TIME\ +CMD+ ]]
[[ "${lines[1]}" == *"root"*[0-9]* ]] [[ "${lines[1]}" == *"$(id -un 2>/dev/null)"*[0-9]* ]]
} }
@test "ps -f json" { @test "ps -f json" {
# ps is not supported, it requires cgroups
requires root
# start busybox detached # start busybox detached
runc run -d --console-socket $CONSOLE_SOCKET test_busybox runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ] [ "$status" -eq 0 ]
@ -43,6 +49,9 @@ function teardown() {
} }
@test "ps -e -x" { @test "ps -e -x" {
# ps is not supported, it requires cgroups
requires root
# start busybox detached # start busybox detached
runc run -d --console-socket $CONSOLE_SOCKET test_busybox runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ] [ "$status" -eq 0 ]

View File

@ -26,7 +26,7 @@ function teardown() {
[ ! -e config.json ] [ ! -e config.json ]
# test generation of spec does not return an error # test generation of spec does not return an error
runc spec runc_spec
[ "$status" -eq 0 ] [ "$status" -eq 0 ]
# test generation of spec created our config.json (spec) # test generation of spec created our config.json (spec)
@ -51,7 +51,7 @@ function teardown() {
[ ! -e "$HELLO_BUNDLE"/config.json ] [ ! -e "$HELLO_BUNDLE"/config.json ]
# test generation of spec does not return an error # test generation of spec does not return an error
runc spec --bundle "$HELLO_BUNDLE" runc_spec --bundle "$HELLO_BUNDLE"
[ "$status" -eq 0 ] [ "$status" -eq 0 ]
# test generation of spec created our config.json (spec) # test generation of spec created our config.json (spec)

View File

@ -23,6 +23,9 @@ function teardown() {
} }
@test "runc run detached ({u,g}id != 0)" { @test "runc run detached ({u,g}id != 0)" {
# cannot start containers as another user in rootless setup
requires root
# replace "uid": 0 with "uid": 1000 # replace "uid": 0 with "uid": 1000
# and do a similar thing for gid. # and do a similar thing for gid.
sed -i 's;"uid": 0;"uid": 1000;g' config.json sed -i 's;"uid": 0;"uid": 1000;g' config.json

View File

@ -21,6 +21,9 @@ function teardown() {
} }
@test "runc run ({u,g}id != 0)" { @test "runc run ({u,g}id != 0)" {
# cannot start containers as another user in rootless setup
requires root
# replace "uid": 0 with "uid": 1000 # replace "uid": 0 with "uid": 1000
# and do a similar thing for gid. # and do a similar thing for gid.
sed -i 's;"uid": 0;"uid": 1000;g' config.json sed -i 's;"uid": 0;"uid": 1000;g' config.json

View File

@ -11,7 +11,37 @@ function teardown() {
teardown_busybox teardown_busybox
} }
@test "state" { @test "state (kill + delete)" {
runc state test_busybox
[ "$status" -ne 0 ]
# run busybox detached
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
# check state
wait_for_container 15 1 test_busybox
testcontainer test_busybox running
runc kill test_busybox KILL
[ "$status" -eq 0 ]
# wait for busybox to be in the destroyed state
retry 10 1 eval "__runc state test_busybox | grep -q 'stopped'"
# delete test_busybox
runc delete test_busybox
[ "$status" -eq 0 ]
runc state test_busybox
[ "$status" -ne 0 ]
}
@test "state (pause + resume)" {
# XXX: pause and resume require cgroups.
requires root
runc state test_busybox runc state test_busybox
[ "$status" -ne 0 ] [ "$status" -ne 0 ]
@ -37,14 +67,4 @@ function teardown() {
# test state of busybox is back to running # test state of busybox is back to running
testcontainer test_busybox running testcontainer test_busybox running
runc kill test_busybox KILL
# wait for busybox to be in the destroyed state
retry 10 1 eval "__runc state test_busybox | grep -q 'stopped'"
# delete test_busybox
runc delete test_busybox
runc state test_busybox
[ "$status" -ne 0 ]
} }

View File

@ -24,6 +24,10 @@ function teardown() {
} }
@test "runc run [tty owner]" { @test "runc run [tty owner]" {
# tty chmod is not doable in rootless containers.
# TODO: this can be made as a change to the gid test.
requires root
# Replace sh script with stat. # Replace sh script with stat.
sed -i 's/"sh"/"sh", "-c", "stat -c %u:%g $(tty) | tr : \\\\\\\\n"/' config.json sed -i 's/"sh"/"sh", "-c", "stat -c %u:%g $(tty) | tr : \\\\\\\\n"/' config.json
@ -36,6 +40,9 @@ function teardown() {
} }
@test "runc run [tty owner] ({u,g}id != 0)" { @test "runc run [tty owner] ({u,g}id != 0)" {
# tty chmod is not doable in rootless containers.
requires root
# replace "uid": 0 with "uid": 1000 # replace "uid": 0 with "uid": 1000
# and do a similar thing for gid. # and do a similar thing for gid.
sed -i 's;"uid": 0;"uid": 1000;g' config.json sed -i 's;"uid": 0;"uid": 1000;g' config.json
@ -72,6 +79,10 @@ function teardown() {
} }
@test "runc exec [tty owner]" { @test "runc exec [tty owner]" {
# tty chmod is not doable in rootless containers.
# TODO: this can be made as a change to the gid test.
requires root
# run busybox detached # run busybox detached
runc run -d --console-socket $CONSOLE_SOCKET test_busybox runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ] [ "$status" -eq 0 ]
@ -90,6 +101,9 @@ function teardown() {
} }
@test "runc exec [tty owner] ({u,g}id != 0)" { @test "runc exec [tty owner] ({u,g}id != 0)" {
# tty chmod is not doable in rootless containers.
requires root
# replace "uid": 0 with "uid": 1000 # replace "uid": 0 with "uid": 1000
# and do a similar thing for gid. # and do a similar thing for gid.
sed -i 's;"uid": 0;"uid": 1000;g' config.json sed -i 's;"uid": 0;"uid": 1000;g' config.json

View File

@ -50,7 +50,11 @@ function check_cgroup_value() {
# TODO: test rt cgroup updating # TODO: test rt cgroup updating
@test "update" { @test "update" {
requires cgroups_kmem # XXX: currently cgroups require root containers.
# XXX: Also, this test should be split into separate sections so that we
# can skip kmem without skipping update tests overall.
requires cgroups_kmem root
# run a few busyboxes detached # run a few busyboxes detached
runc run -d --console-socket $CONSOLE_SOCKET test_update runc run -d --console-socket $CONSOLE_SOCKET test_update
[ "$status" -eq 0 ] [ "$status" -eq 0 ]

View File

@ -63,9 +63,6 @@ func setupSpec(context *cli.Context) (*specs.Spec, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
if os.Geteuid() != 0 {
return nil, fmt.Errorf("runc should be run as root")
}
return spec, nil return spec, nil
} }

View File

@ -186,6 +186,11 @@ func createPidFile(path string, process *libcontainer.Process) error {
return os.Rename(tmpName, path) return os.Rename(tmpName, path)
} }
// XXX: Currently we autodetect rootless mode.
func isRootless() bool {
return os.Geteuid() != 0
}
func createContainer(context *cli.Context, id string, spec *specs.Spec) (libcontainer.Container, error) { func createContainer(context *cli.Context, id string, spec *specs.Spec) (libcontainer.Container, error) {
config, err := specconv.CreateLibcontainerConfig(&specconv.CreateOpts{ config, err := specconv.CreateLibcontainerConfig(&specconv.CreateOpts{
CgroupName: id, CgroupName: id,
@ -193,6 +198,7 @@ func createContainer(context *cli.Context, id string, spec *specs.Spec) (libcont
NoPivotRoot: context.Bool("no-pivot"), NoPivotRoot: context.Bool("no-pivot"),
NoNewKeyring: context.Bool("no-new-keyring"), NoNewKeyring: context.Bool("no-new-keyring"),
Spec: spec, Spec: spec,
Rootless: isRootless(),
}) })
if err != nil { if err != nil {
return nil, err return nil, err
@ -236,12 +242,12 @@ func (r *runner) run(config *specs.Process) (int, error) {
for i := baseFd; i < baseFd+r.preserveFDs; i++ { for i := baseFd; i < baseFd+r.preserveFDs; i++ {
process.ExtraFiles = append(process.ExtraFiles, os.NewFile(uintptr(i), "PreserveFD:"+strconv.Itoa(i))) process.ExtraFiles = append(process.ExtraFiles, os.NewFile(uintptr(i), "PreserveFD:"+strconv.Itoa(i)))
} }
rootuid, err := r.container.Config().HostUID() rootuid, err := r.container.Config().HostRootUID()
if err != nil { if err != nil {
r.destroy() r.destroy()
return -1, err return -1, err
} }
rootgid, err := r.container.Config().HostGID() rootgid, err := r.container.Config().HostRootGID()
if err != nil { if err != nil {
r.destroy() r.destroy()
return -1, err return -1, err