Merge pull request #774 from cyphar/rootless-containers
Rootless Containers
This commit is contained in:
commit
653207bc29
|
@ -6,6 +6,7 @@ RUN echo 'deb http://httpredir.debian.org/debian jessie-backports main' > /etc/a
|
|||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
curl \
|
||||
sudo \
|
||||
gawk \
|
||||
iptables \
|
||||
jq \
|
||||
|
@ -22,6 +23,12 @@ RUN apt-get update && apt-get install -y \
|
|||
--no-install-recommends \
|
||||
&& apt-get clean
|
||||
|
||||
# Add a dummy user for the rootless integration tests. While runC does
|
||||
# not require an entry in /etc/passwd to operate, one of the tests uses
|
||||
# `git clone` -- and `git clone` does not allow you to clone a
|
||||
# repository if the current uid does not have an entry in /etc/passwd.
|
||||
RUN useradd -u1000 -m -d/home/rootless -s/bin/bash rootless
|
||||
|
||||
# install bats
|
||||
RUN cd /tmp \
|
||||
&& git clone https://github.com/sstephenson/bats.git \
|
||||
|
|
13
Makefile
13
Makefile
|
@ -4,7 +4,7 @@
|
|||
|
||||
SOURCES := $(shell find . 2>&1 | grep -E '.*\.(c|h|go)$$')
|
||||
PREFIX := $(DESTDIR)/usr/local
|
||||
BINDIR := $(PREFIX)/sbin
|
||||
BINDIR := $(PREFIX)/bin
|
||||
GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null)
|
||||
GIT_BRANCH_CLEAN := $(shell echo $(GIT_BRANCH) | sed -e "s/[^[:alnum:]]/-/g")
|
||||
RUNC_IMAGE := runc_dev$(if $(GIT_BRANCH_CLEAN),:$(GIT_BRANCH_CLEAN))
|
||||
|
@ -79,10 +79,10 @@ runcimage:
|
|||
docker build -t $(RUNC_IMAGE) .
|
||||
|
||||
test:
|
||||
make unittest integration
|
||||
make unittest integration rootlessintegration
|
||||
|
||||
localtest:
|
||||
make localunittest localintegration
|
||||
make localunittest localintegration localrootlessintegration
|
||||
|
||||
unittest: runcimage
|
||||
docker run -e TESTFLAGS -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localunittest
|
||||
|
@ -96,6 +96,13 @@ integration: runcimage
|
|||
localintegration: all
|
||||
bats -t tests/integration${TESTFLAGS}
|
||||
|
||||
rootlessintegration: runcimage
|
||||
docker run -e TESTFLAGS -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) --cap-drop=ALL -u rootless $(RUNC_IMAGE) make localintegration
|
||||
|
||||
# FIXME: This should not be separate from rootlessintegration's method of running.
|
||||
localrootlessintegration: all
|
||||
sudo -u rootless -H PATH="${PATH}" bats -t tests/integration${TESTFLAGS}
|
||||
|
||||
shell: all
|
||||
docker run -e TESTFLAGS -ti --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) bash
|
||||
|
||||
|
|
|
@ -39,6 +39,11 @@ checkpointed.`,
|
|||
if err := checkArgs(context, 1, exactArgs); err != nil {
|
||||
return err
|
||||
}
|
||||
// XXX: Currently this is untested with rootless containers.
|
||||
if isRootless() {
|
||||
return fmt.Errorf("runc checkpoint requires root")
|
||||
}
|
||||
|
||||
container, err := getContainer(context)
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
3
exec.go
3
exec.go
|
@ -90,9 +90,6 @@ following will output a list of processes running in the container:
|
|||
if err := checkArgs(context, 1, minArgs); err != nil {
|
||||
return err
|
||||
}
|
||||
if os.Geteuid() != 0 {
|
||||
return fmt.Errorf("runc should be run as root")
|
||||
}
|
||||
if err := revisePidFile(context); err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -267,25 +267,8 @@ func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
|
|||
}, nil
|
||||
}
|
||||
|
||||
func (raw *cgroupData) parentPath(subsystem, mountpoint, root string) (string, error) {
|
||||
// Use GetThisCgroupDir instead of GetInitCgroupDir, because the creating
|
||||
// process could in container and shared pid namespace with host, and
|
||||
// /proc/1/cgroup could point to whole other world of cgroups.
|
||||
initPath, err := cgroups.GetThisCgroupDir(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
// This is needed for nested containers, because in /proc/self/cgroup we
|
||||
// see pathes from host, which don't exist in container.
|
||||
relDir, err := filepath.Rel(root, initPath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return filepath.Join(mountpoint, relDir), nil
|
||||
}
|
||||
|
||||
func (raw *cgroupData) path(subsystem string) (string, error) {
|
||||
mnt, root, err := cgroups.FindCgroupMountpointAndRoot(subsystem)
|
||||
mnt, err := cgroups.FindCgroupMountpoint(subsystem)
|
||||
// If we didn't mount the subsystem, there is no point we make the path.
|
||||
if err != nil {
|
||||
return "", err
|
||||
|
@ -297,7 +280,10 @@ func (raw *cgroupData) path(subsystem string) (string, error) {
|
|||
return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
|
||||
}
|
||||
|
||||
parentPath, err := raw.parentPath(subsystem, mnt, root)
|
||||
// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
|
||||
// process could in container and shared pid namespace with host, and
|
||||
// /proc/1/cgroup could point to whole other world of cgroups.
|
||||
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
|
|
@ -0,0 +1,128 @@
|
|||
// +build linux
|
||||
|
||||
package rootless
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs/validate"
|
||||
)
|
||||
|
||||
// TODO: This is copied from libcontainer/cgroups/fs, which duplicates this code
|
||||
// needlessly. We should probably export this list.
|
||||
|
||||
var subsystems = []subsystem{
|
||||
&fs.CpusetGroup{},
|
||||
&fs.DevicesGroup{},
|
||||
&fs.MemoryGroup{},
|
||||
&fs.CpuGroup{},
|
||||
&fs.CpuacctGroup{},
|
||||
&fs.PidsGroup{},
|
||||
&fs.BlkioGroup{},
|
||||
&fs.HugetlbGroup{},
|
||||
&fs.NetClsGroup{},
|
||||
&fs.NetPrioGroup{},
|
||||
&fs.PerfEventGroup{},
|
||||
&fs.FreezerGroup{},
|
||||
&fs.NameGroup{GroupName: "name=systemd"},
|
||||
}
|
||||
|
||||
type subsystem interface {
|
||||
// Name returns the name of the subsystem.
|
||||
Name() string
|
||||
|
||||
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
|
||||
GetStats(path string, stats *cgroups.Stats) error
|
||||
}
|
||||
|
||||
// The noop cgroup manager is used for rootless containers, because we currently
|
||||
// cannot manage cgroups if we are in a rootless setup. This manager is chosen
|
||||
// by factory if we are in rootless mode. We error out if any cgroup options are
|
||||
// set in the config -- this may change in the future with upcoming kernel features
|
||||
// like the cgroup namespace.
|
||||
|
||||
type Manager struct {
|
||||
Cgroups *configs.Cgroup
|
||||
Paths map[string]string
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(pid int) error {
|
||||
// If there are no cgroup settings, there's nothing to do.
|
||||
if m.Cgroups == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// We can't set paths.
|
||||
// TODO(cyphar): Implement the case where the runner of a rootless container
|
||||
// owns their own cgroup, which would allow us to set up a
|
||||
// cgroup for each path.
|
||||
if m.Cgroups.Paths != nil {
|
||||
return fmt.Errorf("cannot change cgroup path in rootless container")
|
||||
}
|
||||
|
||||
// We load the paths into the manager.
|
||||
paths := make(map[string]string)
|
||||
for _, sys := range subsystems {
|
||||
name := sys.Name()
|
||||
|
||||
path, err := cgroups.GetOwnCgroupPath(name)
|
||||
if err != nil {
|
||||
// Ignore paths we couldn't resolve.
|
||||
continue
|
||||
}
|
||||
|
||||
paths[name] = path
|
||||
}
|
||||
|
||||
m.Paths = paths
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetPaths() map[string]string {
|
||||
return m.Paths
|
||||
}
|
||||
|
||||
func (m *Manager) Set(container *configs.Config) error {
|
||||
// We have to re-do the validation here, since someone might decide to
|
||||
// update a rootless container.
|
||||
return validate.New().Validate(container)
|
||||
}
|
||||
|
||||
func (m *Manager) GetPids() ([]int, error) {
|
||||
dir, err := cgroups.GetOwnCgroupPath("devices")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetPids(dir)
|
||||
}
|
||||
|
||||
func (m *Manager) GetAllPids() ([]int, error) {
|
||||
dir, err := cgroups.GetOwnCgroupPath("devices")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetAllPids(dir)
|
||||
}
|
||||
|
||||
func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
// TODO(cyphar): We can make this work if we figure out a way to allow usage
|
||||
// of cgroups with a rootless container. While this doesn't
|
||||
// actually require write access to a cgroup directory, the
|
||||
// statistics are not useful if they can be affected by
|
||||
// non-container processes.
|
||||
return nil, fmt.Errorf("cannot get cgroup stats in rootless container")
|
||||
}
|
||||
|
||||
func (m *Manager) Freeze(state configs.FreezerState) error {
|
||||
// TODO(cyphar): We can make this work if we figure out a way to allow usage
|
||||
// of cgroups with a rootless container.
|
||||
return fmt.Errorf("cannot use freezer cgroup in rootless container")
|
||||
}
|
||||
|
||||
func (m *Manager) Destroy() error {
|
||||
// We don't have to do anything here because we didn't do any setup.
|
||||
return nil
|
||||
}
|
|
@ -426,7 +426,7 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
|
|||
return "", err
|
||||
}
|
||||
|
||||
initPath, err := cgroups.GetInitCgroupDir(subsystem)
|
||||
initPath, err := cgroups.GetInitCgroup(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
|
|
@ -109,7 +109,7 @@ type Mount struct {
|
|||
Subsystems []string
|
||||
}
|
||||
|
||||
func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
|
||||
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
|
||||
if len(m.Subsystems) == 0 {
|
||||
return "", fmt.Errorf("no subsystem for mount")
|
||||
}
|
||||
|
@ -203,8 +203,8 @@ func GetAllSubsystems() ([]string, error) {
|
|||
return subsystems, nil
|
||||
}
|
||||
|
||||
// GetThisCgroupDir returns the relative path to the cgroup docker is running in.
|
||||
func GetThisCgroupDir(subsystem string) (string, error) {
|
||||
// GetOwnCgroup returns the relative path to the cgroup docker is running in.
|
||||
func GetOwnCgroup(subsystem string) (string, error) {
|
||||
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
|
@ -213,8 +213,16 @@ func GetThisCgroupDir(subsystem string) (string, error) {
|
|||
return getControllerPath(subsystem, cgroups)
|
||||
}
|
||||
|
||||
func GetInitCgroupDir(subsystem string) (string, error) {
|
||||
func GetOwnCgroupPath(subsystem string) (string, error) {
|
||||
cgroup, err := GetOwnCgroup(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getCgroupPathHelper(subsystem, cgroup)
|
||||
}
|
||||
|
||||
func GetInitCgroup(subsystem string) (string, error) {
|
||||
cgroups, err := ParseCgroupFile("/proc/1/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
|
@ -223,6 +231,31 @@ func GetInitCgroupDir(subsystem string) (string, error) {
|
|||
return getControllerPath(subsystem, cgroups)
|
||||
}
|
||||
|
||||
func GetInitCgroupPath(subsystem string) (string, error) {
|
||||
cgroup, err := GetInitCgroup(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getCgroupPathHelper(subsystem, cgroup)
|
||||
}
|
||||
|
||||
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
|
||||
mnt, root, err := FindCgroupMountpointAndRoot(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// This is needed for nested containers, because in /proc/self/cgroup we
|
||||
// see pathes from host, which don't exist in container.
|
||||
relCgroup, err := filepath.Rel(root, cgroup)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(mnt, relCgroup), nil
|
||||
}
|
||||
|
||||
func readProcsFile(dir string) ([]int, error) {
|
||||
f, err := os.Open(filepath.Join(dir, CgroupProcesses))
|
||||
if err != nil {
|
||||
|
|
|
@ -183,6 +183,9 @@ type Config struct {
|
|||
// NoNewKeyring will not allocated a new session keyring for the container. It will use the
|
||||
// callers keyring in this case.
|
||||
NoNewKeyring bool `json:"no_new_keyring"`
|
||||
|
||||
// Rootless specifies whether the container is a rootless container.
|
||||
Rootless bool `json:"rootless"`
|
||||
}
|
||||
|
||||
type Hooks struct {
|
||||
|
|
|
@ -4,38 +4,50 @@ package configs
|
|||
|
||||
import "fmt"
|
||||
|
||||
// HostUID gets the root uid for the process on host which could be non-zero
|
||||
// when user namespaces are enabled.
|
||||
func (c Config) HostUID() (int, error) {
|
||||
// HostUID gets the translated uid for the process on host which could be
|
||||
// different when user namespaces are enabled.
|
||||
func (c Config) HostUID(containerId int) (int, error) {
|
||||
if c.Namespaces.Contains(NEWUSER) {
|
||||
if c.UidMappings == nil {
|
||||
return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.")
|
||||
return -1, fmt.Errorf("User namespaces enabled, but no uid mappings found.")
|
||||
}
|
||||
id, found := c.hostIDFromMapping(0, c.UidMappings)
|
||||
id, found := c.hostIDFromMapping(containerId, c.UidMappings)
|
||||
if !found {
|
||||
return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.")
|
||||
return -1, fmt.Errorf("User namespaces enabled, but no user mapping found.")
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
// Return default root uid 0
|
||||
return 0, nil
|
||||
// Return unchanged id.
|
||||
return containerId, nil
|
||||
}
|
||||
|
||||
// HostGID gets the root gid for the process on host which could be non-zero
|
||||
// HostRootUID gets the root uid for the process on host which could be non-zero
|
||||
// when user namespaces are enabled.
|
||||
func (c Config) HostGID() (int, error) {
|
||||
func (c Config) HostRootUID() (int, error) {
|
||||
return c.HostUID(0)
|
||||
}
|
||||
|
||||
// HostGID gets the translated gid for the process on host which could be
|
||||
// different when user namespaces are enabled.
|
||||
func (c Config) HostGID(containerId int) (int, error) {
|
||||
if c.Namespaces.Contains(NEWUSER) {
|
||||
if c.GidMappings == nil {
|
||||
return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
|
||||
}
|
||||
id, found := c.hostIDFromMapping(0, c.GidMappings)
|
||||
id, found := c.hostIDFromMapping(containerId, c.GidMappings)
|
||||
if !found {
|
||||
return -1, fmt.Errorf("User namespaces enabled, but no root group mapping found.")
|
||||
return -1, fmt.Errorf("User namespaces enabled, but no group mapping found.")
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
// Return default root gid 0
|
||||
return 0, nil
|
||||
// Return unchanged id.
|
||||
return containerId, nil
|
||||
}
|
||||
|
||||
// HostRootGID gets the root gid for the process on host which could be non-zero
|
||||
// when user namespaces are enabled.
|
||||
func (c Config) HostRootGID() (int, error) {
|
||||
return c.HostGID(0)
|
||||
}
|
||||
|
||||
// Utility function that gets a host ID for a container ID from user namespace map
|
||||
|
|
|
@ -65,11 +65,11 @@ func TestRemoveNamespace(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestHostUIDNoUSERNS(t *testing.T) {
|
||||
func TestHostRootUIDNoUSERNS(t *testing.T) {
|
||||
config := &Config{
|
||||
Namespaces: Namespaces{},
|
||||
}
|
||||
uid, err := config.HostUID()
|
||||
uid, err := config.HostRootUID()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -78,7 +78,7 @@ func TestHostUIDNoUSERNS(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestHostUIDWithUSERNS(t *testing.T) {
|
||||
func TestHostRootUIDWithUSERNS(t *testing.T) {
|
||||
config := &Config{
|
||||
Namespaces: Namespaces{{Type: NEWUSER}},
|
||||
UidMappings: []IDMap{
|
||||
|
@ -89,7 +89,7 @@ func TestHostUIDWithUSERNS(t *testing.T) {
|
|||
},
|
||||
},
|
||||
}
|
||||
uid, err := config.HostUID()
|
||||
uid, err := config.HostRootUID()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -98,11 +98,11 @@ func TestHostUIDWithUSERNS(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestHostGIDNoUSERNS(t *testing.T) {
|
||||
func TestHostRootGIDNoUSERNS(t *testing.T) {
|
||||
config := &Config{
|
||||
Namespaces: Namespaces{},
|
||||
}
|
||||
uid, err := config.HostGID()
|
||||
uid, err := config.HostRootGID()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -111,7 +111,7 @@ func TestHostGIDNoUSERNS(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestHostGIDWithUSERNS(t *testing.T) {
|
||||
func TestHostRootGIDWithUSERNS(t *testing.T) {
|
||||
config := &Config{
|
||||
Namespaces: Namespaces{{Type: NEWUSER}},
|
||||
GidMappings: []IDMap{
|
||||
|
@ -122,7 +122,7 @@ func TestHostGIDWithUSERNS(t *testing.T) {
|
|||
},
|
||||
},
|
||||
}
|
||||
uid, err := config.HostGID()
|
||||
uid, err := config.HostRootGID()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
|
|
@ -0,0 +1,117 @@
|
|||
package validate
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"reflect"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var (
|
||||
geteuid = os.Geteuid
|
||||
getegid = os.Getegid
|
||||
)
|
||||
|
||||
func (v *ConfigValidator) rootless(config *configs.Config) error {
|
||||
if err := rootlessMappings(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := rootlessMount(config); err != nil {
|
||||
return err
|
||||
}
|
||||
// Currently, cgroups cannot effectively be used in rootless containers.
|
||||
// The new cgroup namespace doesn't really help us either because it doesn't
|
||||
// have nice interactions with the user namespace (we're working with upstream
|
||||
// to fix this).
|
||||
if err := rootlessCgroup(config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// XXX: We currently can't verify the user config at all, because
|
||||
// configs.Config doesn't store the user-related configs. So this
|
||||
// has to be verified by setupUser() in init_linux.go.
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func rootlessMappings(config *configs.Config) error {
|
||||
rootuid, err := config.HostRootUID()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get root uid from uidMappings: %v", err)
|
||||
}
|
||||
if euid := geteuid(); euid != 0 {
|
||||
if !config.Namespaces.Contains(configs.NEWUSER) {
|
||||
return fmt.Errorf("rootless containers require user namespaces")
|
||||
}
|
||||
if rootuid != euid {
|
||||
return fmt.Errorf("rootless containers cannot map container root to a different host user")
|
||||
}
|
||||
}
|
||||
|
||||
rootgid, err := config.HostRootGID()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get root gid from gidMappings: %v", err)
|
||||
}
|
||||
|
||||
// Similar to the above test, we need to make sure that we aren't trying to
|
||||
// map to a group ID that we don't have the right to be.
|
||||
if rootgid != getegid() {
|
||||
return fmt.Errorf("rootless containers cannot map container root to a different host group")
|
||||
}
|
||||
|
||||
// We can only map one user and group inside a container (our own).
|
||||
if len(config.UidMappings) != 1 || config.UidMappings[0].Size != 1 {
|
||||
return fmt.Errorf("rootless containers cannot map more than one user")
|
||||
}
|
||||
if len(config.GidMappings) != 1 || config.GidMappings[0].Size != 1 {
|
||||
return fmt.Errorf("rootless containers cannot map more than one group")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// cgroup verifies that the user isn't trying to set any cgroup limits or paths.
|
||||
func rootlessCgroup(config *configs.Config) error {
|
||||
// Nothing set at all.
|
||||
if config.Cgroups == nil || config.Cgroups.Resources == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Used for comparing to the zero value.
|
||||
left := reflect.ValueOf(*config.Cgroups.Resources)
|
||||
right := reflect.Zero(left.Type())
|
||||
|
||||
// This is all we need to do, since specconv won't add cgroup options in
|
||||
// rootless mode.
|
||||
if !reflect.DeepEqual(left.Interface(), right.Interface()) {
|
||||
return fmt.Errorf("cannot specify resource limits in rootless container")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// mount verifies that the user isn't trying to set up any mounts they don't have
|
||||
// the rights to do. In addition, it makes sure that no mount has a `uid=` or
|
||||
// `gid=` option that doesn't resolve to root.
|
||||
func rootlessMount(config *configs.Config) error {
|
||||
// XXX: We could whitelist allowed devices at this point, but I'm not
|
||||
// convinced that's a good idea. The kernel is the best arbiter of
|
||||
// access control.
|
||||
|
||||
for _, mount := range config.Mounts {
|
||||
// Check that the options list doesn't contain any uid= or gid= entries
|
||||
// that don't resolve to root.
|
||||
for _, opt := range strings.Split(mount.Data, ",") {
|
||||
if strings.HasPrefix(opt, "uid=") && opt != "uid=0" {
|
||||
return fmt.Errorf("cannot specify uid= mount options in rootless containers where argument isn't 0")
|
||||
}
|
||||
if strings.HasPrefix(opt, "gid=") && opt != "gid=0" {
|
||||
return fmt.Errorf("cannot specify gid= mount options in rootless containers where argument isn't 0")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,195 @@
|
|||
package validate
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func init() {
|
||||
geteuid = func() int { return 1337 }
|
||||
getegid = func() int { return 7331 }
|
||||
}
|
||||
|
||||
func rootlessConfig() *configs.Config {
|
||||
return &configs.Config{
|
||||
Rootfs: "/var",
|
||||
Rootless: true,
|
||||
Namespaces: configs.Namespaces(
|
||||
[]configs.Namespace{
|
||||
{Type: configs.NEWUSER},
|
||||
},
|
||||
),
|
||||
UidMappings: []configs.IDMap{
|
||||
{
|
||||
HostID: geteuid(),
|
||||
ContainerID: 0,
|
||||
Size: 1,
|
||||
},
|
||||
},
|
||||
GidMappings: []configs.IDMap{
|
||||
{
|
||||
HostID: getegid(),
|
||||
ContainerID: 0,
|
||||
Size: 1,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateRootless(t *testing.T) {
|
||||
validator := New()
|
||||
|
||||
config := rootlessConfig()
|
||||
if err := validator.Validate(config); err != nil {
|
||||
t.Errorf("Expected error to not occur: %+v", err)
|
||||
}
|
||||
}
|
||||
|
||||
/* rootlessMappings() */
|
||||
|
||||
func TestValidateRootlessUserns(t *testing.T) {
|
||||
validator := New()
|
||||
|
||||
config := rootlessConfig()
|
||||
config.Namespaces = nil
|
||||
if err := validator.Validate(config); err == nil {
|
||||
t.Errorf("Expected error to occur if user namespaces not set")
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateRootlessMappingUid(t *testing.T) {
|
||||
validator := New()
|
||||
|
||||
config := rootlessConfig()
|
||||
config.UidMappings = nil
|
||||
if err := validator.Validate(config); err == nil {
|
||||
t.Errorf("Expected error to occur if no uid mappings provided")
|
||||
}
|
||||
|
||||
config = rootlessConfig()
|
||||
config.UidMappings[0].HostID = geteuid() + 1
|
||||
if err := validator.Validate(config); err == nil {
|
||||
t.Errorf("Expected error to occur if geteuid() != mapped uid")
|
||||
}
|
||||
|
||||
config = rootlessConfig()
|
||||
config.UidMappings[0].Size = 1024
|
||||
if err := validator.Validate(config); err == nil {
|
||||
t.Errorf("Expected error to occur if more than one uid mapped")
|
||||
}
|
||||
|
||||
config = rootlessConfig()
|
||||
config.UidMappings = append(config.UidMappings, configs.IDMap{
|
||||
HostID: geteuid() + 1,
|
||||
ContainerID: 0,
|
||||
Size: 1,
|
||||
})
|
||||
if err := validator.Validate(config); err == nil {
|
||||
t.Errorf("Expected error to occur if more than one uid extent mapped")
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateRootlessMappingGid(t *testing.T) {
|
||||
validator := New()
|
||||
|
||||
config := rootlessConfig()
|
||||
config.GidMappings = nil
|
||||
if err := validator.Validate(config); err == nil {
|
||||
t.Errorf("Expected error to occur if no gid mappings provided")
|
||||
}
|
||||
|
||||
config = rootlessConfig()
|
||||
config.GidMappings[0].HostID = getegid() + 1
|
||||
if err := validator.Validate(config); err == nil {
|
||||
t.Errorf("Expected error to occur if getegid() != mapped gid")
|
||||
}
|
||||
|
||||
config = rootlessConfig()
|
||||
config.GidMappings[0].Size = 1024
|
||||
if err := validator.Validate(config); err == nil {
|
||||
t.Errorf("Expected error to occur if more than one gid mapped")
|
||||
}
|
||||
|
||||
config = rootlessConfig()
|
||||
config.GidMappings = append(config.GidMappings, configs.IDMap{
|
||||
HostID: getegid() + 1,
|
||||
ContainerID: 0,
|
||||
Size: 1,
|
||||
})
|
||||
if err := validator.Validate(config); err == nil {
|
||||
t.Errorf("Expected error to occur if more than one gid extent mapped")
|
||||
}
|
||||
}
|
||||
|
||||
/* rootlessMount() */
|
||||
|
||||
func TestValidateRootlessMountUid(t *testing.T) {
|
||||
config := rootlessConfig()
|
||||
validator := New()
|
||||
|
||||
config.Mounts = []*configs.Mount{
|
||||
{
|
||||
Source: "devpts",
|
||||
Destination: "/dev/pts",
|
||||
Device: "devpts",
|
||||
},
|
||||
}
|
||||
|
||||
if err := validator.Validate(config); err != nil {
|
||||
t.Errorf("Expected error to not occur when uid= not set in mount options: %+v", err)
|
||||
}
|
||||
|
||||
config.Mounts[0].Data = "uid=5"
|
||||
if err := validator.Validate(config); err == nil {
|
||||
t.Errorf("Expected error to occur when setting uid=5 in mount options")
|
||||
}
|
||||
|
||||
config.Mounts[0].Data = "uid=0"
|
||||
if err := validator.Validate(config); err != nil {
|
||||
t.Errorf("Expected error to not occur when setting uid=0 in mount options: %+v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateRootlessMountGid(t *testing.T) {
|
||||
config := rootlessConfig()
|
||||
validator := New()
|
||||
|
||||
config.Mounts = []*configs.Mount{
|
||||
{
|
||||
Source: "devpts",
|
||||
Destination: "/dev/pts",
|
||||
Device: "devpts",
|
||||
},
|
||||
}
|
||||
|
||||
if err := validator.Validate(config); err != nil {
|
||||
t.Errorf("Expected error to not occur when gid= not set in mount options: %+v", err)
|
||||
}
|
||||
|
||||
config.Mounts[0].Data = "gid=5"
|
||||
if err := validator.Validate(config); err == nil {
|
||||
t.Errorf("Expected error to occur when setting gid=5 in mount options")
|
||||
}
|
||||
|
||||
config.Mounts[0].Data = "gid=0"
|
||||
if err := validator.Validate(config); err != nil {
|
||||
t.Errorf("Expected error to not occur when setting gid=0 in mount options: %+v", err)
|
||||
}
|
||||
}
|
||||
|
||||
/* rootlessCgroup() */
|
||||
|
||||
func TestValidateRootlessCgroup(t *testing.T) {
|
||||
validator := New()
|
||||
|
||||
config := rootlessConfig()
|
||||
config.Cgroups = &configs.Cgroup{
|
||||
Resources: &configs.Resources{
|
||||
PidsLimit: 1337,
|
||||
},
|
||||
}
|
||||
if err := validator.Validate(config); err == nil {
|
||||
t.Errorf("Expected error to occur if cgroup limits set")
|
||||
}
|
||||
}
|
|
@ -40,6 +40,11 @@ func (v *ConfigValidator) Validate(config *configs.Config) error {
|
|||
if err := v.sysctl(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Rootless {
|
||||
if err := v.rootless(config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -51,6 +51,9 @@ type State struct {
|
|||
|
||||
// Platform specific fields below here
|
||||
|
||||
// Specifies if the container was started under the rootless mode.
|
||||
Rootless bool `json:"rootless"`
|
||||
|
||||
// Path to all the cgroups setup for a container. Key is cgroup subsystem name
|
||||
// with the value as the path.
|
||||
CgroupPaths map[string]string `json:"cgroup_paths"`
|
||||
|
@ -304,11 +307,11 @@ func (c *linuxContainer) Signal(s os.Signal, all bool) error {
|
|||
}
|
||||
|
||||
func (c *linuxContainer) createExecFifo() error {
|
||||
rootuid, err := c.Config().HostUID()
|
||||
rootuid, err := c.Config().HostRootUID()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
rootgid, err := c.Config().HostGID()
|
||||
rootgid, err := c.Config().HostRootGID()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -452,6 +455,7 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
|
|||
PassedFilesCount: len(process.ExtraFiles),
|
||||
ContainerId: c.ID(),
|
||||
NoNewPrivileges: c.config.NoNewPrivileges,
|
||||
Rootless: c.config.Rootless,
|
||||
AppArmorProfile: c.config.AppArmorProfile,
|
||||
ProcessLabel: c.config.ProcessLabel,
|
||||
Rlimits: c.config.Rlimits,
|
||||
|
@ -516,10 +520,18 @@ func (c *linuxContainer) Resume() error {
|
|||
}
|
||||
|
||||
func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) {
|
||||
// XXX(cyphar): This requires cgroups.
|
||||
if c.config.Rootless {
|
||||
return nil, fmt.Errorf("cannot get OOM notifications from rootless container")
|
||||
}
|
||||
return notifyOnOOM(c.cgroupManager.GetPaths())
|
||||
}
|
||||
|
||||
func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) {
|
||||
// XXX(cyphar): This requires cgroups.
|
||||
if c.config.Rootless {
|
||||
return nil, fmt.Errorf("cannot get memory pressure notifications from rootless container")
|
||||
}
|
||||
return notifyMemoryPressure(c.cgroupManager.GetPaths(), level)
|
||||
}
|
||||
|
||||
|
@ -622,6 +634,13 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
|||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
|
||||
// TODO(avagin): Figure out how to make this work nicely. CRIU 2.0 has
|
||||
// support for doing unprivileged dumps, but the setup of
|
||||
// rootless containers might make this complicated.
|
||||
if c.config.Rootless {
|
||||
return fmt.Errorf("cannot checkpoint a rootless container")
|
||||
}
|
||||
|
||||
if err := c.checkCriuVersion("1.5.2"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -791,6 +810,13 @@ func (c *linuxContainer) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts
|
|||
func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
|
||||
// TODO(avagin): Figure out how to make this work nicely. CRIU doesn't have
|
||||
// support for unprivileged restore at the moment.
|
||||
if c.config.Rootless {
|
||||
return fmt.Errorf("cannot restore a rootless container")
|
||||
}
|
||||
|
||||
if err := c.checkCriuVersion("1.5.2"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -918,6 +944,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
|||
}
|
||||
|
||||
func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
|
||||
// XXX: Do we need to deal with this case? AFAIK criu still requires root.
|
||||
if err := c.cgroupManager.Apply(pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -1314,6 +1341,7 @@ func (c *linuxContainer) currentState() (*State, error) {
|
|||
InitProcessStartTime: startTime,
|
||||
Created: c.created,
|
||||
},
|
||||
Rootless: c.config.Rootless,
|
||||
CgroupPaths: c.cgroupManager.GetPaths(),
|
||||
NamespacePaths: make(map[configs.NamespaceType]string),
|
||||
ExternalDescriptors: externalDescriptors,
|
||||
|
@ -1441,19 +1469,34 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na
|
|||
Type: GidmapAttr,
|
||||
Value: b,
|
||||
})
|
||||
// check if we have CAP_SETGID to setgroup properly
|
||||
pid, err := capability.NewPid(os.Getpid())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !pid.Get(capability.EFFECTIVE, capability.CAP_SETGID) {
|
||||
r.AddData(&Boolmsg{
|
||||
Type: SetgroupAttr,
|
||||
Value: true,
|
||||
})
|
||||
// The following only applies if we are root.
|
||||
if !c.config.Rootless {
|
||||
// check if we have CAP_SETGID to setgroup properly
|
||||
pid, err := capability.NewPid(os.Getpid())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !pid.Get(capability.EFFECTIVE, capability.CAP_SETGID) {
|
||||
r.AddData(&Boolmsg{
|
||||
Type: SetgroupAttr,
|
||||
Value: true,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// write oom_score_adj
|
||||
r.AddData(&Bytemsg{
|
||||
Type: OomScoreAdjAttr,
|
||||
Value: []byte(fmt.Sprintf("%d", c.config.OomScoreAdj)),
|
||||
})
|
||||
|
||||
// write rootless
|
||||
r.AddData(&Boolmsg{
|
||||
Type: RootlessAttr,
|
||||
Value: c.config.Rootless,
|
||||
})
|
||||
|
||||
return bytes.NewReader(r.Serialize()), nil
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@ import (
|
|||
"github.com/docker/docker/pkg/mount"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/rootless"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs/validate"
|
||||
|
@ -73,6 +74,20 @@ func Cgroupfs(l *LinuxFactory) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// RootlessCgroups is an options func to configure a LinuxFactory to
|
||||
// return containers that use the "rootless" cgroup manager, which will
|
||||
// fail to do any operations not possible to do with an unprivileged user.
|
||||
// It should only be used in conjunction with rootless containers.
|
||||
func RootlessCgroups(l *LinuxFactory) error {
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &rootless.Manager{
|
||||
Cgroups: config,
|
||||
Paths: paths,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
|
||||
func TmpfsRoot(l *LinuxFactory) error {
|
||||
mounted, err := mount.Mounted(l.Root)
|
||||
|
@ -149,11 +164,11 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
|
|||
if err := l.Validator.Validate(config); err != nil {
|
||||
return nil, newGenericError(err, ConfigInvalid)
|
||||
}
|
||||
uid, err := config.HostUID()
|
||||
uid, err := config.HostRootUID()
|
||||
if err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
gid, err := config.HostGID()
|
||||
gid, err := config.HostRootGID()
|
||||
if err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
|
@ -169,6 +184,9 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
|
|||
if err := os.Chown(containerRoot, uid, gid); err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
if config.Rootless {
|
||||
RootlessCgroups(l)
|
||||
}
|
||||
c := &linuxContainer{
|
||||
id: id,
|
||||
root: containerRoot,
|
||||
|
@ -195,6 +213,10 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
|
|||
processStartTime: state.InitProcessStartTime,
|
||||
fds: state.ExternalDescriptors,
|
||||
}
|
||||
// We have to use the RootlessManager.
|
||||
if state.Rootless {
|
||||
RootlessCgroups(l)
|
||||
}
|
||||
c := &linuxContainer{
|
||||
initProcess: r,
|
||||
initProcessStartTime: state.InitProcessStartTime,
|
||||
|
|
|
@ -6,10 +6,8 @@ import (
|
|||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
|
@ -60,6 +58,7 @@ type initConfig struct {
|
|||
ContainerId string `json:"containerid"`
|
||||
Rlimits []configs.Rlimit `json:"rlimits"`
|
||||
CreateConsole bool `json:"create_console"`
|
||||
Rootless bool `json:"rootless"`
|
||||
}
|
||||
|
||||
type initer interface {
|
||||
|
@ -231,18 +230,21 @@ func syncParentHooks(pipe io.ReadWriter) error {
|
|||
func setupUser(config *initConfig) error {
|
||||
// Set up defaults.
|
||||
defaultExecUser := user.ExecUser{
|
||||
Uid: syscall.Getuid(),
|
||||
Gid: syscall.Getgid(),
|
||||
Uid: 0,
|
||||
Gid: 0,
|
||||
Home: "/",
|
||||
}
|
||||
|
||||
passwdPath, err := user.GetPasswdPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
groupPath, err := user.GetGroupPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -255,22 +257,49 @@ func setupUser(config *initConfig) error {
|
|||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if config.Rootless {
|
||||
if execUser.Uid != 0 {
|
||||
return fmt.Errorf("cannot run as a non-root user in a rootless container")
|
||||
}
|
||||
|
||||
if execUser.Gid != 0 {
|
||||
return fmt.Errorf("cannot run as a non-root group in a rootless container")
|
||||
}
|
||||
|
||||
// We cannot set any additional groups in a rootless container and thus we
|
||||
// bail if the user asked us to do so. TODO: We currently can't do this
|
||||
// earlier, but if libcontainer.Process.User was typesafe this might work.
|
||||
if len(addGroups) > 0 {
|
||||
return fmt.Errorf("cannot set any additional groups in a rootless container")
|
||||
}
|
||||
}
|
||||
|
||||
// before we change to the container's user make sure that the processes STDIO
|
||||
// is correctly owned by the user that we are switching to.
|
||||
if err := fixStdioPermissions(execUser); err != nil {
|
||||
if err := fixStdioPermissions(config, execUser); err != nil {
|
||||
return err
|
||||
}
|
||||
suppGroups := append(execUser.Sgids, addGroups...)
|
||||
if err := syscall.Setgroups(suppGroups); err != nil {
|
||||
return err
|
||||
|
||||
// This isn't allowed in an unprivileged user namespace since Linux 3.19.
|
||||
// There's nothing we can do about /etc/group entries, so we silently
|
||||
// ignore setting groups here (since the user didn't explicitly ask us to
|
||||
// set the group).
|
||||
if !config.Rootless {
|
||||
suppGroups := append(execUser.Sgids, addGroups...)
|
||||
if err := syscall.Setgroups(suppGroups); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err := system.Setgid(execUser.Gid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := system.Setuid(execUser.Uid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// if we didn't get HOME already, set it based on the user's HOME
|
||||
if envHome := os.Getenv("HOME"); envHome == "" {
|
||||
if err := os.Setenv("HOME", execUser.Home); err != nil {
|
||||
|
@ -283,7 +312,7 @@ func setupUser(config *initConfig) error {
|
|||
// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user.
|
||||
// The ownership needs to match because it is created outside of the container and needs to be
|
||||
// localized.
|
||||
func fixStdioPermissions(u *user.ExecUser) error {
|
||||
func fixStdioPermissions(config *initConfig, u *user.ExecUser) error {
|
||||
var null syscall.Stat_t
|
||||
if err := syscall.Stat("/dev/null", &null); err != nil {
|
||||
return err
|
||||
|
@ -297,10 +326,20 @@ func fixStdioPermissions(u *user.ExecUser) error {
|
|||
if err := syscall.Fstat(int(fd), &s); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Skip chown of /dev/null if it was used as one of the STDIO fds.
|
||||
if s.Rdev == null.Rdev {
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip chown if s.Gid is actually an unmapped gid in the host. While
|
||||
// this is a bit dodgy if it just so happens that the console _is_
|
||||
// owned by overflow_gid, there's no way for us to disambiguate this as
|
||||
// a userspace program.
|
||||
if _, err := config.Config.HostGID(int(s.Gid)); err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// We only change the uid owner (as it is possible for the mount to
|
||||
// prefer a different gid, and there's no reason for us to change it).
|
||||
// The reason why we don't just leave the default uid=X mount setup is
|
||||
|
@ -369,12 +408,6 @@ func setupRlimits(limits []configs.Rlimit, pid int) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func setOomScoreAdj(oomScoreAdj int, pid int) error {
|
||||
path := fmt.Sprintf("/proc/%d/oom_score_adj", pid)
|
||||
|
||||
return ioutil.WriteFile(path, []byte(strconv.Itoa(oomScoreAdj)), 0600)
|
||||
}
|
||||
|
||||
const _P_PID = 1
|
||||
|
||||
type siginfo struct {
|
||||
|
|
|
@ -11,12 +11,15 @@ import (
|
|||
// list of known message types we want to send to bootstrap program
|
||||
// The number is randomly chosen to not conflict with known netlink types
|
||||
const (
|
||||
InitMsg uint16 = 62000
|
||||
CloneFlagsAttr uint16 = 27281
|
||||
NsPathsAttr uint16 = 27282
|
||||
UidmapAttr uint16 = 27283
|
||||
GidmapAttr uint16 = 27284
|
||||
SetgroupAttr uint16 = 27285
|
||||
InitMsg uint16 = 62000
|
||||
CloneFlagsAttr uint16 = 27281
|
||||
NsPathsAttr uint16 = 27282
|
||||
UidmapAttr uint16 = 27283
|
||||
GidmapAttr uint16 = 27284
|
||||
SetgroupAttr uint16 = 27285
|
||||
OomScoreAdjAttr uint16 = 27286
|
||||
RootlessAttr uint16 = 27287
|
||||
|
||||
// When syscall.NLA_HDRLEN is in gccgo, take this out.
|
||||
syscall_NLA_HDRLEN = (syscall.SizeofNlAttr + syscall.NLA_ALIGNTO - 1) & ^(syscall.NLA_ALIGNTO - 1)
|
||||
)
|
||||
|
|
|
@ -72,18 +72,23 @@ struct nlconfig_t {
|
|||
char *namespaces;
|
||||
size_t namespaces_len;
|
||||
uint8_t is_setgroup;
|
||||
uint8_t is_rootless;
|
||||
char *oom_score_adj;
|
||||
size_t oom_score_adj_len;
|
||||
};
|
||||
|
||||
/*
|
||||
* List of netlink message types sent to us as part of bootstrapping the init.
|
||||
* These constants are defined in libcontainer/message_linux.go.
|
||||
*/
|
||||
#define INIT_MSG 62000
|
||||
#define INIT_MSG 62000
|
||||
#define CLONE_FLAGS_ATTR 27281
|
||||
#define NS_PATHS_ATTR 27282
|
||||
#define UIDMAP_ATTR 27283
|
||||
#define GIDMAP_ATTR 27284
|
||||
#define UIDMAP_ATTR 27283
|
||||
#define GIDMAP_ATTR 27284
|
||||
#define SETGROUP_ATTR 27285
|
||||
#define OOM_SCORE_ADJ_ATTR 27286
|
||||
#define ROOTLESS_ATTR 27287
|
||||
|
||||
/*
|
||||
* Use the raw syscall for versions of glibc which don't include a function for
|
||||
|
@ -172,6 +177,7 @@ static void update_setgroups(int pid, enum policy_t setgroup)
|
|||
policy = "deny";
|
||||
break;
|
||||
case SETGROUPS_DEFAULT:
|
||||
default:
|
||||
/* Nothing to do. */
|
||||
return;
|
||||
}
|
||||
|
@ -186,7 +192,7 @@ static void update_setgroups(int pid, enum policy_t setgroup)
|
|||
}
|
||||
}
|
||||
|
||||
static void update_uidmap(int pid, char *map, int map_len)
|
||||
static void update_uidmap(int pid, char *map, size_t map_len)
|
||||
{
|
||||
if (map == NULL || map_len <= 0)
|
||||
return;
|
||||
|
@ -195,7 +201,7 @@ static void update_uidmap(int pid, char *map, int map_len)
|
|||
bail("failed to update /proc/%d/uid_map", pid);
|
||||
}
|
||||
|
||||
static void update_gidmap(int pid, char *map, int map_len)
|
||||
static void update_gidmap(int pid, char *map, size_t map_len)
|
||||
{
|
||||
if (map == NULL || map_len <= 0)
|
||||
return;
|
||||
|
@ -204,6 +210,15 @@ static void update_gidmap(int pid, char *map, int map_len)
|
|||
bail("failed to update /proc/%d/gid_map", pid);
|
||||
}
|
||||
|
||||
static void update_oom_score_adj(char *data, size_t len)
|
||||
{
|
||||
if (data == NULL || len <= 0)
|
||||
return;
|
||||
|
||||
if (write_file(data, len, "/proc/self/oom_score_adj") < 0)
|
||||
bail("failed to update /proc/self/oom_score_adj");
|
||||
}
|
||||
|
||||
/* A dummy function that just jumps to the given jumpval. */
|
||||
static int child_func(void *arg) __attribute__ ((noinline));
|
||||
static int child_func(void *arg)
|
||||
|
@ -317,6 +332,13 @@ static void nl_parse(int fd, struct nlconfig_t *config)
|
|||
case CLONE_FLAGS_ATTR:
|
||||
config->cloneflags = readint32(current);
|
||||
break;
|
||||
case ROOTLESS_ATTR:
|
||||
config->is_rootless = readint8(current);
|
||||
break;
|
||||
case OOM_SCORE_ADJ_ATTR:
|
||||
config->oom_score_adj = current;
|
||||
config->oom_score_adj_len = payload_len;
|
||||
break;
|
||||
case NS_PATHS_ATTR:
|
||||
config->namespaces = current;
|
||||
config->namespaces_len = payload_len;
|
||||
|
@ -425,14 +447,32 @@ void nsexec(void)
|
|||
if (pipenum == -1)
|
||||
return;
|
||||
|
||||
/* make the process non-dumpable */
|
||||
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) != 0) {
|
||||
bail("failed to set process as non-dumpable");
|
||||
}
|
||||
|
||||
/* Parse all of the netlink configuration. */
|
||||
nl_parse(pipenum, &config);
|
||||
|
||||
/* Set oom_score_adj. This has to be done before !dumpable because
|
||||
* /proc/self/oom_score_adj is not writeable unless you're an privileged
|
||||
* user (if !dumpable is set). All children inherit their parent's
|
||||
* oom_score_adj value on fork(2) so this will always be propagated
|
||||
* properly.
|
||||
*/
|
||||
update_oom_score_adj(config.oom_score_adj, config.oom_score_adj_len);
|
||||
|
||||
/*
|
||||
* Make the process non-dumpable, to avoid various race conditions that
|
||||
* could cause processes in namespaces we're joining to access host
|
||||
* resources (or potentially execute code).
|
||||
*
|
||||
* However, if the number of namespaces we are joining is 0, we are not
|
||||
* going to be switching to a different security context. Thus setting
|
||||
* ourselves to be non-dumpable only breaks things (like rootless
|
||||
* containers), which is the recommendation from the kernel folks.
|
||||
*/
|
||||
if (config.namespaces) {
|
||||
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
|
||||
bail("failed to set process as non-dumpable");
|
||||
}
|
||||
|
||||
/* Pipe so we can tell the child when we've finished setting up. */
|
||||
if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_child_pipe) < 0)
|
||||
bail("failed to setup sync pipe between parent and child");
|
||||
|
@ -540,9 +580,21 @@ void nsexec(void)
|
|||
|
||||
exit(ret);
|
||||
case SYNC_USERMAP_PLS:
|
||||
/* Enable setgroups(2) if we've been asked to. */
|
||||
/*
|
||||
* Enable setgroups(2) if we've been asked to. But we also
|
||||
* have to explicitly disable setgroups(2) if we're
|
||||
* creating a rootless container (this is required since
|
||||
* Linux 3.19).
|
||||
*/
|
||||
if (config.is_rootless && config.is_setgroup) {
|
||||
kill(child, SIGKILL);
|
||||
bail("cannot allow setgroup in an unprivileged user namespace setup");
|
||||
}
|
||||
|
||||
if (config.is_setgroup)
|
||||
update_setgroups(child, SETGROUPS_ALLOW);
|
||||
if (config.is_rootless)
|
||||
update_setgroups(child, SETGROUPS_DENY);
|
||||
|
||||
/* Set up mappings. */
|
||||
update_uidmap(child, config.uidmap, config.uidmap_len);
|
||||
|
@ -681,6 +733,11 @@ void nsexec(void)
|
|||
* clone_parent rant). So signal our parent to hook us up.
|
||||
*/
|
||||
|
||||
/* Switching is only necessary if we joined namespaces. */
|
||||
if (config.namespaces) {
|
||||
if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0)
|
||||
bail("failed to set process as dumpable");
|
||||
}
|
||||
s = SYNC_USERMAP_PLS;
|
||||
if (write(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||
bail("failed to sync with parent: write(SYNC_USERMAP_PLS)");
|
||||
|
@ -691,6 +748,11 @@ void nsexec(void)
|
|||
bail("failed to sync with parent: read(SYNC_USERMAP_ACK)");
|
||||
if (s != SYNC_USERMAP_ACK)
|
||||
bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s);
|
||||
/* Switching is only necessary if we joined namespaces. */
|
||||
if (config.namespaces) {
|
||||
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
|
||||
bail("failed to set process as dumpable");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -774,8 +836,10 @@ void nsexec(void)
|
|||
if (setgid(0) < 0)
|
||||
bail("setgid failed");
|
||||
|
||||
if (setgroups(0, NULL) < 0)
|
||||
bail("setgroups failed");
|
||||
if (!config.is_rootless && config.is_setgroup) {
|
||||
if (setgroups(0, NULL) < 0)
|
||||
bail("setgroups failed");
|
||||
}
|
||||
|
||||
s = SYNC_CHILD_READY;
|
||||
if (write(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||
|
|
|
@ -80,15 +80,12 @@ func (p *setnsProcess) start() (err error) {
|
|||
if err = p.execSetns(); err != nil {
|
||||
return newSystemErrorWithCause(err, "executing setns process")
|
||||
}
|
||||
if len(p.cgroupPaths) > 0 {
|
||||
// We can't join cgroups if we're in a rootless container.
|
||||
if !p.config.Rootless && len(p.cgroupPaths) > 0 {
|
||||
if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil {
|
||||
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
|
||||
}
|
||||
}
|
||||
// set oom_score_adj
|
||||
if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting oom score")
|
||||
}
|
||||
// set rlimits, this has to be done here because we lose permissions
|
||||
// to raise the limits once we enter a user-namespace
|
||||
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
|
||||
|
@ -257,8 +254,9 @@ func (p *initProcess) start() error {
|
|||
return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
|
||||
}
|
||||
p.setExternalDescriptors(fds)
|
||||
// Do this before syncing with child so that no children
|
||||
// can escape the cgroup
|
||||
// Do this before syncing with child so that no children can escape the
|
||||
// cgroup. We don't need to worry about not doing this and not being root
|
||||
// because we'd be using the rootless cgroup manager in that case.
|
||||
if err := p.manager.Apply(p.pid()); err != nil {
|
||||
return newSystemErrorWithCause(err, "applying cgroup configuration for process")
|
||||
}
|
||||
|
@ -285,10 +283,6 @@ func (p *initProcess) start() error {
|
|||
if err := p.manager.Set(p.config.Config); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting cgroup config for ready process")
|
||||
}
|
||||
// set oom_score_adj
|
||||
if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting oom score for ready process")
|
||||
}
|
||||
// set rlimits, this has to be done here because we lose permissions
|
||||
// to raise the limits once we enter a user-namespace
|
||||
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
|
||||
|
@ -432,6 +426,12 @@ func getPipeFds(pid int) ([]string, error) {
|
|||
f := filepath.Join(dirPath, strconv.Itoa(i))
|
||||
target, err := os.Readlink(f)
|
||||
if err != nil {
|
||||
// Ignore permission errors, for rootless containers and other
|
||||
// non-dumpable processes. if we can't get the fd for a particular
|
||||
// file, there's not much we can do.
|
||||
if os.IsPermission(err) {
|
||||
continue
|
||||
}
|
||||
return fds, err
|
||||
}
|
||||
fds[i] = target
|
||||
|
|
|
@ -348,7 +348,7 @@ func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
|
|||
var binds []*configs.Mount
|
||||
|
||||
for _, mm := range mounts {
|
||||
dir, err := mm.GetThisCgroupDir(cgroupPaths)
|
||||
dir, err := mm.GetOwnCgroup(cgroupPaths)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -0,0 +1,227 @@
|
|||
package specconv
|
||||
|
||||
import (
|
||||
"os"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
func sPtr(s string) *string { return &s }
|
||||
|
||||
// Example returns an example spec file, with many options set so a user can
|
||||
// see what a standard spec file looks like.
|
||||
func Example() *specs.Spec {
|
||||
return &specs.Spec{
|
||||
Version: specs.Version,
|
||||
Platform: specs.Platform{
|
||||
OS: runtime.GOOS,
|
||||
Arch: runtime.GOARCH,
|
||||
},
|
||||
Root: specs.Root{
|
||||
Path: "rootfs",
|
||||
Readonly: true,
|
||||
},
|
||||
Process: specs.Process{
|
||||
Terminal: true,
|
||||
User: specs.User{},
|
||||
Args: []string{
|
||||
"sh",
|
||||
},
|
||||
Env: []string{
|
||||
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
|
||||
"TERM=xterm",
|
||||
},
|
||||
Cwd: "/",
|
||||
NoNewPrivileges: true,
|
||||
Capabilities: &specs.LinuxCapabilities{
|
||||
Bounding: []string{
|
||||
"CAP_AUDIT_WRITE",
|
||||
"CAP_KILL",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
},
|
||||
Permitted: []string{
|
||||
"CAP_AUDIT_WRITE",
|
||||
"CAP_KILL",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
},
|
||||
Inheritable: []string{
|
||||
"CAP_AUDIT_WRITE",
|
||||
"CAP_KILL",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
},
|
||||
Ambient: []string{
|
||||
"CAP_AUDIT_WRITE",
|
||||
"CAP_KILL",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
},
|
||||
Effective: []string{
|
||||
"CAP_AUDIT_WRITE",
|
||||
"CAP_KILL",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
},
|
||||
},
|
||||
Rlimits: []specs.LinuxRlimit{
|
||||
{
|
||||
Type: "RLIMIT_NOFILE",
|
||||
Hard: uint64(1024),
|
||||
Soft: uint64(1024),
|
||||
},
|
||||
},
|
||||
},
|
||||
Hostname: "runc",
|
||||
Mounts: []specs.Mount{
|
||||
{
|
||||
Destination: "/proc",
|
||||
Type: "proc",
|
||||
Source: "proc",
|
||||
Options: nil,
|
||||
},
|
||||
{
|
||||
Destination: "/dev",
|
||||
Type: "tmpfs",
|
||||
Source: "tmpfs",
|
||||
Options: []string{"nosuid", "strictatime", "mode=755", "size=65536k"},
|
||||
},
|
||||
{
|
||||
Destination: "/dev/pts",
|
||||
Type: "devpts",
|
||||
Source: "devpts",
|
||||
Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"},
|
||||
},
|
||||
{
|
||||
Destination: "/dev/shm",
|
||||
Type: "tmpfs",
|
||||
Source: "shm",
|
||||
Options: []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"},
|
||||
},
|
||||
{
|
||||
Destination: "/dev/mqueue",
|
||||
Type: "mqueue",
|
||||
Source: "mqueue",
|
||||
Options: []string{"nosuid", "noexec", "nodev"},
|
||||
},
|
||||
{
|
||||
Destination: "/sys",
|
||||
Type: "sysfs",
|
||||
Source: "sysfs",
|
||||
Options: []string{"nosuid", "noexec", "nodev", "ro"},
|
||||
},
|
||||
{
|
||||
Destination: "/sys/fs/cgroup",
|
||||
Type: "cgroup",
|
||||
Source: "cgroup",
|
||||
Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"},
|
||||
},
|
||||
},
|
||||
Linux: &specs.Linux{
|
||||
MaskedPaths: []string{
|
||||
"/proc/kcore",
|
||||
"/proc/latency_stats",
|
||||
"/proc/timer_list",
|
||||
"/proc/timer_stats",
|
||||
"/proc/sched_debug",
|
||||
"/sys/firmware",
|
||||
},
|
||||
ReadonlyPaths: []string{
|
||||
"/proc/asound",
|
||||
"/proc/bus",
|
||||
"/proc/fs",
|
||||
"/proc/irq",
|
||||
"/proc/sys",
|
||||
"/proc/sysrq-trigger",
|
||||
},
|
||||
Resources: &specs.LinuxResources{
|
||||
Devices: []specs.LinuxDeviceCgroup{
|
||||
{
|
||||
Allow: false,
|
||||
Access: "rwm",
|
||||
},
|
||||
},
|
||||
},
|
||||
Namespaces: []specs.LinuxNamespace{
|
||||
{
|
||||
Type: "pid",
|
||||
},
|
||||
{
|
||||
Type: "network",
|
||||
},
|
||||
{
|
||||
Type: "ipc",
|
||||
},
|
||||
{
|
||||
Type: "uts",
|
||||
},
|
||||
{
|
||||
Type: "mount",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// ExampleRootless returns an example spec file that works with rootless
|
||||
// containers. It's essentially a modified version of the specfile from
|
||||
// Example().
|
||||
func ToRootless(spec *specs.Spec) {
|
||||
var namespaces []specs.LinuxNamespace
|
||||
|
||||
// Remove networkns from the spec.
|
||||
for _, ns := range spec.Linux.Namespaces {
|
||||
switch ns.Type {
|
||||
case specs.NetworkNamespace, specs.UserNamespace:
|
||||
// Do nothing.
|
||||
default:
|
||||
namespaces = append(namespaces, ns)
|
||||
}
|
||||
}
|
||||
// Add userns to the spec.
|
||||
namespaces = append(namespaces, specs.LinuxNamespace{
|
||||
Type: specs.UserNamespace,
|
||||
})
|
||||
spec.Linux.Namespaces = namespaces
|
||||
|
||||
// Add mappings for the current user.
|
||||
spec.Linux.UIDMappings = []specs.LinuxIDMapping{{
|
||||
HostID: uint32(os.Geteuid()),
|
||||
ContainerID: 0,
|
||||
Size: 1,
|
||||
}}
|
||||
spec.Linux.GIDMappings = []specs.LinuxIDMapping{{
|
||||
HostID: uint32(os.Getegid()),
|
||||
ContainerID: 0,
|
||||
Size: 1,
|
||||
}}
|
||||
|
||||
// Fix up mounts.
|
||||
var mounts []specs.Mount
|
||||
for _, mount := range spec.Mounts {
|
||||
// Ignore all mounts that are under /sys.
|
||||
if strings.HasPrefix(mount.Destination, "/sys") {
|
||||
continue
|
||||
}
|
||||
|
||||
// Remove all gid= and uid= mappings.
|
||||
var options []string
|
||||
for _, option := range mount.Options {
|
||||
if !strings.HasPrefix(option, "gid=") && !strings.HasPrefix(option, "uid=") {
|
||||
options = append(options, option)
|
||||
}
|
||||
}
|
||||
|
||||
mount.Options = options
|
||||
mounts = append(mounts, mount)
|
||||
}
|
||||
// Add the sysfs mount as an rbind.
|
||||
mounts = append(mounts, specs.Mount{
|
||||
Source: "/sys",
|
||||
Destination: "/sys",
|
||||
Type: "none",
|
||||
Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
|
||||
})
|
||||
spec.Mounts = mounts
|
||||
|
||||
// Remove cgroup settings.
|
||||
spec.Linux.Resources = nil
|
||||
}
|
|
@ -145,6 +145,7 @@ type CreateOpts struct {
|
|||
NoPivotRoot bool
|
||||
NoNewKeyring bool
|
||||
Spec *specs.Spec
|
||||
Rootless bool
|
||||
}
|
||||
|
||||
// CreateLibcontainerConfig creates a new libcontainer configuration from a
|
||||
|
@ -175,6 +176,7 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
|
|||
Hostname: spec.Hostname,
|
||||
Labels: append(labels, fmt.Sprintf("bundle=%s", cwd)),
|
||||
NoNewKeyring: opts.NoNewKeyring,
|
||||
Rootless: opts.Rootless,
|
||||
}
|
||||
|
||||
exists := false
|
||||
|
@ -208,7 +210,7 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
|
|||
if err := setupUserNamespace(spec, config); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c, err := createCgroupConfig(opts.CgroupName, opts.UseSystemdCgroup, spec)
|
||||
c, err := createCgroupConfig(opts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -264,8 +266,14 @@ func createLibcontainerMount(cwd string, m specs.Mount) *configs.Mount {
|
|||
}
|
||||
}
|
||||
|
||||
func createCgroupConfig(name string, useSystemdCgroup bool, spec *specs.Spec) (*configs.Cgroup, error) {
|
||||
var myCgroupPath string
|
||||
func createCgroupConfig(opts *CreateOpts) (*configs.Cgroup, error) {
|
||||
var (
|
||||
myCgroupPath string
|
||||
|
||||
spec = opts.Spec
|
||||
useSystemdCgroup = opts.UseSystemdCgroup
|
||||
name = opts.CgroupName
|
||||
)
|
||||
|
||||
c := &configs.Cgroup{
|
||||
Resources: &configs.Resources{},
|
||||
|
@ -301,9 +309,14 @@ func createCgroupConfig(name string, useSystemdCgroup bool, spec *specs.Spec) (*
|
|||
c.Path = myCgroupPath
|
||||
}
|
||||
|
||||
c.Resources.AllowedDevices = allowedDevices
|
||||
if spec.Linux == nil {
|
||||
return c, nil
|
||||
// In rootless containers, any attempt to make cgroup changes will fail.
|
||||
// libcontainer will validate this and we shouldn't add any cgroup options
|
||||
// the user didn't specify.
|
||||
if !opts.Rootless {
|
||||
c.Resources.AllowedDevices = allowedDevices
|
||||
if spec.Linux == nil {
|
||||
return c, nil
|
||||
}
|
||||
}
|
||||
r := spec.Linux.Resources
|
||||
if r == nil {
|
||||
|
@ -340,8 +353,10 @@ func createCgroupConfig(name string, useSystemdCgroup bool, spec *specs.Spec) (*
|
|||
}
|
||||
c.Resources.Devices = append(c.Resources.Devices, dd)
|
||||
}
|
||||
// append the default allowed devices to the end of the list
|
||||
c.Resources.Devices = append(c.Resources.Devices, allowedDevices...)
|
||||
if !opts.Rootless {
|
||||
// append the default allowed devices to the end of the list
|
||||
c.Resources.Devices = append(c.Resources.Devices, allowedDevices...)
|
||||
}
|
||||
if r.Memory != nil {
|
||||
if r.Memory.Limit != nil {
|
||||
c.Resources.Memory = *r.Memory.Limit
|
||||
|
@ -595,11 +610,11 @@ func setupUserNamespace(spec *specs.Spec, config *configs.Config) error {
|
|||
for _, m := range spec.Linux.GIDMappings {
|
||||
config.GidMappings = append(config.GidMappings, create(m))
|
||||
}
|
||||
rootUID, err := config.HostUID()
|
||||
rootUID, err := config.HostRootUID()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
rootGID, err := config.HostGID()
|
||||
rootGID, err := config.HostRootGID()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@ package specconv
|
|||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs/validate"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
|
@ -16,7 +17,13 @@ func TestLinuxCgroupsPathSpecified(t *testing.T) {
|
|||
CgroupsPath: cgroupsPath,
|
||||
}
|
||||
|
||||
cgroup, err := createCgroupConfig("ContainerID", false, spec)
|
||||
opts := &CreateOpts{
|
||||
CgroupName: "ContainerID",
|
||||
UseSystemdCgroup: false,
|
||||
Spec: spec,
|
||||
}
|
||||
|
||||
cgroup, err := createCgroupConfig(opts)
|
||||
if err != nil {
|
||||
t.Errorf("Couldn't create Cgroup config: %v", err)
|
||||
}
|
||||
|
@ -28,8 +35,13 @@ func TestLinuxCgroupsPathSpecified(t *testing.T) {
|
|||
|
||||
func TestLinuxCgroupsPathNotSpecified(t *testing.T) {
|
||||
spec := &specs.Spec{}
|
||||
opts := &CreateOpts{
|
||||
CgroupName: "ContainerID",
|
||||
UseSystemdCgroup: false,
|
||||
Spec: spec,
|
||||
}
|
||||
|
||||
cgroup, err := createCgroupConfig("ContainerID", false, spec)
|
||||
cgroup, err := createCgroupConfig(opts)
|
||||
if err != nil {
|
||||
t.Errorf("Couldn't create Cgroup config: %v", err)
|
||||
}
|
||||
|
@ -39,6 +51,27 @@ func TestLinuxCgroupsPathNotSpecified(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestSpecconvExampleValidate(t *testing.T) {
|
||||
spec := Example()
|
||||
spec.Root.Path = "/"
|
||||
|
||||
opts := &CreateOpts{
|
||||
CgroupName: "ContainerID",
|
||||
UseSystemdCgroup: false,
|
||||
Spec: spec,
|
||||
}
|
||||
|
||||
config, err := CreateLibcontainerConfig(opts)
|
||||
if err != nil {
|
||||
t.Errorf("Couldn't create libcontainer config: %v", err)
|
||||
}
|
||||
|
||||
validator := validate.New()
|
||||
if err := validator.Validate(config); err != nil {
|
||||
t.Errorf("Expected specconv to produce valid container config: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDupNamespaces(t *testing.T) {
|
||||
spec := &specs.Spec{
|
||||
Linux: &specs.Linux{
|
||||
|
@ -62,3 +95,26 @@ func TestDupNamespaces(t *testing.T) {
|
|||
t.Errorf("Duplicated namespaces should be forbidden")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRootlessSpecconvValidate(t *testing.T) {
|
||||
spec := Example()
|
||||
spec.Root.Path = "/"
|
||||
ToRootless(spec)
|
||||
|
||||
opts := &CreateOpts{
|
||||
CgroupName: "ContainerID",
|
||||
UseSystemdCgroup: false,
|
||||
Spec: spec,
|
||||
Rootless: true,
|
||||
}
|
||||
|
||||
config, err := CreateLibcontainerConfig(opts)
|
||||
if err != nil {
|
||||
t.Errorf("Couldn't create libcontainer config: %v", err)
|
||||
}
|
||||
|
||||
validator := validate.New()
|
||||
if err := validator.Validate(config); err != nil {
|
||||
t.Errorf("Expected specconv to produce valid rootless container config: %v", err)
|
||||
}
|
||||
}
|
||||
|
|
19
list.go
19
list.go
|
@ -7,12 +7,14 @@ import (
|
|||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"text/tabwriter"
|
||||
"time"
|
||||
|
||||
"encoding/json"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer"
|
||||
"github.com/opencontainers/runc/libcontainer/user"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/urfave/cli"
|
||||
)
|
||||
|
@ -38,6 +40,8 @@ type containerState struct {
|
|||
Created time.Time `json:"created"`
|
||||
// Annotations is the user defined annotations added to the config.
|
||||
Annotations map[string]string `json:"annotations,omitempty"`
|
||||
// The owner of the state directory (the owner of the container).
|
||||
Owner string `json:"owner"`
|
||||
}
|
||||
|
||||
var listCommand = cli.Command{
|
||||
|
@ -85,14 +89,15 @@ To list containers created using a non-default value for "--root":
|
|||
switch context.String("format") {
|
||||
case "table":
|
||||
w := tabwriter.NewWriter(os.Stdout, 12, 1, 3, ' ', 0)
|
||||
fmt.Fprint(w, "ID\tPID\tSTATUS\tBUNDLE\tCREATED\n")
|
||||
fmt.Fprint(w, "ID\tPID\tSTATUS\tBUNDLE\tCREATED\tOWNER\n")
|
||||
for _, item := range s {
|
||||
fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%s\n",
|
||||
fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%s\t%s\n",
|
||||
item.ID,
|
||||
item.InitProcessPid,
|
||||
item.Status,
|
||||
item.Bundle,
|
||||
item.Created.Format(time.RFC3339Nano))
|
||||
item.Created.Format(time.RFC3339Nano),
|
||||
item.Owner)
|
||||
}
|
||||
if err := w.Flush(); err != nil {
|
||||
return err
|
||||
|
@ -126,6 +131,13 @@ func getContainers(context *cli.Context) ([]containerState, error) {
|
|||
var s []containerState
|
||||
for _, item := range list {
|
||||
if item.IsDir() {
|
||||
// This cast is safe on Linux.
|
||||
stat := item.Sys().(*syscall.Stat_t)
|
||||
owner, err := user.LookupUid(int(stat.Uid))
|
||||
if err != nil {
|
||||
owner.Name = string(stat.Uid)
|
||||
}
|
||||
|
||||
container, err := factory.Load(item.Name())
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "load container %s: %v\n", item.Name(), err)
|
||||
|
@ -155,6 +167,7 @@ func getContainers(context *cli.Context) ([]containerState, error) {
|
|||
Rootfs: state.BaseState.Config.Rootfs,
|
||||
Created: state.BaseState.Created,
|
||||
Annotations: annotations,
|
||||
Owner: owner.Name,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
5
ps.go
5
ps.go
|
@ -28,6 +28,11 @@ var psCommand = cli.Command{
|
|||
if err := checkArgs(context, 1, minArgs); err != nil {
|
||||
return err
|
||||
}
|
||||
// XXX: Currently not supported with rootless containers.
|
||||
if isRootless() {
|
||||
return fmt.Errorf("runc ps requires root")
|
||||
}
|
||||
|
||||
container, err := getContainer(context)
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"syscall"
|
||||
|
||||
|
@ -86,6 +87,11 @@ using the runc checkpoint command.`,
|
|||
if err := checkArgs(context, 1, exactArgs); err != nil {
|
||||
return err
|
||||
}
|
||||
// XXX: Currently this is untested with rootless containers.
|
||||
if isRootless() {
|
||||
return fmt.Errorf("runc restore requires root")
|
||||
}
|
||||
|
||||
imagePath := context.String("image-path")
|
||||
id := context.Args().First()
|
||||
if id == "" {
|
||||
|
|
157
spec.go
157
spec.go
|
@ -10,6 +10,7 @@ import (
|
|||
"runtime"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/specconv"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/urfave/cli"
|
||||
)
|
||||
|
@ -63,156 +64,20 @@ container on your host.`,
|
|||
Value: "",
|
||||
Usage: "path to the root of the bundle directory",
|
||||
},
|
||||
cli.BoolFlag{
|
||||
Name: "rootless",
|
||||
Usage: "generate a configuration for a rootless container",
|
||||
},
|
||||
},
|
||||
Action: func(context *cli.Context) error {
|
||||
if err := checkArgs(context, 0, exactArgs); err != nil {
|
||||
return err
|
||||
}
|
||||
spec := specs.Spec{
|
||||
Version: specs.Version,
|
||||
Platform: specs.Platform{
|
||||
OS: runtime.GOOS,
|
||||
Arch: runtime.GOARCH,
|
||||
},
|
||||
Root: specs.Root{
|
||||
Path: "rootfs",
|
||||
Readonly: true,
|
||||
},
|
||||
Process: specs.Process{
|
||||
Terminal: true,
|
||||
User: specs.User{},
|
||||
Args: []string{
|
||||
"sh",
|
||||
},
|
||||
Env: []string{
|
||||
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
|
||||
"TERM=xterm",
|
||||
},
|
||||
Cwd: "/",
|
||||
NoNewPrivileges: true,
|
||||
Capabilities: &specs.LinuxCapabilities{
|
||||
Bounding: []string{
|
||||
"CAP_AUDIT_WRITE",
|
||||
"CAP_KILL",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
},
|
||||
Permitted: []string{
|
||||
"CAP_AUDIT_WRITE",
|
||||
"CAP_KILL",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
},
|
||||
Inheritable: []string{
|
||||
"CAP_AUDIT_WRITE",
|
||||
"CAP_KILL",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
},
|
||||
Ambient: []string{
|
||||
"CAP_AUDIT_WRITE",
|
||||
"CAP_KILL",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
},
|
||||
Effective: []string{
|
||||
"CAP_AUDIT_WRITE",
|
||||
"CAP_KILL",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
},
|
||||
},
|
||||
Rlimits: []specs.LinuxRlimit{
|
||||
{
|
||||
Type: "RLIMIT_NOFILE",
|
||||
Hard: uint64(1024),
|
||||
Soft: uint64(1024),
|
||||
},
|
||||
},
|
||||
},
|
||||
Hostname: "runc",
|
||||
Mounts: []specs.Mount{
|
||||
{
|
||||
Destination: "/proc",
|
||||
Type: "proc",
|
||||
Source: "proc",
|
||||
Options: nil,
|
||||
},
|
||||
{
|
||||
Destination: "/dev",
|
||||
Type: "tmpfs",
|
||||
Source: "tmpfs",
|
||||
Options: []string{"nosuid", "strictatime", "mode=755", "size=65536k"},
|
||||
},
|
||||
{
|
||||
Destination: "/dev/pts",
|
||||
Type: "devpts",
|
||||
Source: "devpts",
|
||||
Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"},
|
||||
},
|
||||
{
|
||||
Destination: "/dev/shm",
|
||||
Type: "tmpfs",
|
||||
Source: "shm",
|
||||
Options: []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"},
|
||||
},
|
||||
{
|
||||
Destination: "/dev/mqueue",
|
||||
Type: "mqueue",
|
||||
Source: "mqueue",
|
||||
Options: []string{"nosuid", "noexec", "nodev"},
|
||||
},
|
||||
{
|
||||
Destination: "/sys",
|
||||
Type: "sysfs",
|
||||
Source: "sysfs",
|
||||
Options: []string{"nosuid", "noexec", "nodev", "ro"},
|
||||
},
|
||||
{
|
||||
Destination: "/sys/fs/cgroup",
|
||||
Type: "cgroup",
|
||||
Source: "cgroup",
|
||||
Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"},
|
||||
},
|
||||
},
|
||||
Linux: &specs.Linux{
|
||||
MaskedPaths: []string{
|
||||
"/proc/kcore",
|
||||
"/proc/latency_stats",
|
||||
"/proc/timer_list",
|
||||
"/proc/timer_stats",
|
||||
"/proc/sched_debug",
|
||||
"/sys/firmware",
|
||||
},
|
||||
ReadonlyPaths: []string{
|
||||
"/proc/asound",
|
||||
"/proc/bus",
|
||||
"/proc/fs",
|
||||
"/proc/irq",
|
||||
"/proc/sys",
|
||||
"/proc/sysrq-trigger",
|
||||
},
|
||||
Resources: &specs.LinuxResources{
|
||||
Devices: []specs.LinuxDeviceCgroup{
|
||||
{
|
||||
Allow: false,
|
||||
Access: "rwm",
|
||||
},
|
||||
},
|
||||
},
|
||||
Namespaces: []specs.LinuxNamespace{
|
||||
{
|
||||
Type: "pid",
|
||||
},
|
||||
{
|
||||
Type: "network",
|
||||
},
|
||||
{
|
||||
Type: "ipc",
|
||||
},
|
||||
{
|
||||
Type: "uts",
|
||||
},
|
||||
{
|
||||
Type: "mount",
|
||||
},
|
||||
},
|
||||
},
|
||||
spec := specconv.Example()
|
||||
|
||||
rootless := context.Bool("rootless")
|
||||
if rootless {
|
||||
specconv.ToRootless(spec)
|
||||
}
|
||||
|
||||
checkNoFile := func(name string) error {
|
||||
|
@ -234,7 +99,7 @@ container on your host.`,
|
|||
if err := checkNoFile(specConfig); err != nil {
|
||||
return err
|
||||
}
|
||||
data, err := json.MarshalIndent(&spec, "", "\t")
|
||||
data, err := json.MarshalIndent(spec, "", "\t")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -28,7 +28,9 @@ function check_cgroup_value() {
|
|||
}
|
||||
|
||||
@test "runc update --kernel-memory (initialized)" {
|
||||
requires cgroups_kmem
|
||||
# XXX: currently cgroups require root containers.
|
||||
requires cgroups_kmem root
|
||||
|
||||
# Add cgroup path
|
||||
sed -i 's/\("linux": {\)/\1\n "cgroupsPath": "\/runc-cgroups-integration-test",/' ${BUSYBOX_BUNDLE}/config.json
|
||||
|
||||
|
@ -56,7 +58,9 @@ EOF
|
|||
}
|
||||
|
||||
@test "runc update --kernel-memory (uninitialized)" {
|
||||
requires cgroups_kmem
|
||||
# XXX: currently cgroups require root containers.
|
||||
requires cgroups_kmem root
|
||||
|
||||
# Add cgroup path
|
||||
sed -i 's/\("linux": {\)/\1\n "cgroupsPath": "\/runc-cgroups-integration-test",/' ${BUSYBOX_BUNDLE}/config.json
|
||||
|
||||
|
|
|
@ -12,7 +12,8 @@ function teardown() {
|
|||
}
|
||||
|
||||
@test "checkpoint and restore" {
|
||||
requires criu
|
||||
# XXX: currently criu require root containers.
|
||||
requires criu root
|
||||
|
||||
# criu does not work with external terminals so..
|
||||
# setting terminal and root:readonly: to false
|
||||
|
@ -58,8 +59,9 @@ function teardown() {
|
|||
[[ "${output}" == *"running"* ]]
|
||||
}
|
||||
|
||||
@test "checkpoint(pre-dump) and restore" {
|
||||
requires criu
|
||||
@test "checkpoint --pre-dump and restore" {
|
||||
# XXX: currently criu require root containers.
|
||||
requires criu root
|
||||
|
||||
# criu does not work with external terminals so..
|
||||
# setting terminal and root:readonly: to false
|
||||
|
|
|
@ -22,11 +22,13 @@ function teardown() {
|
|||
testcontainer test_busybox running
|
||||
|
||||
runc kill test_busybox KILL
|
||||
[ "$status" -eq 0 ]
|
||||
# wait for busybox to be in the destroyed state
|
||||
retry 10 1 eval "__runc state test_busybox | grep -q 'stopped'"
|
||||
|
||||
# delete test_busybox
|
||||
runc delete test_busybox
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
runc state test_busybox
|
||||
[ "$status" -ne 0 ]
|
||||
|
|
|
@ -12,6 +12,9 @@ function teardown() {
|
|||
}
|
||||
|
||||
@test "events --stats" {
|
||||
# XXX: currently cgroups require root containers.
|
||||
requires root
|
||||
|
||||
# run busybox detached
|
||||
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
|
||||
[ "$status" -eq 0 ]
|
||||
|
@ -27,6 +30,9 @@ function teardown() {
|
|||
}
|
||||
|
||||
@test "events --interval default " {
|
||||
# XXX: currently cgroups require root containers.
|
||||
requires root
|
||||
|
||||
# run busybox detached
|
||||
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
|
||||
[ "$status" -eq 0 ]
|
||||
|
@ -54,6 +60,9 @@ function teardown() {
|
|||
}
|
||||
|
||||
@test "events --interval 1s " {
|
||||
# XXX: currently cgroups require root containers.
|
||||
requires root
|
||||
|
||||
# run busybox detached
|
||||
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
|
||||
[ "$status" -eq 0 ]
|
||||
|
@ -80,6 +89,9 @@ function teardown() {
|
|||
}
|
||||
|
||||
@test "events --interval 100ms " {
|
||||
# XXX: currently cgroups require root containers.
|
||||
requires root
|
||||
|
||||
# run busybox detached
|
||||
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
|
||||
[ "$status" -eq 0 ]
|
||||
|
|
|
@ -112,6 +112,9 @@ function teardown() {
|
|||
}
|
||||
|
||||
@test "runc exec --user" {
|
||||
# --user can't work in rootless containers
|
||||
requires root
|
||||
|
||||
# run busybox detached
|
||||
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
|
||||
[ "$status" -eq 0 ]
|
||||
|
|
|
@ -57,6 +57,7 @@ load helpers
|
|||
[ "$status" -eq 0 ]
|
||||
[[ ${lines[1]} =~ runc\ resume+ ]]
|
||||
|
||||
# We don't use runc_spec here, because we're just testing the help page.
|
||||
runc spec -h
|
||||
[ "$status" -eq 0 ]
|
||||
[[ ${lines[1]} =~ runc\ spec+ ]]
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
INTEGRATION_ROOT=$(dirname "$(readlink -f "$BASH_SOURCE")")
|
||||
RUNC="${INTEGRATION_ROOT}/../../runc"
|
||||
RECVTTY="${INTEGRATION_ROOT}/../../contrib/cmd/recvtty/recvtty"
|
||||
GOPATH="${INTEGRATION_ROOT}/../../../.."
|
||||
GOPATH="$(mktemp -d --tmpdir runc-integration-gopath.XXXXXX)"
|
||||
|
||||
# Test data path.
|
||||
TESTDATA="${INTEGRATION_ROOT}/testdata"
|
||||
|
@ -27,7 +27,7 @@ KERNEL_MINOR="${KERNEL_VERSION#$KERNEL_MAJOR.}"
|
|||
KERNEL_MINOR="${KERNEL_MINOR%%.*}"
|
||||
|
||||
# Root state path.
|
||||
ROOT="$BATS_TMPDIR/runc"
|
||||
ROOT=$(mktemp -d "$BATS_TMPDIR/runc.XXXXXX")
|
||||
|
||||
# Path to console socket.
|
||||
CONSOLE_SOCKET="$BATS_TMPDIR/console.sock"
|
||||
|
@ -40,6 +40,9 @@ CGROUP_CPU_BASE_PATH=$(grep "cgroup" /proc/self/mountinfo | gawk 'toupper($NF) ~
|
|||
KMEM="${CGROUP_MEMORY_BASE_PATH}/memory.kmem.limit_in_bytes"
|
||||
RT_PERIOD="${CGROUP_CPU_BASE_PATH}/cpu.rt_period_us"
|
||||
|
||||
# Check if we're in rootless mode.
|
||||
ROOTLESS=$(id -u)
|
||||
|
||||
# Wrapper for runc.
|
||||
function runc() {
|
||||
run __runc "$@"
|
||||
|
@ -55,6 +58,17 @@ function __runc() {
|
|||
"$RUNC" --root "$ROOT" "$@"
|
||||
}
|
||||
|
||||
# Wrapper for runc spec.
|
||||
function runc_spec() {
|
||||
local args=""
|
||||
|
||||
if [ "$ROOTLESS" -ne 0 ]; then
|
||||
args+="--rootless"
|
||||
fi
|
||||
|
||||
runc spec $args "$@"
|
||||
}
|
||||
|
||||
# Fails the current test, providing the error given.
|
||||
function fail() {
|
||||
echo "$@" >&2
|
||||
|
@ -68,7 +82,12 @@ function requires() {
|
|||
case $var in
|
||||
criu)
|
||||
if [ ! -e "$CRIU" ]; then
|
||||
skip "Test requires ${var}."
|
||||
skip "test requires ${var}"
|
||||
fi
|
||||
;;
|
||||
root)
|
||||
if [ "$ROOTLESS" -ne 0 ]; then
|
||||
skip "test requires ${var}"
|
||||
fi
|
||||
;;
|
||||
cgroups_kmem)
|
||||
|
@ -179,18 +198,18 @@ function setup_busybox() {
|
|||
if [ ! -e $BUSYBOX_IMAGE ]; then
|
||||
curl -o $BUSYBOX_IMAGE -sSL 'https://github.com/docker-library/busybox/raw/a0558a9006ce0dd6f6ec5d56cfd3f32ebeeb815f/glibc/busybox.tar.xz'
|
||||
fi
|
||||
tar -C "$BUSYBOX_BUNDLE"/rootfs -xf "$BUSYBOX_IMAGE"
|
||||
tar --exclude './dev/*' -C "$BUSYBOX_BUNDLE"/rootfs -xf "$BUSYBOX_IMAGE"
|
||||
cd "$BUSYBOX_BUNDLE"
|
||||
runc spec
|
||||
runc_spec
|
||||
}
|
||||
|
||||
function setup_hello() {
|
||||
setup_recvtty
|
||||
run mkdir "$HELLO_BUNDLE"
|
||||
run mkdir "$HELLO_BUNDLE"/rootfs
|
||||
tar -C "$HELLO_BUNDLE"/rootfs -xf "$HELLO_IMAGE"
|
||||
tar --exclude './dev/*' -C "$HELLO_BUNDLE"/rootfs -xf "$HELLO_IMAGE"
|
||||
cd "$HELLO_BUNDLE"
|
||||
runc spec
|
||||
runc_spec
|
||||
sed -i 's;"sh";"/hello";' config.json
|
||||
}
|
||||
|
||||
|
|
|
@ -13,7 +13,6 @@ function teardown() {
|
|||
|
||||
|
||||
@test "kill detached busybox" {
|
||||
|
||||
# run busybox detached
|
||||
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
|
||||
[ "$status" -eq 0 ]
|
||||
|
|
|
@ -12,6 +12,9 @@ function teardown() {
|
|||
}
|
||||
|
||||
@test "runc pause and resume" {
|
||||
# XXX: currently cgroups require root containers.
|
||||
requires root
|
||||
|
||||
# run busybox detached
|
||||
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
|
||||
[ "$status" -eq 0 ]
|
||||
|
@ -34,6 +37,9 @@ function teardown() {
|
|||
}
|
||||
|
||||
@test "runc pause and resume with nonexist container" {
|
||||
# XXX: currently cgroups require root containers.
|
||||
requires root
|
||||
|
||||
# run test_busybox detached
|
||||
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
|
||||
[ "$status" -eq 0 ]
|
||||
|
|
|
@ -12,6 +12,9 @@ function teardown() {
|
|||
}
|
||||
|
||||
@test "ps" {
|
||||
# ps is not supported, it requires cgroups
|
||||
requires root
|
||||
|
||||
# start busybox detached
|
||||
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
|
||||
[ "$status" -eq 0 ]
|
||||
|
@ -24,10 +27,13 @@ function teardown() {
|
|||
runc ps test_busybox
|
||||
[ "$status" -eq 0 ]
|
||||
[[ ${lines[0]} =~ UID\ +PID\ +PPID\ +C\ +STIME\ +TTY\ +TIME\ +CMD+ ]]
|
||||
[[ "${lines[1]}" == *"root"*[0-9]* ]]
|
||||
[[ "${lines[1]}" == *"$(id -un 2>/dev/null)"*[0-9]* ]]
|
||||
}
|
||||
|
||||
@test "ps -f json" {
|
||||
# ps is not supported, it requires cgroups
|
||||
requires root
|
||||
|
||||
# start busybox detached
|
||||
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
|
||||
[ "$status" -eq 0 ]
|
||||
|
@ -43,6 +49,9 @@ function teardown() {
|
|||
}
|
||||
|
||||
@test "ps -e -x" {
|
||||
# ps is not supported, it requires cgroups
|
||||
requires root
|
||||
|
||||
# start busybox detached
|
||||
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
|
||||
[ "$status" -eq 0 ]
|
||||
|
|
|
@ -26,7 +26,7 @@ function teardown() {
|
|||
[ ! -e config.json ]
|
||||
|
||||
# test generation of spec does not return an error
|
||||
runc spec
|
||||
runc_spec
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
# test generation of spec created our config.json (spec)
|
||||
|
@ -51,7 +51,7 @@ function teardown() {
|
|||
[ ! -e "$HELLO_BUNDLE"/config.json ]
|
||||
|
||||
# test generation of spec does not return an error
|
||||
runc spec --bundle "$HELLO_BUNDLE"
|
||||
runc_spec --bundle "$HELLO_BUNDLE"
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
# test generation of spec created our config.json (spec)
|
||||
|
|
|
@ -23,6 +23,9 @@ function teardown() {
|
|||
}
|
||||
|
||||
@test "runc run detached ({u,g}id != 0)" {
|
||||
# cannot start containers as another user in rootless setup
|
||||
requires root
|
||||
|
||||
# replace "uid": 0 with "uid": 1000
|
||||
# and do a similar thing for gid.
|
||||
sed -i 's;"uid": 0;"uid": 1000;g' config.json
|
||||
|
|
|
@ -21,6 +21,9 @@ function teardown() {
|
|||
}
|
||||
|
||||
@test "runc run ({u,g}id != 0)" {
|
||||
# cannot start containers as another user in rootless setup
|
||||
requires root
|
||||
|
||||
# replace "uid": 0 with "uid": 1000
|
||||
# and do a similar thing for gid.
|
||||
sed -i 's;"uid": 0;"uid": 1000;g' config.json
|
||||
|
|
|
@ -11,7 +11,37 @@ function teardown() {
|
|||
teardown_busybox
|
||||
}
|
||||
|
||||
@test "state" {
|
||||
@test "state (kill + delete)" {
|
||||
runc state test_busybox
|
||||
[ "$status" -ne 0 ]
|
||||
|
||||
# run busybox detached
|
||||
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
# check state
|
||||
wait_for_container 15 1 test_busybox
|
||||
|
||||
testcontainer test_busybox running
|
||||
|
||||
runc kill test_busybox KILL
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
# wait for busybox to be in the destroyed state
|
||||
retry 10 1 eval "__runc state test_busybox | grep -q 'stopped'"
|
||||
|
||||
# delete test_busybox
|
||||
runc delete test_busybox
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
runc state test_busybox
|
||||
[ "$status" -ne 0 ]
|
||||
}
|
||||
|
||||
@test "state (pause + resume)" {
|
||||
# XXX: pause and resume require cgroups.
|
||||
requires root
|
||||
|
||||
runc state test_busybox
|
||||
[ "$status" -ne 0 ]
|
||||
|
||||
|
@ -37,14 +67,4 @@ function teardown() {
|
|||
|
||||
# test state of busybox is back to running
|
||||
testcontainer test_busybox running
|
||||
|
||||
runc kill test_busybox KILL
|
||||
# wait for busybox to be in the destroyed state
|
||||
retry 10 1 eval "__runc state test_busybox | grep -q 'stopped'"
|
||||
|
||||
# delete test_busybox
|
||||
runc delete test_busybox
|
||||
|
||||
runc state test_busybox
|
||||
[ "$status" -ne 0 ]
|
||||
}
|
||||
|
|
|
@ -24,6 +24,10 @@ function teardown() {
|
|||
}
|
||||
|
||||
@test "runc run [tty owner]" {
|
||||
# tty chmod is not doable in rootless containers.
|
||||
# TODO: this can be made as a change to the gid test.
|
||||
requires root
|
||||
|
||||
# Replace sh script with stat.
|
||||
sed -i 's/"sh"/"sh", "-c", "stat -c %u:%g $(tty) | tr : \\\\\\\\n"/' config.json
|
||||
|
||||
|
@ -36,6 +40,9 @@ function teardown() {
|
|||
}
|
||||
|
||||
@test "runc run [tty owner] ({u,g}id != 0)" {
|
||||
# tty chmod is not doable in rootless containers.
|
||||
requires root
|
||||
|
||||
# replace "uid": 0 with "uid": 1000
|
||||
# and do a similar thing for gid.
|
||||
sed -i 's;"uid": 0;"uid": 1000;g' config.json
|
||||
|
@ -72,6 +79,10 @@ function teardown() {
|
|||
}
|
||||
|
||||
@test "runc exec [tty owner]" {
|
||||
# tty chmod is not doable in rootless containers.
|
||||
# TODO: this can be made as a change to the gid test.
|
||||
requires root
|
||||
|
||||
# run busybox detached
|
||||
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
|
||||
[ "$status" -eq 0 ]
|
||||
|
@ -90,6 +101,9 @@ function teardown() {
|
|||
}
|
||||
|
||||
@test "runc exec [tty owner] ({u,g}id != 0)" {
|
||||
# tty chmod is not doable in rootless containers.
|
||||
requires root
|
||||
|
||||
# replace "uid": 0 with "uid": 1000
|
||||
# and do a similar thing for gid.
|
||||
sed -i 's;"uid": 0;"uid": 1000;g' config.json
|
||||
|
|
|
@ -50,7 +50,11 @@ function check_cgroup_value() {
|
|||
|
||||
# TODO: test rt cgroup updating
|
||||
@test "update" {
|
||||
requires cgroups_kmem
|
||||
# XXX: currently cgroups require root containers.
|
||||
# XXX: Also, this test should be split into separate sections so that we
|
||||
# can skip kmem without skipping update tests overall.
|
||||
requires cgroups_kmem root
|
||||
|
||||
# run a few busyboxes detached
|
||||
runc run -d --console-socket $CONSOLE_SOCKET test_update
|
||||
[ "$status" -eq 0 ]
|
||||
|
|
3
utils.go
3
utils.go
|
@ -63,9 +63,6 @@ func setupSpec(context *cli.Context) (*specs.Spec, error) {
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if os.Geteuid() != 0 {
|
||||
return nil, fmt.Errorf("runc should be run as root")
|
||||
}
|
||||
return spec, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -186,6 +186,11 @@ func createPidFile(path string, process *libcontainer.Process) error {
|
|||
return os.Rename(tmpName, path)
|
||||
}
|
||||
|
||||
// XXX: Currently we autodetect rootless mode.
|
||||
func isRootless() bool {
|
||||
return os.Geteuid() != 0
|
||||
}
|
||||
|
||||
func createContainer(context *cli.Context, id string, spec *specs.Spec) (libcontainer.Container, error) {
|
||||
config, err := specconv.CreateLibcontainerConfig(&specconv.CreateOpts{
|
||||
CgroupName: id,
|
||||
|
@ -193,6 +198,7 @@ func createContainer(context *cli.Context, id string, spec *specs.Spec) (libcont
|
|||
NoPivotRoot: context.Bool("no-pivot"),
|
||||
NoNewKeyring: context.Bool("no-new-keyring"),
|
||||
Spec: spec,
|
||||
Rootless: isRootless(),
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -236,12 +242,12 @@ func (r *runner) run(config *specs.Process) (int, error) {
|
|||
for i := baseFd; i < baseFd+r.preserveFDs; i++ {
|
||||
process.ExtraFiles = append(process.ExtraFiles, os.NewFile(uintptr(i), "PreserveFD:"+strconv.Itoa(i)))
|
||||
}
|
||||
rootuid, err := r.container.Config().HostUID()
|
||||
rootuid, err := r.container.Config().HostRootUID()
|
||||
if err != nil {
|
||||
r.destroy()
|
||||
return -1, err
|
||||
}
|
||||
rootgid, err := r.container.Config().HostGID()
|
||||
rootgid, err := r.container.Config().HostRootGID()
|
||||
if err != nil {
|
||||
r.destroy()
|
||||
return -1, err
|
||||
|
|
Loading…
Reference in New Issue