rootless: add rootless cgroup manager

The rootless cgroup manager acts as a noop for all set and apply
operations. It is just used for rootless setups. Currently this is far
too simple (we need to add opportunistic cgroup management), but is good
enough as a first-pass at a noop cgroup manager.

Signed-off-by: Aleksa Sarai <asarai@suse.de>
This commit is contained in:
Aleksa Sarai 2016-04-26 02:19:39 +10:00
parent d2f49696b0
commit baeef29858
No known key found for this signature in database
GPG Key ID: 9E18AA267DDB8DB4
8 changed files with 210 additions and 37 deletions

View File

@ -267,25 +267,8 @@ func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
}, nil }, nil
} }
func (raw *cgroupData) parentPath(subsystem, mountpoint, root string) (string, error) {
// Use GetThisCgroupDir instead of GetInitCgroupDir, because the creating
// process could in container and shared pid namespace with host, and
// /proc/1/cgroup could point to whole other world of cgroups.
initPath, err := cgroups.GetThisCgroupDir(subsystem)
if err != nil {
return "", err
}
// This is needed for nested containers, because in /proc/self/cgroup we
// see pathes from host, which don't exist in container.
relDir, err := filepath.Rel(root, initPath)
if err != nil {
return "", err
}
return filepath.Join(mountpoint, relDir), nil
}
func (raw *cgroupData) path(subsystem string) (string, error) { func (raw *cgroupData) path(subsystem string) (string, error) {
mnt, root, err := cgroups.FindCgroupMountpointAndRoot(subsystem) mnt, err := cgroups.FindCgroupMountpoint(subsystem)
// If we didn't mount the subsystem, there is no point we make the path. // If we didn't mount the subsystem, there is no point we make the path.
if err != nil { if err != nil {
return "", err return "", err
@ -297,7 +280,10 @@ func (raw *cgroupData) path(subsystem string) (string, error) {
return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
} }
parentPath, err := raw.parentPath(subsystem, mnt, root) // Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
// process could in container and shared pid namespace with host, and
// /proc/1/cgroup could point to whole other world of cgroups.
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
if err != nil { if err != nil {
return "", err return "", err
} }

View File

@ -0,0 +1,128 @@
// +build linux
package rootless
import (
"fmt"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate"
)
// TODO: This is copied from libcontainer/cgroups/fs, which duplicates this code
// needlessly. We should probably export this list.
var subsystems = []subsystem{
&fs.CpusetGroup{},
&fs.DevicesGroup{},
&fs.MemoryGroup{},
&fs.CpuGroup{},
&fs.CpuacctGroup{},
&fs.PidsGroup{},
&fs.BlkioGroup{},
&fs.HugetlbGroup{},
&fs.NetClsGroup{},
&fs.NetPrioGroup{},
&fs.PerfEventGroup{},
&fs.FreezerGroup{},
&fs.NameGroup{GroupName: "name=systemd"},
}
type subsystem interface {
// Name returns the name of the subsystem.
Name() string
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
GetStats(path string, stats *cgroups.Stats) error
}
// The noop cgroup manager is used for rootless containers, because we currently
// cannot manage cgroups if we are in a rootless setup. This manager is chosen
// by factory if we are in rootless mode. We error out if any cgroup options are
// set in the config -- this may change in the future with upcoming kernel features
// like the cgroup namespace.
type Manager struct {
Cgroups *configs.Cgroup
Paths map[string]string
}
func (m *Manager) Apply(pid int) error {
// If there are no cgroup settings, there's nothing to do.
if m.Cgroups == nil {
return nil
}
// We can't set paths.
// TODO(cyphar): Implement the case where the runner of a rootless container
// owns their own cgroup, which would allow us to set up a
// cgroup for each path.
if m.Cgroups.Paths != nil {
return fmt.Errorf("cannot change cgroup path in rootless container")
}
// We load the paths into the manager.
paths := make(map[string]string)
for _, sys := range subsystems {
name := sys.Name()
path, err := cgroups.GetOwnCgroupPath(name)
if err != nil {
// Ignore paths we couldn't resolve.
continue
}
paths[name] = path
}
m.Paths = paths
return nil
}
func (m *Manager) GetPaths() map[string]string {
return m.Paths
}
func (m *Manager) Set(container *configs.Config) error {
// We have to re-do the validation here, since someone might decide to
// update a rootless container.
return validate.New().Validate(container)
}
func (m *Manager) GetPids() ([]int, error) {
dir, err := cgroups.GetOwnCgroupPath("devices")
if err != nil {
return nil, err
}
return cgroups.GetPids(dir)
}
func (m *Manager) GetAllPids() ([]int, error) {
dir, err := cgroups.GetOwnCgroupPath("devices")
if err != nil {
return nil, err
}
return cgroups.GetAllPids(dir)
}
func (m *Manager) GetStats() (*cgroups.Stats, error) {
// TODO(cyphar): We can make this work if we figure out a way to allow usage
// of cgroups with a rootless container. While this doesn't
// actually require write access to a cgroup directory, the
// statistics are not useful if they can be affected by
// non-container processes.
return nil, fmt.Errorf("cannot get cgroup stats in rootless container")
}
func (m *Manager) Freeze(state configs.FreezerState) error {
// TODO(cyphar): We can make this work if we figure out a way to allow usage
// of cgroups with a rootless container.
return fmt.Errorf("cannot use freezer cgroup in rootless container")
}
func (m *Manager) Destroy() error {
// We don't have to do anything here because we didn't do any setup.
return nil
}

View File

@ -426,7 +426,7 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
return "", err return "", err
} }
initPath, err := cgroups.GetInitCgroupDir(subsystem) initPath, err := cgroups.GetInitCgroup(subsystem)
if err != nil { if err != nil {
return "", err return "", err
} }

View File

@ -109,7 +109,7 @@ type Mount struct {
Subsystems []string Subsystems []string
} }
func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) { func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
if len(m.Subsystems) == 0 { if len(m.Subsystems) == 0 {
return "", fmt.Errorf("no subsystem for mount") return "", fmt.Errorf("no subsystem for mount")
} }
@ -203,8 +203,8 @@ func GetAllSubsystems() ([]string, error) {
return subsystems, nil return subsystems, nil
} }
// GetThisCgroupDir returns the relative path to the cgroup docker is running in. // GetOwnCgroup returns the relative path to the cgroup docker is running in.
func GetThisCgroupDir(subsystem string) (string, error) { func GetOwnCgroup(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/self/cgroup") cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil { if err != nil {
return "", err return "", err
@ -213,8 +213,16 @@ func GetThisCgroupDir(subsystem string) (string, error) {
return getControllerPath(subsystem, cgroups) return getControllerPath(subsystem, cgroups)
} }
func GetInitCgroupDir(subsystem string) (string, error) { func GetOwnCgroupPath(subsystem string) (string, error) {
cgroup, err := GetOwnCgroup(subsystem)
if err != nil {
return "", err
}
return getCgroupPathHelper(subsystem, cgroup)
}
func GetInitCgroup(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/1/cgroup") cgroups, err := ParseCgroupFile("/proc/1/cgroup")
if err != nil { if err != nil {
return "", err return "", err
@ -223,6 +231,31 @@ func GetInitCgroupDir(subsystem string) (string, error) {
return getControllerPath(subsystem, cgroups) return getControllerPath(subsystem, cgroups)
} }
func GetInitCgroupPath(subsystem string) (string, error) {
cgroup, err := GetInitCgroup(subsystem)
if err != nil {
return "", err
}
return getCgroupPathHelper(subsystem, cgroup)
}
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
mnt, root, err := FindCgroupMountpointAndRoot(subsystem)
if err != nil {
return "", err
}
// This is needed for nested containers, because in /proc/self/cgroup we
// see pathes from host, which don't exist in container.
relCgroup, err := filepath.Rel(root, cgroup)
if err != nil {
return "", err
}
return filepath.Join(mnt, relCgroup), nil
}
func readProcsFile(dir string) ([]int, error) { func readProcsFile(dir string) ([]int, error) {
f, err := os.Open(filepath.Join(dir, CgroupProcesses)) f, err := os.Open(filepath.Join(dir, CgroupProcesses))
if err != nil { if err != nil {

View File

@ -520,10 +520,18 @@ func (c *linuxContainer) Resume() error {
} }
func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) { func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) {
// XXX(cyphar): This requires cgroups.
if c.config.Rootless {
return nil, fmt.Errorf("cannot get OOM notifications from rootless container")
}
return notifyOnOOM(c.cgroupManager.GetPaths()) return notifyOnOOM(c.cgroupManager.GetPaths())
} }
func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) { func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) {
// XXX(cyphar): This requires cgroups.
if c.config.Rootless {
return nil, fmt.Errorf("cannot get memory pressure notifications from rootless container")
}
return notifyMemoryPressure(c.cgroupManager.GetPaths(), level) return notifyMemoryPressure(c.cgroupManager.GetPaths(), level)
} }

View File

@ -15,6 +15,7 @@ import (
"github.com/docker/docker/pkg/mount" "github.com/docker/docker/pkg/mount"
"github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs" "github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/cgroups/rootless"
"github.com/opencontainers/runc/libcontainer/cgroups/systemd" "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate" "github.com/opencontainers/runc/libcontainer/configs/validate"
@ -73,6 +74,20 @@ func Cgroupfs(l *LinuxFactory) error {
return nil return nil
} }
// RootlessCgroups is an options func to configure a LinuxFactory to
// return containers that use the "rootless" cgroup manager, which will
// fail to do any operations not possible to do with an unprivileged user.
// It should only be used in conjunction with rootless containers.
func RootlessCgroups(l *LinuxFactory) error {
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
return &rootless.Manager{
Cgroups: config,
Paths: paths,
}
}
return nil
}
// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs. // TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
func TmpfsRoot(l *LinuxFactory) error { func TmpfsRoot(l *LinuxFactory) error {
mounted, err := mount.Mounted(l.Root) mounted, err := mount.Mounted(l.Root)
@ -169,6 +184,9 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
if err := os.Chown(containerRoot, uid, gid); err != nil { if err := os.Chown(containerRoot, uid, gid); err != nil {
return nil, newGenericError(err, SystemError) return nil, newGenericError(err, SystemError)
} }
if config.Rootless {
RootlessCgroups(l)
}
c := &linuxContainer{ c := &linuxContainer{
id: id, id: id,
root: containerRoot, root: containerRoot,
@ -195,6 +213,10 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
processStartTime: state.InitProcessStartTime, processStartTime: state.InitProcessStartTime,
fds: state.ExternalDescriptors, fds: state.ExternalDescriptors,
} }
// We have to use the RootlessManager.
if state.Rootless {
RootlessCgroups(l)
}
c := &linuxContainer{ c := &linuxContainer{
initProcess: r, initProcess: r,
initProcessStartTime: state.InitProcessStartTime, initProcessStartTime: state.InitProcessStartTime,

View File

@ -254,15 +254,14 @@ func (p *initProcess) start() error {
return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid()) return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
} }
p.setExternalDescriptors(fds) p.setExternalDescriptors(fds)
if !p.container.config.Rootless {
// Do this before syncing with child so that no children can escape the // Do this before syncing with child so that no children can escape the
// cgroup. We can't do this if we're not running as root. // cgroup. We don't need to worry about not doing this and not being root
// because we'd be using the rootless cgroup manager in that case.
if err := p.manager.Apply(p.pid()); err != nil { if err := p.manager.Apply(p.pid()); err != nil {
return newSystemErrorWithCause(err, "applying cgroup configuration for process") return newSystemErrorWithCause(err, "applying cgroup configuration for process")
} }
}
defer func() { defer func() {
if err != nil && !p.container.config.Rootless { if err != nil {
// TODO: should not be the responsibility to call here // TODO: should not be the responsibility to call here
p.manager.Destroy() p.manager.Destroy()
} }
@ -281,12 +280,9 @@ func (p *initProcess) start() error {
ierr := parseSync(p.parentPipe, func(sync *syncT) error { ierr := parseSync(p.parentPipe, func(sync *syncT) error {
switch sync.Type { switch sync.Type {
case procReady: case procReady:
// We can't set cgroups if we're in a rootless container.
if !p.container.config.Rootless {
if err := p.manager.Set(p.config.Config); err != nil { if err := p.manager.Set(p.config.Config); err != nil {
return newSystemErrorWithCause(err, "setting cgroup config for ready process") return newSystemErrorWithCause(err, "setting cgroup config for ready process")
} }
}
// set rlimits, this has to be done here because we lose permissions // set rlimits, this has to be done here because we lose permissions
// to raise the limits once we enter a user-namespace // to raise the limits once we enter a user-namespace
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {

View File

@ -348,7 +348,7 @@ func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
var binds []*configs.Mount var binds []*configs.Mount
for _, mm := range mounts { for _, mm := range mounts {
dir, err := mm.GetThisCgroupDir(cgroupPaths) dir, err := mm.GetOwnCgroup(cgroupPaths)
if err != nil { if err != nil {
return nil, err return nil, err
} }