From 781855b62ac1e72ffa14e3d7c3c347120f5588b1 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 13 Aug 2014 18:00:15 -0700 Subject: [PATCH] Cleanup systemd cgroup code The current paths for the different systemd cgroup subsystems that systemd manages and that we have to manage are very inconsistent. This patch cleans up those differences and allows consistent paths to be used. Signed-off-by: Michael Crosby --- cgroups/fs/cpuset.go | 31 ++-- cgroups/systemd/apply_systemd.go | 283 ++++++++++--------------------- namespaces/exec.go | 4 +- 3 files changed, 112 insertions(+), 206 deletions(-) diff --git a/cgroups/fs/cpuset.go b/cgroups/fs/cpuset.go index 9570125f..88477394 100644 --- a/cgroups/fs/cpuset.go +++ b/cgroups/fs/cpuset.go @@ -20,19 +20,10 @@ func (s *CpusetGroup) Set(d *data) error { if err != nil { return err } - if err := s.ensureParent(dir); err != nil { - return err - } - // because we are not using d.join we need to place the pid into the procs file - // unlike the other subsystems - if err := writeFile(dir, "cgroup.procs", strconv.Itoa(d.pid)); err != nil { - return err - } - if err := writeFile(dir, "cpuset.cpus", d.c.CpusetCpus); err != nil { - return err - } + return s.SetDir(dir, d.c.CpusetCpus, d.pid) } + return nil } @@ -44,6 +35,24 @@ func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error { return nil } +func (s *CpusetGroup) SetDir(dir, value string, pid int) error { + if err := s.ensureParent(dir); err != nil { + return err + } + + // because we are not using d.join we need to place the pid into the procs file + // unlike the other subsystems + if err := writeFile(dir, "cgroup.procs", strconv.Itoa(pid)); err != nil { + return err + } + + if err := writeFile(dir, "cpuset.cpus", value); err != nil { + return err + } + + return nil +} + func (s *CpusetGroup) getSubsystemSettings(parent string) (cpus []byte, mems []byte, err error) { if cpus, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.cpus")); err != nil { return diff --git a/cgroups/systemd/apply_systemd.go b/cgroups/systemd/apply_systemd.go index 8a0e69a2..7fcd99bc 100644 --- a/cgroups/systemd/apply_systemd.go +++ b/cgroups/systemd/apply_systemd.go @@ -21,8 +21,7 @@ import ( ) type systemdCgroup struct { - cleanupDirs []string - cgroup *cgroups.Cgroup + cgroup *cgroups.Cgroup } type subsystem interface { @@ -85,40 +84,15 @@ func getIfaceForUnit(unitName string) string { return "Unit" } -type cgroupArg struct { - File string - Value string -} - func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) { var ( unitName = getUnitName(c) slice = "system.slice" properties []systemd1.Property - cpuArgs []cgroupArg - cpusetArgs []cgroupArg - memoryArgs []cgroupArg - res systemdCgroup + res = &systemdCgroup{} ) res.cgroup = c - // First set up things not supported by systemd - - // -1 disables memorySwap - if c.MemorySwap >= 0 && (c.Memory != 0 || c.MemorySwap > 0) { - memorySwap := c.MemorySwap - - if memorySwap == 0 { - // By default, MemorySwap is set to twice the size of RAM. - memorySwap = c.Memory * 2 - } - - memoryArgs = append(memoryArgs, cgroupArg{"memory.memsw.limit_in_bytes", strconv.FormatInt(memorySwap, 10)}) - } - - if c.CpusetCpus != "" { - cpusetArgs = append(cpusetArgs, cgroupArg{"cpuset.cpus", c.CpusetCpus}) - } if c.Slice != "" { slice = c.Slice @@ -152,170 +126,33 @@ func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) { return nil, err } - // To work around the lack of /dev/pts/* support above we need to manually add these - // so, ask systemd for the cgroup used - props, err := theConn.GetUnitTypeProperties(unitName, getIfaceForUnit(unitName)) - if err != nil { - return nil, err - } - - cgroup := props["ControlGroup"].(string) - if !c.AllowAllDevices { - // Atm we can't use the systemd device support because of two missing things: - // * Support for wildcards to allow mknod on any device - // * Support for wildcards to allow /dev/pts support - // - // The second is available in more recent systemd as "char-pts", but not in e.g. v208 which is - // in wide use. When both these are availalable we will be able to switch, but need to keep the old - // implementation for backwards compat. - // - // Note: we can't use systemd to set up the initial limits, and then change the cgroup - // because systemd will re-write the device settings if it needs to re-apply the cgroup context. - // This happens at least for v208 when any sibling unit is started. - - mountpoint, err := cgroups.FindCgroupMountpoint("devices") - if err != nil { + if err := joinDevices(c, pid); err != nil { return nil, err } - - initPath, err := cgroups.GetInitCgroupDir("devices") - if err != nil { - return nil, err - } - - dir := filepath.Join(mountpoint, initPath, c.Parent, c.Name) - - res.cleanupDirs = append(res.cleanupDirs, dir) - - if err := os.MkdirAll(dir, 0755); err != nil && !os.IsExist(err) { - return nil, err - } - - if err := ioutil.WriteFile(filepath.Join(dir, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700); err != nil { - return nil, err - } - - if err := writeFile(dir, "devices.deny", "a"); err != nil { - return nil, err - } - - for _, dev := range c.AllowedDevices { - if err := writeFile(dir, "devices.allow", dev.GetCgroupAllowString()); err != nil { - return nil, err - } - } } - if len(cpuArgs) != 0 { - mountpoint, err := cgroups.FindCgroupMountpoint("cpu") - if err != nil { + // -1 disables memorySwap + if c.MemorySwap >= 0 && (c.Memory != 0 || c.MemorySwap > 0) { + if err := joinMemory(c, pid); err != nil { return nil, err } - path := filepath.Join(mountpoint, cgroup) - - for _, arg := range cpuArgs { - if err := ioutil.WriteFile(filepath.Join(path, arg.File), []byte(arg.Value), 0700); err != nil { - return nil, err - } - } - } - - if len(memoryArgs) != 0 { - mountpoint, err := cgroups.FindCgroupMountpoint("memory") - if err != nil { - return nil, err - } - - path := filepath.Join(mountpoint, cgroup) - - for _, arg := range memoryArgs { - if err := ioutil.WriteFile(filepath.Join(path, arg.File), []byte(arg.Value), 0700); err != nil { - return nil, err - } - } } // we need to manually join the freezer cgroup in systemd because it does not currently support it // via the dbus api - freezerPath, err := joinFreezer(c, pid) - if err != nil { + if err := joinFreezer(c, pid); err != nil { return nil, err } - res.cleanupDirs = append(res.cleanupDirs, freezerPath) - if len(cpusetArgs) != 0 { - // systemd does not atm set up the cpuset controller, so we must manually - // join it. Additionally that is a very finicky controller where each - // level must have a full setup as the default for a new directory is "no cpus", - // so we avoid using any hierarchies here, creating a toplevel directory. - mountpoint, err := cgroups.FindCgroupMountpoint("cpuset") - if err != nil { - return nil, err - } - - initPath, err := cgroups.GetInitCgroupDir("cpuset") - if err != nil { - return nil, err - } - - var ( - foundCpus bool - foundMems bool - - rootPath = filepath.Join(mountpoint, initPath) - path = filepath.Join(mountpoint, initPath, c.Parent+"-"+c.Name) - ) - - res.cleanupDirs = append(res.cleanupDirs, path) - - if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { - return nil, err - } - - for _, arg := range cpusetArgs { - if arg.File == "cpuset.cpus" { - foundCpus = true - } - if arg.File == "cpuset.mems" { - foundMems = true - } - if err := ioutil.WriteFile(filepath.Join(path, arg.File), []byte(arg.Value), 0700); err != nil { - return nil, err - } - } - - // These are required, if not specified inherit from parent - if !foundCpus { - s, err := ioutil.ReadFile(filepath.Join(rootPath, "cpuset.cpus")) - if err != nil { - return nil, err - } - - if err := ioutil.WriteFile(filepath.Join(path, "cpuset.cpus"), s, 0700); err != nil { - return nil, err - } - } - - // These are required, if not specified inherit from parent - if !foundMems { - s, err := ioutil.ReadFile(filepath.Join(rootPath, "cpuset.mems")) - if err != nil { - return nil, err - } - - if err := ioutil.WriteFile(filepath.Join(path, "cpuset.mems"), s, 0700); err != nil { - return nil, err - } - } - - if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700); err != nil { + if c.CpusetCpus != "" { + if err := joinCpuset(c, pid); err != nil { return nil, err } } - return &res, nil + return res, nil } func writeFile(dir, file, data string) error { @@ -324,6 +161,7 @@ func writeFile(dir, file, data string) error { func (c *systemdCgroup) Paths() (map[string]string, error) { paths := make(map[string]string) + for sysname := range subsystems { subsystemPath, err := getSubsystemPath(c.cgroup, sysname) if err != nil { @@ -334,6 +172,7 @@ func (c *systemdCgroup) Paths() (map[string]string, error) { return nil, err } + paths[sysname] = subsystemPath } @@ -342,29 +181,29 @@ func (c *systemdCgroup) Paths() (map[string]string, error) { func (c *systemdCgroup) Cleanup() error { // systemd cleans up, we don't need to do much + paths, err := c.Paths() + if err != nil { + return err + } - for _, path := range c.cleanupDirs { + for _, path := range paths { os.RemoveAll(path) } return nil } -func joinFreezer(c *cgroups.Cgroup, pid int) (string, error) { +func joinFreezer(c *cgroups.Cgroup, pid int) error { path, err := getSubsystemPath(c, "freezer") if err != nil { - return "", err + return err } if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { - return "", err + return err } - if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700); err != nil { - return "", err - } - - return path, nil + return ioutil.WriteFile(filepath.Join(path, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700) } func getSubsystemPath(c *cgroups.Cgroup, subsystem string) (string, error) { @@ -409,20 +248,12 @@ func Freeze(c *cgroups.Cgroup, state cgroups.FreezerState) error { } func GetPids(c *cgroups.Cgroup) ([]int, error) { - unitName := getUnitName(c) - - mountpoint, err := cgroups.FindCgroupMountpoint("cpu") + path, err := getSubsystemPath(c, "cpu") if err != nil { return nil, err } - props, err := theConn.GetUnitTypeProperties(unitName, getIfaceForUnit(unitName)) - if err != nil { - return nil, err - } - cgroup := props["ControlGroup"].(string) - - return cgroups.ReadProcsFile(filepath.Join(mountpoint, cgroup)) + return cgroups.ReadProcsFile(path) } func getUnitName(c *cgroups.Cgroup) string { @@ -457,3 +288,71 @@ func GetStats(c *cgroups.Cgroup) (*cgroups.Stats, error) { return stats, nil } + +// Atm we can't use the systemd device support because of two missing things: +// * Support for wildcards to allow mknod on any device +// * Support for wildcards to allow /dev/pts support +// +// The second is available in more recent systemd as "char-pts", but not in e.g. v208 which is +// in wide use. When both these are availalable we will be able to switch, but need to keep the old +// implementation for backwards compat. +// +// Note: we can't use systemd to set up the initial limits, and then change the cgroup +// because systemd will re-write the device settings if it needs to re-apply the cgroup context. +// This happens at least for v208 when any sibling unit is started. +func joinDevices(c *cgroups.Cgroup, pid int) error { + path, err := getSubsystemPath(c, "devices") + if err != nil { + return err + } + + if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { + return err + } + + if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700); err != nil { + return err + } + + if err := writeFile(path, "devices.deny", "a"); err != nil { + return err + } + + for _, dev := range c.AllowedDevices { + if err := writeFile(path, "devices.allow", dev.GetCgroupAllowString()); err != nil { + return err + } + } + + return nil +} + +func joinMemory(c *cgroups.Cgroup, pid int) error { + memorySwap := c.MemorySwap + + if memorySwap == 0 { + // By default, MemorySwap is set to twice the size of RAM. + memorySwap = c.Memory * 2 + } + + path, err := getSubsystemPath(c, "memory") + if err != nil { + return err + } + + return ioutil.WriteFile(filepath.Join(path, "memory.memsw.limit_in_bytes"), []byte(strconv.FormatInt(memorySwap, 10)), 0700) +} + +// systemd does not atm set up the cpuset controller, so we must manually +// join it. Additionally that is a very finicky controller where each +// level must have a full setup as the default for a new directory is "no cpus" +func joinCpuset(c *cgroups.Cgroup, pid int) error { + path, err := getSubsystemPath(c, "cpuset") + if err != nil { + return err + } + + s := &fs.CpusetGroup{} + + return s.SetDir(path, c.CpusetCpus, pid) +} diff --git a/namespaces/exec.go b/namespaces/exec.go index eb92a71e..382abfbc 100644 --- a/namespaces/exec.go +++ b/namespaces/exec.go @@ -62,9 +62,7 @@ func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Wri command.Wait() return -1, err } - if cgroupRef != nil { - defer cgroupRef.Cleanup() - } + defer cgroupRef.Cleanup() cgroupPaths, err := cgroupRef.Paths() if err != nil {