From 1932917b716d688b2a56de30ec053c4f7a9d8fb6 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Mon, 2 Sep 2019 10:25:04 +0200 Subject: [PATCH 1/3] libcontainer: add initial support for cgroups v2 allow to set what subsystems are used by libcontainer/cgroups/fs.Manager. subsystemsUnified is used on a system running with cgroups v2 unified mode. Signed-off-by: Giuseppe Scrivano --- libcontainer/cgroups/fs/apply_raw.go | 25 +++- libcontainer/cgroups/fs/cpu_v2.go | 92 +++++++++++++ libcontainer/cgroups/fs/cpuset_v2.go | 159 +++++++++++++++++++++ libcontainer/cgroups/fs/freezer_v2.go | 74 ++++++++++ libcontainer/cgroups/fs/io_v2.go | 191 ++++++++++++++++++++++++++ libcontainer/cgroups/fs/memory_v2.go | 164 ++++++++++++++++++++++ libcontainer/cgroups/fs/pids_v2.go | 107 +++++++++++++++ libcontainer/cgroups/fs/utils.go | 7 +- libcontainer/cgroups/utils.go | 58 +++++++- libcontainer/configs/blkio_device.go | 5 + libcontainer/configs/cgroup_linux.go | 8 ++ libcontainer/container_linux.go | 11 +- 12 files changed, 890 insertions(+), 11 deletions(-) create mode 100644 libcontainer/cgroups/fs/cpu_v2.go create mode 100644 libcontainer/cgroups/fs/cpuset_v2.go create mode 100644 libcontainer/cgroups/fs/freezer_v2.go create mode 100644 libcontainer/cgroups/fs/io_v2.go create mode 100644 libcontainer/cgroups/fs/memory_v2.go create mode 100644 libcontainer/cgroups/fs/pids_v2.go diff --git a/libcontainer/cgroups/fs/apply_raw.go b/libcontainer/cgroups/fs/apply_raw.go index d4f271d6..512fd700 100644 --- a/libcontainer/cgroups/fs/apply_raw.go +++ b/libcontainer/cgroups/fs/apply_raw.go @@ -18,7 +18,7 @@ import ( ) var ( - subsystems = subsystemSet{ + subsystemsLegacy = subsystemSet{ &CpusetGroup{}, &DevicesGroup{}, &MemoryGroup{}, @@ -33,6 +33,14 @@ var ( &FreezerGroup{}, &NameGroup{GroupName: "name=systemd", Join: true}, } + subsystemsUnified = subsystemSet{ + &CpusetGroupV2{}, + &FreezerGroupV2{}, + &CpuGroupV2{}, + &MemoryGroupV2{}, + &IOGroupV2{}, + &PidsGroupV2{}, + } HugePageSizes, _ = cgroups.GetHugePageSize() ) @@ -129,6 +137,13 @@ func isIgnorableError(rootless bool, err error) bool { return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES } +func (m *Manager) getSubsystems() subsystemSet { + if cgroups.IsCgroup2UnifiedMode() { + return subsystemsUnified + } + return subsystemsLegacy +} + func (m *Manager) Apply(pid int) (err error) { if m.Cgroups == nil { return nil @@ -158,7 +173,7 @@ func (m *Manager) Apply(pid int) (err error) { return cgroups.EnterPid(m.Paths, pid) } - for _, sys := range subsystems { + for _, sys := range m.getSubsystems() { // TODO: Apply should, ideally, be reentrant or be broken up into a separate // create and join phase so that the cgroup hierarchy for a container can be // created then join consists of writing the process pids to cgroup.procs @@ -214,7 +229,7 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) { defer m.mu.Unlock() stats := cgroups.NewStats() for name, path := range m.Paths { - sys, err := subsystems.Get(name) + sys, err := m.getSubsystems().Get(name) if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) { continue } @@ -237,7 +252,7 @@ func (m *Manager) Set(container *configs.Config) error { } paths := m.GetPaths() - for _, sys := range subsystems { + for _, sys := range m.getSubsystems() { path := paths[sys.Name()] if err := sys.Set(path, container.Cgroups); err != nil { if m.Rootless && sys.Name() == "devices" { @@ -274,7 +289,7 @@ func (m *Manager) Freeze(state configs.FreezerState) error { dir := paths["freezer"] prevState := m.Cgroups.Resources.Freezer m.Cgroups.Resources.Freezer = state - freezer, err := subsystems.Get("freezer") + freezer, err := m.getSubsystems().Get("freezer") if err != nil { return err } diff --git a/libcontainer/cgroups/fs/cpu_v2.go b/libcontainer/cgroups/fs/cpu_v2.go new file mode 100644 index 00000000..245071ae --- /dev/null +++ b/libcontainer/cgroups/fs/cpu_v2.go @@ -0,0 +1,92 @@ +// +build linux + +package fs + +import ( + "bufio" + "os" + "path/filepath" + "strconv" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type CpuGroupV2 struct { +} + +func (s *CpuGroupV2) Name() string { + return "cpu" +} + +func (s *CpuGroupV2) Apply(d *cgroupData) error { + // We always want to join the cpu group, to allow fair cpu scheduling + // on a container basis + path, err := d.path("cpu") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return s.ApplyDir(path, d.config, d.pid) +} + +func (s *CpuGroupV2) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error { + // This might happen if we have no cpu cgroup mounted. + // Just do nothing and don't fail. + if path == "" { + return nil + } + if err := os.MkdirAll(path, 0755); err != nil { + return err + } + return cgroups.WriteCgroupProc(path, pid) +} + +func (s *CpuGroupV2) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.CpuWeight != 0 { + if err := writeFile(path, "cpu.weight", strconv.FormatUint(cgroup.Resources.CpuWeight, 10)); err != nil { + return err + } + } + + if cgroup.Resources.CpuMax != "" { + if err := writeFile(path, "cpu.max", cgroup.Resources.CpuMax); err != nil { + return err + } + } + + return nil +} + +func (s *CpuGroupV2) Remove(d *cgroupData) error { + return removePath(d.path("cpu")) +} + +func (s *CpuGroupV2) GetStats(path string, stats *cgroups.Stats) error { + f, err := os.Open(filepath.Join(path, "cpu.stat")) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + t, v, err := getCgroupParamKeyValue(sc.Text()) + if err != nil { + return err + } + switch t { + case "usage_usec": + stats.CpuStats.CpuUsage.TotalUsage = v * 1000 + + case "user_usec": + stats.CpuStats.CpuUsage.UsageInUsermode = v * 1000 + + case "system_usec": + stats.CpuStats.CpuUsage.UsageInKernelmode = v * 1000 + } + } + return nil +} diff --git a/libcontainer/cgroups/fs/cpuset_v2.go b/libcontainer/cgroups/fs/cpuset_v2.go new file mode 100644 index 00000000..fa88cfb7 --- /dev/null +++ b/libcontainer/cgroups/fs/cpuset_v2.go @@ -0,0 +1,159 @@ +// +build linux + +package fs + +import ( + "bytes" + "fmt" + "io/ioutil" + "os" + "path/filepath" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" +) + +type CpusetGroupV2 struct { +} + +func (s *CpusetGroupV2) Name() string { + return "cpuset" +} + +func (s *CpusetGroupV2) Apply(d *cgroupData) error { + dir, err := d.path("cpuset") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return s.ApplyDir(dir, d.config, d.pid) +} + +func (s *CpusetGroupV2) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.CpusetCpus != "" { + if err := writeFile(path, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil { + return err + } + } + if cgroup.Resources.CpusetMems != "" { + if err := writeFile(path, "cpuset.mems", cgroup.Resources.CpusetMems); err != nil { + return err + } + } + return nil +} + +func (s *CpusetGroupV2) Remove(d *cgroupData) error { + return removePath(d.path("cpuset")) +} + +func (s *CpusetGroupV2) GetStats(path string, stats *cgroups.Stats) error { + return nil +} + +func (s *CpusetGroupV2) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) error { + // This might happen if we have no cpuset cgroup mounted. + // Just do nothing and don't fail. + if dir == "" { + return nil + } + mountInfo, err := ioutil.ReadFile("/proc/self/mountinfo") + if err != nil { + return err + } + root := filepath.Dir(cgroups.GetClosestMountpointAncestor(dir, string(mountInfo))) + // 'ensureParent' start with parent because we don't want to + // explicitly inherit from parent, it could conflict with + // 'cpuset.cpu_exclusive'. + if err := s.ensureParent(filepath.Dir(dir), root); err != nil { + return err + } + if err := os.MkdirAll(dir, 0755); err != nil { + return err + } + // We didn't inherit cpuset configs from parent, but we have + // to ensure cpuset configs are set before moving task into the + // cgroup. + // The logic is, if user specified cpuset configs, use these + // specified configs, otherwise, inherit from parent. This makes + // cpuset configs work correctly with 'cpuset.cpu_exclusive', and + // keep backward compatibility. + if err := s.ensureCpusAndMems(dir, cgroup); err != nil { + return err + } + + // because we are not using d.join we need to place the pid into the procs file + // unlike the other subsystems + return cgroups.WriteCgroupProc(dir, pid) +} + +func (s *CpusetGroupV2) getSubsystemSettings(parent string) (cpus []byte, mems []byte, err error) { + if cpus, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.cpus.effective")); err != nil { + return + } + if mems, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.mems.effective")); err != nil { + return + } + return cpus, mems, nil +} + +// ensureParent makes sure that the parent directory of current is created +// and populated with the proper cpus and mems files copied from +// it's parent. +func (s *CpusetGroupV2) ensureParent(current, root string) error { + parent := filepath.Dir(current) + if libcontainerUtils.CleanPath(parent) == root { + return nil + } + // Avoid infinite recursion. + if parent == current { + return fmt.Errorf("cpuset: cgroup parent path outside cgroup root") + } + if err := s.ensureParent(parent, root); err != nil { + return err + } + if err := os.MkdirAll(current, 0755); err != nil { + return err + } + return s.copyIfNeeded(current, parent) +} + +// copyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent +// directory to the current directory if the file's contents are 0 +func (s *CpusetGroupV2) copyIfNeeded(current, parent string) error { + var ( + err error + currentCpus, currentMems []byte + parentCpus, parentMems []byte + ) + + if currentCpus, currentMems, err = s.getSubsystemSettings(current); err != nil { + return err + } + if parentCpus, parentMems, err = s.getSubsystemSettings(parent); err != nil { + return err + } + + if s.isEmpty(currentCpus) { + if err := writeFile(current, "cpuset.cpus", string(parentCpus)); err != nil { + return err + } + } + if s.isEmpty(currentMems) { + if err := writeFile(current, "cpuset.mems", string(parentMems)); err != nil { + return err + } + } + return nil +} + +func (s *CpusetGroupV2) isEmpty(b []byte) bool { + return len(bytes.Trim(b, "\n")) == 0 +} + +func (s *CpusetGroupV2) ensureCpusAndMems(path string, cgroup *configs.Cgroup) error { + if err := s.Set(path, cgroup); err != nil { + return err + } + return s.copyIfNeeded(path, filepath.Dir(path)) +} diff --git a/libcontainer/cgroups/fs/freezer_v2.go b/libcontainer/cgroups/fs/freezer_v2.go new file mode 100644 index 00000000..186de9ab --- /dev/null +++ b/libcontainer/cgroups/fs/freezer_v2.go @@ -0,0 +1,74 @@ +// +build linux + +package fs + +import ( + "fmt" + "strings" + "time" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type FreezerGroupV2 struct { +} + +func (s *FreezerGroupV2) Name() string { + return "freezer" +} + +func (s *FreezerGroupV2) Apply(d *cgroupData) error { + _, err := d.join("freezer") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *FreezerGroupV2) Set(path string, cgroup *configs.Cgroup) error { + var desiredState string + filename := "cgroup.freeze" + if cgroup.Resources.Freezer == configs.Frozen { + desiredState = "1" + } else { + desiredState = "0" + } + + switch cgroup.Resources.Freezer { + case configs.Frozen, configs.Thawed: + for { + // In case this loop does not exit because it doesn't get the expected + // state, let's write again this state, hoping it's going to be properly + // set this time. Otherwise, this loop could run infinitely, waiting for + // a state change that would never happen. + if err := writeFile(path, filename, desiredState); err != nil { + return err + } + + state, err := readFile(path, filename) + if err != nil { + return err + } + if strings.TrimSpace(state) == desiredState { + break + } + + time.Sleep(1 * time.Millisecond) + } + case configs.Undefined: + return nil + default: + return fmt.Errorf("Invalid argument '%s' to freezer.state", string(cgroup.Resources.Freezer)) + } + + return nil +} + +func (s *FreezerGroupV2) Remove(d *cgroupData) error { + return removePath(d.path("freezer")) +} + +func (s *FreezerGroupV2) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/libcontainer/cgroups/fs/io_v2.go b/libcontainer/cgroups/fs/io_v2.go new file mode 100644 index 00000000..84b18294 --- /dev/null +++ b/libcontainer/cgroups/fs/io_v2.go @@ -0,0 +1,191 @@ +// +build linux + +package fs + +import ( + "bufio" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type IOGroupV2 struct { +} + +func (s *IOGroupV2) Name() string { + return "blkio" +} + +func (s *IOGroupV2) Apply(d *cgroupData) error { + _, err := d.join("blkio") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *IOGroupV2) Set(path string, cgroup *configs.Cgroup) error { + cgroupsv2 := cgroups.IsCgroup2UnifiedMode() + + if cgroup.Resources.BlkioWeight != 0 { + filename := "blkio.weight" + if cgroupsv2 { + filename = "io.bfq.weight" + } + if err := writeFile(path, filename, strconv.FormatUint(uint64(cgroup.Resources.BlkioWeight), 10)); err != nil { + return err + } + } + + if cgroup.Resources.BlkioLeafWeight != 0 { + if err := writeFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioLeafWeight), 10)); err != nil { + return err + } + } + for _, wd := range cgroup.Resources.BlkioWeightDevice { + if err := writeFile(path, "blkio.weight_device", wd.WeightString()); err != nil { + return err + } + if err := writeFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil { + return err + } + } + for _, td := range cgroup.Resources.BlkioThrottleReadBpsDevice { + if cgroupsv2 { + if err := writeFile(path, "io.max", td.StringName("rbps")); err != nil { + return err + } + } else { + if err := writeFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil { + return err + } + } + } + for _, td := range cgroup.Resources.BlkioThrottleWriteBpsDevice { + if cgroupsv2 { + if err := writeFile(path, "io.max", td.StringName("wbps")); err != nil { + return err + } + } else { + if err := writeFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil { + return err + } + } + } + for _, td := range cgroup.Resources.BlkioThrottleReadIOPSDevice { + if cgroupsv2 { + if err := writeFile(path, "io.max", td.StringName("riops")); err != nil { + return err + } + } else { + if err := writeFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil { + return err + } + } + } + for _, td := range cgroup.Resources.BlkioThrottleWriteIOPSDevice { + if cgroupsv2 { + if err := writeFile(path, "io.max", td.StringName("wiops")); err != nil { + return err + } + } else { + if err := writeFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil { + return err + } + } + } + + return nil +} + +func (s *IOGroupV2) Remove(d *cgroupData) error { + return removePath(d.path("blkio")) +} + +func readCgroup2MapFile(path string, name string) (map[string][]string, error) { + ret := map[string][]string{} + p := filepath.Join("/sys/fs/cgroup", path, name) + f, err := os.Open(p) + if err != nil { + if os.IsNotExist(err) { + return ret, nil + } + return nil, err + } + defer f.Close() + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := scanner.Text() + parts := strings.Fields(line) + if len(parts) < 2 { + continue + } + ret[parts[0]] = parts[1:] + } + if err := scanner.Err(); err != nil { + return nil, err + } + return ret, nil +} + +func (s *IOGroupV2) getCgroupV2Stats(path string, stats *cgroups.Stats) error { + // more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt + var ioServiceBytesRecursive []cgroups.BlkioStatEntry + values, err := readCgroup2MapFile(path, "io.stat") + if err != nil { + return err + } + for k, v := range values { + d := strings.Split(k, ":") + if len(d) != 2 { + continue + } + minor, err := strconv.ParseUint(d[0], 10, 0) + if err != nil { + return err + } + major, err := strconv.ParseUint(d[1], 10, 0) + if err != nil { + return err + } + + for _, item := range v { + d := strings.Split(item, "=") + if len(d) != 2 { + continue + } + op := d[0] + + // Accommodate the cgroup v1 naming + switch op { + case "rbytes": + op = "read" + case "wbytes": + op = "write" + } + + value, err := strconv.ParseUint(d[1], 10, 0) + if err != nil { + return err + } + + entry := cgroups.BlkioStatEntry{ + Op: op, + Major: major, + Minor: minor, + Value: value, + } + ioServiceBytesRecursive = append(ioServiceBytesRecursive, entry) + } + } + stats.BlkioStats = cgroups.BlkioStats{IoServiceBytesRecursive: ioServiceBytesRecursive} + return nil +} + +func (s *IOGroupV2) GetStats(path string, stats *cgroups.Stats) error { + return s.getCgroupV2Stats(path, stats) +} diff --git a/libcontainer/cgroups/fs/memory_v2.go b/libcontainer/cgroups/fs/memory_v2.go new file mode 100644 index 00000000..2ad997bc --- /dev/null +++ b/libcontainer/cgroups/fs/memory_v2.go @@ -0,0 +1,164 @@ +// +build linux + +package fs + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type MemoryGroupV2 struct { +} + +func (s *MemoryGroupV2) Name() string { + return "memory" +} + +func (s *MemoryGroupV2) Apply(d *cgroupData) (err error) { + path, err := d.path("memory") + if err != nil && !cgroups.IsNotFound(err) { + return err + } else if path == "" { + return nil + } + if memoryAssigned(d.config) { + if _, err := os.Stat(path); os.IsNotExist(err) { + if err := os.MkdirAll(path, 0755); err != nil { + return err + } + // Only enable kernel memory accouting when this cgroup + // is created by libcontainer, otherwise we might get + // error when people use `cgroupsPath` to join an existed + // cgroup whose kernel memory is not initialized. + if err := EnableKernelMemoryAccounting(path); err != nil { + return err + } + } + } + defer func() { + if err != nil { + os.RemoveAll(path) + } + }() + + // We need to join memory cgroup after set memory limits, because + // kmem.limit_in_bytes can only be set when the cgroup is empty. + _, err = d.join("memory") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func setMemoryAndSwapCgroups(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.MemorySwap != 0 { + if err := writeFile(path, "memory.swap.max", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil { + return err + } + } + if cgroup.Resources.Memory != 0 { + if err := writeFile(path, "memory.max", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil { + return err + } + } + return nil +} + +func (s *MemoryGroupV2) Set(path string, cgroup *configs.Cgroup) error { + + if err := setMemoryAndSwapCgroups(path, cgroup); err != nil { + return err + } + + if cgroup.Resources.KernelMemory != 0 { + if err := setKernelMemory(path, cgroup.Resources.KernelMemory); err != nil { + return err + } + } + + if cgroup.Resources.MemoryReservation != 0 { + if err := writeFile(path, "memory.high", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil { + return err + } + } + + return nil +} + +func (s *MemoryGroupV2) Remove(d *cgroupData) error { + return removePath(d.path("memory")) +} + +func (s *MemoryGroupV2) GetStats(path string, stats *cgroups.Stats) error { + // Set stats from memory.stat. + statsFile, err := os.Open(filepath.Join(path, "memory.stat")) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + defer statsFile.Close() + + sc := bufio.NewScanner(statsFile) + for sc.Scan() { + t, v, err := getCgroupParamKeyValue(sc.Text()) + if err != nil { + return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err) + } + stats.MemoryStats.Stats[t] = v + } + stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"] + + memoryUsage, err := getMemoryDataV2(path, "") + if err != nil { + return err + } + stats.MemoryStats.Usage = memoryUsage + swapUsage, err := getMemoryDataV2(path, "swap") + if err != nil { + return err + } + stats.MemoryStats.SwapUsage = swapUsage + + stats.MemoryStats.UseHierarchy = true + return nil +} + +func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) { + memoryData := cgroups.MemoryData{} + + moduleName := "memory" + if name != "" { + moduleName = strings.Join([]string{"memory", name}, ".") + } + usage := strings.Join([]string{moduleName, "current"}, ".") + limit := strings.Join([]string{moduleName, "max"}, ".") + + value, err := getCgroupParamUint(path, usage) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err) + } + memoryData.Usage = value + + value, err = getCgroupParamUint(path, limit) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", limit, err) + } + memoryData.Limit = value + + return memoryData, nil +} diff --git a/libcontainer/cgroups/fs/pids_v2.go b/libcontainer/cgroups/fs/pids_v2.go new file mode 100644 index 00000000..3413a2a0 --- /dev/null +++ b/libcontainer/cgroups/fs/pids_v2.go @@ -0,0 +1,107 @@ +// +build linux + +package fs + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + "golang.org/x/sys/unix" +) + +type PidsGroupV2 struct { +} + +func (s *PidsGroupV2) Name() string { + return "pids" +} + +func (s *PidsGroupV2) Apply(d *cgroupData) error { + _, err := d.join("pids") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *PidsGroupV2) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.PidsLimit != 0 { + // "max" is the fallback value. + limit := "max" + + if cgroup.Resources.PidsLimit > 0 { + limit = strconv.FormatInt(cgroup.Resources.PidsLimit, 10) + } + + if err := writeFile(path, "pids.max", limit); err != nil { + return err + } + } + + return nil +} + +func (s *PidsGroupV2) Remove(d *cgroupData) error { + return removePath(d.path("pids")) +} + +func isNOTSUP(err error) bool { + switch err := err.(type) { + case *os.PathError: + return err.Err == unix.ENOTSUP + default: + return false + } +} + +func (s *PidsGroupV2) GetStats(path string, stats *cgroups.Stats) error { + current, err := getCgroupParamUint(path, "pids.current") + if os.IsNotExist(err) { + // if the controller is not enabled, let's read the list + // PIDs (or threads if cgroup.threads is enabled) + contents, err := ioutil.ReadFile(filepath.Join(path, "cgroup.procs")) + if err != nil && isNOTSUP(err) { + contents, err = ioutil.ReadFile(filepath.Join(path, "cgroup.threads")) + } + if err != nil { + return err + } + pids := make(map[string]string) + for _, i := range strings.Split(string(contents), "\n") { + if i != "" { + pids[i] = i + } + } + stats.PidsStats.Current = uint64(len(pids)) + stats.PidsStats.Limit = 0 + return nil + + } + if err != nil { + return fmt.Errorf("failed to parse pids.current - %s", err) + } + + maxString, err := getCgroupParamString(path, "pids.max") + if err != nil { + return fmt.Errorf("failed to parse pids.max - %s", err) + } + + // Default if pids.max == "max" is 0 -- which represents "no limit". + var max uint64 + if maxString != "max" { + max, err = parseUint(maxString, 10, 64) + if err != nil { + return fmt.Errorf("failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q", maxString, filepath.Join(path, "pids.max")) + } + } + + stats.PidsStats.Current = current + stats.PidsStats.Limit = max + return nil +} diff --git a/libcontainer/cgroups/fs/utils.go b/libcontainer/cgroups/fs/utils.go index 5ff0a161..30922777 100644 --- a/libcontainer/cgroups/fs/utils.go +++ b/libcontainer/cgroups/fs/utils.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "io/ioutil" + "math" "path/filepath" "strconv" "strings" @@ -59,8 +60,12 @@ func getCgroupParamUint(cgroupPath, cgroupFile string) (uint64, error) { if err != nil { return 0, err } + trimmed := strings.TrimSpace(string(contents)) + if trimmed == "max" { + return math.MaxUint64, nil + } - res, err := parseUint(strings.TrimSpace(string(contents)), 10, 64) + res, err := parseUint(trimmed, 10, 64) if err != nil { return res, fmt.Errorf("unable to parse %q as a uint from Cgroup file %q", string(contents), fileName) } diff --git a/libcontainer/cgroups/utils.go b/libcontainer/cgroups/utils.go index ec79ae76..60790f83 100644 --- a/libcontainer/cgroups/utils.go +++ b/libcontainer/cgroups/utils.go @@ -11,6 +11,8 @@ import ( "path/filepath" "strconv" "strings" + "sync" + "syscall" "time" units "github.com/docker/go-units" @@ -22,6 +24,11 @@ const ( CgroupProcesses = "cgroup.procs" ) +var ( + isUnifiedOnce sync.Once + isUnified bool +) + // HugePageSizeUnitList is a list of the units used by the linux kernel when // naming the HugePage control files. // https://www.kernel.org/doc/Documentation/cgroup-v1/hugetlb.txt @@ -29,6 +36,18 @@ const ( // depends on https://github.com/docker/go-units/commit/a09cd47f892041a4fac473133d181f5aea6fa393 var HugePageSizeUnitList = []string{"B", "KB", "MB", "GB", "TB", "PB"} +// IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode. +func IsCgroup2UnifiedMode() bool { + isUnifiedOnce.Do(func() { + var st syscall.Statfs_t + if err := syscall.Statfs("/sys/fs/cgroup", &st); err != nil { + panic("cannot statfs cgroup root") + } + isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC + }) + return isUnified +} + // https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) { mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem) @@ -49,6 +68,10 @@ func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, } defer f.Close() + if IsCgroup2UnifiedMode() { + subsystem = "" + } + return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem) } @@ -57,12 +80,12 @@ func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsyst for scanner.Scan() { txt := scanner.Text() fields := strings.Fields(txt) - if len(fields) < 5 { + if len(fields) < 9 { continue } if strings.HasPrefix(fields[4], cgroupPath) { for _, opt := range strings.Split(fields[len(fields)-1], ",") { - if opt == subsystem { + if (subsystem == "" && fields[9] == "cgroup2") || opt == subsystem { return fields[4], fields[3], nil } } @@ -76,6 +99,19 @@ func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsyst } func isSubsystemAvailable(subsystem string) bool { + if IsCgroup2UnifiedMode() { + controllers, err := GetAllSubsystems() + if err != nil { + return false + } + for _, c := range controllers { + if c == subsystem { + return true + } + } + return false + } + cgroups, err := ParseCgroupFile("/proc/self/cgroup") if err != nil { return false @@ -120,7 +156,7 @@ func FindCgroupMountpointDir() (string, error) { return "", fmt.Errorf("Found no fields post '-' in %q", text) } - if postSeparatorFields[0] == "cgroup" { + if postSeparatorFields[0] == "cgroup" || postSeparatorFields[0] == "cgroup2" { // Check that the mount is properly formatted. if numPostFields < 3 { return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text) @@ -193,6 +229,19 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, // GetCgroupMounts returns the mounts for the cgroup subsystems. // all indicates whether to return just the first instance or all the mounts. func GetCgroupMounts(all bool) ([]Mount, error) { + if IsCgroup2UnifiedMode() { + availableControllers, err := GetAllSubsystems() + if err != nil { + return nil, err + } + m := Mount{ + Mountpoint: "/sys/fs/cgroup", + Root: "/sys/fs/cgroup", + Subsystems: availableControllers, + } + return []Mount{m}, nil + } + f, err := os.Open("/proc/self/mountinfo") if err != nil { return nil, err @@ -356,6 +405,9 @@ func parseCgroupFromReader(r io.Reader) (map[string]string, error) { } func getControllerPath(subsystem string, cgroups map[string]string) (string, error) { + if IsCgroup2UnifiedMode() { + return "/", nil + } if p, ok := cgroups[subsystem]; ok { return p, nil diff --git a/libcontainer/configs/blkio_device.go b/libcontainer/configs/blkio_device.go index e0f3ca16..fa195bf9 100644 --- a/libcontainer/configs/blkio_device.go +++ b/libcontainer/configs/blkio_device.go @@ -59,3 +59,8 @@ func NewThrottleDevice(major, minor int64, rate uint64) *ThrottleDevice { func (td *ThrottleDevice) String() string { return fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate) } + +// StringName formats the struct to be writable to the cgroup specific file +func (td *ThrottleDevice) StringName(name string) string { + return fmt.Sprintf("%d:%d %s=%d", td.Major, td.Minor, name, td.Rate) +} diff --git a/libcontainer/configs/cgroup_linux.go b/libcontainer/configs/cgroup_linux.go index e15a662f..58ed19c9 100644 --- a/libcontainer/configs/cgroup_linux.go +++ b/libcontainer/configs/cgroup_linux.go @@ -119,4 +119,12 @@ type Resources struct { // Set class identifier for container's network packets NetClsClassid uint32 `json:"net_cls_classid_u"` + + // Used on cgroups v2: + + // CpuWeight sets a proportional bandwidth limit. + CpuWeight uint64 `json:"cpu_weight"` + + // CpuMax sets she maximum bandwidth limit (format: max period). + CpuMax string `json:"cpu_max"` } diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index d6c4ebda..09279204 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -1814,7 +1814,14 @@ func (c *linuxContainer) isPaused() (bool, error) { // A container doesn't have a freezer cgroup return false, nil } - data, err := ioutil.ReadFile(filepath.Join(fcg, "freezer.state")) + pausedState := "FROZEN" + filename := "freezer.state" + if cgroups.IsCgroup2UnifiedMode() { + filename = "cgroup.freeze" + pausedState = "1" + } + + data, err := ioutil.ReadFile(filepath.Join(fcg, filename)) if err != nil { // If freezer cgroup is not mounted, the container would just be not paused. if os.IsNotExist(err) { @@ -1822,7 +1829,7 @@ func (c *linuxContainer) isPaused() (bool, error) { } return false, newSystemErrorWithCause(err, "checking if container is paused") } - return bytes.Equal(bytes.TrimSpace(data), []byte("FROZEN")), nil + return bytes.Equal(bytes.TrimSpace(data), []byte(pausedState)), nil } func (c *linuxContainer) currentState() (*State, error) { From ec11136828c9558db61de95daec25475549dbe7e Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Mon, 2 Sep 2019 11:09:55 +0200 Subject: [PATCH 2/3] libcontainer, cgroups: rename systemd.Manager to LegacyManager Signed-off-by: Giuseppe Scrivano --- libcontainer/cgroups/systemd/apply_systemd.go | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/libcontainer/cgroups/systemd/apply_systemd.go b/libcontainer/cgroups/systemd/apply_systemd.go index 7a62f5c3..a956cbc5 100644 --- a/libcontainer/cgroups/systemd/apply_systemd.go +++ b/libcontainer/cgroups/systemd/apply_systemd.go @@ -21,7 +21,7 @@ import ( "github.com/sirupsen/logrus" ) -type Manager struct { +type LegacyManager struct { mu sync.Mutex Cgroups *configs.Cgroup Paths map[string]string @@ -49,7 +49,7 @@ func (s subsystemSet) Get(name string) (subsystem, error) { return nil, errSubsystemDoesNotExist } -var subsystems = subsystemSet{ +var legacySubsystems = subsystemSet{ &fs.CpusetGroup{}, &fs.DevicesGroup{}, &fs.MemoryGroup{}, @@ -120,14 +120,14 @@ func NewSystemdCgroupsManager() (func(config *configs.Cgroup, paths map[string]s return nil, fmt.Errorf("systemd not running on this host, can't use systemd as a cgroups.Manager") } return func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { - return &Manager{ + return &LegacyManager{ Cgroups: config, Paths: paths, } }, nil } -func (m *Manager) Apply(pid int) error { +func (m *LegacyManager) Apply(pid int) error { var ( c = m.Cgroups unitName = getUnitName(c) @@ -253,7 +253,7 @@ func (m *Manager) Apply(pid int) error { } paths := make(map[string]string) - for _, s := range subsystems { + for _, s := range legacySubsystems { subsystemPath, err := getSubsystemPath(m.Cgroups, s.Name()) if err != nil { // Don't fail if a cgroup hierarchy was not found, just skip this subsystem @@ -268,7 +268,7 @@ func (m *Manager) Apply(pid int) error { return nil } -func (m *Manager) Destroy() error { +func (m *LegacyManager) Destroy() error { if m.Cgroups.Paths != nil { return nil } @@ -282,7 +282,7 @@ func (m *Manager) Destroy() error { return nil } -func (m *Manager) GetPaths() map[string]string { +func (m *LegacyManager) GetPaths() map[string]string { m.mu.Lock() paths := m.Paths m.mu.Unlock() @@ -294,6 +294,7 @@ func join(c *configs.Cgroup, subsystem string, pid int) (string, error) { if err != nil { return "", err } + if err := os.MkdirAll(path, 0755); err != nil { return "", err } @@ -304,7 +305,7 @@ func join(c *configs.Cgroup, subsystem string, pid int) (string, error) { } func joinCgroups(c *configs.Cgroup, pid int) error { - for _, sys := range subsystems { + for _, sys := range legacySubsystems { name := sys.Name() switch name { case "name=systemd": @@ -399,7 +400,7 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) { return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil } -func (m *Manager) Freeze(state configs.FreezerState) error { +func (m *LegacyManager) Freeze(state configs.FreezerState) error { path, err := getSubsystemPath(m.Cgroups, "freezer") if err != nil { return err @@ -418,7 +419,7 @@ func (m *Manager) Freeze(state configs.FreezerState) error { return nil } -func (m *Manager) GetPids() ([]int, error) { +func (m *LegacyManager) GetPids() ([]int, error) { path, err := getSubsystemPath(m.Cgroups, "devices") if err != nil { return nil, err @@ -426,7 +427,7 @@ func (m *Manager) GetPids() ([]int, error) { return cgroups.GetPids(path) } -func (m *Manager) GetAllPids() ([]int, error) { +func (m *LegacyManager) GetAllPids() ([]int, error) { path, err := getSubsystemPath(m.Cgroups, "devices") if err != nil { return nil, err @@ -434,7 +435,7 @@ func (m *Manager) GetAllPids() ([]int, error) { return cgroups.GetAllPids(path) } -func (m *Manager) GetStats() (*cgroups.Stats, error) { +func (m *LegacyManager) GetStats() (*cgroups.Stats, error) { m.mu.Lock() defer m.mu.Unlock() stats := cgroups.NewStats() @@ -451,13 +452,13 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) { return stats, nil } -func (m *Manager) Set(container *configs.Config) error { +func (m *LegacyManager) Set(container *configs.Config) error { // If Paths are set, then we are just joining cgroups paths // and there is no need to set any values. if m.Cgroups.Paths != nil { return nil } - for _, sys := range subsystems { + for _, sys := range legacySubsystems { // Get the subsystem path, but don't error out for not found cgroups. path, err := getSubsystemPath(container.Cgroups, sys.Name()) if err != nil && !cgroups.IsNotFound(err) { From 524cb7c318207032518ebbf9df507a549d6de646 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Mon, 2 Sep 2019 11:10:52 +0200 Subject: [PATCH 3/3] libcontainer: add systemd.UnifiedManager Signed-off-by: Giuseppe Scrivano --- libcontainer/cgroups/systemd/apply_systemd.go | 12 +- .../cgroups/systemd/unified_hierarchy.go | 329 ++++++++++++++++++ 2 files changed, 339 insertions(+), 2 deletions(-) create mode 100644 libcontainer/cgroups/systemd/unified_hierarchy.go diff --git a/libcontainer/cgroups/systemd/apply_systemd.go b/libcontainer/cgroups/systemd/apply_systemd.go index a956cbc5..f274a49a 100644 --- a/libcontainer/cgroups/systemd/apply_systemd.go +++ b/libcontainer/cgroups/systemd/apply_systemd.go @@ -119,6 +119,14 @@ func NewSystemdCgroupsManager() (func(config *configs.Cgroup, paths map[string]s if !isRunningSystemd() { return nil, fmt.Errorf("systemd not running on this host, can't use systemd as a cgroups.Manager") } + if cgroups.IsCgroup2UnifiedMode() { + return func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { + return &UnifiedManager{ + Cgroups: config, + Paths: paths, + } + }, nil + } return func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { return &LegacyManager{ Cgroups: config, @@ -407,7 +415,7 @@ func (m *LegacyManager) Freeze(state configs.FreezerState) error { } prevState := m.Cgroups.Resources.Freezer m.Cgroups.Resources.Freezer = state - freezer, err := subsystems.Get("freezer") + freezer, err := legacySubsystems.Get("freezer") if err != nil { return err } @@ -440,7 +448,7 @@ func (m *LegacyManager) GetStats() (*cgroups.Stats, error) { defer m.mu.Unlock() stats := cgroups.NewStats() for name, path := range m.Paths { - sys, err := subsystems.Get(name) + sys, err := legacySubsystems.Get(name) if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) { continue } diff --git a/libcontainer/cgroups/systemd/unified_hierarchy.go b/libcontainer/cgroups/systemd/unified_hierarchy.go new file mode 100644 index 00000000..d394e3a5 --- /dev/null +++ b/libcontainer/cgroups/systemd/unified_hierarchy.go @@ -0,0 +1,329 @@ +// +build linux,!static_build + +package systemd + +import ( + "fmt" + "io/ioutil" + "math" + "os" + "path/filepath" + "strings" + "sync" + "time" + + systemdDbus "github.com/coreos/go-systemd/dbus" + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fs" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/sirupsen/logrus" +) + +type UnifiedManager struct { + mu sync.Mutex + Cgroups *configs.Cgroup + Paths map[string]string +} + +var unifiedSubsystems = subsystemSet{ + &fs.CpusetGroupV2{}, + &fs.FreezerGroupV2{}, + &fs.CpuGroupV2{}, + &fs.MemoryGroupV2{}, + &fs.IOGroupV2{}, + &fs.PidsGroupV2{}, +} + +func (m *UnifiedManager) Apply(pid int) error { + var ( + c = m.Cgroups + unitName = getUnitName(c) + slice = "system.slice" + properties []systemdDbus.Property + ) + + if c.Paths != nil { + paths := make(map[string]string) + for name, path := range c.Paths { + _, err := getSubsystemPath(m.Cgroups, name) + if err != nil { + // Don't fail if a cgroup hierarchy was not found, just skip this subsystem + if cgroups.IsNotFound(err) { + continue + } + return err + } + paths[name] = path + } + m.Paths = paths + return cgroups.EnterPid(m.Paths, pid) + } + + if c.Parent != "" { + slice = c.Parent + } + + properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name)) + + // if we create a slice, the parent is defined via a Wants= + if strings.HasSuffix(unitName, ".slice") { + properties = append(properties, systemdDbus.PropWants(slice)) + } else { + // otherwise, we use Slice= + properties = append(properties, systemdDbus.PropSlice(slice)) + } + + // only add pid if its valid, -1 is used w/ general slice creation. + if pid != -1 { + properties = append(properties, newProp("PIDs", []uint32{uint32(pid)})) + } + + // Check if we can delegate. This is only supported on systemd versions 218 and above. + if !strings.HasSuffix(unitName, ".slice") { + // Assume scopes always support delegation. + properties = append(properties, newProp("Delegate", true)) + } + + // Always enable accounting, this gets us the same behaviour as the fs implementation, + // plus the kernel has some problems with joining the memory cgroup at a later time. + properties = append(properties, + newProp("MemoryAccounting", true), + newProp("CPUAccounting", true), + newProp("BlockIOAccounting", true)) + + // Assume DefaultDependencies= will always work (the check for it was previously broken.) + properties = append(properties, + newProp("DefaultDependencies", false)) + + if c.Resources.Memory != 0 { + properties = append(properties, + newProp("MemoryLimit", uint64(c.Resources.Memory))) + } + + if c.Resources.CpuShares != 0 { + properties = append(properties, + newProp("CPUShares", c.Resources.CpuShares)) + } + + // cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd. + if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 { + // corresponds to USEC_INFINITY in systemd + // if USEC_INFINITY is provided, CPUQuota is left unbound by systemd + // always setting a property value ensures we can apply a quota and remove it later + cpuQuotaPerSecUSec := uint64(math.MaxUint64) + if c.Resources.CpuQuota > 0 { + // systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota + // (integer percentage of CPU) internally. This means that if a fractional percent of + // CPU is indicated by Resources.CpuQuota, we need to round up to the nearest + // 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect. + cpuQuotaPerSecUSec = uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod + if cpuQuotaPerSecUSec%10000 != 0 { + cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000 + } + } + properties = append(properties, + newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec)) + } + + if c.Resources.BlkioWeight != 0 { + properties = append(properties, + newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight))) + } + + if c.Resources.PidsLimit > 0 { + properties = append(properties, + newProp("TasksAccounting", true), + newProp("TasksMax", uint64(c.Resources.PidsLimit))) + } + + // We have to set kernel memory here, as we can't change it once + // processes have been attached to the cgroup. + if c.Resources.KernelMemory != 0 { + if err := setKernelMemory(c); err != nil { + return err + } + } + + statusChan := make(chan string, 1) + if _, err := theConn.StartTransientUnit(unitName, "replace", properties, statusChan); err == nil { + select { + case <-statusChan: + case <-time.After(time.Second): + logrus.Warnf("Timed out while waiting for StartTransientUnit(%s) completion signal from dbus. Continuing...", unitName) + } + } else if !isUnitExists(err) { + return err + } + + if err := joinCgroupsV2(c, pid); err != nil { + return err + } + + paths := make(map[string]string) + for _, s := range unifiedSubsystems { + subsystemPath, err := getSubsystemPath(m.Cgroups, s.Name()) + if err != nil { + // Don't fail if a cgroup hierarchy was not found, just skip this subsystem + if cgroups.IsNotFound(err) { + continue + } + return err + } + paths[s.Name()] = subsystemPath + } + m.Paths = paths + return nil +} + +func (m *UnifiedManager) Destroy() error { + if m.Cgroups.Paths != nil { + return nil + } + m.mu.Lock() + defer m.mu.Unlock() + theConn.StopUnit(getUnitName(m.Cgroups), "replace", nil) + if err := cgroups.RemovePaths(m.Paths); err != nil { + return err + } + m.Paths = make(map[string]string) + return nil +} + +func (m *UnifiedManager) GetPaths() map[string]string { + m.mu.Lock() + paths := m.Paths + m.mu.Unlock() + return paths +} + +func createCgroupsv2Path(path string) (Err error) { + content, err := ioutil.ReadFile("/sys/fs/cgroup/cgroup.controllers") + if err != nil { + return err + } + if !filepath.HasPrefix(path, "/sys/fs/cgroup") { + return fmt.Errorf("invalid cgroup path %s", path) + } + + res := "" + for i, c := range strings.Split(strings.TrimSpace(string(content)), " ") { + if i == 0 { + res = fmt.Sprintf("+%s", c) + } else { + res = res + fmt.Sprintf(" +%s", c) + } + } + resByte := []byte(res) + + current := "/sys/fs" + elements := strings.Split(path, "/") + for i, e := range elements[3:] { + current = filepath.Join(current, e) + if i > 0 { + if err := os.Mkdir(current, 0755); err != nil { + if !os.IsExist(err) { + return err + } + } else { + // If the directory was created, be sure it is not left around on errors. + defer func() { + if Err != nil { + os.Remove(current) + } + }() + } + } + if i < len(elements[3:])-1 { + if err := ioutil.WriteFile(filepath.Join(current, "cgroup.subtree_control"), resByte, 0755); err != nil { + return err + } + } + } + return nil +} + +func joinCgroupsV2(c *configs.Cgroup, pid int) error { + path, err := getSubsystemPath(c, "memory") + if err != nil { + return err + } + return createCgroupsv2Path(path) +} + +func (m *UnifiedManager) Freeze(state configs.FreezerState) error { + path, err := getSubsystemPath(m.Cgroups, "freezer") + if err != nil { + return err + } + prevState := m.Cgroups.Resources.Freezer + m.Cgroups.Resources.Freezer = state + freezer, err := unifiedSubsystems.Get("freezer") + if err != nil { + return err + } + err = freezer.Set(path, m.Cgroups) + if err != nil { + m.Cgroups.Resources.Freezer = prevState + return err + } + return nil +} + +func (m *UnifiedManager) GetPids() ([]int, error) { + path, err := getSubsystemPath(m.Cgroups, "devices") + if err != nil { + return nil, err + } + return cgroups.GetPids(path) +} + +func (m *UnifiedManager) GetAllPids() ([]int, error) { + path, err := getSubsystemPath(m.Cgroups, "devices") + if err != nil { + return nil, err + } + return cgroups.GetAllPids(path) +} + +func (m *UnifiedManager) GetStats() (*cgroups.Stats, error) { + m.mu.Lock() + defer m.mu.Unlock() + stats := cgroups.NewStats() + for name, path := range m.Paths { + sys, err := unifiedSubsystems.Get(name) + if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) { + continue + } + if err := sys.GetStats(path, stats); err != nil { + return nil, err + } + } + + return stats, nil +} + +func (m *UnifiedManager) Set(container *configs.Config) error { + // If Paths are set, then we are just joining cgroups paths + // and there is no need to set any values. + if m.Cgroups.Paths != nil { + return nil + } + for _, sys := range unifiedSubsystems { + // Get the subsystem path, but don't error out for not found cgroups. + path, err := getSubsystemPath(container.Cgroups, sys.Name()) + if err != nil && !cgroups.IsNotFound(err) { + return err + } + + if err := sys.Set(path, container.Cgroups); err != nil { + return err + } + } + + if m.Paths["cpu"] != "" { + if err := fs.CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil { + return err + } + } + return nil +}