Merge pull request #2411 from kolyshkin/v1-specific

libct/cgroups/utils: fix/separate cgroupv1 code
This commit is contained in:
Mrunal Patel 2020-06-17 06:45:19 -07:00 committed by GitHub
commit 12a7c8fc2b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 350 additions and 318 deletions

View File

@ -3,8 +3,6 @@
package cgroups
import (
"fmt"
"github.com/opencontainers/runc/libcontainer/configs"
)
@ -51,25 +49,3 @@ type Manager interface {
// Whether the cgroup path exists or not
Exists() bool
}
type NotFoundError struct {
Subsystem string
}
func (e *NotFoundError) Error() string {
return fmt.Sprintf("mountpoint for %s not found", e.Subsystem)
}
func NewNotFoundError(sub string) error {
return &NotFoundError{
Subsystem: sub,
}
}
func IsNotFound(err error) bool {
if err == nil {
return false
}
_, ok := err.(*NotFoundError)
return ok
}

View File

@ -3,9 +3,11 @@
package fs
import (
"bufio"
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"github.com/opencontainers/runc/libcontainer/cgroups"
@ -88,10 +90,43 @@ func getCgroupRoot() (string, error) {
return cgroupRoot, nil
}
root, err := cgroups.FindCgroupMountpointDir()
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", err
}
defer f.Close()
var root string
scanner := bufio.NewScanner(f)
for scanner.Scan() {
text := scanner.Text()
fields := strings.Split(text, " ")
// Safe as mountinfo encodes mountpoints with spaces as \040.
index := strings.Index(text, " - ")
postSeparatorFields := strings.Fields(text[index+3:])
numPostFields := len(postSeparatorFields)
// This is an error as we can't detect if the mount is for "cgroup"
if numPostFields == 0 {
return "", fmt.Errorf("mountinfo: found no fields post '-' in %q", text)
}
if postSeparatorFields[0] == "cgroup" {
// Check that the mount is properly formatted.
if numPostFields < 3 {
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
}
root = filepath.Dir(fields[4])
break
}
}
if err := scanner.Err(); err != nil {
return "", err
}
if root == "" {
return "", errors.New("no cgroup mount found in mountinfo")
}
if _, err := os.Stat(root); err != nil {
return "", err

View File

@ -20,7 +20,6 @@ import (
)
const (
CgroupNamePrefix = "name="
CgroupProcesses = "cgroup.procs"
unifiedMountpoint = "/sys/fs/cgroup"
)
@ -28,8 +27,6 @@ const (
var (
isUnifiedOnce sync.Once
isUnified bool
errUnified = errors.New("not implemented for cgroup v2 unified hierarchy")
)
// HugePageSizeUnitList is a list of the units used by the linux kernel when
@ -51,176 +48,19 @@ func IsCgroup2UnifiedMode() bool {
return isUnified
}
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
if IsCgroup2UnifiedMode() {
return unifiedMountpoint, nil
}
mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem)
return mnt, err
}
func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
// We are not using mount.GetMounts() because it's super-inefficient,
// parsing it directly sped up x10 times because of not using Sscanf.
// It was one of two major performance drawbacks in container start.
if !isSubsystemAvailable(subsystem) {
return "", "", NewNotFoundError(subsystem)
}
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", "", err
}
defer f.Close()
if IsCgroup2UnifiedMode() {
subsystem = ""
}
return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem)
}
func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsystem string) (string, string, error) {
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
txt := scanner.Text()
fields := strings.Fields(txt)
if len(fields) < 9 {
continue
}
if strings.HasPrefix(fields[4], cgroupPath) {
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if (subsystem == "" && fields[9] == "cgroup2") || opt == subsystem {
return fields[4], fields[3], nil
}
}
}
}
if err := scanner.Err(); err != nil {
return "", "", err
}
return "", "", NewNotFoundError(subsystem)
}
func isSubsystemAvailable(subsystem string) bool {
if IsCgroup2UnifiedMode() {
controllers, err := GetAllSubsystems()
if err != nil {
return false
}
for _, c := range controllers {
if c == subsystem {
return true
}
}
return false
}
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return false
}
_, avail := cgroups[subsystem]
return avail
}
func FindCgroupMountpointDir() (string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", err
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
text := scanner.Text()
fields := strings.Split(text, " ")
// Safe as mountinfo encodes mountpoints with spaces as \040.
index := strings.Index(text, " - ")
postSeparatorFields := strings.Fields(text[index+3:])
numPostFields := len(postSeparatorFields)
// This is an error as we can't detect if the mount is for "cgroup"
if numPostFields == 0 {
return "", fmt.Errorf("Found no fields post '-' in %q", text)
}
if postSeparatorFields[0] == "cgroup" || postSeparatorFields[0] == "cgroup2" {
// Check that the mount is properly formatted.
if numPostFields < 3 {
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
}
return filepath.Dir(fields[4]), nil
}
}
if err := scanner.Err(); err != nil {
return "", err
}
return "", NewNotFoundError("cgroup")
}
type Mount struct {
Mountpoint string
Root string
Subsystems []string
}
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
if len(m.Subsystems) == 0 {
return "", fmt.Errorf("no subsystem for mount")
}
return getControllerPath(m.Subsystems[0], cgroups)
}
func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) {
res := make([]Mount, 0, len(ss))
scanner := bufio.NewScanner(mi)
numFound := 0
for scanner.Scan() && numFound < len(ss) {
txt := scanner.Text()
sepIdx := strings.Index(txt, " - ")
if sepIdx == -1 {
return nil, fmt.Errorf("invalid mountinfo format")
}
if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" {
continue
}
fields := strings.Split(txt, " ")
m := Mount{
Mountpoint: fields[4],
Root: fields[3],
}
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
seen, known := ss[opt]
if !known || (!all && seen) {
continue
}
ss[opt] = true
if strings.HasPrefix(opt, CgroupNamePrefix) {
opt = opt[len(CgroupNamePrefix):]
}
m.Subsystems = append(m.Subsystems, opt)
numFound++
}
if len(m.Subsystems) > 0 || all {
res = append(res, m)
}
}
if err := scanner.Err(); err != nil {
return nil, err
}
return res, nil
}
// GetCgroupMounts returns the mounts for the cgroup subsystems.
// all indicates whether to return just the first instance or all the mounts.
// This function should not be used from cgroupv2 code, as in this case
// all the controllers are available under the constant unifiedMountpoint.
func GetCgroupMounts(all bool) ([]Mount, error) {
if IsCgroup2UnifiedMode() {
// TODO: remove cgroupv2 case once all external users are converted
availableControllers, err := GetAllSubsystems()
if err != nil {
return nil, err
@ -233,22 +73,7 @@ func GetCgroupMounts(all bool) ([]Mount, error) {
return []Mount{m}, nil
}
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return nil, err
}
defer f.Close()
allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return nil, err
}
allMap := make(map[string]bool)
for s := range allSubsystems {
allMap[s] = false
}
return getCgroupMountsHelper(allMap, f, all)
return getCgroupMountsV1(all)
}
// GetAllSubsystems returns all the cgroup subsystems supported by the kernel
@ -292,65 +117,6 @@ func GetAllSubsystems() ([]string, error) {
return subsystems, nil
}
// GetOwnCgroup returns the relative path to the cgroup docker is running in.
func GetOwnCgroup(subsystem string) (string, error) {
if IsCgroup2UnifiedMode() {
return "", errUnified
}
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return "", err
}
return getControllerPath(subsystem, cgroups)
}
func GetOwnCgroupPath(subsystem string) (string, error) {
cgroup, err := GetOwnCgroup(subsystem)
if err != nil {
return "", err
}
return getCgroupPathHelper(subsystem, cgroup)
}
func GetInitCgroup(subsystem string) (string, error) {
if IsCgroup2UnifiedMode() {
return "", errUnified
}
cgroups, err := ParseCgroupFile("/proc/1/cgroup")
if err != nil {
return "", err
}
return getControllerPath(subsystem, cgroups)
}
func GetInitCgroupPath(subsystem string) (string, error) {
cgroup, err := GetInitCgroup(subsystem)
if err != nil {
return "", err
}
return getCgroupPathHelper(subsystem, cgroup)
}
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
if err != nil {
return "", err
}
// This is needed for nested containers, because in /proc/self/cgroup we
// see paths from host, which don't exist in container.
relCgroup, err := filepath.Rel(root, cgroup)
if err != nil {
return "", err
}
return filepath.Join(mnt, relCgroup), nil
}
func readProcsFile(file string) ([]int, error) {
f, err := os.Open(file)
if err != nil {
@ -375,8 +141,15 @@ func readProcsFile(file string) ([]int, error) {
return out, nil
}
// ParseCgroupFile parses the given cgroup file, typically from
// /proc/<pid>/cgroup, into a map of subgroups to cgroup names.
// ParseCgroupFile parses the given cgroup file, typically /proc/self/cgroup
// or /proc/<pid>/cgroup, into a map of subsystems to cgroup paths, e.g.
// "cpu": "/user.slice/user-1000.slice"
// "pids": "/user.slice/user-1000.slice"
// etc.
//
// Note that for cgroup v2 unified hierarchy, there are no per-controller
// cgroup paths, so the resulting map will have a single element where the key
// is empty string ("") and the value is the cgroup path the <pid> is in.
func ParseCgroupFile(path string) (map[string]string, error) {
f, err := os.Open(path)
if err != nil {
@ -416,22 +189,6 @@ func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
return cgroups, nil
}
func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
if IsCgroup2UnifiedMode() {
return "/", nil
}
if p, ok := cgroups[subsystem]; ok {
return p, nil
}
if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
return p, nil
}
return "", NewNotFoundError(subsystem)
}
func PathExists(path string) bool {
if _, err := os.Stat(path); err != nil {
return false

View File

@ -189,46 +189,52 @@ func TestGetCgroupMounts(t *testing.T) {
mountInfo: fedoraMountinfo,
root: "/",
subsystems: map[string]bool{
"cpuset": false,
"cpu": false,
"cpuacct": false,
"memory": false,
"devices": false,
"freezer": false,
"net_cls": false,
"blkio": false,
"perf_event": false,
"hugetlb": false,
"name=systemd": false,
"cpuset": false,
"cpu": false,
"cpuacct": false,
"memory": false,
"devices": false,
"freezer": false,
"net_cls": false,
"blkio": false,
"perf_event": false,
"hugetlb": false,
},
},
{
mountInfo: systemdMountinfo,
root: "/system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope",
subsystems: map[string]bool{
"cpuset": false,
"cpu": false,
"cpuacct": false,
"memory": false,
"devices": false,
"freezer": false,
"net_cls": false,
"blkio": false,
"perf_event": false,
"name=systemd": false,
"cpuset": false,
"cpu": false,
"cpuacct": false,
"memory": false,
"devices": false,
"freezer": false,
"net_cls": false,
"net_prio": false,
"blkio": false,
"perf_event": false,
},
},
{
mountInfo: bedrockMountinfo,
root: "/",
subsystems: map[string]bool{
"cpuset": false,
"cpu": false,
"cpuacct": false,
"memory": false,
"devices": false,
"freezer": false,
"net_cls": false,
"blkio": false,
"perf_event": false,
"name=systemd": false,
"cpuset": false,
"cpu": false,
"cpuacct": false,
"memory": false,
"devices": false,
"freezer": false,
"net_cls": false,
"net_prio": false,
"blkio": false,
"perf_event": false,
"pids": false,
},
},
}
@ -245,6 +251,7 @@ func TestGetCgroupMounts(t *testing.T) {
}
}
for ss := range td.subsystems {
ss = strings.TrimPrefix(ss, CgroupNamePrefix)
m, ok := cgMap[ss]
if !ok {
t.Fatalf("%s not found", ss)

View File

@ -0,0 +1,250 @@
package cgroups
import (
"bufio"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strings"
)
// Code in this source file are specific to cgroup v1,
// and must not be used from any cgroup v2 code.
const (
CgroupNamePrefix = "name="
)
var (
errUnified = errors.New("not implemented for cgroup v2 unified hierarchy")
)
type NotFoundError struct {
Subsystem string
}
func (e *NotFoundError) Error() string {
return fmt.Sprintf("mountpoint for %s not found", e.Subsystem)
}
func NewNotFoundError(sub string) error {
return &NotFoundError{
Subsystem: sub,
}
}
func IsNotFound(err error) bool {
if err == nil {
return false
}
_, ok := err.(*NotFoundError)
return ok
}
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
if IsCgroup2UnifiedMode() {
return "", errUnified
}
mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem)
return mnt, err
}
func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
if IsCgroup2UnifiedMode() {
return "", "", errUnified
}
// We are not using mount.GetMounts() because it's super-inefficient,
// parsing it directly sped up x10 times because of not using Sscanf.
// It was one of two major performance drawbacks in container start.
if !isSubsystemAvailable(subsystem) {
return "", "", NewNotFoundError(subsystem)
}
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", "", err
}
defer f.Close()
return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem)
}
func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsystem string) (string, string, error) {
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
txt := scanner.Text()
fields := strings.Fields(txt)
if len(fields) < 9 {
continue
}
if strings.HasPrefix(fields[4], cgroupPath) {
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if opt == subsystem {
return fields[4], fields[3], nil
}
}
}
}
if err := scanner.Err(); err != nil {
return "", "", err
}
return "", "", NewNotFoundError(subsystem)
}
func isSubsystemAvailable(subsystem string) bool {
if IsCgroup2UnifiedMode() {
panic("don't call isSubsystemAvailable from cgroupv2 code")
}
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return false
}
_, avail := cgroups[subsystem]
return avail
}
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
if len(m.Subsystems) == 0 {
return "", fmt.Errorf("no subsystem for mount")
}
return getControllerPath(m.Subsystems[0], cgroups)
}
func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) {
res := make([]Mount, 0, len(ss))
scanner := bufio.NewScanner(mi)
numFound := 0
for scanner.Scan() && numFound < len(ss) {
txt := scanner.Text()
sepIdx := strings.Index(txt, " - ")
if sepIdx == -1 {
return nil, fmt.Errorf("invalid mountinfo format")
}
if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" {
continue
}
fields := strings.Split(txt, " ")
m := Mount{
Mountpoint: fields[4],
Root: fields[3],
}
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
seen, known := ss[opt]
if !known || (!all && seen) {
continue
}
ss[opt] = true
opt = strings.TrimPrefix(opt, CgroupNamePrefix)
m.Subsystems = append(m.Subsystems, opt)
numFound++
}
if len(m.Subsystems) > 0 || all {
res = append(res, m)
}
}
if err := scanner.Err(); err != nil {
return nil, err
}
return res, nil
}
func getCgroupMountsV1(all bool) ([]Mount, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return nil, err
}
defer f.Close()
allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return nil, err
}
allMap := make(map[string]bool)
for s := range allSubsystems {
allMap[s] = false
}
return getCgroupMountsHelper(allMap, f, all)
}
// GetOwnCgroup returns the relative path to the cgroup docker is running in.
func GetOwnCgroup(subsystem string) (string, error) {
if IsCgroup2UnifiedMode() {
return "", errUnified
}
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return "", err
}
return getControllerPath(subsystem, cgroups)
}
func GetOwnCgroupPath(subsystem string) (string, error) {
cgroup, err := GetOwnCgroup(subsystem)
if err != nil {
return "", err
}
return getCgroupPathHelper(subsystem, cgroup)
}
func GetInitCgroup(subsystem string) (string, error) {
if IsCgroup2UnifiedMode() {
return "", errUnified
}
cgroups, err := ParseCgroupFile("/proc/1/cgroup")
if err != nil {
return "", err
}
return getControllerPath(subsystem, cgroups)
}
func GetInitCgroupPath(subsystem string) (string, error) {
cgroup, err := GetInitCgroup(subsystem)
if err != nil {
return "", err
}
return getCgroupPathHelper(subsystem, cgroup)
}
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
if err != nil {
return "", err
}
// This is needed for nested containers, because in /proc/self/cgroup we
// see paths from host, which don't exist in container.
relCgroup, err := filepath.Rel(root, cgroup)
if err != nil {
return "", err
}
return filepath.Join(mnt, relCgroup), nil
}
func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
if IsCgroup2UnifiedMode() {
return "", errUnified
}
if p, ok := cgroups[subsystem]; ok {
return p, nil
}
if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
return p, nil
}
return "", NewNotFoundError(subsystem)
}

View File

@ -703,7 +703,7 @@ func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.
Features: criuFeat,
}
err := c.criuSwrk(nil, req, criuOpts, false, nil)
err := c.criuSwrk(nil, req, criuOpts, nil)
if err != nil {
logrus.Debugf("%s", err)
return errors.New("CRIU feature check failed")
@ -1045,7 +1045,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
}
}
err = c.criuSwrk(nil, req, criuOpts, false, nil)
err = c.criuSwrk(nil, req, criuOpts, nil)
if err != nil {
return err
}
@ -1339,10 +1339,15 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd)
}
}
return c.criuSwrk(process, req, criuOpts, true, extraFiles)
return c.criuSwrk(process, req, criuOpts, extraFiles)
}
func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
// need to apply cgroups only on restore
if req.GetType() != criurpc.CriuReqType_RESTORE {
return nil
}
// XXX: Do we need to deal with this case? AFAIK criu still requires root.
if err := c.cgroupManager.Apply(pid); err != nil {
return err
@ -1352,6 +1357,11 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
return newSystemError(err)
}
if cgroups.IsCgroup2UnifiedMode() {
return nil
}
// the stuff below is cgroupv1-specific
path := fmt.Sprintf("/proc/%d/cgroup", pid)
cgroupsPaths, err := cgroups.ParseCgroupFile(path)
if err != nil {
@ -1369,7 +1379,7 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
return nil
}
func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool, extraFiles []*os.File) error {
func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, extraFiles []*os.File) error {
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0)
if err != nil {
return err
@ -1433,11 +1443,8 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
}
}()
if applyCgroups {
err := c.criuApplyCgroups(criuProcess.Pid, req)
if err != nil {
return err
}
if err := c.criuApplyCgroups(criuProcess.Pid, req); err != nil {
return err
}
var extFds []string