From 9280e3566d6f160fef8071887b58ab2e392c2f0a Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Mon, 20 Apr 2020 18:06:59 -0700 Subject: [PATCH] checkpoint/restore: fix cgroupv2 handling In case of cgroupv2 unified hierarchy, the /sys/fs/cgroup mount is the real mount with fstype of cgroup2 (rather than a set of external bind mounts like for cgroupv1). So, we should not add it to the list of "external bind mounts" on both checkpoint and restore. Without this fix, checkpoint integration tests fail on cgroup v2. Also, same is true for cgroup v1 + cgroupns. Signed-off-by: Kir Kolyshkin --- libcontainer/container_linux.go | 26 +++++++++++++++++--------- libcontainer/rootfs_linux.go | 2 +- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 23484ed2..bb67b9e6 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -1085,18 +1085,19 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error { go waitForCriuLazyServer(statusRead, criuOpts.StatusFd) } - //no need to dump these information in pre-dump + // no need to dump all this in pre-dump if !criuOpts.PreDump { + hasCgroupns := c.config.Namespaces.Contains(configs.NEWCGROUP) for _, m := range c.config.Mounts { switch m.Device { case "bind": c.addCriuDumpMount(req, m) case "cgroup": - if cgroups.IsCgroup2UnifiedMode() { - c.addCriuDumpMount(req, m) + if cgroups.IsCgroup2UnifiedMode() || hasCgroupns { + // real mount(s) continue } - // cgroup v1 + // a set of "external" bind mounts binds, err := getCgroupMounts(m) if err != nil { return err @@ -1174,7 +1175,14 @@ func (c *linuxContainer) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error { switch m.Device { case "cgroup": - // Do nothing for cgroup, CRIU should handle it + // No mount point(s) need to be created: + // + // * for v1, mount points are saved by CRIU because + // /sys/fs/cgroup is a tmpfs mount + // + // * for v2, /sys/fs/cgroup is a real mount, but + // the mountpoint appears as soon as /sys is mounted + return nil case "bind": // The prepareBindMount() function checks if source // exists. So it cannot be used for other filesystem types. @@ -1182,7 +1190,7 @@ func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error { return err } default: - // for all other file-systems just create the mountpoints + // for all other filesystems just create the mountpoints dest, err := securejoin.SecureJoin(c.config.Rootfs, m.Destination) if err != nil { return err @@ -1356,16 +1364,16 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { return err } + hasCgroupns := c.config.Namespaces.Contains(configs.NEWCGROUP) for _, m := range c.config.Mounts { switch m.Device { case "bind": c.addCriuRestoreMount(req, m) case "cgroup": - if cgroups.IsCgroup2UnifiedMode() { - c.addCriuRestoreMount(req, m) + if cgroups.IsCgroup2UnifiedMode() || hasCgroupns { continue } - // cgroup v1 + // cgroup v1 is a set of bind mounts, unless cgroupns is used binds, err := getCgroupMounts(m) if err != nil { return err diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index 84c5143a..e6cc0878 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -244,7 +244,7 @@ func mountCgroupV1(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b } cgroupmount := &configs.Mount{ Source: "cgroup", - Device: "cgroup", + Device: "cgroup", // this is actually fstype Destination: subsystemPath, Flags: flags, Data: filepath.Base(subsystemPath),