Factor out checkpointing with external namespace code

To checkpoint and restore a container with an external network namespace
(like with Podman and CNI), runc tells CRIU to ignore the network
namespace during checkpoint and restore.

This commit moves that code to their own functions to be able to reuse
the same code path for external PID namespaces which are necessary for
checkpointing and restoring containers out of a pod in cri-o.

Signed-off-by: Adrian Reber <areber@redhat.com>
This commit is contained in:
Adrian Reber 2020-07-21 11:33:51 +00:00 committed by Adrian Reber
parent d65df61dc5
commit 610c5ad75c
No known key found for this signature in database
GPG Key ID: 82C9378ED3C4906A
1 changed files with 78 additions and 47 deletions

View File

@ -836,6 +836,72 @@ func (c *linuxContainer) handleCriuConfigurationFile(rpcOpts *criurpc.CriuOpts)
}
}
func (c *linuxContainer) criuSupportsExtNS(t configs.NamespaceType) bool {
switch t {
case configs.NEWNET:
// CRIU supports different external namespace with different released CRIU versions.
// For network namespaces to work we need at least criu 3.11.0 => 31100.
return c.checkCriuVersion(31100) == nil
}
return false
}
func (c *linuxContainer) criuNsToKey(t configs.NamespaceType) string {
return "extRoot" + strings.Title(configs.NsName(t)) + "NS"
}
func (c *linuxContainer) handleCheckpointingExternalNamespaces(rpcOpts *criurpc.CriuOpts, t configs.NamespaceType) error {
if !c.criuSupportsExtNS(t) {
return nil
}
nsPath := c.config.Namespaces.PathOf(t)
if nsPath == "" {
return nil
}
// CRIU expects the information about an external namespace
// like this: --external <TYPE>[<inode>]:<key>
// This <key> is always 'extRoot<TYPE>NS'.
var ns unix.Stat_t
if err := unix.Stat(nsPath, &ns); err != nil {
return err
}
criuExternal := fmt.Sprintf("%s[%d]:%s", configs.NsName(t), ns.Ino, c.criuNsToKey(t))
rpcOpts.External = append(rpcOpts.External, criuExternal)
return nil
}
func (c *linuxContainer) handleRestoringExternalNamespaces(rpcOpts *criurpc.CriuOpts, extraFiles *[]*os.File, t configs.NamespaceType) error {
if !c.criuSupportsExtNS(t) {
return nil
}
nsPath := c.config.Namespaces.PathOf(t)
if nsPath == "" {
return nil
}
// CRIU wants the information about an existing namespace
// like this: --inherit-fd fd[<fd>]:<key>
// The <key> needs to be the same as during checkpointing.
// We are always using 'extRoot<TYPE>NS' as the key in this.
nsFd, err := os.Open(nsPath)
if err != nil {
logrus.Errorf("If a specific network namespace is defined it must exist: %s", err)
return fmt.Errorf("Requested network namespace %v does not exist", nsPath)
}
inheritFd := new(criurpc.InheritFd)
inheritFd.Key = proto.String(c.criuNsToKey(t))
// The offset of four is necessary because 0, 1, 2 and 3 is already
// used by stdin, stdout, stderr, 'criu swrk' socket.
inheritFd.Fd = proto.Int32(int32(4 + len(*extraFiles)))
rpcOpts.InheritFd = append(rpcOpts.InheritFd, inheritFd)
// All open FDs need to be transferred to CRIU via extraFiles
*extraFiles = append(*extraFiles, nsFd)
return nil
}
func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
c.m.Lock()
defer c.m.Unlock()
@ -909,26 +975,9 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
// will expect that the namespace exists during restore.
// This basically means that CRIU will ignore the namespace
// and expect to be setup correctly.
nsPath := c.config.Namespaces.PathOf(configs.NEWNET)
if nsPath != "" {
// For this to work we need at least criu 3.11.0 => 31100.
// As there was already a successful version check we will
// not error out if it fails. runc will just behave as it used
// to do and ignore external network namespaces.
err := c.checkCriuVersion(31100)
if err == nil {
// CRIU expects the information about an external namespace
// like this: --external net[<inode>]:<key>
// This <key> is always 'extRootNetNS'.
var netns unix.Stat_t
err = unix.Stat(nsPath, &netns)
if err != nil {
if err := c.handleCheckpointingExternalNamespaces(&rpcOpts, configs.NEWNET); err != nil {
return err
}
criuExternal := fmt.Sprintf("net[%d]:extRootNetNS", netns.Ino)
rpcOpts.External = append(rpcOpts.External, criuExternal)
}
}
// CRIU can use cgroup freezer; when rpcOpts.FreezeCgroup
// is not set, CRIU uses ptrace() to pause the processes.
@ -1251,33 +1300,8 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
// Same as during checkpointing. If the container has a specific network namespace
// assigned to it, this now expects that the checkpoint will be restored in a
// already created network namespace.
nsPath := c.config.Namespaces.PathOf(configs.NEWNET)
if nsPath != "" {
// For this to work we need at least criu 3.11.0 => 31100.
// As there was already a successful version check we will
// not error out if it fails. runc will just behave as it used
// to do and ignore external network namespaces.
err := c.checkCriuVersion(31100)
if err == nil {
// CRIU wants the information about an existing network namespace
// like this: --inherit-fd fd[<fd>]:<key>
// The <key> needs to be the same as during checkpointing.
// We are always using 'extRootNetNS' as the key in this.
netns, err := os.Open(nsPath)
if err != nil {
logrus.Errorf("If a specific network namespace is defined it must exist: %s", err)
return fmt.Errorf("Requested network namespace %v does not exist", nsPath)
}
defer netns.Close()
inheritFd := new(criurpc.InheritFd)
inheritFd.Key = proto.String("extRootNetNS")
// The offset of four is necessary because 0, 1, 2 and 3 is already
// used by stdin, stdout, stderr, 'criu swrk' socket.
inheritFd.Fd = proto.Int32(int32(4 + len(extraFiles)))
req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd)
// All open FDs need to be transferred to CRIU via extraFiles
extraFiles = append(extraFiles, netns)
}
if err := c.handleRestoringExternalNamespaces(req.Opts, &extraFiles, configs.NEWNET); err != nil {
return err
}
// This will modify the rootfs of the container in the same way runc
@ -1345,7 +1369,14 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd)
}
}
return c.criuSwrk(process, req, criuOpts, extraFiles)
err = c.criuSwrk(process, req, criuOpts, extraFiles)
// Now that CRIU is done let's close all opened FDs CRIU needed.
for _, fd := range extraFiles {
fd.Close()
}
return err
}
func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {