criu: restore into existing namespace when specified
Using CRIU to checkpoint and restore a container into an existing network namespace is not possible. If the network namespace is defined like { "type": "network", "path": "/run/netns/test" } there is the expectation that the restored container is again running in the network namespace specified with 'path'. This adds the new CRIU 'external namespace' feature to runc, where during checkpointing that specific namespace is referenced and during restore CRIU tries to restore the container in exactly that namespace. This breaks/fixes current runc behavior. If, without this patch, runc restores a container with such a network namespace definition, it is ignored and CRIU recreates a network namespace without a name. With this patch runc uses the network namespace path (if available) to checkpoint and restore the container in just that network namespace. Restore will now fail if a container was checkpointed with a network namespace path set and if that network namespace path does not exist during restore. runc still falls back to the old behavior if CRIU older than 3.11 is installed. Fixes #1786 Related to https://github.com/projectatomic/libpod/pull/469 Thanks to Andrei Vagin for all the help in getting the interface between CRIU and runc right! Signed-off-by: Adrian Reber <areber@redhat.com>
This commit is contained in:
parent
308daade45
commit
fa43a72aba
|
@ -657,7 +657,7 @@ func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.
|
||||||
Features: criuFeat,
|
Features: criuFeat,
|
||||||
}
|
}
|
||||||
|
|
||||||
err := c.criuSwrk(nil, req, criuOpts, false)
|
err := c.criuSwrk(nil, req, criuOpts, false, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.Debugf("%s", err)
|
logrus.Debugf("%s", err)
|
||||||
return fmt.Errorf("CRIU feature check failed")
|
return fmt.Errorf("CRIU feature check failed")
|
||||||
|
@ -770,7 +770,7 @@ func (c *linuxContainer) checkCriuVersion(minVersion int) error {
|
||||||
Type: &t,
|
Type: &t,
|
||||||
}
|
}
|
||||||
|
|
||||||
err := c.criuSwrk(nil, req, nil, false)
|
err := c.criuSwrk(nil, req, nil, false, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("CRIU version check failed: %s", err)
|
return fmt.Errorf("CRIU version check failed: %s", err)
|
||||||
}
|
}
|
||||||
|
@ -928,6 +928,33 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||||
LazyPages: proto.Bool(criuOpts.LazyPages),
|
LazyPages: proto.Bool(criuOpts.LazyPages),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If the container is running in a network namespace and has
|
||||||
|
// a path to the network namespace configured, we will dump
|
||||||
|
// that network namespace as an external namespace and we
|
||||||
|
// will expect that the namespace exists during restore.
|
||||||
|
// This basically means that CRIU will ignore the namespace
|
||||||
|
// and expect to be setup correctly.
|
||||||
|
nsPath := c.config.Namespaces.PathOf(configs.NEWNET)
|
||||||
|
if nsPath != "" {
|
||||||
|
// For this to work we need at least criu 3.11.0 => 31100.
|
||||||
|
// As there was already a successful version check we will
|
||||||
|
// not error out if it fails. runc will just behave as it used
|
||||||
|
// to do and ignore external network namespaces.
|
||||||
|
err := c.checkCriuVersion(31100)
|
||||||
|
if err == nil {
|
||||||
|
// CRIU expects the information about an external namespace
|
||||||
|
// like this: --external net[<inode>]:<key>
|
||||||
|
// This <key> is always 'extRootNetNS'.
|
||||||
|
var netns syscall.Stat_t
|
||||||
|
err = syscall.Stat(nsPath, &netns)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
criuExternal := fmt.Sprintf("net[%d]:extRootNetNS", netns.Ino)
|
||||||
|
rpcOpts.External = append(rpcOpts.External, criuExternal)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fcg := c.cgroupManager.GetPaths()["freezer"]
|
fcg := c.cgroupManager.GetPaths()["freezer"]
|
||||||
if fcg != "" {
|
if fcg != "" {
|
||||||
rpcOpts.FreezeCgroup = proto.String(fcg)
|
rpcOpts.FreezeCgroup = proto.String(fcg)
|
||||||
|
@ -1032,7 +1059,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
err = c.criuSwrk(nil, req, criuOpts, false)
|
err = c.criuSwrk(nil, req, criuOpts, false, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -1076,6 +1103,8 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||||
c.m.Lock()
|
c.m.Lock()
|
||||||
defer c.m.Unlock()
|
defer c.m.Unlock()
|
||||||
|
|
||||||
|
var extraFiles []*os.File
|
||||||
|
|
||||||
// TODO(avagin): Figure out how to make this work nicely. CRIU doesn't have
|
// TODO(avagin): Figure out how to make this work nicely. CRIU doesn't have
|
||||||
// support for unprivileged restore at the moment.
|
// support for unprivileged restore at the moment.
|
||||||
if c.config.Rootless {
|
if c.config.Rootless {
|
||||||
|
@ -1150,6 +1179,38 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Same as during checkpointing. If the container has a specific network namespace
|
||||||
|
// assigned to it, this now expects that the checkpoint will be restored in a
|
||||||
|
// already created network namespace.
|
||||||
|
nsPath := c.config.Namespaces.PathOf(configs.NEWNET)
|
||||||
|
if nsPath != "" {
|
||||||
|
// For this to work we need at least criu 3.11.0 => 31100.
|
||||||
|
// As there was already a successful version check we will
|
||||||
|
// not error out if it fails. runc will just behave as it used
|
||||||
|
// to do and ignore external network namespaces.
|
||||||
|
err := c.checkCriuVersion(31100)
|
||||||
|
if err == nil {
|
||||||
|
// CRIU wants the information about an existing network namespace
|
||||||
|
// like this: --inherit-fd fd[<fd>]:<key>
|
||||||
|
// The <key> needs to be the same as during checkpointing.
|
||||||
|
// We are always using 'extRootNetNS' as the key in this.
|
||||||
|
netns, err := os.Open(nsPath)
|
||||||
|
defer netns.Close()
|
||||||
|
if err != nil {
|
||||||
|
logrus.Error("If a specific network namespace is defined it must exist: %s", err)
|
||||||
|
return fmt.Errorf("Requested network namespace %v does not exist", nsPath)
|
||||||
|
}
|
||||||
|
inheritFd := new(criurpc.InheritFd)
|
||||||
|
inheritFd.Key = proto.String("extRootNetNS")
|
||||||
|
// The offset of four is necessary because 0, 1, 2 and 3 is already
|
||||||
|
// used by stdin, stdout, stderr, 'criu swrk' socket.
|
||||||
|
inheritFd.Fd = proto.Int32(int32(4 + len(extraFiles)))
|
||||||
|
req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd)
|
||||||
|
// All open FDs need to be transferred to CRIU via extraFiles
|
||||||
|
extraFiles = append(extraFiles, netns)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for _, m := range c.config.Mounts {
|
for _, m := range c.config.Mounts {
|
||||||
switch m.Device {
|
switch m.Device {
|
||||||
case "bind":
|
case "bind":
|
||||||
|
@ -1208,7 +1269,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||||
req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd)
|
req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return c.criuSwrk(process, req, criuOpts, true)
|
return c.criuSwrk(process, req, criuOpts, true, extraFiles)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
|
func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
|
||||||
|
@ -1238,7 +1299,7 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool) error {
|
func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool, extraFiles []*os.File) error {
|
||||||
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0)
|
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -1279,6 +1340,9 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
||||||
cmd.Stderr = process.Stderr
|
cmd.Stderr = process.Stderr
|
||||||
}
|
}
|
||||||
cmd.ExtraFiles = append(cmd.ExtraFiles, criuServer)
|
cmd.ExtraFiles = append(cmd.ExtraFiles, criuServer)
|
||||||
|
if extraFiles != nil {
|
||||||
|
cmd.ExtraFiles = append(cmd.ExtraFiles, extraFiles...)
|
||||||
|
}
|
||||||
|
|
||||||
if err := cmd.Start(); err != nil {
|
if err := cmd.Start(); err != nil {
|
||||||
return err
|
return err
|
||||||
|
|
Loading…
Reference in New Issue