cr: use the RPC protocol for communication with criu

criu swrk is a special mode when we don't want to execute a service,
but want to use RPC.

Here is more details:
http://lists.openvz.org/pipermail/criu/2015-March/019400.html

Another good feature of this mode is that we don't need to create
action scripts and we will be able to remove the hack with saving StdFds.

criu swrk is supported starting with CRIU 1.5.1.

Signed-off-by: Andrey Vagin <avagin@openvz.org>
This commit is contained in:
Andrey Vagin 2015-03-26 14:20:59 +03:00 committed by Michael Crosby
parent c920f5fe75
commit b836046422
3 changed files with 165 additions and 49 deletions

View File

@ -5,6 +5,7 @@ package libcontainer
import ( import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"math"
"os" "os"
"os/exec" "os/exec"
"path/filepath" "path/filepath"
@ -16,6 +17,8 @@ import (
"github.com/Sirupsen/logrus" "github.com/Sirupsen/logrus"
"github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/criurpc"
"github.com/golang/protobuf/proto"
) )
const stdioFdCount = 3 const stdioFdCount = 3
@ -299,6 +302,7 @@ func (c *linuxContainer) Checkpoint() error {
if err := exec.Command(c.criuPath, args...).Run(); err != nil { if err := exec.Command(c.criuPath, args...).Run(); err != nil {
return err return err
} }
log.Info("Checkpointed")
return nil return nil
} }
@ -312,34 +316,56 @@ func (c *linuxContainer) Restore(process *Process) error {
if err := os.Remove(pidfile); err != nil && !os.IsNotExist(err) { if err := os.Remove(pidfile); err != nil && !os.IsNotExist(err) {
return err return err
} }
// XXX We should do the restore in detached mode (-d).
// To do this, we need an "init" process that executes fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0)
// CRIU and waits for it, reaping its children, and if err != nil {
// waiting for the container. return err
args := []string{ }
"restore", "-v4",
"-D", filepath.Join(c.root, "checkpoint"), criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client")
"-o", "restore.log", criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server")
"--restore-detached", defer criuClient.Close()
"--restore-sibling", defer criuServer.Close()
"--root", c.config.Rootfs,
"--pidfile", pidfile, imagePath := filepath.Join(c.root, "checkpoint")
"--manage-cgroups", "--evasive-devices", imageDir, err := os.Open(imagePath)
if err != nil {
return err
}
defer imageDir.Close()
t := criurpc.CriuReqType_RESTORE
req := criurpc.CriuReq{
Type: &t,
Opts: &criurpc.CriuOpts{
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
EvasiveDevices: proto.Bool(true),
LogLevel: proto.Int32(4),
LogFile: proto.String("restore.log"),
RstSibling: proto.Bool(true),
Root: proto.String(c.config.Rootfs),
ManageCgroups: proto.Bool(true),
NotifyScripts: proto.Bool(true),
},
} }
for _, m := range c.config.Mounts { for _, m := range c.config.Mounts {
if m.Device == "bind" { if m.Device == "bind" {
args = append(args, "--ext-mount-map", extMnt := new(criurpc.ExtMountMap)
fmt.Sprintf("%s:%s", m.Destination, m.Source)) extMnt.Key = proto.String(m.Destination)
extMnt.Val = proto.String(m.Source)
req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt)
} }
} }
// Pipes that were previously set up for std{in,out,err} // Pipes that were previously set up for std{in,out,err}
// were removed after checkpoint. Use the new ones. // were removed after checkpoint. Use the new ones.
for i := 0; i < 3; i++ { var i int32
for i = 0; i < 3; i++ {
if s := c.config.StdFds[i]; strings.Contains(s, "pipe:") { if s := c.config.StdFds[i]; strings.Contains(s, "pipe:") {
args = append(args, "--inherit-fd", fmt.Sprintf("fd[%d]:%s", i, s)) inheritFd := new(criurpc.InheritFd)
inheritFd.Key = proto.String(s)
inheritFd.Fd = proto.Int32(i)
req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd)
} }
} }
addArgsFromEnv("CRIU_R", &args) // XXX debug
// XXX This doesn't really belong here as our caller should have // XXX This doesn't really belong here as our caller should have
// already set up root (including devices) and mounted it. // already set up root (including devices) and mounted it.
@ -351,13 +377,126 @@ func (c *linuxContainer) Restore(process *Process) error {
defer syscall.Unmount(c.config.Rootfs, syscall.MNT_DETACH) defer syscall.Unmount(c.config.Rootfs, syscall.MNT_DETACH)
*/ */
args := []string{"swrk", "3"}
cmd := exec.Command(c.criuPath, args...) cmd := exec.Command(c.criuPath, args...)
cmd.Stdin = process.Stdin cmd.Stdin = process.Stdin
cmd.Stdout = process.Stdout cmd.Stdout = process.Stdout
cmd.Stderr = process.Stderr cmd.Stderr = process.Stderr
cmd.ExtraFiles = append(cmd.ExtraFiles, criuServer)
if err := cmd.Start(); err != nil { if err := cmd.Start(); err != nil {
return err return err
} }
criuServer.Close()
defer func() {
if err != nil {
log.Errorf(filepath.Join(imagePath, "restore.log"))
}
criuClient.Close()
st, err := cmd.Process.Wait()
if err != nil {
return
}
log.Warn(st.String())
}()
err = saveStdPipes(cmd.Process.Pid, c.config)
if err != nil {
return err
}
data, err := proto.Marshal(&req)
if err != nil {
return err
}
_, err = criuClient.Write(data)
if err != nil {
return err
}
var pid int32 = math.MinInt32
buf := make([]byte, 10*4096)
for true {
n, err := criuClient.Read(buf)
if err != nil {
return err
}
if n == 0 {
return fmt.Errorf("unexpected EOF")
}
if n == len(buf) {
return fmt.Errorf("buffer is too small")
}
resp := new(criurpc.CriuResp)
err = proto.Unmarshal(buf[:n], resp)
if err != nil {
return err
}
log.Debug(resp.String())
if !resp.GetSuccess() {
return fmt.Errorf("criu failed: type %d errno %d", t, resp.GetCrErrno())
}
t = resp.GetType()
switch {
case t == criurpc.CriuReqType_NOTIFY:
notify := resp.GetNotify()
if notify == nil {
return fmt.Errorf("invalid response: %s", resp.String())
}
if notify.GetScript() == "setup-namespaces" {
pid = notify.GetPid()
}
if notify.GetScript() == "post-restore" {
// In many case, restore from the images can be done only once.
// If we want to create snapshots, we need to snapshot the file system.
os.RemoveAll(imagePath)
r, err := newRestoredProcess(int(pid))
if err != nil {
return err
}
// TODO: crosbymichael restore previous process information by saving the init process information in
// the container's state file or separate process state files.
if err := c.updateState(r); err != nil {
return err
}
process.ops = r
}
t = criurpc.CriuReqType_NOTIFY
req = criurpc.CriuReq{
Type: &t,
NotifySuccess: proto.Bool(true),
}
data, err = proto.Marshal(&req)
if err != nil {
return err
}
n, err = criuClient.Write(data)
if err != nil {
return err
}
continue
case t == criurpc.CriuReqType_RESTORE:
restore := resp.GetRestore()
if restore != nil {
pid = restore.GetPid()
break
}
default:
return fmt.Errorf("unable to parse the response %s", resp.String())
}
break
}
// cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors. // cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors.
// Here we want to wait only the CRIU process. // Here we want to wait only the CRIU process.
@ -368,17 +507,7 @@ func (c *linuxContainer) Restore(process *Process) error {
if !st.Success() { if !st.Success() {
return fmt.Errorf("criu failed: %s", st.String()) return fmt.Errorf("criu failed: %s", st.String())
} }
r, err := newRestoredProcess(pidfile) log.Info("Restored")
if err != nil {
return err
}
// TODO: crosbymichael restore previous process information by saving the init process information in
// the conatiner's state file or separate process state files.
if err := c.updateState(r); err != nil {
return err
}
process.ops = r
return nil return nil
} }

View File

@ -13,6 +13,7 @@ import (
"syscall" "syscall"
"github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/system" "github.com/docker/libcontainer/system"
) )
@ -166,7 +167,7 @@ func (p *initProcess) start() error {
// Save the standard descriptor names before the container process // Save the standard descriptor names before the container process
// can potentially move them (e.g., via dup2()). If we don't do this now, // can potentially move them (e.g., via dup2()). If we don't do this now,
// we won't know at checkpoint time which file descriptor to look up. // we won't know at checkpoint time which file descriptor to look up.
if err = p.saveStdPipes(); err != nil { if err = saveStdPipes(p.pid(), p.config.Config); err != nil {
return newSystemError(err) return newSystemError(err)
} }
// Do this before syncing with child so that no children // Do this before syncing with child so that no children
@ -262,15 +263,15 @@ func (p *initProcess) signal(sig os.Signal) error {
// Save process's std{in,out,err} file names as these will be // Save process's std{in,out,err} file names as these will be
// removed if/when the container is checkpointed. We will need // removed if/when the container is checkpointed. We will need
// this info to restore the container. // this info to restore the container.
func (p *initProcess) saveStdPipes() error { func saveStdPipes(pid int, config *configs.Config) error {
dirPath := filepath.Join("/proc", strconv.Itoa(p.pid()), "/fd") dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd")
for i := 0; i < 3; i++ { for i := 0; i < 3; i++ {
f := filepath.Join(dirPath, strconv.Itoa(i)) f := filepath.Join(dirPath, strconv.Itoa(i))
target, err := os.Readlink(f) target, err := os.Readlink(f)
if err != nil { if err != nil {
return err return err
} }
p.config.Config.StdFds[i] = target config.StdFds[i] = target
} }
return nil return nil
} }

View File

@ -4,29 +4,15 @@ package libcontainer
import ( import (
"fmt" "fmt"
"io/ioutil"
"os" "os"
"strconv"
"github.com/docker/libcontainer/system" "github.com/docker/libcontainer/system"
) )
func newRestoredProcess(pidfile string) (*restoredProcess, error) { func newRestoredProcess(pid int) (*restoredProcess, error) {
var ( var (
data []byte err error
err error
) )
data, err = ioutil.ReadFile(pidfile)
if err != nil {
return nil, err
}
if len(data) == 0 {
return nil, fmt.Errorf("empty pidfile, restore failed")
}
pid, err := strconv.Atoi(string(data))
if err != nil {
return nil, err
}
proc, err := os.FindProcess(pid) proc, err := os.FindProcess(pid)
if err != nil { if err != nil {
return nil, err return nil, err