cr: use the RPC protocol for communication with criu
criu swrk is a special mode when we don't want to execute a service, but want to use RPC. Here is more details: http://lists.openvz.org/pipermail/criu/2015-March/019400.html Another good feature of this mode is that we don't need to create action scripts and we will be able to remove the hack with saving StdFds. criu swrk is supported starting with CRIU 1.5.1. Signed-off-by: Andrey Vagin <avagin@openvz.org>
This commit is contained in:
parent
c920f5fe75
commit
b836046422
|
@ -5,6 +5,7 @@ package libcontainer
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
@ -16,6 +17,8 @@ import (
|
||||||
"github.com/Sirupsen/logrus"
|
"github.com/Sirupsen/logrus"
|
||||||
"github.com/docker/libcontainer/cgroups"
|
"github.com/docker/libcontainer/cgroups"
|
||||||
"github.com/docker/libcontainer/configs"
|
"github.com/docker/libcontainer/configs"
|
||||||
|
"github.com/docker/libcontainer/criurpc"
|
||||||
|
"github.com/golang/protobuf/proto"
|
||||||
)
|
)
|
||||||
|
|
||||||
const stdioFdCount = 3
|
const stdioFdCount = 3
|
||||||
|
@ -299,6 +302,7 @@ func (c *linuxContainer) Checkpoint() error {
|
||||||
if err := exec.Command(c.criuPath, args...).Run(); err != nil {
|
if err := exec.Command(c.criuPath, args...).Run(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
log.Info("Checkpointed")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -312,34 +316,56 @@ func (c *linuxContainer) Restore(process *Process) error {
|
||||||
if err := os.Remove(pidfile); err != nil && !os.IsNotExist(err) {
|
if err := os.Remove(pidfile); err != nil && !os.IsNotExist(err) {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// XXX We should do the restore in detached mode (-d).
|
|
||||||
// To do this, we need an "init" process that executes
|
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0)
|
||||||
// CRIU and waits for it, reaping its children, and
|
if err != nil {
|
||||||
// waiting for the container.
|
return err
|
||||||
args := []string{
|
}
|
||||||
"restore", "-v4",
|
|
||||||
"-D", filepath.Join(c.root, "checkpoint"),
|
criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client")
|
||||||
"-o", "restore.log",
|
criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server")
|
||||||
"--restore-detached",
|
defer criuClient.Close()
|
||||||
"--restore-sibling",
|
defer criuServer.Close()
|
||||||
"--root", c.config.Rootfs,
|
|
||||||
"--pidfile", pidfile,
|
imagePath := filepath.Join(c.root, "checkpoint")
|
||||||
"--manage-cgroups", "--evasive-devices",
|
imageDir, err := os.Open(imagePath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer imageDir.Close()
|
||||||
|
t := criurpc.CriuReqType_RESTORE
|
||||||
|
req := criurpc.CriuReq{
|
||||||
|
Type: &t,
|
||||||
|
Opts: &criurpc.CriuOpts{
|
||||||
|
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
|
||||||
|
EvasiveDevices: proto.Bool(true),
|
||||||
|
LogLevel: proto.Int32(4),
|
||||||
|
LogFile: proto.String("restore.log"),
|
||||||
|
RstSibling: proto.Bool(true),
|
||||||
|
Root: proto.String(c.config.Rootfs),
|
||||||
|
ManageCgroups: proto.Bool(true),
|
||||||
|
NotifyScripts: proto.Bool(true),
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for _, m := range c.config.Mounts {
|
for _, m := range c.config.Mounts {
|
||||||
if m.Device == "bind" {
|
if m.Device == "bind" {
|
||||||
args = append(args, "--ext-mount-map",
|
extMnt := new(criurpc.ExtMountMap)
|
||||||
fmt.Sprintf("%s:%s", m.Destination, m.Source))
|
extMnt.Key = proto.String(m.Destination)
|
||||||
|
extMnt.Val = proto.String(m.Source)
|
||||||
|
req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Pipes that were previously set up for std{in,out,err}
|
// Pipes that were previously set up for std{in,out,err}
|
||||||
// were removed after checkpoint. Use the new ones.
|
// were removed after checkpoint. Use the new ones.
|
||||||
for i := 0; i < 3; i++ {
|
var i int32
|
||||||
|
for i = 0; i < 3; i++ {
|
||||||
if s := c.config.StdFds[i]; strings.Contains(s, "pipe:") {
|
if s := c.config.StdFds[i]; strings.Contains(s, "pipe:") {
|
||||||
args = append(args, "--inherit-fd", fmt.Sprintf("fd[%d]:%s", i, s))
|
inheritFd := new(criurpc.InheritFd)
|
||||||
|
inheritFd.Key = proto.String(s)
|
||||||
|
inheritFd.Fd = proto.Int32(i)
|
||||||
|
req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
addArgsFromEnv("CRIU_R", &args) // XXX debug
|
|
||||||
|
|
||||||
// XXX This doesn't really belong here as our caller should have
|
// XXX This doesn't really belong here as our caller should have
|
||||||
// already set up root (including devices) and mounted it.
|
// already set up root (including devices) and mounted it.
|
||||||
|
@ -351,13 +377,126 @@ func (c *linuxContainer) Restore(process *Process) error {
|
||||||
|
|
||||||
defer syscall.Unmount(c.config.Rootfs, syscall.MNT_DETACH)
|
defer syscall.Unmount(c.config.Rootfs, syscall.MNT_DETACH)
|
||||||
*/
|
*/
|
||||||
|
args := []string{"swrk", "3"}
|
||||||
cmd := exec.Command(c.criuPath, args...)
|
cmd := exec.Command(c.criuPath, args...)
|
||||||
cmd.Stdin = process.Stdin
|
cmd.Stdin = process.Stdin
|
||||||
cmd.Stdout = process.Stdout
|
cmd.Stdout = process.Stdout
|
||||||
cmd.Stderr = process.Stderr
|
cmd.Stderr = process.Stderr
|
||||||
|
cmd.ExtraFiles = append(cmd.ExtraFiles, criuServer)
|
||||||
|
|
||||||
if err := cmd.Start(); err != nil {
|
if err := cmd.Start(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
criuServer.Close()
|
||||||
|
|
||||||
|
defer func() {
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf(filepath.Join(imagePath, "restore.log"))
|
||||||
|
}
|
||||||
|
criuClient.Close()
|
||||||
|
st, err := cmd.Process.Wait()
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
log.Warn(st.String())
|
||||||
|
}()
|
||||||
|
|
||||||
|
err = saveStdPipes(cmd.Process.Pid, c.config)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := proto.Marshal(&req)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
_, err = criuClient.Write(data)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
var pid int32 = math.MinInt32
|
||||||
|
|
||||||
|
buf := make([]byte, 10*4096)
|
||||||
|
for true {
|
||||||
|
n, err := criuClient.Read(buf)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if n == 0 {
|
||||||
|
return fmt.Errorf("unexpected EOF")
|
||||||
|
}
|
||||||
|
if n == len(buf) {
|
||||||
|
return fmt.Errorf("buffer is too small")
|
||||||
|
}
|
||||||
|
|
||||||
|
resp := new(criurpc.CriuResp)
|
||||||
|
err = proto.Unmarshal(buf[:n], resp)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debug(resp.String())
|
||||||
|
if !resp.GetSuccess() {
|
||||||
|
return fmt.Errorf("criu failed: type %d errno %d", t, resp.GetCrErrno())
|
||||||
|
}
|
||||||
|
|
||||||
|
t = resp.GetType()
|
||||||
|
switch {
|
||||||
|
case t == criurpc.CriuReqType_NOTIFY:
|
||||||
|
notify := resp.GetNotify()
|
||||||
|
if notify == nil {
|
||||||
|
return fmt.Errorf("invalid response: %s", resp.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
if notify.GetScript() == "setup-namespaces" {
|
||||||
|
pid = notify.GetPid()
|
||||||
|
}
|
||||||
|
|
||||||
|
if notify.GetScript() == "post-restore" {
|
||||||
|
// In many case, restore from the images can be done only once.
|
||||||
|
// If we want to create snapshots, we need to snapshot the file system.
|
||||||
|
os.RemoveAll(imagePath)
|
||||||
|
|
||||||
|
r, err := newRestoredProcess(int(pid))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: crosbymichael restore previous process information by saving the init process information in
|
||||||
|
// the container's state file or separate process state files.
|
||||||
|
if err := c.updateState(r); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
process.ops = r
|
||||||
|
}
|
||||||
|
|
||||||
|
t = criurpc.CriuReqType_NOTIFY
|
||||||
|
req = criurpc.CriuReq{
|
||||||
|
Type: &t,
|
||||||
|
NotifySuccess: proto.Bool(true),
|
||||||
|
}
|
||||||
|
data, err = proto.Marshal(&req)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
n, err = criuClient.Write(data)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
case t == criurpc.CriuReqType_RESTORE:
|
||||||
|
restore := resp.GetRestore()
|
||||||
|
if restore != nil {
|
||||||
|
pid = restore.GetPid()
|
||||||
|
break
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("unable to parse the response %s", resp.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
// cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors.
|
// cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors.
|
||||||
// Here we want to wait only the CRIU process.
|
// Here we want to wait only the CRIU process.
|
||||||
|
@ -368,17 +507,7 @@ func (c *linuxContainer) Restore(process *Process) error {
|
||||||
if !st.Success() {
|
if !st.Success() {
|
||||||
return fmt.Errorf("criu failed: %s", st.String())
|
return fmt.Errorf("criu failed: %s", st.String())
|
||||||
}
|
}
|
||||||
r, err := newRestoredProcess(pidfile)
|
log.Info("Restored")
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: crosbymichael restore previous process information by saving the init process information in
|
|
||||||
// the conatiner's state file or separate process state files.
|
|
||||||
if err := c.updateState(r); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
process.ops = r
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,7 @@ import (
|
||||||
"syscall"
|
"syscall"
|
||||||
|
|
||||||
"github.com/docker/libcontainer/cgroups"
|
"github.com/docker/libcontainer/cgroups"
|
||||||
|
"github.com/docker/libcontainer/configs"
|
||||||
"github.com/docker/libcontainer/system"
|
"github.com/docker/libcontainer/system"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -166,7 +167,7 @@ func (p *initProcess) start() error {
|
||||||
// Save the standard descriptor names before the container process
|
// Save the standard descriptor names before the container process
|
||||||
// can potentially move them (e.g., via dup2()). If we don't do this now,
|
// can potentially move them (e.g., via dup2()). If we don't do this now,
|
||||||
// we won't know at checkpoint time which file descriptor to look up.
|
// we won't know at checkpoint time which file descriptor to look up.
|
||||||
if err = p.saveStdPipes(); err != nil {
|
if err = saveStdPipes(p.pid(), p.config.Config); err != nil {
|
||||||
return newSystemError(err)
|
return newSystemError(err)
|
||||||
}
|
}
|
||||||
// Do this before syncing with child so that no children
|
// Do this before syncing with child so that no children
|
||||||
|
@ -262,15 +263,15 @@ func (p *initProcess) signal(sig os.Signal) error {
|
||||||
// Save process's std{in,out,err} file names as these will be
|
// Save process's std{in,out,err} file names as these will be
|
||||||
// removed if/when the container is checkpointed. We will need
|
// removed if/when the container is checkpointed. We will need
|
||||||
// this info to restore the container.
|
// this info to restore the container.
|
||||||
func (p *initProcess) saveStdPipes() error {
|
func saveStdPipes(pid int, config *configs.Config) error {
|
||||||
dirPath := filepath.Join("/proc", strconv.Itoa(p.pid()), "/fd")
|
dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd")
|
||||||
for i := 0; i < 3; i++ {
|
for i := 0; i < 3; i++ {
|
||||||
f := filepath.Join(dirPath, strconv.Itoa(i))
|
f := filepath.Join(dirPath, strconv.Itoa(i))
|
||||||
target, err := os.Readlink(f)
|
target, err := os.Readlink(f)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
p.config.Config.StdFds[i] = target
|
config.StdFds[i] = target
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,29 +4,15 @@ package libcontainer
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
|
||||||
"os"
|
"os"
|
||||||
"strconv"
|
|
||||||
|
|
||||||
"github.com/docker/libcontainer/system"
|
"github.com/docker/libcontainer/system"
|
||||||
)
|
)
|
||||||
|
|
||||||
func newRestoredProcess(pidfile string) (*restoredProcess, error) {
|
func newRestoredProcess(pid int) (*restoredProcess, error) {
|
||||||
var (
|
var (
|
||||||
data []byte
|
err error
|
||||||
err error
|
|
||||||
)
|
)
|
||||||
data, err = ioutil.ReadFile(pidfile)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if len(data) == 0 {
|
|
||||||
return nil, fmt.Errorf("empty pidfile, restore failed")
|
|
||||||
}
|
|
||||||
pid, err := strconv.Atoi(string(data))
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
proc, err := os.FindProcess(pid)
|
proc, err := os.FindProcess(pid)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
|
Loading…
Reference in New Issue