checkpoint: add support for containers with terminals
CRIU was extended to report about orphaned master pty-s via RPC. Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
This commit is contained in:
parent
a4fcbfb704
commit
1c43d091a1
|
@ -8,6 +8,7 @@ import (
|
|||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
|
@ -17,6 +18,8 @@ import (
|
|||
"syscall"
|
||||
"time"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/golang/protobuf/proto"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
|
@ -741,6 +744,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
|||
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
|
||||
FileLocks: proto.Bool(criuOpts.FileLocks),
|
||||
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
|
||||
OrphanPtsMaster: proto.Bool(true),
|
||||
}
|
||||
|
||||
// append optional criu opts, e.g., page-server and port
|
||||
|
@ -937,6 +941,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
|||
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
|
||||
FileLocks: proto.Bool(criuOpts.FileLocks),
|
||||
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
|
||||
OrphanPtsMaster: proto.Bool(true),
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -1030,15 +1035,23 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
|
|||
}
|
||||
|
||||
func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool) error {
|
||||
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0)
|
||||
fds, err := unix.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
logPath := filepath.Join(opts.WorkDirectory, req.GetOpts().GetLogFile())
|
||||
criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client")
|
||||
criuClientFileCon, err := net.FileConn(criuClient)
|
||||
criuClient.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
criuClientCon := criuClientFileCon.(*net.UnixConn)
|
||||
defer criuClientCon.Close()
|
||||
|
||||
criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server")
|
||||
defer criuClient.Close()
|
||||
defer criuServer.Close()
|
||||
|
||||
args := []string{"swrk", "3"}
|
||||
|
@ -1058,7 +1071,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
|||
criuServer.Close()
|
||||
|
||||
defer func() {
|
||||
criuClient.Close()
|
||||
criuClientCon.Close()
|
||||
_, err := cmd.Process.Wait()
|
||||
if err != nil {
|
||||
return
|
||||
|
@ -1101,14 +1114,15 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = criuClient.Write(data)
|
||||
_, err = criuClientCon.Write(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
buf := make([]byte, 10*4096)
|
||||
oob := make([]byte, 4096)
|
||||
for true {
|
||||
n, err := criuClient.Read(buf)
|
||||
n, oobn, _, _, err := criuClientCon.ReadMsgUnix(buf, oob)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -1136,7 +1150,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
|||
criuFeatures = resp.GetFeatures()
|
||||
break
|
||||
case t == criurpc.CriuReqType_NOTIFY:
|
||||
if err := c.criuNotifications(resp, process, opts, extFds); err != nil {
|
||||
if err := c.criuNotifications(resp, process, opts, extFds, oob[:oobn]); err != nil {
|
||||
return err
|
||||
}
|
||||
t = criurpc.CriuReqType_NOTIFY
|
||||
|
@ -1148,31 +1162,14 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = criuClient.Write(data)
|
||||
_, err = criuClientCon.Write(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
continue
|
||||
case t == criurpc.CriuReqType_RESTORE:
|
||||
case t == criurpc.CriuReqType_DUMP:
|
||||
break
|
||||
case t == criurpc.CriuReqType_PRE_DUMP:
|
||||
// In pre-dump mode CRIU is in a loop and waits for
|
||||
// the final DUMP command.
|
||||
// The current runc pre-dump approach, however, is
|
||||
// start criu in PRE_DUMP once for a single pre-dump
|
||||
// and not the whole series of pre-dump, pre-dump, ...m, dump
|
||||
// If we got the message CriuReqType_PRE_DUMP it means
|
||||
// CRIU was successful and we need to forcefully stop CRIU
|
||||
logrus.Debugf("PRE_DUMP finished. Send close signal to CRIU service")
|
||||
criuClient.Close()
|
||||
// Process status won't be success, because one end of sockets is closed
|
||||
_, err := cmd.Process.Wait()
|
||||
if err != nil {
|
||||
logrus.Debugf("After PRE_DUMP CRIU exiting failed")
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("unable to parse the response %s", resp.String())
|
||||
}
|
||||
|
@ -1180,13 +1177,22 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
|||
break
|
||||
}
|
||||
|
||||
criuClientCon.CloseWrite()
|
||||
// cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors.
|
||||
// Here we want to wait only the CRIU process.
|
||||
st, err := cmd.Process.Wait()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !st.Success() {
|
||||
|
||||
// In pre-dump mode CRIU is in a loop and waits for
|
||||
// the final DUMP command.
|
||||
// The current runc pre-dump approach, however, is
|
||||
// start criu in PRE_DUMP once for a single pre-dump
|
||||
// and not the whole series of pre-dump, pre-dump, ...m, dump
|
||||
// If we got the message CriuReqType_PRE_DUMP it means
|
||||
// CRIU was successful and we need to forcefully stop CRIU
|
||||
if !st.Success() && *req.Type != criurpc.CriuReqType_PRE_DUMP {
|
||||
return fmt.Errorf("criu failed: %s\nlog file: %s", st.String(), logPath)
|
||||
}
|
||||
return nil
|
||||
|
@ -1220,11 +1226,12 @@ func unlockNetwork(config *configs.Config) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string) error {
|
||||
func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string, oob []byte) error {
|
||||
notify := resp.GetNotify()
|
||||
if notify == nil {
|
||||
return fmt.Errorf("invalid response: %s", resp.String())
|
||||
}
|
||||
logrus.Debugf("notify: %s\n", notify.GetScript())
|
||||
switch {
|
||||
case notify.GetScript() == "post-dump":
|
||||
f, err := os.Create(filepath.Join(c.root, "checkpoint"))
|
||||
|
@ -1277,6 +1284,20 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
|
|||
logrus.Error(err)
|
||||
}
|
||||
}
|
||||
case notify.GetScript() == "orphan-pts-master":
|
||||
scm, err := syscall.ParseSocketControlMessage(oob)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fds, err := syscall.ParseUnixRights(&scm[0])
|
||||
|
||||
master := os.NewFile(uintptr(fds[0]), "orphan-pts-master")
|
||||
defer master.Close()
|
||||
|
||||
// While we can access console.master, using the API is a good idea.
|
||||
if err := utils.SendFd(process.ConsoleSocket, master); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -20,6 +20,11 @@ restored.`,
|
|||
Description: `Restores the saved state of the container instance that was previously saved
|
||||
using the runc checkpoint command.`,
|
||||
Flags: []cli.Flag{
|
||||
cli.StringFlag{
|
||||
Name: "console-socket",
|
||||
Value: "",
|
||||
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
|
||||
},
|
||||
cli.StringFlag{
|
||||
Name: "image-path",
|
||||
Value: "",
|
||||
|
|
Loading…
Reference in New Issue