checkpoint: add support for containers with terminals
CRIU was extended to report about orphaned master pty-s via RPC. Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
This commit is contained in:
parent
a4fcbfb704
commit
1c43d091a1
|
@ -8,6 +8,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
|
"net"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
@ -17,6 +18,8 @@ import (
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
|
||||||
"github.com/Sirupsen/logrus"
|
"github.com/Sirupsen/logrus"
|
||||||
"github.com/golang/protobuf/proto"
|
"github.com/golang/protobuf/proto"
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
@ -741,6 +744,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||||
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
|
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
|
||||||
FileLocks: proto.Bool(criuOpts.FileLocks),
|
FileLocks: proto.Bool(criuOpts.FileLocks),
|
||||||
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
|
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
|
||||||
|
OrphanPtsMaster: proto.Bool(true),
|
||||||
}
|
}
|
||||||
|
|
||||||
// append optional criu opts, e.g., page-server and port
|
// append optional criu opts, e.g., page-server and port
|
||||||
|
@ -937,6 +941,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||||
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
|
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
|
||||||
FileLocks: proto.Bool(criuOpts.FileLocks),
|
FileLocks: proto.Bool(criuOpts.FileLocks),
|
||||||
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
|
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
|
||||||
|
OrphanPtsMaster: proto.Bool(true),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1030,15 +1035,23 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool) error {
|
func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool) error {
|
||||||
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0)
|
fds, err := unix.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
logPath := filepath.Join(opts.WorkDirectory, req.GetOpts().GetLogFile())
|
logPath := filepath.Join(opts.WorkDirectory, req.GetOpts().GetLogFile())
|
||||||
criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client")
|
criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client")
|
||||||
|
criuClientFileCon, err := net.FileConn(criuClient)
|
||||||
|
criuClient.Close()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
criuClientCon := criuClientFileCon.(*net.UnixConn)
|
||||||
|
defer criuClientCon.Close()
|
||||||
|
|
||||||
criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server")
|
criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server")
|
||||||
defer criuClient.Close()
|
|
||||||
defer criuServer.Close()
|
defer criuServer.Close()
|
||||||
|
|
||||||
args := []string{"swrk", "3"}
|
args := []string{"swrk", "3"}
|
||||||
|
@ -1058,7 +1071,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
||||||
criuServer.Close()
|
criuServer.Close()
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
criuClient.Close()
|
criuClientCon.Close()
|
||||||
_, err := cmd.Process.Wait()
|
_, err := cmd.Process.Wait()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
|
@ -1101,14 +1114,15 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
_, err = criuClient.Write(data)
|
_, err = criuClientCon.Write(data)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
buf := make([]byte, 10*4096)
|
buf := make([]byte, 10*4096)
|
||||||
|
oob := make([]byte, 4096)
|
||||||
for true {
|
for true {
|
||||||
n, err := criuClient.Read(buf)
|
n, oobn, _, _, err := criuClientCon.ReadMsgUnix(buf, oob)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -1136,7 +1150,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
||||||
criuFeatures = resp.GetFeatures()
|
criuFeatures = resp.GetFeatures()
|
||||||
break
|
break
|
||||||
case t == criurpc.CriuReqType_NOTIFY:
|
case t == criurpc.CriuReqType_NOTIFY:
|
||||||
if err := c.criuNotifications(resp, process, opts, extFds); err != nil {
|
if err := c.criuNotifications(resp, process, opts, extFds, oob[:oobn]); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
t = criurpc.CriuReqType_NOTIFY
|
t = criurpc.CriuReqType_NOTIFY
|
||||||
|
@ -1148,31 +1162,14 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
_, err = criuClient.Write(data)
|
_, err = criuClientCon.Write(data)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
case t == criurpc.CriuReqType_RESTORE:
|
case t == criurpc.CriuReqType_RESTORE:
|
||||||
case t == criurpc.CriuReqType_DUMP:
|
case t == criurpc.CriuReqType_DUMP:
|
||||||
break
|
|
||||||
case t == criurpc.CriuReqType_PRE_DUMP:
|
case t == criurpc.CriuReqType_PRE_DUMP:
|
||||||
// In pre-dump mode CRIU is in a loop and waits for
|
|
||||||
// the final DUMP command.
|
|
||||||
// The current runc pre-dump approach, however, is
|
|
||||||
// start criu in PRE_DUMP once for a single pre-dump
|
|
||||||
// and not the whole series of pre-dump, pre-dump, ...m, dump
|
|
||||||
// If we got the message CriuReqType_PRE_DUMP it means
|
|
||||||
// CRIU was successful and we need to forcefully stop CRIU
|
|
||||||
logrus.Debugf("PRE_DUMP finished. Send close signal to CRIU service")
|
|
||||||
criuClient.Close()
|
|
||||||
// Process status won't be success, because one end of sockets is closed
|
|
||||||
_, err := cmd.Process.Wait()
|
|
||||||
if err != nil {
|
|
||||||
logrus.Debugf("After PRE_DUMP CRIU exiting failed")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
default:
|
default:
|
||||||
return fmt.Errorf("unable to parse the response %s", resp.String())
|
return fmt.Errorf("unable to parse the response %s", resp.String())
|
||||||
}
|
}
|
||||||
|
@ -1180,13 +1177,22 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
|
criuClientCon.CloseWrite()
|
||||||
// cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors.
|
// cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors.
|
||||||
// Here we want to wait only the CRIU process.
|
// Here we want to wait only the CRIU process.
|
||||||
st, err := cmd.Process.Wait()
|
st, err := cmd.Process.Wait()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if !st.Success() {
|
|
||||||
|
// In pre-dump mode CRIU is in a loop and waits for
|
||||||
|
// the final DUMP command.
|
||||||
|
// The current runc pre-dump approach, however, is
|
||||||
|
// start criu in PRE_DUMP once for a single pre-dump
|
||||||
|
// and not the whole series of pre-dump, pre-dump, ...m, dump
|
||||||
|
// If we got the message CriuReqType_PRE_DUMP it means
|
||||||
|
// CRIU was successful and we need to forcefully stop CRIU
|
||||||
|
if !st.Success() && *req.Type != criurpc.CriuReqType_PRE_DUMP {
|
||||||
return fmt.Errorf("criu failed: %s\nlog file: %s", st.String(), logPath)
|
return fmt.Errorf("criu failed: %s\nlog file: %s", st.String(), logPath)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
@ -1220,11 +1226,12 @@ func unlockNetwork(config *configs.Config) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string) error {
|
func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string, oob []byte) error {
|
||||||
notify := resp.GetNotify()
|
notify := resp.GetNotify()
|
||||||
if notify == nil {
|
if notify == nil {
|
||||||
return fmt.Errorf("invalid response: %s", resp.String())
|
return fmt.Errorf("invalid response: %s", resp.String())
|
||||||
}
|
}
|
||||||
|
logrus.Debugf("notify: %s\n", notify.GetScript())
|
||||||
switch {
|
switch {
|
||||||
case notify.GetScript() == "post-dump":
|
case notify.GetScript() == "post-dump":
|
||||||
f, err := os.Create(filepath.Join(c.root, "checkpoint"))
|
f, err := os.Create(filepath.Join(c.root, "checkpoint"))
|
||||||
|
@ -1277,6 +1284,20 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
|
||||||
logrus.Error(err)
|
logrus.Error(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
case notify.GetScript() == "orphan-pts-master":
|
||||||
|
scm, err := syscall.ParseSocketControlMessage(oob)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
fds, err := syscall.ParseUnixRights(&scm[0])
|
||||||
|
|
||||||
|
master := os.NewFile(uintptr(fds[0]), "orphan-pts-master")
|
||||||
|
defer master.Close()
|
||||||
|
|
||||||
|
// While we can access console.master, using the API is a good idea.
|
||||||
|
if err := utils.SendFd(process.ConsoleSocket, master); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,6 +20,11 @@ restored.`,
|
||||||
Description: `Restores the saved state of the container instance that was previously saved
|
Description: `Restores the saved state of the container instance that was previously saved
|
||||||
using the runc checkpoint command.`,
|
using the runc checkpoint command.`,
|
||||||
Flags: []cli.Flag{
|
Flags: []cli.Flag{
|
||||||
|
cli.StringFlag{
|
||||||
|
Name: "console-socket",
|
||||||
|
Value: "",
|
||||||
|
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
|
||||||
|
},
|
||||||
cli.StringFlag{
|
cli.StringFlag{
|
||||||
Name: "image-path",
|
Name: "image-path",
|
||||||
Value: "",
|
Value: "",
|
||||||
|
|
Loading…
Reference in New Issue