From 1c43d091a18a2f2dd73b46d49c678c265cf1ace3 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 2 Mar 2017 11:02:15 +0300 Subject: [PATCH] checkpoint: add support for containers with terminals CRIU was extended to report about orphaned master pty-s via RPC. Signed-off-by: Andrei Vagin --- libcontainer/container_linux.go | 129 +++++++++++++++++++------------- restore.go | 5 ++ 2 files changed, 80 insertions(+), 54 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 73fa92b1..3143f17d 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -8,6 +8,7 @@ import ( "fmt" "io" "io/ioutil" + "net" "os" "os/exec" "path/filepath" @@ -17,6 +18,8 @@ import ( "syscall" "time" + "golang.org/x/sys/unix" + "github.com/Sirupsen/logrus" "github.com/golang/protobuf/proto" "github.com/opencontainers/runc/libcontainer/cgroups" @@ -727,20 +730,21 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error { defer imageDir.Close() rpcOpts := criurpc.CriuOpts{ - ImagesDirFd: proto.Int32(int32(imageDir.Fd())), - WorkDirFd: proto.Int32(int32(workDir.Fd())), - LogLevel: proto.Int32(4), - LogFile: proto.String("dump.log"), - Root: proto.String(c.config.Rootfs), - ManageCgroups: proto.Bool(true), - NotifyScripts: proto.Bool(true), - Pid: proto.Int32(int32(c.initProcess.pid())), - ShellJob: proto.Bool(criuOpts.ShellJob), - LeaveRunning: proto.Bool(criuOpts.LeaveRunning), - TcpEstablished: proto.Bool(criuOpts.TcpEstablished), - ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections), - FileLocks: proto.Bool(criuOpts.FileLocks), - EmptyNs: proto.Uint32(criuOpts.EmptyNs), + ImagesDirFd: proto.Int32(int32(imageDir.Fd())), + WorkDirFd: proto.Int32(int32(workDir.Fd())), + LogLevel: proto.Int32(4), + LogFile: proto.String("dump.log"), + Root: proto.String(c.config.Rootfs), + ManageCgroups: proto.Bool(true), + NotifyScripts: proto.Bool(true), + Pid: proto.Int32(int32(c.initProcess.pid())), + ShellJob: proto.Bool(criuOpts.ShellJob), + LeaveRunning: proto.Bool(criuOpts.LeaveRunning), + TcpEstablished: proto.Bool(criuOpts.TcpEstablished), + ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections), + FileLocks: proto.Bool(criuOpts.FileLocks), + EmptyNs: proto.Uint32(criuOpts.EmptyNs), + OrphanPtsMaster: proto.Bool(true), } // append optional criu opts, e.g., page-server and port @@ -923,20 +927,21 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { req := &criurpc.CriuReq{ Type: &t, Opts: &criurpc.CriuOpts{ - ImagesDirFd: proto.Int32(int32(imageDir.Fd())), - WorkDirFd: proto.Int32(int32(workDir.Fd())), - EvasiveDevices: proto.Bool(true), - LogLevel: proto.Int32(4), - LogFile: proto.String("restore.log"), - RstSibling: proto.Bool(true), - Root: proto.String(root), - ManageCgroups: proto.Bool(true), - NotifyScripts: proto.Bool(true), - ShellJob: proto.Bool(criuOpts.ShellJob), - ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections), - TcpEstablished: proto.Bool(criuOpts.TcpEstablished), - FileLocks: proto.Bool(criuOpts.FileLocks), - EmptyNs: proto.Uint32(criuOpts.EmptyNs), + ImagesDirFd: proto.Int32(int32(imageDir.Fd())), + WorkDirFd: proto.Int32(int32(workDir.Fd())), + EvasiveDevices: proto.Bool(true), + LogLevel: proto.Int32(4), + LogFile: proto.String("restore.log"), + RstSibling: proto.Bool(true), + Root: proto.String(root), + ManageCgroups: proto.Bool(true), + NotifyScripts: proto.Bool(true), + ShellJob: proto.Bool(criuOpts.ShellJob), + ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections), + TcpEstablished: proto.Bool(criuOpts.TcpEstablished), + FileLocks: proto.Bool(criuOpts.FileLocks), + EmptyNs: proto.Uint32(criuOpts.EmptyNs), + OrphanPtsMaster: proto.Bool(true), }, } @@ -1030,15 +1035,23 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error { } func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool) error { - fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0) + fds, err := unix.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0) if err != nil { return err } logPath := filepath.Join(opts.WorkDirectory, req.GetOpts().GetLogFile()) criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client") + criuClientFileCon, err := net.FileConn(criuClient) + criuClient.Close() + if err != nil { + return err + } + + criuClientCon := criuClientFileCon.(*net.UnixConn) + defer criuClientCon.Close() + criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server") - defer criuClient.Close() defer criuServer.Close() args := []string{"swrk", "3"} @@ -1058,7 +1071,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts * criuServer.Close() defer func() { - criuClient.Close() + criuClientCon.Close() _, err := cmd.Process.Wait() if err != nil { return @@ -1101,14 +1114,15 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts * if err != nil { return err } - _, err = criuClient.Write(data) + _, err = criuClientCon.Write(data) if err != nil { return err } buf := make([]byte, 10*4096) + oob := make([]byte, 4096) for true { - n, err := criuClient.Read(buf) + n, oobn, _, _, err := criuClientCon.ReadMsgUnix(buf, oob) if err != nil { return err } @@ -1136,7 +1150,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts * criuFeatures = resp.GetFeatures() break case t == criurpc.CriuReqType_NOTIFY: - if err := c.criuNotifications(resp, process, opts, extFds); err != nil { + if err := c.criuNotifications(resp, process, opts, extFds, oob[:oobn]); err != nil { return err } t = criurpc.CriuReqType_NOTIFY @@ -1148,31 +1162,14 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts * if err != nil { return err } - _, err = criuClient.Write(data) + _, err = criuClientCon.Write(data) if err != nil { return err } continue case t == criurpc.CriuReqType_RESTORE: case t == criurpc.CriuReqType_DUMP: - break case t == criurpc.CriuReqType_PRE_DUMP: - // In pre-dump mode CRIU is in a loop and waits for - // the final DUMP command. - // The current runc pre-dump approach, however, is - // start criu in PRE_DUMP once for a single pre-dump - // and not the whole series of pre-dump, pre-dump, ...m, dump - // If we got the message CriuReqType_PRE_DUMP it means - // CRIU was successful and we need to forcefully stop CRIU - logrus.Debugf("PRE_DUMP finished. Send close signal to CRIU service") - criuClient.Close() - // Process status won't be success, because one end of sockets is closed - _, err := cmd.Process.Wait() - if err != nil { - logrus.Debugf("After PRE_DUMP CRIU exiting failed") - return err - } - return nil default: return fmt.Errorf("unable to parse the response %s", resp.String()) } @@ -1180,13 +1177,22 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts * break } + criuClientCon.CloseWrite() // cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors. // Here we want to wait only the CRIU process. st, err := cmd.Process.Wait() if err != nil { return err } - if !st.Success() { + + // In pre-dump mode CRIU is in a loop and waits for + // the final DUMP command. + // The current runc pre-dump approach, however, is + // start criu in PRE_DUMP once for a single pre-dump + // and not the whole series of pre-dump, pre-dump, ...m, dump + // If we got the message CriuReqType_PRE_DUMP it means + // CRIU was successful and we need to forcefully stop CRIU + if !st.Success() && *req.Type != criurpc.CriuReqType_PRE_DUMP { return fmt.Errorf("criu failed: %s\nlog file: %s", st.String(), logPath) } return nil @@ -1220,11 +1226,12 @@ func unlockNetwork(config *configs.Config) error { return nil } -func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string) error { +func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string, oob []byte) error { notify := resp.GetNotify() if notify == nil { return fmt.Errorf("invalid response: %s", resp.String()) } + logrus.Debugf("notify: %s\n", notify.GetScript()) switch { case notify.GetScript() == "post-dump": f, err := os.Create(filepath.Join(c.root, "checkpoint")) @@ -1277,6 +1284,20 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc logrus.Error(err) } } + case notify.GetScript() == "orphan-pts-master": + scm, err := syscall.ParseSocketControlMessage(oob) + if err != nil { + return err + } + fds, err := syscall.ParseUnixRights(&scm[0]) + + master := os.NewFile(uintptr(fds[0]), "orphan-pts-master") + defer master.Close() + + // While we can access console.master, using the API is a good idea. + if err := utils.SendFd(process.ConsoleSocket, master); err != nil { + return err + } } return nil } diff --git a/restore.go b/restore.go index ecd1d626..ca9e1e89 100644 --- a/restore.go +++ b/restore.go @@ -20,6 +20,11 @@ restored.`, Description: `Restores the saved state of the container instance that was previously saved using the runc checkpoint command.`, Flags: []cli.Flag{ + cli.StringFlag{ + Name: "console-socket", + Value: "", + Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal", + }, cli.StringFlag{ Name: "image-path", Value: "",