From a8d5fdf1fd0e41f9b4ed3648942a60f01dc1db22 Mon Sep 17 00:00:00 2001 From: boucher Date: Sat, 18 Apr 2015 18:28:40 -0700 Subject: [PATCH] Add support for providing options to CRIU. In order to do more complex things with checkpointing and restoring of containers it's necessary to have control over where the image files are being saved and whether or not to kill the running process. It's possible more flags will be wanted in the future. Some things probably should always be auto-configured by libcontainer though. Docker-DCO-1.1-Signed-off-by: Ross Boucher (github: boucher) --- container.go | 4 +-- container_linux.go | 81 +++++++++++++++++++++++++++++++------------- criu_opts.go | 10 ++++++ nsinit/checkpoint.go | 25 +++++++++----- nsinit/restore.go | 14 ++++++-- 5 files changed, 97 insertions(+), 37 deletions(-) create mode 100644 criu_opts.go diff --git a/container.go b/container.go index fe18f59a..9faa7926 100644 --- a/container.go +++ b/container.go @@ -115,13 +115,13 @@ type Container interface { // // errors: // Systemerror - System error. - Checkpoint(string) error + Checkpoint(criuOpts *CriuOpts) error // Restore restores the checkpointed container to a running state using the criu(8) utiity. // // errors: // Systemerror - System error. - Restore(*Process, string) error + Restore(process *Process, criuOpts *CriuOpts) error // Destroys the container after killing all running processes. // diff --git a/container_linux.go b/container_linux.go index 51a42b35..bb5d3312 100644 --- a/container_linux.go +++ b/container_linux.go @@ -287,7 +287,7 @@ func (c *linuxContainer) checkCriuVersion() error { return nil } -func (c *linuxContainer) Checkpoint(imagePath string) error { +func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error { c.m.Lock() defer c.m.Unlock() @@ -295,36 +295,57 @@ func (c *linuxContainer) Checkpoint(imagePath string) error { return err } - workPath := filepath.Join(c.root, "criu.work") - if err := os.Mkdir(workPath, 0655); err != nil && !os.IsExist(err) { + if criuOpts.ImagesDirectory == "" { + criuOpts.ImagesDirectory = filepath.Join(c.root, "criu.image") + } + + // Since a container can be C/R'ed multiple times, + // the checkpoint directory may already exist. + if err := os.Mkdir(criuOpts.ImagesDirectory, 0755); err != nil && !os.IsExist(err) { return err } - workDir, err := os.Open(workPath) + if criuOpts.WorkDirectory == "" { + criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work") + } + + if err := os.Mkdir(criuOpts.WorkDirectory, 0755); err != nil && !os.IsExist(err) { + return err + } + + workDir, err := os.Open(criuOpts.WorkDirectory) if err != nil { return err } defer workDir.Close() - imageDir, err := os.Open(imagePath) + imageDir, err := os.Open(criuOpts.ImagesDirectory) if err != nil { return err } defer imageDir.Close() + + rpcOpts := criurpc.CriuOpts{ + ImagesDirFd: proto.Int32(int32(imageDir.Fd())), + WorkDirFd: proto.Int32(int32(workDir.Fd())), + LogLevel: proto.Int32(4), + LogFile: proto.String("dump.log"), + Root: proto.String(c.config.Rootfs), + ManageCgroups: proto.Bool(true), + NotifyScripts: proto.Bool(true), + Pid: proto.Int32(int32(c.initProcess.pid())), + ShellJob: proto.Bool(criuOpts.ShellJob), + LeaveRunning: proto.Bool(criuOpts.LeaveRunning), + TcpEstablished: proto.Bool(criuOpts.TcpEstablished), + ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections), + } + t := criurpc.CriuReqType_DUMP req := criurpc.CriuReq{ Type: &t, - Opts: &criurpc.CriuOpts{ - ImagesDirFd: proto.Int32(int32(imageDir.Fd())), - WorkDirFd: proto.Int32(int32(workDir.Fd())), - LogLevel: proto.Int32(4), - LogFile: proto.String("dump.log"), - Root: proto.String(c.config.Rootfs), - ManageCgroups: proto.Bool(true), - NotifyScripts: proto.Bool(true), - Pid: proto.Int32(int32(c.initProcess.pid())), - }, + Opts: &rpcOpts, } + for _, m := range c.config.Mounts { if m.Device == "bind" { mountDest := m.Destination @@ -340,9 +361,9 @@ func (c *linuxContainer) Checkpoint(imagePath string) error { } } - err = c.criuSwrk(nil, &req, imagePath) + err = c.criuSwrk(nil, &req, criuOpts.ImagesDirectory) if err != nil { - log.Errorf(filepath.Join(workPath, "dump.log")) + log.Errorf(filepath.Join(criuOpts.WorkDirectory, "dump.log")) return err } @@ -350,7 +371,7 @@ func (c *linuxContainer) Checkpoint(imagePath string) error { return nil } -func (c *linuxContainer) Restore(process *Process, imagePath string) error { +func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { c.m.Lock() defer c.m.Unlock() @@ -358,19 +379,25 @@ func (c *linuxContainer) Restore(process *Process, imagePath string) error { return err } - workPath := filepath.Join(c.root, "criu.work") + if criuOpts.WorkDirectory == "" { + criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work") + } // Since a container can be C/R'ed multiple times, // the work directory may already exist. - if err := os.Mkdir(workPath, 0755); err != nil && !os.IsExist(err) { + if err := os.Mkdir(criuOpts.WorkDirectory, 0655); err != nil && !os.IsExist(err) { return err } - workDir, err := os.Open(workPath) + + workDir, err := os.Open(criuOpts.WorkDirectory) if err != nil { return err } defer workDir.Close() - imageDir, err := os.Open(imagePath) + if criuOpts.ImagesDirectory == "" { + criuOpts.ImagesDirectory = filepath.Join(c.root, "criu.image") + } + imageDir, err := os.Open(criuOpts.ImagesDirectory) if err != nil { return err } @@ -412,6 +439,9 @@ func (c *linuxContainer) Restore(process *Process, imagePath string) error { Root: proto.String(root), ManageCgroups: proto.Bool(true), NotifyScripts: proto.Bool(true), + ShellJob: proto.Bool(criuOpts.ShellJob), + ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections), + TcpEstablished: proto.Bool(criuOpts.TcpEstablished), }, } for _, m := range c.config.Mounts { @@ -446,9 +476,9 @@ func (c *linuxContainer) Restore(process *Process, imagePath string) error { } } - err = c.criuSwrk(process, &req, imagePath) + err = c.criuSwrk(process, &req, criuOpts.ImagesDirectory) if err != nil { - log.Errorf(filepath.Join(workPath, "restore.log")) + log.Errorf(filepath.Join(criuOpts.WorkDirectory, "restore.log")) return err } @@ -462,6 +492,9 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, imageP return err } + stringOpts, _ := json.Marshal(*req.Opts) + log.Debugf("stringOpts: %s", stringOpts) + criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client") criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server") defer criuClient.Close() diff --git a/criu_opts.go b/criu_opts.go new file mode 100644 index 00000000..c2474030 --- /dev/null +++ b/criu_opts.go @@ -0,0 +1,10 @@ +package libcontainer + +type CriuOpts struct { + ImagesDirectory string // directory for storing image files + WorkDirectory string // directory to cd and write logs/pidfiles/stats to + LeaveRunning bool // leave container in running state after checkpoint + TcpEstablished bool // checkpoint/restore established TCP connections + ExternalUnixConnections bool // allow external unix connections + ShellJob bool // allow to dump and restore shell jobs +} diff --git a/nsinit/checkpoint.go b/nsinit/checkpoint.go index a2a95740..39290cb9 100644 --- a/nsinit/checkpoint.go +++ b/nsinit/checkpoint.go @@ -2,9 +2,8 @@ package main import ( "fmt" - "os" - "github.com/codegangsta/cli" + "github.com/docker/libcontainer" ) var checkpointCommand = cli.Command{ @@ -12,23 +11,31 @@ var checkpointCommand = cli.Command{ Usage: "checkpoint a running container", Flags: []cli.Flag{ cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"}, - cli.StringFlag{Name: "image-path", Value: "", Usage: "path where to save images"}, + cli.StringFlag{Name: "image-path", Value: "", Usage: "path for saving criu image files"}, + cli.StringFlag{Name: "work-path", Value: "", Usage: "path for saving work files and logs"}, + cli.BoolFlag{Name: "leave-running", Usage: "leave the process running after checkpointing"}, + cli.BoolFlag{Name: "tcp-established", Usage: "allow open tcp connections"}, + cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"}, + cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"}, }, Action: func(context *cli.Context) { imagePath := context.String("image-path") if imagePath == "" { fatal(fmt.Errorf("The --image-path option isn't specified")) } + container, err := getContainer(context) if err != nil { fatal(err) } - // Since a container can be C/R'ed multiple times, - // the checkpoint directory may already exist. - if err := os.Mkdir(imagePath, 0655); err != nil && !os.IsExist(err) { - fatal(err) - } - if err := container.Checkpoint(imagePath); err != nil { + if err := container.Checkpoint(&libcontainer.CriuOpts{ + ImagesDirectory: imagePath, + WorkDirectory: context.String("work-path"), + LeaveRunning: context.Bool("leave-running"), + TcpEstablished: context.Bool("tcp-established"), + ExternalUnixConnections: context.Bool("ext-unix-sk"), + ShellJob: context.Bool("shell-job"), + }); err != nil { fatal(err) } }, diff --git a/nsinit/restore.go b/nsinit/restore.go index df0d4852..8d7f0a41 100644 --- a/nsinit/restore.go +++ b/nsinit/restore.go @@ -16,7 +16,11 @@ var restoreCommand = cli.Command{ Usage: "restore a container from a previous checkpoint", Flags: []cli.Flag{ cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"}, - cli.StringFlag{Name: "image-path", Value: "", Usage: "path where to save images"}, + cli.StringFlag{Name: "image-path", Value: "", Usage: "path to criu image files for restoring"}, + cli.StringFlag{Name: "work-path", Value: "", Usage: "path for saving work files and logs"}, + cli.BoolFlag{Name: "tcp-established", Usage: "allow open tcp connections"}, + cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"}, + cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"}, }, Action: func(context *cli.Context) { imagePath := context.String("image-path") @@ -44,7 +48,13 @@ var restoreCommand = cli.Command{ if err := tty.attach(process); err != nil { fatal(err) } - err = container.Restore(process, imagePath) + err = container.Restore(process, &libcontainer.CriuOpts{ + ImagesDirectory: imagePath, + WorkDirectory: context.String("work-path"), + TcpEstablished: context.Bool("tcp-established"), + ExternalUnixConnections: context.Bool("ext-unix-sk"), + ShellJob: context.Bool("shell-job"), + }) if err != nil { fatal(err) }