Add support for providing options to CRIU.

In order to do more complex things with checkpointing
and restoring of containers it's necessary to have
control over where the image files are being saved
and whether or not to kill the running process. It's
possible more flags will be wanted in the future.

Some things probably should always be auto-configured
by libcontainer though.

Docker-DCO-1.1-Signed-off-by: Ross Boucher <rboucher@gmail.com> (github: boucher)
This commit is contained in:
boucher 2015-04-18 18:28:40 -07:00 committed by Michael Crosby
parent cbe747d989
commit a8d5fdf1fd
5 changed files with 97 additions and 37 deletions

View File

@ -115,13 +115,13 @@ type Container interface {
//
// errors:
// Systemerror - System error.
Checkpoint(string) error
Checkpoint(criuOpts *CriuOpts) error
// Restore restores the checkpointed container to a running state using the criu(8) utiity.
//
// errors:
// Systemerror - System error.
Restore(*Process, string) error
Restore(process *Process, criuOpts *CriuOpts) error
// Destroys the container after killing all running processes.
//

View File

@ -287,7 +287,7 @@ func (c *linuxContainer) checkCriuVersion() error {
return nil
}
func (c *linuxContainer) Checkpoint(imagePath string) error {
func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
c.m.Lock()
defer c.m.Unlock()
@ -295,36 +295,57 @@ func (c *linuxContainer) Checkpoint(imagePath string) error {
return err
}
workPath := filepath.Join(c.root, "criu.work")
if err := os.Mkdir(workPath, 0655); err != nil && !os.IsExist(err) {
if criuOpts.ImagesDirectory == "" {
criuOpts.ImagesDirectory = filepath.Join(c.root, "criu.image")
}
// Since a container can be C/R'ed multiple times,
// the checkpoint directory may already exist.
if err := os.Mkdir(criuOpts.ImagesDirectory, 0755); err != nil && !os.IsExist(err) {
return err
}
workDir, err := os.Open(workPath)
if criuOpts.WorkDirectory == "" {
criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work")
}
if err := os.Mkdir(criuOpts.WorkDirectory, 0755); err != nil && !os.IsExist(err) {
return err
}
workDir, err := os.Open(criuOpts.WorkDirectory)
if err != nil {
return err
}
defer workDir.Close()
imageDir, err := os.Open(imagePath)
imageDir, err := os.Open(criuOpts.ImagesDirectory)
if err != nil {
return err
}
defer imageDir.Close()
rpcOpts := criurpc.CriuOpts{
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
WorkDirFd: proto.Int32(int32(workDir.Fd())),
LogLevel: proto.Int32(4),
LogFile: proto.String("dump.log"),
Root: proto.String(c.config.Rootfs),
ManageCgroups: proto.Bool(true),
NotifyScripts: proto.Bool(true),
Pid: proto.Int32(int32(c.initProcess.pid())),
ShellJob: proto.Bool(criuOpts.ShellJob),
LeaveRunning: proto.Bool(criuOpts.LeaveRunning),
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
}
t := criurpc.CriuReqType_DUMP
req := criurpc.CriuReq{
Type: &t,
Opts: &criurpc.CriuOpts{
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
WorkDirFd: proto.Int32(int32(workDir.Fd())),
LogLevel: proto.Int32(4),
LogFile: proto.String("dump.log"),
Root: proto.String(c.config.Rootfs),
ManageCgroups: proto.Bool(true),
NotifyScripts: proto.Bool(true),
Pid: proto.Int32(int32(c.initProcess.pid())),
},
Opts: &rpcOpts,
}
for _, m := range c.config.Mounts {
if m.Device == "bind" {
mountDest := m.Destination
@ -340,9 +361,9 @@ func (c *linuxContainer) Checkpoint(imagePath string) error {
}
}
err = c.criuSwrk(nil, &req, imagePath)
err = c.criuSwrk(nil, &req, criuOpts.ImagesDirectory)
if err != nil {
log.Errorf(filepath.Join(workPath, "dump.log"))
log.Errorf(filepath.Join(criuOpts.WorkDirectory, "dump.log"))
return err
}
@ -350,7 +371,7 @@ func (c *linuxContainer) Checkpoint(imagePath string) error {
return nil
}
func (c *linuxContainer) Restore(process *Process, imagePath string) error {
func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
c.m.Lock()
defer c.m.Unlock()
@ -358,19 +379,25 @@ func (c *linuxContainer) Restore(process *Process, imagePath string) error {
return err
}
workPath := filepath.Join(c.root, "criu.work")
if criuOpts.WorkDirectory == "" {
criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work")
}
// Since a container can be C/R'ed multiple times,
// the work directory may already exist.
if err := os.Mkdir(workPath, 0755); err != nil && !os.IsExist(err) {
if err := os.Mkdir(criuOpts.WorkDirectory, 0655); err != nil && !os.IsExist(err) {
return err
}
workDir, err := os.Open(workPath)
workDir, err := os.Open(criuOpts.WorkDirectory)
if err != nil {
return err
}
defer workDir.Close()
imageDir, err := os.Open(imagePath)
if criuOpts.ImagesDirectory == "" {
criuOpts.ImagesDirectory = filepath.Join(c.root, "criu.image")
}
imageDir, err := os.Open(criuOpts.ImagesDirectory)
if err != nil {
return err
}
@ -412,6 +439,9 @@ func (c *linuxContainer) Restore(process *Process, imagePath string) error {
Root: proto.String(root),
ManageCgroups: proto.Bool(true),
NotifyScripts: proto.Bool(true),
ShellJob: proto.Bool(criuOpts.ShellJob),
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
},
}
for _, m := range c.config.Mounts {
@ -446,9 +476,9 @@ func (c *linuxContainer) Restore(process *Process, imagePath string) error {
}
}
err = c.criuSwrk(process, &req, imagePath)
err = c.criuSwrk(process, &req, criuOpts.ImagesDirectory)
if err != nil {
log.Errorf(filepath.Join(workPath, "restore.log"))
log.Errorf(filepath.Join(criuOpts.WorkDirectory, "restore.log"))
return err
}
@ -462,6 +492,9 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, imageP
return err
}
stringOpts, _ := json.Marshal(*req.Opts)
log.Debugf("stringOpts: %s", stringOpts)
criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client")
criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server")
defer criuClient.Close()

10
criu_opts.go Normal file
View File

@ -0,0 +1,10 @@
package libcontainer
type CriuOpts struct {
ImagesDirectory string // directory for storing image files
WorkDirectory string // directory to cd and write logs/pidfiles/stats to
LeaveRunning bool // leave container in running state after checkpoint
TcpEstablished bool // checkpoint/restore established TCP connections
ExternalUnixConnections bool // allow external unix connections
ShellJob bool // allow to dump and restore shell jobs
}

View File

@ -2,9 +2,8 @@ package main
import (
"fmt"
"os"
"github.com/codegangsta/cli"
"github.com/docker/libcontainer"
)
var checkpointCommand = cli.Command{
@ -12,23 +11,31 @@ var checkpointCommand = cli.Command{
Usage: "checkpoint a running container",
Flags: []cli.Flag{
cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"},
cli.StringFlag{Name: "image-path", Value: "", Usage: "path where to save images"},
cli.StringFlag{Name: "image-path", Value: "", Usage: "path for saving criu image files"},
cli.StringFlag{Name: "work-path", Value: "", Usage: "path for saving work files and logs"},
cli.BoolFlag{Name: "leave-running", Usage: "leave the process running after checkpointing"},
cli.BoolFlag{Name: "tcp-established", Usage: "allow open tcp connections"},
cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"},
cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"},
},
Action: func(context *cli.Context) {
imagePath := context.String("image-path")
if imagePath == "" {
fatal(fmt.Errorf("The --image-path option isn't specified"))
}
container, err := getContainer(context)
if err != nil {
fatal(err)
}
// Since a container can be C/R'ed multiple times,
// the checkpoint directory may already exist.
if err := os.Mkdir(imagePath, 0655); err != nil && !os.IsExist(err) {
fatal(err)
}
if err := container.Checkpoint(imagePath); err != nil {
if err := container.Checkpoint(&libcontainer.CriuOpts{
ImagesDirectory: imagePath,
WorkDirectory: context.String("work-path"),
LeaveRunning: context.Bool("leave-running"),
TcpEstablished: context.Bool("tcp-established"),
ExternalUnixConnections: context.Bool("ext-unix-sk"),
ShellJob: context.Bool("shell-job"),
}); err != nil {
fatal(err)
}
},

View File

@ -16,7 +16,11 @@ var restoreCommand = cli.Command{
Usage: "restore a container from a previous checkpoint",
Flags: []cli.Flag{
cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"},
cli.StringFlag{Name: "image-path", Value: "", Usage: "path where to save images"},
cli.StringFlag{Name: "image-path", Value: "", Usage: "path to criu image files for restoring"},
cli.StringFlag{Name: "work-path", Value: "", Usage: "path for saving work files and logs"},
cli.BoolFlag{Name: "tcp-established", Usage: "allow open tcp connections"},
cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"},
cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"},
},
Action: func(context *cli.Context) {
imagePath := context.String("image-path")
@ -44,7 +48,13 @@ var restoreCommand = cli.Command{
if err := tty.attach(process); err != nil {
fatal(err)
}
err = container.Restore(process, imagePath)
err = container.Restore(process, &libcontainer.CriuOpts{
ImagesDirectory: imagePath,
WorkDirectory: context.String("work-path"),
TcpEstablished: context.Bool("tcp-established"),
ExternalUnixConnections: context.Bool("ext-unix-sk"),
ShellJob: context.Bool("shell-job"),
})
if err != nil {
fatal(err)
}