Merge pull request #1001 from x1022as/predump

add pre-dump and parent-path to checkpoint
This commit is contained in:
Qiang Huang 2017-02-24 10:55:06 -08:00 committed by GitHub
commit 707dd48b2f
7 changed files with 155 additions and 32 deletions

View File

@ -24,12 +24,14 @@ checkpointed.`,
Flags: []cli.Flag{ Flags: []cli.Flag{
cli.StringFlag{Name: "image-path", Value: "", Usage: "path for saving criu image files"}, cli.StringFlag{Name: "image-path", Value: "", Usage: "path for saving criu image files"},
cli.StringFlag{Name: "work-path", Value: "", Usage: "path for saving work files and logs"}, cli.StringFlag{Name: "work-path", Value: "", Usage: "path for saving work files and logs"},
cli.StringFlag{Name: "parent-path", Value: "", Usage: "path for previous criu image files in pre-dump"},
cli.BoolFlag{Name: "leave-running", Usage: "leave the process running after checkpointing"}, cli.BoolFlag{Name: "leave-running", Usage: "leave the process running after checkpointing"},
cli.BoolFlag{Name: "tcp-established", Usage: "allow open tcp connections"}, cli.BoolFlag{Name: "tcp-established", Usage: "allow open tcp connections"},
cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"}, cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"},
cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"}, cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"},
cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"}, cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"},
cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"}, cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"},
cli.BoolFlag{Name: "pre-dump", Usage: "dump container's memory information only, leave the container running after this"},
cli.StringFlag{Name: "manage-cgroups-mode", Value: "", Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'"}, cli.StringFlag{Name: "manage-cgroups-mode", Value: "", Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'"},
cli.StringSliceFlag{Name: "empty-ns", Usage: "create a namespace, but don't restore its properies"}, cli.StringSliceFlag{Name: "empty-ns", Usage: "create a namespace, but don't restore its properies"},
}, },

View File

@ -695,6 +695,12 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
} }
} }
//pre-dump may need parentImage param to complete iterative migration
if criuOpts.ParentImage != "" {
rpcOpts.ParentImg = proto.String(criuOpts.ParentImage)
rpcOpts.TrackMem = proto.Bool(true)
}
// append optional manage cgroups mode // append optional manage cgroups mode
if criuOpts.ManageCgroupsMode != 0 { if criuOpts.ManageCgroupsMode != 0 {
if err := c.checkCriuVersion("1.7"); err != nil { if err := c.checkCriuVersion("1.7"); err != nil {
@ -704,48 +710,55 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
rpcOpts.ManageCgroupsMode = &mode rpcOpts.ManageCgroupsMode = &mode
} }
t := criurpc.CriuReqType_DUMP var t criurpc.CriuReqType
if criuOpts.PreDump {
t = criurpc.CriuReqType_PRE_DUMP
} else {
t = criurpc.CriuReqType_DUMP
}
req := &criurpc.CriuReq{ req := &criurpc.CriuReq{
Type: &t, Type: &t,
Opts: &rpcOpts, Opts: &rpcOpts,
} }
for _, m := range c.config.Mounts { //no need to dump these information in pre-dump
switch m.Device { if !criuOpts.PreDump {
case "bind": for _, m := range c.config.Mounts {
c.addCriuDumpMount(req, m) switch m.Device {
break case "bind":
case "cgroup": c.addCriuDumpMount(req, m)
binds, err := getCgroupMounts(m) break
if err != nil { case "cgroup":
return err binds, err := getCgroupMounts(m)
if err != nil {
return err
}
for _, b := range binds {
c.addCriuDumpMount(req, b)
}
break
} }
for _, b := range binds {
c.addCriuDumpMount(req, b)
}
break
} }
}
if err := c.addMaskPaths(req); err != nil { if err := c.addMaskPaths(req); err != nil {
return err return err
} }
for _, node := range c.config.Devices { for _, node := range c.config.Devices {
m := &configs.Mount{Destination: node.Path, Source: node.Path} m := &configs.Mount{Destination: node.Path, Source: node.Path}
c.addCriuDumpMount(req, m) c.addCriuDumpMount(req, m)
} }
// Write the FD info to a file in the image directory // Write the FD info to a file in the image directory
fdsJSON, err := json.Marshal(c.initProcess.externalDescriptors())
if err != nil {
return err
}
fdsJSON, err := json.Marshal(c.initProcess.externalDescriptors()) err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0655)
if err != nil { if err != nil {
return err return err
} }
err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0655)
if err != nil {
return err
} }
err = c.criuSwrk(nil, req, criuOpts, false) err = c.criuSwrk(nil, req, criuOpts, false)
@ -1058,6 +1071,23 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
case t == criurpc.CriuReqType_RESTORE: case t == criurpc.CriuReqType_RESTORE:
case t == criurpc.CriuReqType_DUMP: case t == criurpc.CriuReqType_DUMP:
break break
case t == criurpc.CriuReqType_PRE_DUMP:
// In pre-dump mode CRIU is in a loop and waits for
// the final DUMP command.
// The current runc pre-dump approach, however, is
// start criu in PRE_DUMP once for a single pre-dump
// and not the whole series of pre-dump, pre-dump, ...m, dump
// If we got the message CriuReqType_PRE_DUMP it means
// CRIU was successful and we need to forcefully stop CRIU
logrus.Debugf("PRE_DUMP finished. Send close signal to CRIU service")
criuClient.Close()
// Process status won't be success, because one end of sockets is closed
_, err := cmd.Process.Wait()
if err != nil {
logrus.Debugf("After PRE_DUMP CRIU exiting failed")
return err
}
return nil
default: default:
return fmt.Errorf("unable to parse the response %s", resp.String()) return fmt.Errorf("unable to parse the response %s", resp.String())
} }

View File

@ -25,11 +25,13 @@ type VethPairName struct {
type CriuOpts struct { type CriuOpts struct {
ImagesDirectory string // directory for storing image files ImagesDirectory string // directory for storing image files
WorkDirectory string // directory to cd and write logs/pidfiles/stats to WorkDirectory string // directory to cd and write logs/pidfiles/stats to
ParentImage string // direcotry for storing parent image files in pre-dump and dump
LeaveRunning bool // leave container in running state after checkpoint LeaveRunning bool // leave container in running state after checkpoint
TcpEstablished bool // checkpoint/restore established TCP connections TcpEstablished bool // checkpoint/restore established TCP connections
ExternalUnixConnections bool // allow external unix connections ExternalUnixConnections bool // allow external unix connections
ShellJob bool // allow to dump and restore shell jobs ShellJob bool // allow to dump and restore shell jobs
FileLocks bool // handle file locks, for safety FileLocks bool // handle file locks, for safety
PreDump bool // call criu predump to perform iterative checkpoint
PageServer CriuPageServerInfo // allow to dump to criu page server PageServer CriuPageServerInfo // allow to dump to criu page server
VethPairs []VethPairName // pass the veth to criu when restore VethPairs []VethPairName // pass the veth to criu when restore
ManageCgroupsMode cgMode // dump or restore cgroup mode ManageCgroupsMode cgMode // dump or restore cgroup mode

View File

@ -106,6 +106,33 @@ func TestCheckpoint(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
parentDir, err := ioutil.TempDir("", "criu-parent")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(parentDir)
preDumpOpts := &libcontainer.CriuOpts{
ImagesDirectory: parentDir,
WorkDirectory: parentDir,
PreDump: true,
}
preDumpLog := filepath.Join(preDumpOpts.WorkDirectory, "dump.log")
if err := container.Checkpoint(preDumpOpts); err != nil {
showFile(t, preDumpLog)
t.Fatal(err)
}
state, err := container.Status()
if err != nil {
t.Fatal(err)
}
if state != libcontainer.Running {
t.Fatal("Unexpected preDump state: ", state)
}
imagesDir, err := ioutil.TempDir("", "criu") imagesDir, err := ioutil.TempDir("", "criu")
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
@ -115,6 +142,7 @@ func TestCheckpoint(t *testing.T) {
checkpointOpts := &libcontainer.CriuOpts{ checkpointOpts := &libcontainer.CriuOpts{
ImagesDirectory: imagesDir, ImagesDirectory: imagesDir,
WorkDirectory: imagesDir, WorkDirectory: imagesDir,
ParentImage: "../criu-parent",
} }
dumpLog := filepath.Join(checkpointOpts.WorkDirectory, "dump.log") dumpLog := filepath.Join(checkpointOpts.WorkDirectory, "dump.log")
restoreLog := filepath.Join(checkpointOpts.WorkDirectory, "restore.log") restoreLog := filepath.Join(checkpointOpts.WorkDirectory, "restore.log")
@ -124,7 +152,7 @@ func TestCheckpoint(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
state, err := container.Status() state, err = container.Status()
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }

View File

@ -13,11 +13,13 @@ checkpointed.
# OPTIONS # OPTIONS
--image-path value path for saving criu image files --image-path value path for saving criu image files
--work-path value path for saving work files and logs --work-path value path for saving work files and logs
--parent-path value path for previous criu image files in pre-dump
--leave-running leave the process running after checkpointing --leave-running leave the process running after checkpointing
--tcp-established allow open tcp connections --tcp-established allow open tcp connections
--ext-unix-sk allow external unix sockets --ext-unix-sk allow external unix sockets
--shell-job allow shell jobs --shell-job allow shell jobs
--page-server value ADDRESS:PORT of the page server --page-server value ADDRESS:PORT of the page server
--file-locks handle file locks, for safety --file-locks handle file locks, for safety
--pre-dump dump container's memory information only, leave the container running after this
--manage-cgroups-mode value cgroups mode: 'soft' (default), 'full' and 'strict' --manage-cgroups-mode value cgroups mode: 'soft' (default), 'full' and 'strict'
--empty-ns value create a namespace, but don't restore its properies --empty-ns value create a namespace, but don't restore its properies

View File

@ -195,10 +195,12 @@ func criuOptions(context *cli.Context) *libcontainer.CriuOpts {
return &libcontainer.CriuOpts{ return &libcontainer.CriuOpts{
ImagesDirectory: imagePath, ImagesDirectory: imagePath,
WorkDirectory: context.String("work-path"), WorkDirectory: context.String("work-path"),
ParentImage: context.String("parent-path"),
LeaveRunning: context.Bool("leave-running"), LeaveRunning: context.Bool("leave-running"),
TcpEstablished: context.Bool("tcp-established"), TcpEstablished: context.Bool("tcp-established"),
ExternalUnixConnections: context.Bool("ext-unix-sk"), ExternalUnixConnections: context.Bool("ext-unix-sk"),
ShellJob: context.Bool("shell-job"), ShellJob: context.Bool("shell-job"),
FileLocks: context.Bool("file-locks"), FileLocks: context.Bool("file-locks"),
PreDump: context.Bool("pre-dump"),
} }
} }

View File

@ -57,3 +57,60 @@ function teardown() {
[ "$status" -eq 0 ] [ "$status" -eq 0 ]
[[ "${output}" == *"running"* ]] [[ "${output}" == *"running"* ]]
} }
@test "checkpoint(pre-dump) and restore" {
requires criu
# criu does not work with external terminals so..
# setting terminal and root:readonly: to false
sed -i 's;"terminal": true;"terminal": false;' config.json
sed -i 's;"readonly": true;"readonly": false;' config.json
sed -i 's/"sh"/"sh","-c","while :; do date; sleep 1; done"/' config.json
(
# run busybox (not detached)
runc run test_busybox
[ "$status" -eq 0 ]
) &
# check state
wait_for_container 15 1 test_busybox
runc state test_busybox
[ "$status" -eq 0 ]
[[ "${output}" == *"running"* ]]
#test checkpoint pre-dump
mkdir parent-dir
runc --criu "$CRIU" checkpoint --pre-dump --image-path ./parent-dir test_busybox
[ "$status" -eq 0 ]
# busybox should still be running
runc state test_busybox
[ "$status" -eq 0 ]
[[ "${output}" == *"running"* ]]
# checkpoint the running container
mkdir image-dir
runc --criu "$CRIU" checkpoint --parent-path ./parent-dir --image-path ./image-dir test_busybox
[ "$status" -eq 0 ]
# after checkpoint busybox is no longer running
runc state test_busybox
[ "$status" -ne 0 ]
# restore from checkpoint
(
runc --criu "$CRIU" restore --image-path ./image-dir test_busybox
[ "$status" -eq 0 ]
) &
# check state
wait_for_container 15 1 test_busybox
# busybox should be back up and running
runc state test_busybox
[ "$status" -eq 0 ]
[[ "${output}" == *"running"* ]]
}