Merge pull request #1001 from x1022as/predump
add pre-dump and parent-path to checkpoint
This commit is contained in:
commit
707dd48b2f
|
@ -24,12 +24,14 @@ checkpointed.`,
|
||||||
Flags: []cli.Flag{
|
Flags: []cli.Flag{
|
||||||
cli.StringFlag{Name: "image-path", Value: "", Usage: "path for saving criu image files"},
|
cli.StringFlag{Name: "image-path", Value: "", Usage: "path for saving criu image files"},
|
||||||
cli.StringFlag{Name: "work-path", Value: "", Usage: "path for saving work files and logs"},
|
cli.StringFlag{Name: "work-path", Value: "", Usage: "path for saving work files and logs"},
|
||||||
|
cli.StringFlag{Name: "parent-path", Value: "", Usage: "path for previous criu image files in pre-dump"},
|
||||||
cli.BoolFlag{Name: "leave-running", Usage: "leave the process running after checkpointing"},
|
cli.BoolFlag{Name: "leave-running", Usage: "leave the process running after checkpointing"},
|
||||||
cli.BoolFlag{Name: "tcp-established", Usage: "allow open tcp connections"},
|
cli.BoolFlag{Name: "tcp-established", Usage: "allow open tcp connections"},
|
||||||
cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"},
|
cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"},
|
||||||
cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"},
|
cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"},
|
||||||
cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"},
|
cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"},
|
||||||
cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"},
|
cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"},
|
||||||
|
cli.BoolFlag{Name: "pre-dump", Usage: "dump container's memory information only, leave the container running after this"},
|
||||||
cli.StringFlag{Name: "manage-cgroups-mode", Value: "", Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'"},
|
cli.StringFlag{Name: "manage-cgroups-mode", Value: "", Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'"},
|
||||||
cli.StringSliceFlag{Name: "empty-ns", Usage: "create a namespace, but don't restore its properies"},
|
cli.StringSliceFlag{Name: "empty-ns", Usage: "create a namespace, but don't restore its properies"},
|
||||||
},
|
},
|
||||||
|
|
|
@ -695,6 +695,12 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//pre-dump may need parentImage param to complete iterative migration
|
||||||
|
if criuOpts.ParentImage != "" {
|
||||||
|
rpcOpts.ParentImg = proto.String(criuOpts.ParentImage)
|
||||||
|
rpcOpts.TrackMem = proto.Bool(true)
|
||||||
|
}
|
||||||
|
|
||||||
// append optional manage cgroups mode
|
// append optional manage cgroups mode
|
||||||
if criuOpts.ManageCgroupsMode != 0 {
|
if criuOpts.ManageCgroupsMode != 0 {
|
||||||
if err := c.checkCriuVersion("1.7"); err != nil {
|
if err := c.checkCriuVersion("1.7"); err != nil {
|
||||||
|
@ -704,48 +710,55 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||||
rpcOpts.ManageCgroupsMode = &mode
|
rpcOpts.ManageCgroupsMode = &mode
|
||||||
}
|
}
|
||||||
|
|
||||||
t := criurpc.CriuReqType_DUMP
|
var t criurpc.CriuReqType
|
||||||
|
if criuOpts.PreDump {
|
||||||
|
t = criurpc.CriuReqType_PRE_DUMP
|
||||||
|
} else {
|
||||||
|
t = criurpc.CriuReqType_DUMP
|
||||||
|
}
|
||||||
req := &criurpc.CriuReq{
|
req := &criurpc.CriuReq{
|
||||||
Type: &t,
|
Type: &t,
|
||||||
Opts: &rpcOpts,
|
Opts: &rpcOpts,
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, m := range c.config.Mounts {
|
//no need to dump these information in pre-dump
|
||||||
switch m.Device {
|
if !criuOpts.PreDump {
|
||||||
case "bind":
|
for _, m := range c.config.Mounts {
|
||||||
c.addCriuDumpMount(req, m)
|
switch m.Device {
|
||||||
break
|
case "bind":
|
||||||
case "cgroup":
|
c.addCriuDumpMount(req, m)
|
||||||
binds, err := getCgroupMounts(m)
|
break
|
||||||
if err != nil {
|
case "cgroup":
|
||||||
return err
|
binds, err := getCgroupMounts(m)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for _, b := range binds {
|
||||||
|
c.addCriuDumpMount(req, b)
|
||||||
|
}
|
||||||
|
break
|
||||||
}
|
}
|
||||||
for _, b := range binds {
|
|
||||||
c.addCriuDumpMount(req, b)
|
|
||||||
}
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if err := c.addMaskPaths(req); err != nil {
|
if err := c.addMaskPaths(req); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, node := range c.config.Devices {
|
for _, node := range c.config.Devices {
|
||||||
m := &configs.Mount{Destination: node.Path, Source: node.Path}
|
m := &configs.Mount{Destination: node.Path, Source: node.Path}
|
||||||
c.addCriuDumpMount(req, m)
|
c.addCriuDumpMount(req, m)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write the FD info to a file in the image directory
|
// Write the FD info to a file in the image directory
|
||||||
|
fdsJSON, err := json.Marshal(c.initProcess.externalDescriptors())
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
fdsJSON, err := json.Marshal(c.initProcess.externalDescriptors())
|
err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0655)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0655)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
err = c.criuSwrk(nil, req, criuOpts, false)
|
err = c.criuSwrk(nil, req, criuOpts, false)
|
||||||
|
@ -1058,6 +1071,23 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
||||||
case t == criurpc.CriuReqType_RESTORE:
|
case t == criurpc.CriuReqType_RESTORE:
|
||||||
case t == criurpc.CriuReqType_DUMP:
|
case t == criurpc.CriuReqType_DUMP:
|
||||||
break
|
break
|
||||||
|
case t == criurpc.CriuReqType_PRE_DUMP:
|
||||||
|
// In pre-dump mode CRIU is in a loop and waits for
|
||||||
|
// the final DUMP command.
|
||||||
|
// The current runc pre-dump approach, however, is
|
||||||
|
// start criu in PRE_DUMP once for a single pre-dump
|
||||||
|
// and not the whole series of pre-dump, pre-dump, ...m, dump
|
||||||
|
// If we got the message CriuReqType_PRE_DUMP it means
|
||||||
|
// CRIU was successful and we need to forcefully stop CRIU
|
||||||
|
logrus.Debugf("PRE_DUMP finished. Send close signal to CRIU service")
|
||||||
|
criuClient.Close()
|
||||||
|
// Process status won't be success, because one end of sockets is closed
|
||||||
|
_, err := cmd.Process.Wait()
|
||||||
|
if err != nil {
|
||||||
|
logrus.Debugf("After PRE_DUMP CRIU exiting failed")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
default:
|
default:
|
||||||
return fmt.Errorf("unable to parse the response %s", resp.String())
|
return fmt.Errorf("unable to parse the response %s", resp.String())
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,11 +25,13 @@ type VethPairName struct {
|
||||||
type CriuOpts struct {
|
type CriuOpts struct {
|
||||||
ImagesDirectory string // directory for storing image files
|
ImagesDirectory string // directory for storing image files
|
||||||
WorkDirectory string // directory to cd and write logs/pidfiles/stats to
|
WorkDirectory string // directory to cd and write logs/pidfiles/stats to
|
||||||
|
ParentImage string // direcotry for storing parent image files in pre-dump and dump
|
||||||
LeaveRunning bool // leave container in running state after checkpoint
|
LeaveRunning bool // leave container in running state after checkpoint
|
||||||
TcpEstablished bool // checkpoint/restore established TCP connections
|
TcpEstablished bool // checkpoint/restore established TCP connections
|
||||||
ExternalUnixConnections bool // allow external unix connections
|
ExternalUnixConnections bool // allow external unix connections
|
||||||
ShellJob bool // allow to dump and restore shell jobs
|
ShellJob bool // allow to dump and restore shell jobs
|
||||||
FileLocks bool // handle file locks, for safety
|
FileLocks bool // handle file locks, for safety
|
||||||
|
PreDump bool // call criu predump to perform iterative checkpoint
|
||||||
PageServer CriuPageServerInfo // allow to dump to criu page server
|
PageServer CriuPageServerInfo // allow to dump to criu page server
|
||||||
VethPairs []VethPairName // pass the veth to criu when restore
|
VethPairs []VethPairName // pass the veth to criu when restore
|
||||||
ManageCgroupsMode cgMode // dump or restore cgroup mode
|
ManageCgroupsMode cgMode // dump or restore cgroup mode
|
||||||
|
|
|
@ -106,6 +106,33 @@ func TestCheckpoint(t *testing.T) {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
parentDir, err := ioutil.TempDir("", "criu-parent")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(parentDir)
|
||||||
|
|
||||||
|
preDumpOpts := &libcontainer.CriuOpts{
|
||||||
|
ImagesDirectory: parentDir,
|
||||||
|
WorkDirectory: parentDir,
|
||||||
|
PreDump: true,
|
||||||
|
}
|
||||||
|
preDumpLog := filepath.Join(preDumpOpts.WorkDirectory, "dump.log")
|
||||||
|
|
||||||
|
if err := container.Checkpoint(preDumpOpts); err != nil {
|
||||||
|
showFile(t, preDumpLog)
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
state, err := container.Status()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if state != libcontainer.Running {
|
||||||
|
t.Fatal("Unexpected preDump state: ", state)
|
||||||
|
}
|
||||||
|
|
||||||
imagesDir, err := ioutil.TempDir("", "criu")
|
imagesDir, err := ioutil.TempDir("", "criu")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
|
@ -115,6 +142,7 @@ func TestCheckpoint(t *testing.T) {
|
||||||
checkpointOpts := &libcontainer.CriuOpts{
|
checkpointOpts := &libcontainer.CriuOpts{
|
||||||
ImagesDirectory: imagesDir,
|
ImagesDirectory: imagesDir,
|
||||||
WorkDirectory: imagesDir,
|
WorkDirectory: imagesDir,
|
||||||
|
ParentImage: "../criu-parent",
|
||||||
}
|
}
|
||||||
dumpLog := filepath.Join(checkpointOpts.WorkDirectory, "dump.log")
|
dumpLog := filepath.Join(checkpointOpts.WorkDirectory, "dump.log")
|
||||||
restoreLog := filepath.Join(checkpointOpts.WorkDirectory, "restore.log")
|
restoreLog := filepath.Join(checkpointOpts.WorkDirectory, "restore.log")
|
||||||
|
@ -124,7 +152,7 @@ func TestCheckpoint(t *testing.T) {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
state, err := container.Status()
|
state, err = container.Status()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,11 +13,13 @@ checkpointed.
|
||||||
# OPTIONS
|
# OPTIONS
|
||||||
--image-path value path for saving criu image files
|
--image-path value path for saving criu image files
|
||||||
--work-path value path for saving work files and logs
|
--work-path value path for saving work files and logs
|
||||||
|
--parent-path value path for previous criu image files in pre-dump
|
||||||
--leave-running leave the process running after checkpointing
|
--leave-running leave the process running after checkpointing
|
||||||
--tcp-established allow open tcp connections
|
--tcp-established allow open tcp connections
|
||||||
--ext-unix-sk allow external unix sockets
|
--ext-unix-sk allow external unix sockets
|
||||||
--shell-job allow shell jobs
|
--shell-job allow shell jobs
|
||||||
--page-server value ADDRESS:PORT of the page server
|
--page-server value ADDRESS:PORT of the page server
|
||||||
--file-locks handle file locks, for safety
|
--file-locks handle file locks, for safety
|
||||||
|
--pre-dump dump container's memory information only, leave the container running after this
|
||||||
--manage-cgroups-mode value cgroups mode: 'soft' (default), 'full' and 'strict'
|
--manage-cgroups-mode value cgroups mode: 'soft' (default), 'full' and 'strict'
|
||||||
--empty-ns value create a namespace, but don't restore its properies
|
--empty-ns value create a namespace, but don't restore its properies
|
||||||
|
|
|
@ -195,10 +195,12 @@ func criuOptions(context *cli.Context) *libcontainer.CriuOpts {
|
||||||
return &libcontainer.CriuOpts{
|
return &libcontainer.CriuOpts{
|
||||||
ImagesDirectory: imagePath,
|
ImagesDirectory: imagePath,
|
||||||
WorkDirectory: context.String("work-path"),
|
WorkDirectory: context.String("work-path"),
|
||||||
|
ParentImage: context.String("parent-path"),
|
||||||
LeaveRunning: context.Bool("leave-running"),
|
LeaveRunning: context.Bool("leave-running"),
|
||||||
TcpEstablished: context.Bool("tcp-established"),
|
TcpEstablished: context.Bool("tcp-established"),
|
||||||
ExternalUnixConnections: context.Bool("ext-unix-sk"),
|
ExternalUnixConnections: context.Bool("ext-unix-sk"),
|
||||||
ShellJob: context.Bool("shell-job"),
|
ShellJob: context.Bool("shell-job"),
|
||||||
FileLocks: context.Bool("file-locks"),
|
FileLocks: context.Bool("file-locks"),
|
||||||
|
PreDump: context.Bool("pre-dump"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -57,3 +57,60 @@ function teardown() {
|
||||||
[ "$status" -eq 0 ]
|
[ "$status" -eq 0 ]
|
||||||
[[ "${output}" == *"running"* ]]
|
[[ "${output}" == *"running"* ]]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@test "checkpoint(pre-dump) and restore" {
|
||||||
|
requires criu
|
||||||
|
|
||||||
|
# criu does not work with external terminals so..
|
||||||
|
# setting terminal and root:readonly: to false
|
||||||
|
sed -i 's;"terminal": true;"terminal": false;' config.json
|
||||||
|
sed -i 's;"readonly": true;"readonly": false;' config.json
|
||||||
|
sed -i 's/"sh"/"sh","-c","while :; do date; sleep 1; done"/' config.json
|
||||||
|
|
||||||
|
(
|
||||||
|
# run busybox (not detached)
|
||||||
|
runc run test_busybox
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
) &
|
||||||
|
|
||||||
|
# check state
|
||||||
|
wait_for_container 15 1 test_busybox
|
||||||
|
|
||||||
|
runc state test_busybox
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
[[ "${output}" == *"running"* ]]
|
||||||
|
|
||||||
|
|
||||||
|
#test checkpoint pre-dump
|
||||||
|
mkdir parent-dir
|
||||||
|
runc --criu "$CRIU" checkpoint --pre-dump --image-path ./parent-dir test_busybox
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
|
||||||
|
# busybox should still be running
|
||||||
|
runc state test_busybox
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
[[ "${output}" == *"running"* ]]
|
||||||
|
|
||||||
|
# checkpoint the running container
|
||||||
|
mkdir image-dir
|
||||||
|
runc --criu "$CRIU" checkpoint --parent-path ./parent-dir --image-path ./image-dir test_busybox
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
|
||||||
|
# after checkpoint busybox is no longer running
|
||||||
|
runc state test_busybox
|
||||||
|
[ "$status" -ne 0 ]
|
||||||
|
|
||||||
|
# restore from checkpoint
|
||||||
|
(
|
||||||
|
runc --criu "$CRIU" restore --image-path ./image-dir test_busybox
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
) &
|
||||||
|
|
||||||
|
# check state
|
||||||
|
wait_for_container 15 1 test_busybox
|
||||||
|
|
||||||
|
# busybox should be back up and running
|
||||||
|
runc state test_busybox
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
[[ "${output}" == *"running"* ]]
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue