From 25da513c4bcbb72017ee642fd9a83e1af27f81ec Mon Sep 17 00:00:00 2001 From: Hui Kang Date: Thu, 6 Aug 2015 15:14:59 +0000 Subject: [PATCH] Add option to support criu manage cgroups mode for dump and restore CRIU supports cgroup-manage mode from v1.7 Signed-off-by: Hui Kang --- checkpoint.go | 17 +++ libcontainer/container_linux.go | 72 ++++++++++--- libcontainer/criu_opts.go | 11 ++ libcontainer/criurpc/criurpc.pb.go | 159 ++++++++++++++++++++++------- libcontainer/criurpc/criurpc.proto | 19 +++- restore.go | 9 ++ 6 files changed, 237 insertions(+), 50 deletions(-) diff --git a/checkpoint.go b/checkpoint.go index b853a411..9df558c8 100644 --- a/checkpoint.go +++ b/checkpoint.go @@ -23,6 +23,7 @@ var checkpointCommand = cli.Command{ cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"}, cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"}, cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"}, + cli.StringFlag{Name: "manage-cgroups-mode", Value: "", Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'."}, }, Action: func(context *cli.Context) { container, err := getContainer(context) @@ -39,6 +40,7 @@ var checkpointCommand = cli.Command{ } // these are the mandatory criu options for a container setPageServer(context, options) + setManageCgroupsMode(context, options) if err := container.Checkpoint(options); err != nil { fatal(err) } @@ -71,3 +73,18 @@ func setPageServer(context *cli.Context, options *libcontainer.CriuOpts) { } } } + +func setManageCgroupsMode(context *cli.Context, options *libcontainer.CriuOpts) { + if cgOpt := context.String("manage-cgroups-mode"); cgOpt != "" { + switch cgOpt { + case "soft": + options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_SOFT + case "full": + options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_FULL + case "strict": + options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_STRICT + default: + fatal(fmt.Errorf("Invalid manage cgroups mode")) + } + } +} diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 853aedf4..19c4701e 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -289,24 +289,56 @@ func addArgsFromEnv(evar string, args *[]string) { fmt.Printf(">>> criu %v\n", *args) } -func (c *linuxContainer) checkCriuVersion() error { - var x, y, z int +// check Criu version greater than or equal to min_version +func (c *linuxContainer) checkCriuVersion(min_version string) error { + var x, y, z, versionReq int + + _, err := fmt.Sscanf(min_version, "%d.%d.%d\n", &x, &y, &z) // 1.5.2 + if err != nil { + _, err = fmt.Sscanf(min_version, "Version: %d.%d\n", &x, &y) // 1.6 + } + versionReq = x*10000 + y*100 + z out, err := exec.Command(c.criuPath, "-V").Output() if err != nil { return fmt.Errorf("Unable to execute CRIU command: %s", c.criuPath) } - n, err := fmt.Sscanf(string(out), "Version: %d.%d.%d\n", &x, &y, &z) // 1.5.2 - if err != nil { - n, err = fmt.Sscanf(string(out), "Version: %d.%d\n", &x, &y) // 1.6 - } - if n < 2 || err != nil { - return fmt.Errorf("Unable to parse the CRIU version: %s %d %s", out, n, err) + x = 0 + y = 0 + z = 0 + if ep := strings.Index(string(out), "-"); ep >= 0 { + // criu Git version format + var version string + if sp := strings.Index(string(out), "GitID"); sp > 0 { + version = string(out)[sp:ep] + } else { + return fmt.Errorf("Unable to parse the CRIU version: %s", c.criuPath) + } + + n, err := fmt.Sscanf(string(version), "GitID: v%d.%d.%d", &x, &y, &z) // 1.5.2 + if err != nil { + n, err = fmt.Sscanf(string(version), "GitID: v%d.%d", &x, &y) // 1.6 + y++ + } else { + z++ + } + if n < 2 || err != nil { + return fmt.Errorf("Unable to parse the CRIU version: %s %d %s", version, n, err) + } + } else { + // criu release version format + n, err := fmt.Sscanf(string(out), "Version: %d.%d.%d\n", &x, &y, &z) // 1.5.2 + if err != nil { + n, err = fmt.Sscanf(string(out), "Version: %d.%d\n", &x, &y) // 1.6 + } + if n < 2 || err != nil { + return fmt.Errorf("Unable to parse the CRIU version: %s %d %s", out, n, err) + } } - if x*10000+y*100+z < 10502 { - return fmt.Errorf("CRIU version must be 1.5.2 or higher") + if x*10000+y*100+z < versionReq { + return fmt.Errorf("CRIU version must be %s or higher", min_version) } return nil @@ -331,7 +363,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error { c.m.Lock() defer c.m.Unlock() - if err := c.checkCriuVersion(); err != nil { + if err := c.checkCriuVersion("1.5.2"); err != nil { return err } @@ -389,6 +421,14 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error { } } + // append optional manage cgroups mode + if criuOpts.ManageCgroupsMode != 0 { + if err := c.checkCriuVersion("1.7"); err != nil { + return err + } + rpcOpts.ManageCgroupsMode = proto.Uint32(uint32(criuOpts.ManageCgroupsMode)) + } + t := criurpc.CriuReqType_DUMP req := &criurpc.CriuReq{ Type: &t, @@ -448,7 +488,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { c.m.Lock() defer c.m.Unlock() - if err := c.checkCriuVersion(); err != nil { + if err := c.checkCriuVersion("1.5.2"); err != nil { return err } @@ -553,6 +593,14 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { req.Opts.Veths = append(req.Opts.Veths, veth) } + // append optional manage cgroups mode + if criuOpts.ManageCgroupsMode != 0 { + if err := c.checkCriuVersion("1.7"); err != nil { + return err + } + req.Opts.ManageCgroupsMode = proto.Uint32(uint32(criuOpts.ManageCgroupsMode)) + } + var ( fds []string fdJSON []byte diff --git a/libcontainer/criu_opts.go b/libcontainer/criu_opts.go index 794d5bd5..6d6062a8 100644 --- a/libcontainer/criu_opts.go +++ b/libcontainer/criu_opts.go @@ -1,5 +1,15 @@ package libcontainer +// cgroup restoring strategy provided by criu +type cg_mode uint32 + +const ( + CRIU_CG_MODE_SOFT cg_mode = 3 + iota // restore cgroup properties if only dir created by criu + CRIU_CG_MODE_FULL // always restore all cgroups and their properties + CRIU_CG_MODE_STRICT // restore all, requiring them to not present in the system + CRIU_CG_MODE_DEFAULT // the same as CRIU_CG_MODE_SOFT +) + type CriuPageServerInfo struct { Address string // IP address of CRIU page server Port int32 // port number of CRIU page server @@ -20,4 +30,5 @@ type CriuOpts struct { FileLocks bool // handle file locks, for safety PageServer CriuPageServerInfo // allow to dump to criu page server VethPairs []VethPairName // pass the veth to criu when restore + ManageCgroupsMode cg_mode // dump or restore cgroup mode } diff --git a/libcontainer/criurpc/criurpc.pb.go b/libcontainer/criurpc/criurpc.pb.go index af59d38c..193b6df1 100644 --- a/libcontainer/criurpc/criurpc.pb.go +++ b/libcontainer/criurpc/criurpc.pb.go @@ -2,15 +2,33 @@ // source: criurpc.proto // DO NOT EDIT! +/* +Package criurpc is a generated protocol buffer package. + +It is generated from these files: + criurpc.proto + +It has these top-level messages: + CriuPageServerInfo + CriuVethPair + ExtMountMap + InheritFd + CgroupRoot + UnixSk + CriuOpts + CriuDumpResp + CriuRestoreResp + CriuNotify + CriuReq + CriuResp +*/ package criurpc import proto "github.com/golang/protobuf/proto" -import json "encoding/json" import math "math" -// Reference proto, json, and math imports to suppress error if they are not otherwise used. +// Reference imports to suppress errors if they are not otherwise used. var _ = proto.Marshal -var _ = &json.SyntaxError{} var _ = math.Inf type CriuReqType int32 @@ -58,9 +76,6 @@ func (x CriuReqType) Enum() *CriuReqType { func (x CriuReqType) String() string { return proto.EnumName(CriuReqType_name, int32(x)) } -func (x CriuReqType) MarshalJSON() ([]byte, error) { - return json.Marshal(x.String()) -} func (x *CriuReqType) UnmarshalJSON(data []byte) error { value, err := proto.UnmarshalJSONEnum(CriuReqType_value, data, "CriuReqType") if err != nil { @@ -206,35 +221,58 @@ func (m *CgroupRoot) GetPath() string { return "" } +type UnixSk struct { + Inode *uint32 `protobuf:"varint,1,req,name=inode" json:"inode,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *UnixSk) Reset() { *m = UnixSk{} } +func (m *UnixSk) String() string { return proto.CompactTextString(m) } +func (*UnixSk) ProtoMessage() {} + +func (m *UnixSk) GetInode() uint32 { + if m != nil && m.Inode != nil { + return *m.Inode + } + return 0 +} + type CriuOpts struct { - ImagesDirFd *int32 `protobuf:"varint,1,req,name=images_dir_fd" json:"images_dir_fd,omitempty"` - Pid *int32 `protobuf:"varint,2,opt,name=pid" json:"pid,omitempty"` - LeaveRunning *bool `protobuf:"varint,3,opt,name=leave_running" json:"leave_running,omitempty"` - ExtUnixSk *bool `protobuf:"varint,4,opt,name=ext_unix_sk" json:"ext_unix_sk,omitempty"` - TcpEstablished *bool `protobuf:"varint,5,opt,name=tcp_established" json:"tcp_established,omitempty"` - EvasiveDevices *bool `protobuf:"varint,6,opt,name=evasive_devices" json:"evasive_devices,omitempty"` - ShellJob *bool `protobuf:"varint,7,opt,name=shell_job" json:"shell_job,omitempty"` - FileLocks *bool `protobuf:"varint,8,opt,name=file_locks" json:"file_locks,omitempty"` - LogLevel *int32 `protobuf:"varint,9,opt,name=log_level,def=2" json:"log_level,omitempty"` - LogFile *string `protobuf:"bytes,10,opt,name=log_file" json:"log_file,omitempty"` - Ps *CriuPageServerInfo `protobuf:"bytes,11,opt,name=ps" json:"ps,omitempty"` - NotifyScripts *bool `protobuf:"varint,12,opt,name=notify_scripts" json:"notify_scripts,omitempty"` - Root *string `protobuf:"bytes,13,opt,name=root" json:"root,omitempty"` - ParentImg *string `protobuf:"bytes,14,opt,name=parent_img" json:"parent_img,omitempty"` - TrackMem *bool `protobuf:"varint,15,opt,name=track_mem" json:"track_mem,omitempty"` - AutoDedup *bool `protobuf:"varint,16,opt,name=auto_dedup" json:"auto_dedup,omitempty"` - WorkDirFd *int32 `protobuf:"varint,17,opt,name=work_dir_fd" json:"work_dir_fd,omitempty"` - LinkRemap *bool `protobuf:"varint,18,opt,name=link_remap" json:"link_remap,omitempty"` - Veths []*CriuVethPair `protobuf:"bytes,19,rep,name=veths" json:"veths,omitempty"` - CpuCap *uint32 `protobuf:"varint,20,opt,name=cpu_cap,def=4294967295" json:"cpu_cap,omitempty"` - ForceIrmap *bool `protobuf:"varint,21,opt,name=force_irmap" json:"force_irmap,omitempty"` - ExecCmd []string `protobuf:"bytes,22,rep,name=exec_cmd" json:"exec_cmd,omitempty"` - ExtMnt []*ExtMountMap `protobuf:"bytes,23,rep,name=ext_mnt" json:"ext_mnt,omitempty"` - ManageCgroups *bool `protobuf:"varint,24,opt,name=manage_cgroups" json:"manage_cgroups,omitempty"` - CgRoot []*CgroupRoot `protobuf:"bytes,25,rep,name=cg_root" json:"cg_root,omitempty"` - RstSibling *bool `protobuf:"varint,26,opt,name=rst_sibling" json:"rst_sibling,omitempty"` - InheritFd []*InheritFd `protobuf:"bytes,27,rep,name=inherit_fd" json:"inherit_fd,omitempty"` - XXX_unrecognized []byte `json:"-"` + ImagesDirFd *int32 `protobuf:"varint,1,req,name=images_dir_fd" json:"images_dir_fd,omitempty"` + Pid *int32 `protobuf:"varint,2,opt,name=pid" json:"pid,omitempty"` + LeaveRunning *bool `protobuf:"varint,3,opt,name=leave_running" json:"leave_running,omitempty"` + ExtUnixSk *bool `protobuf:"varint,4,opt,name=ext_unix_sk" json:"ext_unix_sk,omitempty"` + TcpEstablished *bool `protobuf:"varint,5,opt,name=tcp_established" json:"tcp_established,omitempty"` + EvasiveDevices *bool `protobuf:"varint,6,opt,name=evasive_devices" json:"evasive_devices,omitempty"` + ShellJob *bool `protobuf:"varint,7,opt,name=shell_job" json:"shell_job,omitempty"` + FileLocks *bool `protobuf:"varint,8,opt,name=file_locks" json:"file_locks,omitempty"` + LogLevel *int32 `protobuf:"varint,9,opt,name=log_level,def=2" json:"log_level,omitempty"` + LogFile *string `protobuf:"bytes,10,opt,name=log_file" json:"log_file,omitempty"` + Ps *CriuPageServerInfo `protobuf:"bytes,11,opt,name=ps" json:"ps,omitempty"` + NotifyScripts *bool `protobuf:"varint,12,opt,name=notify_scripts" json:"notify_scripts,omitempty"` + Root *string `protobuf:"bytes,13,opt,name=root" json:"root,omitempty"` + ParentImg *string `protobuf:"bytes,14,opt,name=parent_img" json:"parent_img,omitempty"` + TrackMem *bool `protobuf:"varint,15,opt,name=track_mem" json:"track_mem,omitempty"` + AutoDedup *bool `protobuf:"varint,16,opt,name=auto_dedup" json:"auto_dedup,omitempty"` + WorkDirFd *int32 `protobuf:"varint,17,opt,name=work_dir_fd" json:"work_dir_fd,omitempty"` + LinkRemap *bool `protobuf:"varint,18,opt,name=link_remap" json:"link_remap,omitempty"` + Veths []*CriuVethPair `protobuf:"bytes,19,rep,name=veths" json:"veths,omitempty"` + CpuCap *uint32 `protobuf:"varint,20,opt,name=cpu_cap,def=4294967295" json:"cpu_cap,omitempty"` + ForceIrmap *bool `protobuf:"varint,21,opt,name=force_irmap" json:"force_irmap,omitempty"` + ExecCmd []string `protobuf:"bytes,22,rep,name=exec_cmd" json:"exec_cmd,omitempty"` + ExtMnt []*ExtMountMap `protobuf:"bytes,23,rep,name=ext_mnt" json:"ext_mnt,omitempty"` + ManageCgroups *bool `protobuf:"varint,24,opt,name=manage_cgroups" json:"manage_cgroups,omitempty"` + CgRoot []*CgroupRoot `protobuf:"bytes,25,rep,name=cg_root" json:"cg_root,omitempty"` + RstSibling *bool `protobuf:"varint,26,opt,name=rst_sibling" json:"rst_sibling,omitempty"` + InheritFd []*InheritFd `protobuf:"bytes,27,rep,name=inherit_fd" json:"inherit_fd,omitempty"` + AutoExtMnt *bool `protobuf:"varint,28,opt,name=auto_ext_mnt" json:"auto_ext_mnt,omitempty"` + ExtSharing *bool `protobuf:"varint,29,opt,name=ext_sharing" json:"ext_sharing,omitempty"` + ExtMasters *bool `protobuf:"varint,30,opt,name=ext_masters" json:"ext_masters,omitempty"` + SkipMnt []string `protobuf:"bytes,31,rep,name=skip_mnt" json:"skip_mnt,omitempty"` + EnableFs []string `protobuf:"bytes,32,rep,name=enable_fs" json:"enable_fs,omitempty"` + UnixSkIno []*UnixSk `protobuf:"bytes,33,rep,name=unix_sk_ino" json:"unix_sk_ino,omitempty"` + ManageCgroupsMode *uint32 `protobuf:"varint,34,opt,name=manage_cgroups_mode" json:"manage_cgroups_mode,omitempty"` + XXX_unrecognized []byte `json:"-"` } func (m *CriuOpts) Reset() { *m = CriuOpts{} } @@ -433,6 +471,55 @@ func (m *CriuOpts) GetInheritFd() []*InheritFd { return nil } +func (m *CriuOpts) GetAutoExtMnt() bool { + if m != nil && m.AutoExtMnt != nil { + return *m.AutoExtMnt + } + return false +} + +func (m *CriuOpts) GetExtSharing() bool { + if m != nil && m.ExtSharing != nil { + return *m.ExtSharing + } + return false +} + +func (m *CriuOpts) GetExtMasters() bool { + if m != nil && m.ExtMasters != nil { + return *m.ExtMasters + } + return false +} + +func (m *CriuOpts) GetSkipMnt() []string { + if m != nil { + return m.SkipMnt + } + return nil +} + +func (m *CriuOpts) GetEnableFs() []string { + if m != nil { + return m.EnableFs + } + return nil +} + +func (m *CriuOpts) GetUnixSkIno() []*UnixSk { + if m != nil { + return m.UnixSkIno + } + return nil +} + +func (m *CriuOpts) GetManageCgroupsMode() uint32 { + if m != nil && m.ManageCgroupsMode != nil { + return *m.ManageCgroupsMode + } + return 0 +} + type CriuDumpResp struct { Restored *bool `protobuf:"varint,1,opt,name=restored" json:"restored,omitempty"` XXX_unrecognized []byte `json:"-"` @@ -509,7 +596,7 @@ func (m *CriuReq) GetType() CriuReqType { if m != nil && m.Type != nil { return *m.Type } - return 0 + return CriuReqType_EMPTY } func (m *CriuReq) GetOpts() *CriuOpts { @@ -552,7 +639,7 @@ func (m *CriuResp) GetType() CriuReqType { if m != nil && m.Type != nil { return *m.Type } - return 0 + return CriuReqType_EMPTY } func (m *CriuResp) GetSuccess() bool { diff --git a/libcontainer/criurpc/criurpc.proto b/libcontainer/criurpc/criurpc.proto index 5bc5d7d5..f49325e2 100644 --- a/libcontainer/criurpc/criurpc.proto +++ b/libcontainer/criurpc/criurpc.proto @@ -25,6 +25,10 @@ message cgroup_root { required string path = 2; }; +message unix_sk { + required uint32 inode = 1; +}; + message criu_opts { required int32 images_dir_fd = 1; optional int32 pid = 2; /* if not set on dump, will dump requesting process */ @@ -56,11 +60,22 @@ message criu_opts { repeated string exec_cmd = 22; repeated ext_mount_map ext_mnt = 23; - optional bool manage_cgroups = 24; + optional bool manage_cgroups = 24; /* backward compatibility */ repeated cgroup_root cg_root = 25; optional bool rst_sibling = 26; /* swrk only */ - repeated inherit_fd inherit_fd = 27; + repeated inherit_fd inherit_fd = 27; /* swrk only */ + + optional bool auto_ext_mnt = 28; + optional bool ext_sharing = 29; + optional bool ext_masters = 30; + + repeated string skip_mnt = 31; + repeated string enable_fs = 32; + + repeated unix_sk unix_sk_ino = 33; + + optional uint32 manage_cgroups_mode = 34; } message criu_dump_resp { diff --git a/restore.go b/restore.go index bf12cb47..919e9c97 100644 --- a/restore.go +++ b/restore.go @@ -44,6 +44,11 @@ var restoreCommand = cli.Command{ Name: "file-locks", Usage: "handle file locks, for safety", }, + cli.StringFlag{ + Name: "manage-cgroups-mode", + Value: "", + Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'.", + }, cli.StringFlag{ Name: "config-file, c", Value: "config.json", @@ -90,6 +95,7 @@ func restoreContainer(context *cli.Context, spec *specs.LinuxSpec, config *confi } } options := criuOptions(context) + status, err := container.Status() if err != nil { logrus.Error(err) @@ -97,6 +103,9 @@ func restoreContainer(context *cli.Context, spec *specs.LinuxSpec, config *confi if status == libcontainer.Running { fatal(fmt.Errorf("Container with id %s already running", context.GlobalString("id"))) } + + setManageCgroupsMode(context, options) + // ensure that the container is always removed if we were the process // that created it. defer func() {