2015-06-30 07:49:13 +08:00
|
|
|
// +build linux
|
|
|
|
|
2015-06-22 10:31:12 +08:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
Disable rootless mode except RootlessCgMgr when executed as the root in userns
This PR decomposes `libcontainer/configs.Config.Rootless bool` into `RootlessEUID bool` and
`RootlessCgroups bool`, so as to make "runc-in-userns" to be more compatible with "rootful" runc.
`RootlessEUID` denotes that runc is being executed as a non-root user (euid != 0) in
the current user namespace. `RootlessEUID` is almost identical to the former `Rootless`
except cgroups stuff.
`RootlessCgroups` denotes that runc is unlikely to have the full access to cgroups.
`RootlessCgroups` is set to false if runc is executed as the root (euid == 0) in the initial namespace.
Otherwise `RootlessCgroups` is set to true.
(Hint: if `RootlessEUID` is true, `RootlessCgroups` becomes true as well)
When runc is executed as the root (euid == 0) in an user namespace (e.g. by Docker-in-LXD, Podman, Usernetes),
`RootlessEUID` is set to false but `RootlessCgroups` is set to true.
So, "runc-in-userns" behaves almost same as "rootful" runc except that cgroups errors are ignored.
This PR does not have any impact on CLI flags and `state.json`.
Note about CLI:
* Now `runc --rootless=(auto|true|false)` CLI flag is only used for setting `RootlessCgroups`.
* Now `runc spec --rootless` is only required when `RootlessEUID` is set to true.
For runc-in-userns, `runc spec` without `--rootless` should work, when sufficient numbers of
UID/GID are mapped.
Note about `$XDG_RUNTIME_DIR` (e.g. `/run/user/1000`):
* `$XDG_RUNTIME_DIR` is ignored if runc is being executed as the root (euid == 0) in the initial namespace, for backward compatibility.
(`/run/runc` is used)
* If runc is executed as the root (euid == 0) in an user namespace, `$XDG_RUNTIME_DIR` is honored if `$USER != "" && $USER != "root"`.
This allows unprivileged users to allow execute runc as the root in userns, without mounting writable `/run/runc`.
Note about `state.json`:
* `rootless` is set to true when `RootlessEUID == true && RootlessCgroups == true`.
Signed-off-by: Akihiro Suda <suda.akihiro@lab.ntt.co.jp>
2018-07-05 14:28:21 +08:00
|
|
|
"os"
|
2015-06-22 10:31:12 +08:00
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
|
|
|
|
"github.com/opencontainers/runc/libcontainer"
|
Disable rootless mode except RootlessCgMgr when executed as the root in userns
This PR decomposes `libcontainer/configs.Config.Rootless bool` into `RootlessEUID bool` and
`RootlessCgroups bool`, so as to make "runc-in-userns" to be more compatible with "rootful" runc.
`RootlessEUID` denotes that runc is being executed as a non-root user (euid != 0) in
the current user namespace. `RootlessEUID` is almost identical to the former `Rootless`
except cgroups stuff.
`RootlessCgroups` denotes that runc is unlikely to have the full access to cgroups.
`RootlessCgroups` is set to false if runc is executed as the root (euid == 0) in the initial namespace.
Otherwise `RootlessCgroups` is set to true.
(Hint: if `RootlessEUID` is true, `RootlessCgroups` becomes true as well)
When runc is executed as the root (euid == 0) in an user namespace (e.g. by Docker-in-LXD, Podman, Usernetes),
`RootlessEUID` is set to false but `RootlessCgroups` is set to true.
So, "runc-in-userns" behaves almost same as "rootful" runc except that cgroups errors are ignored.
This PR does not have any impact on CLI flags and `state.json`.
Note about CLI:
* Now `runc --rootless=(auto|true|false)` CLI flag is only used for setting `RootlessCgroups`.
* Now `runc spec --rootless` is only required when `RootlessEUID` is set to true.
For runc-in-userns, `runc spec` without `--rootless` should work, when sufficient numbers of
UID/GID are mapped.
Note about `$XDG_RUNTIME_DIR` (e.g. `/run/user/1000`):
* `$XDG_RUNTIME_DIR` is ignored if runc is being executed as the root (euid == 0) in the initial namespace, for backward compatibility.
(`/run/runc` is used)
* If runc is executed as the root (euid == 0) in an user namespace, `$XDG_RUNTIME_DIR` is honored if `$USER != "" && $USER != "root"`.
This allows unprivileged users to allow execute runc as the root in userns, without mounting writable `/run/runc`.
Note about `state.json`:
* `rootless` is set to true when `RootlessEUID == true && RootlessCgroups == true`.
Signed-off-by: Akihiro Suda <suda.akihiro@lab.ntt.co.jp>
2018-07-05 14:28:21 +08:00
|
|
|
"github.com/opencontainers/runc/libcontainer/system"
|
2016-05-26 02:27:34 +08:00
|
|
|
"github.com/opencontainers/runtime-spec/specs-go"
|
Disable rootless mode except RootlessCgMgr when executed as the root in userns
This PR decomposes `libcontainer/configs.Config.Rootless bool` into `RootlessEUID bool` and
`RootlessCgroups bool`, so as to make "runc-in-userns" to be more compatible with "rootful" runc.
`RootlessEUID` denotes that runc is being executed as a non-root user (euid != 0) in
the current user namespace. `RootlessEUID` is almost identical to the former `Rootless`
except cgroups stuff.
`RootlessCgroups` denotes that runc is unlikely to have the full access to cgroups.
`RootlessCgroups` is set to false if runc is executed as the root (euid == 0) in the initial namespace.
Otherwise `RootlessCgroups` is set to true.
(Hint: if `RootlessEUID` is true, `RootlessCgroups` becomes true as well)
When runc is executed as the root (euid == 0) in an user namespace (e.g. by Docker-in-LXD, Podman, Usernetes),
`RootlessEUID` is set to false but `RootlessCgroups` is set to true.
So, "runc-in-userns" behaves almost same as "rootful" runc except that cgroups errors are ignored.
This PR does not have any impact on CLI flags and `state.json`.
Note about CLI:
* Now `runc --rootless=(auto|true|false)` CLI flag is only used for setting `RootlessCgroups`.
* Now `runc spec --rootless` is only required when `RootlessEUID` is set to true.
For runc-in-userns, `runc spec` without `--rootless` should work, when sufficient numbers of
UID/GID are mapped.
Note about `$XDG_RUNTIME_DIR` (e.g. `/run/user/1000`):
* `$XDG_RUNTIME_DIR` is ignored if runc is being executed as the root (euid == 0) in the initial namespace, for backward compatibility.
(`/run/runc` is used)
* If runc is executed as the root (euid == 0) in an user namespace, `$XDG_RUNTIME_DIR` is honored if `$USER != "" && $USER != "root"`.
This allows unprivileged users to allow execute runc as the root in userns, without mounting writable `/run/runc`.
Note about `state.json`:
* `rootless` is set to true when `RootlessEUID == true && RootlessCgroups == true`.
Signed-off-by: Akihiro Suda <suda.akihiro@lab.ntt.co.jp>
2018-07-05 14:28:21 +08:00
|
|
|
"github.com/sirupsen/logrus"
|
2016-06-07 02:45:46 +08:00
|
|
|
"github.com/urfave/cli"
|
2017-05-11 23:06:37 +08:00
|
|
|
|
|
|
|
"golang.org/x/sys/unix"
|
2015-06-22 10:31:12 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
var checkpointCommand = cli.Command{
|
|
|
|
Name: "checkpoint",
|
|
|
|
Usage: "checkpoint a running container",
|
2016-02-11 01:30:06 +08:00
|
|
|
ArgsUsage: `<container-id>
|
|
|
|
|
|
|
|
Where "<container-id>" is the name for the instance of the container to be
|
|
|
|
checkpointed.`,
|
|
|
|
Description: `The checkpoint command saves the state of the container instance.`,
|
2015-06-22 10:31:12 +08:00
|
|
|
Flags: []cli.Flag{
|
|
|
|
cli.StringFlag{Name: "image-path", Value: "", Usage: "path for saving criu image files"},
|
|
|
|
cli.StringFlag{Name: "work-path", Value: "", Usage: "path for saving work files and logs"},
|
2016-08-24 17:48:56 +08:00
|
|
|
cli.StringFlag{Name: "parent-path", Value: "", Usage: "path for previous criu image files in pre-dump"},
|
2015-06-22 10:31:12 +08:00
|
|
|
cli.BoolFlag{Name: "leave-running", Usage: "leave the process running after checkpointing"},
|
|
|
|
cli.BoolFlag{Name: "tcp-established", Usage: "allow open tcp connections"},
|
|
|
|
cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"},
|
|
|
|
cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"},
|
2017-07-24 23:43:14 +08:00
|
|
|
cli.BoolFlag{Name: "lazy-pages", Usage: "use userfaultfd to lazily restore memory pages"},
|
|
|
|
cli.StringFlag{Name: "status-fd", Value: "", Usage: "criu writes \\0 to this FD once lazy-pages is ready"},
|
2015-06-22 10:31:12 +08:00
|
|
|
cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"},
|
2015-06-27 17:56:24 +08:00
|
|
|
cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"},
|
2016-08-24 17:48:56 +08:00
|
|
|
cli.BoolFlag{Name: "pre-dump", Usage: "dump container's memory information only, leave the container running after this"},
|
2016-05-28 13:33:57 +08:00
|
|
|
cli.StringFlag{Name: "manage-cgroups-mode", Value: "", Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'"},
|
2017-04-21 10:41:02 +08:00
|
|
|
cli.StringSliceFlag{Name: "empty-ns", Usage: "create a namespace, but don't restore its properties"},
|
2017-08-18 22:19:21 +08:00
|
|
|
cli.BoolFlag{Name: "auto-dedup", Usage: "enable auto deduplication of memory images"},
|
2015-06-22 10:31:12 +08:00
|
|
|
},
|
2016-05-10 13:58:09 +08:00
|
|
|
Action: func(context *cli.Context) error {
|
2016-10-28 23:43:10 +08:00
|
|
|
if err := checkArgs(context, 1, exactArgs); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2016-04-23 21:39:42 +08:00
|
|
|
// XXX: Currently this is untested with rootless containers.
|
Disable rootless mode except RootlessCgMgr when executed as the root in userns
This PR decomposes `libcontainer/configs.Config.Rootless bool` into `RootlessEUID bool` and
`RootlessCgroups bool`, so as to make "runc-in-userns" to be more compatible with "rootful" runc.
`RootlessEUID` denotes that runc is being executed as a non-root user (euid != 0) in
the current user namespace. `RootlessEUID` is almost identical to the former `Rootless`
except cgroups stuff.
`RootlessCgroups` denotes that runc is unlikely to have the full access to cgroups.
`RootlessCgroups` is set to false if runc is executed as the root (euid == 0) in the initial namespace.
Otherwise `RootlessCgroups` is set to true.
(Hint: if `RootlessEUID` is true, `RootlessCgroups` becomes true as well)
When runc is executed as the root (euid == 0) in an user namespace (e.g. by Docker-in-LXD, Podman, Usernetes),
`RootlessEUID` is set to false but `RootlessCgroups` is set to true.
So, "runc-in-userns" behaves almost same as "rootful" runc except that cgroups errors are ignored.
This PR does not have any impact on CLI flags and `state.json`.
Note about CLI:
* Now `runc --rootless=(auto|true|false)` CLI flag is only used for setting `RootlessCgroups`.
* Now `runc spec --rootless` is only required when `RootlessEUID` is set to true.
For runc-in-userns, `runc spec` without `--rootless` should work, when sufficient numbers of
UID/GID are mapped.
Note about `$XDG_RUNTIME_DIR` (e.g. `/run/user/1000`):
* `$XDG_RUNTIME_DIR` is ignored if runc is being executed as the root (euid == 0) in the initial namespace, for backward compatibility.
(`/run/runc` is used)
* If runc is executed as the root (euid == 0) in an user namespace, `$XDG_RUNTIME_DIR` is honored if `$USER != "" && $USER != "root"`.
This allows unprivileged users to allow execute runc as the root in userns, without mounting writable `/run/runc`.
Note about `state.json`:
* `rootless` is set to true when `RootlessEUID == true && RootlessCgroups == true`.
Signed-off-by: Akihiro Suda <suda.akihiro@lab.ntt.co.jp>
2018-07-05 14:28:21 +08:00
|
|
|
if os.Geteuid() != 0 || system.RunningInUserNS() {
|
|
|
|
logrus.Warn("runc checkpoint is untested with rootless containers")
|
2016-04-23 21:39:42 +08:00
|
|
|
}
|
|
|
|
|
2015-06-22 10:31:12 +08:00
|
|
|
container, err := getContainer(context)
|
|
|
|
if err != nil {
|
2016-05-10 13:58:09 +08:00
|
|
|
return err
|
2015-06-22 10:31:12 +08:00
|
|
|
}
|
2016-09-25 23:39:23 +08:00
|
|
|
status, err := container.Status()
|
|
|
|
if err != nil {
|
2016-10-19 02:37:18 +08:00
|
|
|
return err
|
2016-09-25 23:39:23 +08:00
|
|
|
}
|
2017-12-07 15:43:56 +08:00
|
|
|
if status == libcontainer.Created || status == libcontainer.Stopped {
|
|
|
|
fatalf("Container cannot be checkpointed in %s state", status.String())
|
2016-09-25 23:39:23 +08:00
|
|
|
}
|
2015-06-22 10:31:12 +08:00
|
|
|
options := criuOptions(context)
|
2020-03-18 01:17:07 +08:00
|
|
|
if !options.LeaveRunning || !options.PreDump {
|
|
|
|
// destroy prints out an error if we tell CRIU to
|
|
|
|
// leave the container running:
|
|
|
|
// ERRO[0000] container is not destroyed
|
|
|
|
// The message is correct, but we actually do not want
|
|
|
|
// to destroy the container in this case.
|
|
|
|
defer destroy(container)
|
|
|
|
}
|
2015-06-22 10:31:12 +08:00
|
|
|
// these are the mandatory criu options for a container
|
|
|
|
setPageServer(context, options)
|
2015-08-06 23:14:59 +08:00
|
|
|
setManageCgroupsMode(context, options)
|
2016-05-26 02:27:34 +08:00
|
|
|
if err := setEmptyNsMask(context, options); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2018-10-14 03:14:03 +08:00
|
|
|
return container.Checkpoint(options)
|
2015-06-22 10:31:12 +08:00
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
func getCheckpointImagePath(context *cli.Context) string {
|
|
|
|
imagePath := context.String("image-path")
|
|
|
|
if imagePath == "" {
|
|
|
|
imagePath = getDefaultImagePath(context)
|
|
|
|
}
|
|
|
|
return imagePath
|
|
|
|
}
|
|
|
|
|
|
|
|
func setPageServer(context *cli.Context, options *libcontainer.CriuOpts) {
|
|
|
|
// xxx following criu opts are optional
|
|
|
|
// The dump image can be sent to a criu page server
|
|
|
|
if psOpt := context.String("page-server"); psOpt != "" {
|
|
|
|
addressPort := strings.Split(psOpt, ":")
|
|
|
|
if len(addressPort) != 2 {
|
|
|
|
fatal(fmt.Errorf("Use --page-server ADDRESS:PORT to specify page server"))
|
|
|
|
}
|
2015-08-05 05:44:45 +08:00
|
|
|
portInt, err := strconv.Atoi(addressPort[1])
|
2015-06-22 10:31:12 +08:00
|
|
|
if err != nil {
|
|
|
|
fatal(fmt.Errorf("Invalid port number"))
|
|
|
|
}
|
|
|
|
options.PageServer = libcontainer.CriuPageServerInfo{
|
|
|
|
Address: addressPort[0],
|
2015-08-05 05:44:45 +08:00
|
|
|
Port: int32(portInt),
|
2015-06-22 10:31:12 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-08-06 23:14:59 +08:00
|
|
|
|
|
|
|
func setManageCgroupsMode(context *cli.Context, options *libcontainer.CriuOpts) {
|
|
|
|
if cgOpt := context.String("manage-cgroups-mode"); cgOpt != "" {
|
|
|
|
switch cgOpt {
|
|
|
|
case "soft":
|
|
|
|
options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_SOFT
|
|
|
|
case "full":
|
|
|
|
options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_FULL
|
|
|
|
case "strict":
|
|
|
|
options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_STRICT
|
|
|
|
default:
|
|
|
|
fatal(fmt.Errorf("Invalid manage cgroups mode"))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2016-05-26 02:27:34 +08:00
|
|
|
|
2016-12-17 13:01:53 +08:00
|
|
|
var namespaceMapping = map[specs.LinuxNamespaceType]int{
|
2017-05-11 23:06:37 +08:00
|
|
|
specs.NetworkNamespace: unix.CLONE_NEWNET,
|
2016-05-26 02:27:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func setEmptyNsMask(context *cli.Context, options *libcontainer.CriuOpts) error {
|
2018-07-11 00:53:34 +08:00
|
|
|
/* Runc doesn't manage network devices and their configuration */
|
|
|
|
nsmask := unix.CLONE_NEWNET
|
2016-05-26 02:27:34 +08:00
|
|
|
|
|
|
|
for _, ns := range context.StringSlice("empty-ns") {
|
2016-12-17 13:01:53 +08:00
|
|
|
f, exists := namespaceMapping[specs.LinuxNamespaceType(ns)]
|
2016-05-26 02:27:34 +08:00
|
|
|
if !exists {
|
|
|
|
return fmt.Errorf("namespace %q is not supported", ns)
|
|
|
|
}
|
|
|
|
nsmask |= f
|
|
|
|
}
|
|
|
|
|
|
|
|
options.EmptyNs = uint32(nsmask)
|
|
|
|
return nil
|
|
|
|
}
|