2015-07-03 00:59:30 +08:00
|
|
|
// +build linux
|
|
|
|
|
2015-06-22 10:31:12 +08:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/json"
|
|
|
|
"fmt"
|
2015-09-02 00:32:29 +08:00
|
|
|
"io/ioutil"
|
2015-07-03 00:59:30 +08:00
|
|
|
"os"
|
|
|
|
"path/filepath"
|
2015-06-22 10:31:12 +08:00
|
|
|
"runtime"
|
2015-10-08 04:51:53 +08:00
|
|
|
"strconv"
|
2015-07-03 00:59:30 +08:00
|
|
|
"strings"
|
|
|
|
"syscall"
|
2015-06-22 10:31:12 +08:00
|
|
|
|
|
|
|
"github.com/codegangsta/cli"
|
2015-07-03 00:59:30 +08:00
|
|
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
|
|
|
"github.com/opencontainers/runc/libcontainer/configs"
|
2015-08-25 02:30:45 +08:00
|
|
|
"github.com/opencontainers/runc/libcontainer/seccomp"
|
2016-01-21 10:04:59 +08:00
|
|
|
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
2016-03-11 06:18:39 +08:00
|
|
|
"github.com/opencontainers/specs/specs-go"
|
2015-06-22 10:31:12 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
var specCommand = cli.Command{
|
2016-03-19 02:54:06 +08:00
|
|
|
Name: "spec",
|
|
|
|
Usage: "create a new specification file",
|
|
|
|
ArgsUsage: "",
|
|
|
|
Description: `The spec command creates the new specification file named "` + specConfig + `" for
|
|
|
|
the bundle.
|
|
|
|
|
|
|
|
The spec generated is just a starter file. Editing of the spec is required to
|
|
|
|
achieve desired results. For example, the newly generated spec includes an args
|
|
|
|
parameter that is initially set to call the "sh" command when the container is
|
|
|
|
started. Calling "sh" may work for an ubuntu container or busybox, but will not
|
|
|
|
work for containers that do not include the "sh" program.
|
|
|
|
|
|
|
|
EXAMPLE:
|
|
|
|
To run docker's hello-world container one needs to set the args parameter
|
|
|
|
in the spec to call hello. This can be done using the sed command or a text
|
|
|
|
editor. The following commands create a bundle for hello-world, change the
|
|
|
|
default args parameter in the spec from "sh" to "/hello", then run the hello
|
|
|
|
command in a new hello-world container named container1:
|
|
|
|
|
|
|
|
mkdir hello
|
|
|
|
cd hello
|
|
|
|
docker pull hello-world
|
|
|
|
docker export $(docker create hello-world) > hello-world.tar
|
|
|
|
mkdir rootfs
|
|
|
|
tar -C rootfs -xf hello-world.tar
|
|
|
|
runc spec
|
|
|
|
sed -i 's;"sh";"/hello";' ` + specConfig + `
|
|
|
|
runc start container1
|
|
|
|
|
|
|
|
In the start command above, "container1" is the name for the instance of the
|
|
|
|
container that you are starting. The name you provide for the container instance
|
|
|
|
must be unique on your host.
|
|
|
|
|
|
|
|
When starting a container through runc, runc needs root privilege. If not
|
|
|
|
already running as root, you can use sudo to give runc root privilege. For
|
|
|
|
example: "sudo runc start container1" will give runc root privilege to start the
|
|
|
|
container on your host.`,
|
2015-09-02 00:32:29 +08:00
|
|
|
Flags: []cli.Flag{
|
2015-09-16 10:06:59 +08:00
|
|
|
cli.StringFlag{
|
2015-10-28 03:23:44 +08:00
|
|
|
Name: "bundle, b",
|
|
|
|
Value: "",
|
|
|
|
Usage: "path to the root of the bundle directory",
|
2015-09-16 10:06:59 +08:00
|
|
|
},
|
2015-09-02 00:32:29 +08:00
|
|
|
},
|
2015-06-22 10:31:12 +08:00
|
|
|
Action: func(context *cli.Context) {
|
2016-03-11 06:18:39 +08:00
|
|
|
spec := specs.Spec{
|
|
|
|
Version: specs.Version,
|
|
|
|
Platform: specs.Platform{
|
|
|
|
OS: runtime.GOOS,
|
|
|
|
Arch: runtime.GOARCH,
|
|
|
|
},
|
|
|
|
Root: specs.Root{
|
|
|
|
Path: "rootfs",
|
|
|
|
Readonly: true,
|
|
|
|
},
|
|
|
|
Process: specs.Process{
|
|
|
|
Terminal: true,
|
|
|
|
User: specs.User{},
|
|
|
|
Args: []string{
|
|
|
|
"sh",
|
2015-06-30 02:21:05 +08:00
|
|
|
},
|
2016-03-11 06:18:39 +08:00
|
|
|
Env: []string{
|
|
|
|
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
|
|
|
|
"TERM=xterm",
|
2015-06-22 10:31:12 +08:00
|
|
|
},
|
2016-03-11 06:18:39 +08:00
|
|
|
Cwd: "/",
|
|
|
|
NoNewPrivileges: true,
|
|
|
|
Capabilities: []string{
|
|
|
|
"CAP_AUDIT_WRITE",
|
|
|
|
"CAP_KILL",
|
|
|
|
"CAP_NET_BIND_SERVICE",
|
2015-06-22 10:31:12 +08:00
|
|
|
},
|
2016-03-11 06:18:39 +08:00
|
|
|
Rlimits: []specs.Rlimit{
|
2015-07-15 09:31:39 +08:00
|
|
|
{
|
2016-03-11 06:18:39 +08:00
|
|
|
Type: "RLIMIT_NOFILE",
|
|
|
|
Hard: uint64(1024),
|
|
|
|
Soft: uint64(1024),
|
2015-07-15 09:31:39 +08:00
|
|
|
},
|
2015-06-22 10:31:12 +08:00
|
|
|
},
|
2015-07-03 00:59:30 +08:00
|
|
|
},
|
2016-03-11 06:18:39 +08:00
|
|
|
Hostname: "runc",
|
|
|
|
Mounts: []specs.Mount{
|
|
|
|
{
|
|
|
|
Destination: "/proc",
|
|
|
|
Type: "proc",
|
|
|
|
Source: "proc",
|
|
|
|
Options: nil,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Destination: "/dev",
|
|
|
|
Type: "tmpfs",
|
|
|
|
Source: "tmpfs",
|
|
|
|
Options: []string{"nosuid", "strictatime", "mode=755", "size=65536k"},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Destination: "/dev/pts",
|
|
|
|
Type: "devpts",
|
|
|
|
Source: "devpts",
|
|
|
|
Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Destination: "/dev/shm",
|
|
|
|
Type: "tmpfs",
|
|
|
|
Source: "shm",
|
|
|
|
Options: []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Destination: "/dev/mqueue",
|
|
|
|
Type: "mqueue",
|
|
|
|
Source: "mqueue",
|
|
|
|
Options: []string{"nosuid", "noexec", "nodev"},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Destination: "/sys",
|
|
|
|
Type: "sysfs",
|
|
|
|
Source: "sysfs",
|
|
|
|
Options: []string{"nosuid", "noexec", "nodev", "ro"},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Destination: "/sys/fs/cgroup",
|
|
|
|
Type: "cgroup",
|
|
|
|
Source: "cgroup",
|
|
|
|
Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"},
|
|
|
|
},
|
|
|
|
},
|
2015-07-03 00:59:30 +08:00
|
|
|
Linux: specs.Linux{
|
2016-02-06 07:15:25 +08:00
|
|
|
Resources: &specs.Resources{
|
|
|
|
Devices: []specs.DeviceCgroup{
|
|
|
|
{
|
|
|
|
Allow: false,
|
|
|
|
Access: sPtr("rwm"),
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
2015-07-03 00:59:30 +08:00
|
|
|
Namespaces: []specs.Namespace{
|
|
|
|
{
|
2015-08-05 05:12:18 +08:00
|
|
|
Type: "pid",
|
2015-07-03 00:59:30 +08:00
|
|
|
},
|
|
|
|
{
|
|
|
|
Type: "network",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Type: "ipc",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Type: "uts",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Type: "mount",
|
|
|
|
},
|
2015-06-22 10:31:12 +08:00
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
2016-02-06 02:46:12 +08:00
|
|
|
|
2015-09-02 00:32:29 +08:00
|
|
|
checkNoFile := func(name string) error {
|
|
|
|
_, err := os.Stat(name)
|
|
|
|
if err == nil {
|
|
|
|
return fmt.Errorf("File %s exists. Remove it first", name)
|
|
|
|
}
|
|
|
|
if !os.IsNotExist(err) {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
2015-10-28 03:23:44 +08:00
|
|
|
bundle := context.String("bundle")
|
|
|
|
if bundle != "" {
|
|
|
|
if err := os.Chdir(bundle); err != nil {
|
|
|
|
fatal(err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if err := checkNoFile(specConfig); err != nil {
|
2016-03-09 10:05:50 +08:00
|
|
|
fatal(err)
|
2015-09-02 00:32:29 +08:00
|
|
|
}
|
2015-06-22 10:31:12 +08:00
|
|
|
data, err := json.MarshalIndent(&spec, "", "\t")
|
|
|
|
if err != nil {
|
2016-03-09 10:05:50 +08:00
|
|
|
fatal(err)
|
2015-06-22 10:31:12 +08:00
|
|
|
}
|
2015-10-28 03:23:44 +08:00
|
|
|
if err := ioutil.WriteFile(specConfig, data, 0666); err != nil {
|
2016-03-09 10:05:50 +08:00
|
|
|
fatal(err)
|
2015-09-02 00:32:29 +08:00
|
|
|
}
|
2015-06-22 10:31:12 +08:00
|
|
|
},
|
|
|
|
}
|
2015-07-03 00:59:30 +08:00
|
|
|
|
2016-02-06 02:46:12 +08:00
|
|
|
func sPtr(s string) *string { return &s }
|
|
|
|
func rPtr(r rune) *rune { return &r }
|
|
|
|
func iPtr(i int64) *int64 { return &i }
|
|
|
|
func u32Ptr(i int64) *uint32 { u := uint32(i); return &u }
|
|
|
|
func fmPtr(i int64) *os.FileMode { fm := os.FileMode(i); return &fm }
|
|
|
|
|
2015-09-02 00:32:29 +08:00
|
|
|
var namespaceMapping = map[specs.NamespaceType]configs.NamespaceType{
|
|
|
|
specs.PIDNamespace: configs.NEWPID,
|
|
|
|
specs.NetworkNamespace: configs.NEWNET,
|
|
|
|
specs.MountNamespace: configs.NEWNS,
|
|
|
|
specs.UserNamespace: configs.NEWUSER,
|
|
|
|
specs.IPCNamespace: configs.NEWIPC,
|
|
|
|
specs.UTSNamespace: configs.NEWUTS,
|
2015-07-03 00:59:30 +08:00
|
|
|
}
|
|
|
|
|
2015-10-02 05:03:02 +08:00
|
|
|
var mountPropagationMapping = map[string]int{
|
|
|
|
"rprivate": syscall.MS_PRIVATE | syscall.MS_REC,
|
|
|
|
"private": syscall.MS_PRIVATE,
|
|
|
|
"rslave": syscall.MS_SLAVE | syscall.MS_REC,
|
|
|
|
"slave": syscall.MS_SLAVE,
|
|
|
|
"rshared": syscall.MS_SHARED | syscall.MS_REC,
|
|
|
|
"shared": syscall.MS_SHARED,
|
|
|
|
"": syscall.MS_PRIVATE | syscall.MS_REC,
|
|
|
|
}
|
|
|
|
|
2015-07-03 00:59:30 +08:00
|
|
|
// loadSpec loads the specification from the provided path.
|
|
|
|
// If the path is empty then the default path will be "config.json"
|
2016-03-11 06:18:39 +08:00
|
|
|
func loadSpec(cPath string) (spec *specs.Spec, err error) {
|
2015-09-02 00:32:29 +08:00
|
|
|
cf, err := os.Open(cPath)
|
|
|
|
if err != nil {
|
|
|
|
if os.IsNotExist(err) {
|
2016-02-06 02:46:12 +08:00
|
|
|
return nil, fmt.Errorf("JSON specification file %s not found", cPath)
|
2015-09-02 00:32:29 +08:00
|
|
|
}
|
2016-03-06 11:52:26 +08:00
|
|
|
return nil, err
|
2015-07-03 00:59:30 +08:00
|
|
|
}
|
2015-09-16 10:54:53 +08:00
|
|
|
defer cf.Close()
|
|
|
|
|
2015-09-02 00:32:29 +08:00
|
|
|
if err = json.NewDecoder(cf).Decode(&spec); err != nil {
|
2016-03-06 11:52:26 +08:00
|
|
|
return nil, err
|
2015-09-02 00:32:29 +08:00
|
|
|
}
|
2016-03-08 01:36:14 +08:00
|
|
|
return spec, validateProcessSpec(&spec.Process)
|
2015-07-03 00:59:30 +08:00
|
|
|
}
|
|
|
|
|
2016-03-22 03:53:46 +08:00
|
|
|
func createLibcontainerConfig(cgroupName string, useSystemdCgroup bool, spec *specs.Spec) (*configs.Config, error) {
|
2016-02-25 03:11:10 +08:00
|
|
|
// runc's cwd will always be the bundle path
|
|
|
|
rcwd, err := os.Getwd()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
cwd, err := filepath.Abs(rcwd)
|
2015-07-03 00:59:30 +08:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
rootfsPath := spec.Root.Path
|
|
|
|
if !filepath.IsAbs(rootfsPath) {
|
|
|
|
rootfsPath = filepath.Join(cwd, rootfsPath)
|
|
|
|
}
|
|
|
|
config := &configs.Config{
|
2016-03-04 02:44:33 +08:00
|
|
|
Rootfs: rootfsPath,
|
|
|
|
Readonlyfs: spec.Root.Readonly,
|
|
|
|
Hostname: spec.Hostname,
|
2016-02-25 03:11:10 +08:00
|
|
|
Labels: []string{
|
|
|
|
"bundle=" + cwd,
|
|
|
|
},
|
2015-07-03 00:59:30 +08:00
|
|
|
}
|
2015-10-02 05:03:02 +08:00
|
|
|
|
|
|
|
exists := false
|
2016-02-06 02:46:12 +08:00
|
|
|
if config.RootPropagation, exists = mountPropagationMapping[spec.Linux.RootfsPropagation]; !exists {
|
|
|
|
return nil, fmt.Errorf("rootfsPropagation=%v is not supported", spec.Linux.RootfsPropagation)
|
2015-10-02 05:03:02 +08:00
|
|
|
}
|
|
|
|
|
2016-02-06 02:46:12 +08:00
|
|
|
for _, ns := range spec.Linux.Namespaces {
|
2015-07-03 00:59:30 +08:00
|
|
|
t, exists := namespaceMapping[ns.Type]
|
|
|
|
if !exists {
|
|
|
|
return nil, fmt.Errorf("namespace %q does not exist", ns)
|
|
|
|
}
|
|
|
|
config.Namespaces.Add(t, ns.Path)
|
|
|
|
}
|
2015-07-28 07:36:28 +08:00
|
|
|
if config.Namespaces.Contains(configs.NEWNET) {
|
|
|
|
config.Networks = []*configs.Network{
|
|
|
|
{
|
|
|
|
Type: "loopback",
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
2016-02-06 02:46:12 +08:00
|
|
|
for _, m := range spec.Mounts {
|
|
|
|
config.Mounts = append(config.Mounts, createLibcontainerMount(cwd, m))
|
2015-07-03 00:59:30 +08:00
|
|
|
}
|
2016-02-06 02:46:12 +08:00
|
|
|
if err := createDevices(spec, config); err != nil {
|
2015-07-03 00:59:30 +08:00
|
|
|
return nil, err
|
|
|
|
}
|
2016-02-06 02:46:12 +08:00
|
|
|
if err := setupUserNamespace(spec, config); err != nil {
|
2015-07-03 00:59:30 +08:00
|
|
|
return nil, err
|
|
|
|
}
|
2016-03-22 03:53:46 +08:00
|
|
|
c, err := createCgroupConfig(cgroupName, useSystemdCgroup, spec)
|
2015-07-03 00:59:30 +08:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
config.Cgroups = c
|
2016-02-27 07:54:53 +08:00
|
|
|
// set extra path masking for libcontainer for the various unsafe places in proc
|
|
|
|
config.MaskPaths = maskedPaths
|
|
|
|
config.ReadonlyPaths = readonlyPaths
|
2016-03-11 06:18:39 +08:00
|
|
|
if spec.Linux.Seccomp != nil {
|
|
|
|
seccomp, err := setupSeccomp(spec.Linux.Seccomp)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
config.Seccomp = seccomp
|
2015-08-25 02:30:45 +08:00
|
|
|
}
|
2016-02-06 02:46:12 +08:00
|
|
|
config.Sysctl = spec.Linux.Sysctl
|
2016-02-26 04:01:18 +08:00
|
|
|
if oomScoreAdj := spec.Linux.Resources.OOMScoreAdj; oomScoreAdj != nil {
|
|
|
|
config.OomScoreAdj = *oomScoreAdj
|
|
|
|
}
|
2015-10-08 04:51:53 +08:00
|
|
|
for _, g := range spec.Process.User.AdditionalGids {
|
|
|
|
config.AdditionalGroups = append(config.AdditionalGroups, strconv.FormatUint(uint64(g), 10))
|
|
|
|
}
|
2016-02-06 02:46:12 +08:00
|
|
|
createHooks(spec, config)
|
2015-09-24 08:13:00 +08:00
|
|
|
config.Version = specs.Version
|
2015-07-03 00:59:30 +08:00
|
|
|
return config, nil
|
|
|
|
}
|
|
|
|
|
2016-02-06 02:46:12 +08:00
|
|
|
func createLibcontainerMount(cwd string, m specs.Mount) *configs.Mount {
|
libcontainer: Allow passing mount propagation flags
Right now if one passes a mount propagation flag in spec file, it
does not take effect. For example, try following in spec json file.
{
"type": "bind",
"source": "/root/mnt-source",
"destination": "/root/mnt-dest",
"options": "rbind,shared"
}
One would expect that /root/mnt-dest will be shared inside the container
but that's not the case.
#findmnt -o TARGET,PROPAGATION
`-/root/mnt-dest private
Reason being that propagation flags can't be passed in along with other
regular flags. They need to be passed in a separate call to mount syscall.
That too, one propagation flag at a time. (from mount man page).
Hence, store propagation flags separately in a slice and apply these
in that order after the mount call wherever appropriate. This allows
user to control the propagation property of mount point inside
the container.
Storing them separately also solves another problem where recursive flag
(syscall.MS_REC) can get mixed up. For example, options "rbind,private"
and "bind,rprivate" will be same and there will be no way to differentiate
between these if all the flags are stored in a single integer.
This patch would allow one to pass propagation flags "[r]shared,[r]slave,
[r]private,[r]unbindable" in spec file as per mount property.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
2015-09-17 03:53:23 +08:00
|
|
|
flags, pgflags, data := parseMountOptions(m.Options)
|
2015-07-03 00:59:30 +08:00
|
|
|
source := m.Source
|
|
|
|
if m.Type == "bind" {
|
|
|
|
if !filepath.IsAbs(source) {
|
|
|
|
source = filepath.Join(cwd, m.Source)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return &configs.Mount{
|
libcontainer: Allow passing mount propagation flags
Right now if one passes a mount propagation flag in spec file, it
does not take effect. For example, try following in spec json file.
{
"type": "bind",
"source": "/root/mnt-source",
"destination": "/root/mnt-dest",
"options": "rbind,shared"
}
One would expect that /root/mnt-dest will be shared inside the container
but that's not the case.
#findmnt -o TARGET,PROPAGATION
`-/root/mnt-dest private
Reason being that propagation flags can't be passed in along with other
regular flags. They need to be passed in a separate call to mount syscall.
That too, one propagation flag at a time. (from mount man page).
Hence, store propagation flags separately in a slice and apply these
in that order after the mount call wherever appropriate. This allows
user to control the propagation property of mount point inside
the container.
Storing them separately also solves another problem where recursive flag
(syscall.MS_REC) can get mixed up. For example, options "rbind,private"
and "bind,rprivate" will be same and there will be no way to differentiate
between these if all the flags are stored in a single integer.
This patch would allow one to pass propagation flags "[r]shared,[r]slave,
[r]private,[r]unbindable" in spec file as per mount property.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
2015-09-17 03:53:23 +08:00
|
|
|
Device: m.Type,
|
|
|
|
Source: source,
|
2016-02-06 02:46:12 +08:00
|
|
|
Destination: m.Destination,
|
libcontainer: Allow passing mount propagation flags
Right now if one passes a mount propagation flag in spec file, it
does not take effect. For example, try following in spec json file.
{
"type": "bind",
"source": "/root/mnt-source",
"destination": "/root/mnt-dest",
"options": "rbind,shared"
}
One would expect that /root/mnt-dest will be shared inside the container
but that's not the case.
#findmnt -o TARGET,PROPAGATION
`-/root/mnt-dest private
Reason being that propagation flags can't be passed in along with other
regular flags. They need to be passed in a separate call to mount syscall.
That too, one propagation flag at a time. (from mount man page).
Hence, store propagation flags separately in a slice and apply these
in that order after the mount call wherever appropriate. This allows
user to control the propagation property of mount point inside
the container.
Storing them separately also solves another problem where recursive flag
(syscall.MS_REC) can get mixed up. For example, options "rbind,private"
and "bind,rprivate" will be same and there will be no way to differentiate
between these if all the flags are stored in a single integer.
This patch would allow one to pass propagation flags "[r]shared,[r]slave,
[r]private,[r]unbindable" in spec file as per mount property.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
2015-09-17 03:53:23 +08:00
|
|
|
Data: data,
|
|
|
|
Flags: flags,
|
|
|
|
PropagationFlags: pgflags,
|
2015-07-03 00:59:30 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-22 03:53:46 +08:00
|
|
|
func createCgroupConfig(name string, useSystemdCgroup bool, spec *specs.Spec) (*configs.Cgroup, error) {
|
2016-01-21 10:04:59 +08:00
|
|
|
var (
|
|
|
|
err error
|
|
|
|
myCgroupPath string
|
|
|
|
)
|
|
|
|
|
2016-03-22 03:53:46 +08:00
|
|
|
c := &configs.Cgroup{
|
|
|
|
Resources: &configs.Resources{},
|
|
|
|
}
|
|
|
|
|
2016-01-21 10:04:59 +08:00
|
|
|
if spec.Linux.CgroupsPath != nil {
|
|
|
|
myCgroupPath = libcontainerUtils.CleanPath(*spec.Linux.CgroupsPath)
|
2016-03-25 01:40:16 +08:00
|
|
|
if useSystemdCgroup {
|
|
|
|
myCgroupPath = *spec.Linux.CgroupsPath
|
|
|
|
}
|
2016-03-22 03:53:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if useSystemdCgroup {
|
|
|
|
if myCgroupPath == "" {
|
|
|
|
c.Parent = "system.slice"
|
|
|
|
c.ScopePrefix = "runc"
|
|
|
|
c.Name = name
|
|
|
|
} else {
|
|
|
|
// Parse the path from expected "slice:prefix:name"
|
|
|
|
// for e.g. "system.slice:docker:1234"
|
|
|
|
parts := strings.Split(myCgroupPath, ":")
|
|
|
|
if len(parts) != 3 {
|
|
|
|
return nil, fmt.Errorf("expected cgroupsPath to be of format \"slice:prefix:name\" for systemd cgroups")
|
|
|
|
}
|
|
|
|
c.Parent = parts[0]
|
|
|
|
c.ScopePrefix = parts[1]
|
|
|
|
c.Name = parts[2]
|
|
|
|
}
|
2016-01-21 10:04:59 +08:00
|
|
|
} else {
|
2016-03-22 03:53:46 +08:00
|
|
|
if myCgroupPath == "" {
|
|
|
|
myCgroupPath, err = cgroups.GetThisCgroupDir("devices")
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
myCgroupPath = filepath.Join(myCgroupPath, name)
|
2016-01-21 10:04:59 +08:00
|
|
|
}
|
2016-03-22 03:53:46 +08:00
|
|
|
c.Path = myCgroupPath
|
2015-07-03 00:59:30 +08:00
|
|
|
}
|
2016-01-21 10:04:59 +08:00
|
|
|
|
2016-02-06 07:15:25 +08:00
|
|
|
c.Resources.AllowedDevices = allowedDevices
|
2015-07-03 00:59:30 +08:00
|
|
|
r := spec.Linux.Resources
|
2016-02-06 07:15:25 +08:00
|
|
|
if r == nil {
|
|
|
|
return c, nil
|
|
|
|
}
|
|
|
|
for i, d := range spec.Linux.Resources.Devices {
|
|
|
|
var (
|
2016-03-04 02:26:38 +08:00
|
|
|
t = "a"
|
2016-02-06 07:15:25 +08:00
|
|
|
major = int64(-1)
|
|
|
|
minor = int64(-1)
|
|
|
|
)
|
|
|
|
if d.Type != nil {
|
|
|
|
t = *d.Type
|
2016-01-26 08:09:08 +08:00
|
|
|
}
|
2016-02-06 07:15:25 +08:00
|
|
|
if d.Major != nil {
|
|
|
|
major = *d.Major
|
2016-01-26 08:09:08 +08:00
|
|
|
}
|
2016-02-06 07:15:25 +08:00
|
|
|
if d.Minor != nil {
|
|
|
|
minor = *d.Minor
|
2016-01-26 08:09:08 +08:00
|
|
|
}
|
2016-02-06 07:15:25 +08:00
|
|
|
if d.Access == nil || *d.Access == "" {
|
2016-03-21 22:49:06 +08:00
|
|
|
return nil, fmt.Errorf("device access at %d field cannot be empty", i)
|
2016-02-06 07:15:25 +08:00
|
|
|
}
|
2016-03-04 02:26:38 +08:00
|
|
|
dt, err := stringToDeviceRune(t)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2016-02-06 07:15:25 +08:00
|
|
|
dd := &configs.Device{
|
2016-03-04 02:26:38 +08:00
|
|
|
Type: dt,
|
2016-02-06 07:15:25 +08:00
|
|
|
Major: major,
|
|
|
|
Minor: minor,
|
|
|
|
Permissions: *d.Access,
|
|
|
|
Allow: d.Allow,
|
|
|
|
}
|
|
|
|
c.Resources.Devices = append(c.Resources.Devices, dd)
|
|
|
|
}
|
|
|
|
// append the default allowed devices to the end of the list
|
|
|
|
c.Resources.Devices = append(c.Resources.Devices, allowedDevices...)
|
|
|
|
if r.Memory != nil {
|
|
|
|
if r.Memory.Limit != nil {
|
|
|
|
c.Resources.Memory = int64(*r.Memory.Limit)
|
|
|
|
}
|
|
|
|
if r.Memory.Reservation != nil {
|
|
|
|
c.Resources.MemoryReservation = int64(*r.Memory.Reservation)
|
|
|
|
}
|
|
|
|
if r.Memory.Swap != nil {
|
|
|
|
c.Resources.MemorySwap = int64(*r.Memory.Swap)
|
|
|
|
}
|
|
|
|
if r.Memory.Kernel != nil {
|
|
|
|
c.Resources.KernelMemory = int64(*r.Memory.Kernel)
|
|
|
|
}
|
2016-03-20 18:45:52 +08:00
|
|
|
if r.Memory.KernelTCP != nil {
|
|
|
|
c.Resources.KernelMemoryTCP = int64(*r.Memory.KernelTCP)
|
|
|
|
}
|
2016-02-06 07:15:25 +08:00
|
|
|
if r.Memory.Swappiness != nil {
|
2016-02-21 09:29:53 +08:00
|
|
|
swappiness := int64(*r.Memory.Swappiness)
|
|
|
|
c.Resources.MemorySwappiness = &swappiness
|
2016-02-06 07:15:25 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if r.CPU != nil {
|
|
|
|
if r.CPU.Shares != nil {
|
|
|
|
c.Resources.CpuShares = int64(*r.CPU.Shares)
|
|
|
|
}
|
|
|
|
if r.CPU.Quota != nil {
|
|
|
|
c.Resources.CpuQuota = int64(*r.CPU.Quota)
|
|
|
|
}
|
|
|
|
if r.CPU.Period != nil {
|
|
|
|
c.Resources.CpuPeriod = int64(*r.CPU.Period)
|
|
|
|
}
|
|
|
|
if r.CPU.RealtimeRuntime != nil {
|
|
|
|
c.Resources.CpuRtRuntime = int64(*r.CPU.RealtimeRuntime)
|
|
|
|
}
|
|
|
|
if r.CPU.RealtimePeriod != nil {
|
|
|
|
c.Resources.CpuRtPeriod = int64(*r.CPU.RealtimePeriod)
|
|
|
|
}
|
|
|
|
if r.CPU.Cpus != nil {
|
|
|
|
c.Resources.CpusetCpus = *r.CPU.Cpus
|
|
|
|
}
|
|
|
|
if r.CPU.Mems != nil {
|
|
|
|
c.Resources.CpusetMems = *r.CPU.Mems
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if r.Pids != nil {
|
|
|
|
c.Resources.PidsLimit = *r.Pids.Limit
|
|
|
|
}
|
|
|
|
if r.BlockIO != nil {
|
|
|
|
if r.BlockIO.Weight != nil {
|
|
|
|
c.Resources.BlkioWeight = *r.BlockIO.Weight
|
|
|
|
}
|
|
|
|
if r.BlockIO.LeafWeight != nil {
|
|
|
|
c.Resources.BlkioLeafWeight = *r.BlockIO.LeafWeight
|
|
|
|
}
|
|
|
|
if r.BlockIO.WeightDevice != nil {
|
|
|
|
for _, wd := range r.BlockIO.WeightDevice {
|
|
|
|
weightDevice := configs.NewWeightDevice(wd.Major, wd.Minor, *wd.Weight, *wd.LeafWeight)
|
|
|
|
c.Resources.BlkioWeightDevice = append(c.Resources.BlkioWeightDevice, weightDevice)
|
2016-01-26 08:09:08 +08:00
|
|
|
}
|
2016-02-06 07:15:25 +08:00
|
|
|
}
|
|
|
|
if r.BlockIO.ThrottleReadBpsDevice != nil {
|
|
|
|
for _, td := range r.BlockIO.ThrottleReadBpsDevice {
|
|
|
|
throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, *td.Rate)
|
|
|
|
c.Resources.BlkioThrottleReadBpsDevice = append(c.Resources.BlkioThrottleReadBpsDevice, throttleDevice)
|
2016-01-26 08:09:08 +08:00
|
|
|
}
|
2016-02-06 07:15:25 +08:00
|
|
|
}
|
|
|
|
if r.BlockIO.ThrottleWriteBpsDevice != nil {
|
|
|
|
for _, td := range r.BlockIO.ThrottleWriteBpsDevice {
|
|
|
|
throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, *td.Rate)
|
|
|
|
c.Resources.BlkioThrottleWriteBpsDevice = append(c.Resources.BlkioThrottleWriteBpsDevice, throttleDevice)
|
2016-01-26 08:09:08 +08:00
|
|
|
}
|
2016-02-06 07:15:25 +08:00
|
|
|
}
|
|
|
|
if r.BlockIO.ThrottleReadIOPSDevice != nil {
|
|
|
|
for _, td := range r.BlockIO.ThrottleReadIOPSDevice {
|
|
|
|
throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, *td.Rate)
|
|
|
|
c.Resources.BlkioThrottleReadIOPSDevice = append(c.Resources.BlkioThrottleReadIOPSDevice, throttleDevice)
|
2016-01-26 08:09:08 +08:00
|
|
|
}
|
|
|
|
}
|
2016-02-06 07:15:25 +08:00
|
|
|
if r.BlockIO.ThrottleWriteIOPSDevice != nil {
|
|
|
|
for _, td := range r.BlockIO.ThrottleWriteIOPSDevice {
|
|
|
|
throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, *td.Rate)
|
|
|
|
c.Resources.BlkioThrottleWriteIOPSDevice = append(c.Resources.BlkioThrottleWriteIOPSDevice, throttleDevice)
|
|
|
|
}
|
2016-01-26 08:09:08 +08:00
|
|
|
}
|
2016-02-06 07:15:25 +08:00
|
|
|
}
|
|
|
|
for _, l := range r.HugepageLimits {
|
|
|
|
c.Resources.HugetlbLimit = append(c.Resources.HugetlbLimit, &configs.HugepageLimit{
|
|
|
|
Pagesize: *l.Pagesize,
|
|
|
|
Limit: *l.Limit,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
if r.DisableOOMKiller != nil {
|
|
|
|
c.Resources.OomKillDisable = *r.DisableOOMKiller
|
|
|
|
}
|
|
|
|
if r.Network != nil {
|
|
|
|
if r.Network.ClassID != nil {
|
|
|
|
c.Resources.NetClsClassid = string(*r.Network.ClassID)
|
2016-01-26 08:09:08 +08:00
|
|
|
}
|
2016-02-06 07:15:25 +08:00
|
|
|
for _, m := range r.Network.Priorities {
|
|
|
|
c.Resources.NetPrioIfpriomap = append(c.Resources.NetPrioIfpriomap, &configs.IfPrioMap{
|
|
|
|
Interface: m.Name,
|
|
|
|
Priority: int64(m.Priority),
|
|
|
|
})
|
2016-01-26 08:09:08 +08:00
|
|
|
}
|
2015-07-03 00:59:30 +08:00
|
|
|
}
|
|
|
|
return c, nil
|
|
|
|
}
|
|
|
|
|
2016-03-04 02:26:38 +08:00
|
|
|
func stringToDeviceRune(s string) (rune, error) {
|
|
|
|
switch s {
|
|
|
|
case "a":
|
|
|
|
return 'a', nil
|
|
|
|
case "b":
|
|
|
|
return 'b', nil
|
|
|
|
case "c":
|
|
|
|
return 'c', nil
|
|
|
|
default:
|
|
|
|
return 0, fmt.Errorf("invalid device type %q", s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-11 06:18:39 +08:00
|
|
|
func createDevices(spec *specs.Spec, config *configs.Config) error {
|
2016-02-06 07:15:25 +08:00
|
|
|
// add whitelisted devices
|
|
|
|
config.Devices = []*configs.Device{
|
|
|
|
{
|
|
|
|
Type: 'c',
|
|
|
|
Path: "/dev/null",
|
|
|
|
Major: 1,
|
|
|
|
Minor: 3,
|
|
|
|
FileMode: 0666,
|
|
|
|
Uid: 0,
|
|
|
|
Gid: 0,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Type: 'c',
|
|
|
|
Path: "/dev/random",
|
|
|
|
Major: 1,
|
|
|
|
Minor: 8,
|
|
|
|
FileMode: 0666,
|
|
|
|
Uid: 0,
|
|
|
|
Gid: 0,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Type: 'c',
|
|
|
|
Path: "/dev/full",
|
|
|
|
Major: 1,
|
|
|
|
Minor: 7,
|
|
|
|
FileMode: 0666,
|
|
|
|
Uid: 0,
|
|
|
|
Gid: 0,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Type: 'c',
|
|
|
|
Path: "/dev/tty",
|
|
|
|
Major: 5,
|
|
|
|
Minor: 0,
|
|
|
|
FileMode: 0666,
|
|
|
|
Uid: 0,
|
|
|
|
Gid: 0,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Type: 'c',
|
|
|
|
Path: "/dev/zero",
|
|
|
|
Major: 1,
|
|
|
|
Minor: 5,
|
|
|
|
FileMode: 0666,
|
|
|
|
Uid: 0,
|
|
|
|
Gid: 0,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Type: 'c',
|
|
|
|
Path: "/dev/urandom",
|
|
|
|
Major: 1,
|
|
|
|
Minor: 9,
|
|
|
|
FileMode: 0666,
|
|
|
|
Uid: 0,
|
|
|
|
Gid: 0,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
// merge in additional devices from the spec
|
2015-08-12 05:24:00 +08:00
|
|
|
for _, d := range spec.Linux.Devices {
|
2016-02-06 07:15:25 +08:00
|
|
|
var uid, gid uint32
|
|
|
|
if d.UID != nil {
|
|
|
|
uid = *d.UID
|
|
|
|
}
|
|
|
|
if d.GID != nil {
|
|
|
|
gid = *d.GID
|
|
|
|
}
|
2016-03-04 02:26:38 +08:00
|
|
|
dt, err := stringToDeviceRune(d.Type)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2015-08-12 05:24:00 +08:00
|
|
|
device := &configs.Device{
|
2016-03-04 02:26:38 +08:00
|
|
|
Type: dt,
|
2016-02-06 02:46:12 +08:00
|
|
|
Path: d.Path,
|
|
|
|
Major: d.Major,
|
|
|
|
Minor: d.Minor,
|
|
|
|
FileMode: *d.FileMode,
|
2016-02-06 07:15:25 +08:00
|
|
|
Uid: uid,
|
|
|
|
Gid: gid,
|
2015-07-03 00:59:30 +08:00
|
|
|
}
|
2015-08-12 05:24:00 +08:00
|
|
|
config.Devices = append(config.Devices, device)
|
2015-07-03 00:59:30 +08:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-03-11 06:18:39 +08:00
|
|
|
func setupUserNamespace(spec *specs.Spec, config *configs.Config) error {
|
2015-08-04 03:00:36 +08:00
|
|
|
if len(spec.Linux.UIDMappings) == 0 {
|
2015-07-03 00:59:30 +08:00
|
|
|
return nil
|
|
|
|
}
|
2015-09-14 08:33:17 +08:00
|
|
|
// do not override the specified user namespace path
|
|
|
|
if config.Namespaces.PathOf(configs.NEWUSER) == "" {
|
|
|
|
config.Namespaces.Add(configs.NEWUSER, "")
|
|
|
|
}
|
2015-07-03 00:59:30 +08:00
|
|
|
create := func(m specs.IDMapping) configs.IDMap {
|
|
|
|
return configs.IDMap{
|
2015-07-08 15:41:43 +08:00
|
|
|
HostID: int(m.HostID),
|
|
|
|
ContainerID: int(m.ContainerID),
|
|
|
|
Size: int(m.Size),
|
2015-07-03 00:59:30 +08:00
|
|
|
}
|
|
|
|
}
|
2015-08-04 03:00:36 +08:00
|
|
|
for _, m := range spec.Linux.UIDMappings {
|
2015-07-03 00:59:30 +08:00
|
|
|
config.UidMappings = append(config.UidMappings, create(m))
|
|
|
|
}
|
2015-08-04 03:00:36 +08:00
|
|
|
for _, m := range spec.Linux.GIDMappings {
|
2015-07-03 00:59:30 +08:00
|
|
|
config.GidMappings = append(config.GidMappings, create(m))
|
|
|
|
}
|
2015-08-05 05:44:45 +08:00
|
|
|
rootUID, err := config.HostUID()
|
2015-07-03 00:59:30 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2015-08-05 05:44:45 +08:00
|
|
|
rootGID, err := config.HostGID()
|
2015-07-03 00:59:30 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
for _, node := range config.Devices {
|
2015-08-05 05:44:45 +08:00
|
|
|
node.Uid = uint32(rootUID)
|
|
|
|
node.Gid = uint32(rootGID)
|
2015-07-03 00:59:30 +08:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-09-02 00:32:29 +08:00
|
|
|
func createLibContainerRlimit(rlimit specs.Rlimit) (configs.Rlimit, error) {
|
|
|
|
rl, err := strToRlimit(rlimit.Type)
|
|
|
|
if err != nil {
|
|
|
|
return configs.Rlimit{}, err
|
|
|
|
}
|
2015-08-23 19:17:31 +08:00
|
|
|
return configs.Rlimit{
|
2015-09-02 00:32:29 +08:00
|
|
|
Type: rl,
|
2015-08-23 19:17:31 +08:00
|
|
|
Hard: uint64(rlimit.Hard),
|
|
|
|
Soft: uint64(rlimit.Soft),
|
2015-09-02 00:32:29 +08:00
|
|
|
}, nil
|
2015-08-23 19:17:31 +08:00
|
|
|
}
|
|
|
|
|
libcontainer: Allow passing mount propagation flags
Right now if one passes a mount propagation flag in spec file, it
does not take effect. For example, try following in spec json file.
{
"type": "bind",
"source": "/root/mnt-source",
"destination": "/root/mnt-dest",
"options": "rbind,shared"
}
One would expect that /root/mnt-dest will be shared inside the container
but that's not the case.
#findmnt -o TARGET,PROPAGATION
`-/root/mnt-dest private
Reason being that propagation flags can't be passed in along with other
regular flags. They need to be passed in a separate call to mount syscall.
That too, one propagation flag at a time. (from mount man page).
Hence, store propagation flags separately in a slice and apply these
in that order after the mount call wherever appropriate. This allows
user to control the propagation property of mount point inside
the container.
Storing them separately also solves another problem where recursive flag
(syscall.MS_REC) can get mixed up. For example, options "rbind,private"
and "bind,rprivate" will be same and there will be no way to differentiate
between these if all the flags are stored in a single integer.
This patch would allow one to pass propagation flags "[r]shared,[r]slave,
[r]private,[r]unbindable" in spec file as per mount property.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
2015-09-17 03:53:23 +08:00
|
|
|
// parseMountOptions parses the string and returns the flags, propagation
|
|
|
|
// flags and any mount data that it contains.
|
|
|
|
func parseMountOptions(options []string) (int, []int, string) {
|
2015-07-03 00:59:30 +08:00
|
|
|
var (
|
libcontainer: Allow passing mount propagation flags
Right now if one passes a mount propagation flag in spec file, it
does not take effect. For example, try following in spec json file.
{
"type": "bind",
"source": "/root/mnt-source",
"destination": "/root/mnt-dest",
"options": "rbind,shared"
}
One would expect that /root/mnt-dest will be shared inside the container
but that's not the case.
#findmnt -o TARGET,PROPAGATION
`-/root/mnt-dest private
Reason being that propagation flags can't be passed in along with other
regular flags. They need to be passed in a separate call to mount syscall.
That too, one propagation flag at a time. (from mount man page).
Hence, store propagation flags separately in a slice and apply these
in that order after the mount call wherever appropriate. This allows
user to control the propagation property of mount point inside
the container.
Storing them separately also solves another problem where recursive flag
(syscall.MS_REC) can get mixed up. For example, options "rbind,private"
and "bind,rprivate" will be same and there will be no way to differentiate
between these if all the flags are stored in a single integer.
This patch would allow one to pass propagation flags "[r]shared,[r]slave,
[r]private,[r]unbindable" in spec file as per mount property.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
2015-09-17 03:53:23 +08:00
|
|
|
flag int
|
|
|
|
pgflag []int
|
|
|
|
data []string
|
2015-07-03 00:59:30 +08:00
|
|
|
)
|
|
|
|
flags := map[string]struct {
|
|
|
|
clear bool
|
|
|
|
flag int
|
|
|
|
}{
|
2015-07-10 10:31:20 +08:00
|
|
|
"async": {true, syscall.MS_SYNCHRONOUS},
|
|
|
|
"atime": {true, syscall.MS_NOATIME},
|
|
|
|
"bind": {false, syscall.MS_BIND},
|
2015-07-03 00:59:30 +08:00
|
|
|
"defaults": {false, 0},
|
|
|
|
"dev": {true, syscall.MS_NODEV},
|
2015-07-10 10:31:20 +08:00
|
|
|
"diratime": {true, syscall.MS_NODIRATIME},
|
2015-07-03 00:59:30 +08:00
|
|
|
"dirsync": {false, syscall.MS_DIRSYNC},
|
2015-07-10 10:31:20 +08:00
|
|
|
"exec": {true, syscall.MS_NOEXEC},
|
2015-07-03 00:59:30 +08:00
|
|
|
"mand": {false, syscall.MS_MANDLOCK},
|
|
|
|
"noatime": {false, syscall.MS_NOATIME},
|
2015-07-10 10:31:20 +08:00
|
|
|
"nodev": {false, syscall.MS_NODEV},
|
2015-07-03 00:59:30 +08:00
|
|
|
"nodiratime": {false, syscall.MS_NODIRATIME},
|
2015-07-10 10:31:20 +08:00
|
|
|
"noexec": {false, syscall.MS_NOEXEC},
|
|
|
|
"nomand": {true, syscall.MS_MANDLOCK},
|
|
|
|
"norelatime": {true, syscall.MS_RELATIME},
|
|
|
|
"nostrictatime": {true, syscall.MS_STRICTATIME},
|
|
|
|
"nosuid": {false, syscall.MS_NOSUID},
|
|
|
|
"rbind": {false, syscall.MS_BIND | syscall.MS_REC},
|
|
|
|
"relatime": {false, syscall.MS_RELATIME},
|
|
|
|
"remount": {false, syscall.MS_REMOUNT},
|
|
|
|
"ro": {false, syscall.MS_RDONLY},
|
|
|
|
"rw": {true, syscall.MS_RDONLY},
|
2015-07-03 00:59:30 +08:00
|
|
|
"strictatime": {false, syscall.MS_STRICTATIME},
|
2015-07-10 10:31:20 +08:00
|
|
|
"suid": {true, syscall.MS_NOSUID},
|
|
|
|
"sync": {false, syscall.MS_SYNCHRONOUS},
|
libcontainer: Allow passing mount propagation flags
Right now if one passes a mount propagation flag in spec file, it
does not take effect. For example, try following in spec json file.
{
"type": "bind",
"source": "/root/mnt-source",
"destination": "/root/mnt-dest",
"options": "rbind,shared"
}
One would expect that /root/mnt-dest will be shared inside the container
but that's not the case.
#findmnt -o TARGET,PROPAGATION
`-/root/mnt-dest private
Reason being that propagation flags can't be passed in along with other
regular flags. They need to be passed in a separate call to mount syscall.
That too, one propagation flag at a time. (from mount man page).
Hence, store propagation flags separately in a slice and apply these
in that order after the mount call wherever appropriate. This allows
user to control the propagation property of mount point inside
the container.
Storing them separately also solves another problem where recursive flag
(syscall.MS_REC) can get mixed up. For example, options "rbind,private"
and "bind,rprivate" will be same and there will be no way to differentiate
between these if all the flags are stored in a single integer.
This patch would allow one to pass propagation flags "[r]shared,[r]slave,
[r]private,[r]unbindable" in spec file as per mount property.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
2015-09-17 03:53:23 +08:00
|
|
|
}
|
|
|
|
propagationFlags := map[string]struct {
|
|
|
|
clear bool
|
|
|
|
flag int
|
|
|
|
}{
|
|
|
|
"private": {false, syscall.MS_PRIVATE},
|
|
|
|
"shared": {false, syscall.MS_SHARED},
|
|
|
|
"slave": {false, syscall.MS_SLAVE},
|
|
|
|
"unbindable": {false, syscall.MS_UNBINDABLE},
|
|
|
|
"rprivate": {false, syscall.MS_PRIVATE | syscall.MS_REC},
|
|
|
|
"rshared": {false, syscall.MS_SHARED | syscall.MS_REC},
|
|
|
|
"rslave": {false, syscall.MS_SLAVE | syscall.MS_REC},
|
|
|
|
"runbindable": {false, syscall.MS_UNBINDABLE | syscall.MS_REC},
|
2015-07-03 00:59:30 +08:00
|
|
|
}
|
2015-09-02 00:32:29 +08:00
|
|
|
for _, o := range options {
|
2015-07-03 00:59:30 +08:00
|
|
|
// If the option does not exist in the flags table or the flag
|
|
|
|
// is not supported on the platform,
|
|
|
|
// then it is a data value for a specific fs type
|
|
|
|
if f, exists := flags[o]; exists && f.flag != 0 {
|
|
|
|
if f.clear {
|
|
|
|
flag &= ^f.flag
|
|
|
|
} else {
|
|
|
|
flag |= f.flag
|
|
|
|
}
|
libcontainer: Allow passing mount propagation flags
Right now if one passes a mount propagation flag in spec file, it
does not take effect. For example, try following in spec json file.
{
"type": "bind",
"source": "/root/mnt-source",
"destination": "/root/mnt-dest",
"options": "rbind,shared"
}
One would expect that /root/mnt-dest will be shared inside the container
but that's not the case.
#findmnt -o TARGET,PROPAGATION
`-/root/mnt-dest private
Reason being that propagation flags can't be passed in along with other
regular flags. They need to be passed in a separate call to mount syscall.
That too, one propagation flag at a time. (from mount man page).
Hence, store propagation flags separately in a slice and apply these
in that order after the mount call wherever appropriate. This allows
user to control the propagation property of mount point inside
the container.
Storing them separately also solves another problem where recursive flag
(syscall.MS_REC) can get mixed up. For example, options "rbind,private"
and "bind,rprivate" will be same and there will be no way to differentiate
between these if all the flags are stored in a single integer.
This patch would allow one to pass propagation flags "[r]shared,[r]slave,
[r]private,[r]unbindable" in spec file as per mount property.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
2015-09-17 03:53:23 +08:00
|
|
|
} else if f, exists := propagationFlags[o]; exists && f.flag != 0 {
|
|
|
|
pgflag = append(pgflag, f.flag)
|
2015-07-03 00:59:30 +08:00
|
|
|
} else {
|
|
|
|
data = append(data, o)
|
|
|
|
}
|
|
|
|
}
|
libcontainer: Allow passing mount propagation flags
Right now if one passes a mount propagation flag in spec file, it
does not take effect. For example, try following in spec json file.
{
"type": "bind",
"source": "/root/mnt-source",
"destination": "/root/mnt-dest",
"options": "rbind,shared"
}
One would expect that /root/mnt-dest will be shared inside the container
but that's not the case.
#findmnt -o TARGET,PROPAGATION
`-/root/mnt-dest private
Reason being that propagation flags can't be passed in along with other
regular flags. They need to be passed in a separate call to mount syscall.
That too, one propagation flag at a time. (from mount man page).
Hence, store propagation flags separately in a slice and apply these
in that order after the mount call wherever appropriate. This allows
user to control the propagation property of mount point inside
the container.
Storing them separately also solves another problem where recursive flag
(syscall.MS_REC) can get mixed up. For example, options "rbind,private"
and "bind,rprivate" will be same and there will be no way to differentiate
between these if all the flags are stored in a single integer.
This patch would allow one to pass propagation flags "[r]shared,[r]slave,
[r]private,[r]unbindable" in spec file as per mount property.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
2015-09-17 03:53:23 +08:00
|
|
|
return flag, pgflag, strings.Join(data, ",")
|
2015-07-03 00:59:30 +08:00
|
|
|
}
|
2015-08-25 02:30:45 +08:00
|
|
|
|
|
|
|
func setupSeccomp(config *specs.Seccomp) (*configs.Seccomp, error) {
|
|
|
|
if config == nil {
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// No default action specified, no syscalls listed, assume seccomp disabled
|
|
|
|
if config.DefaultAction == "" && len(config.Syscalls) == 0 {
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
newConfig := new(configs.Seccomp)
|
|
|
|
newConfig.Syscalls = []*configs.Syscall{}
|
|
|
|
|
2015-10-23 10:17:39 +08:00
|
|
|
if len(config.Architectures) > 0 {
|
|
|
|
newConfig.Architectures = []string{}
|
|
|
|
for _, arch := range config.Architectures {
|
|
|
|
newArch, err := seccomp.ConvertStringToArch(string(arch))
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
newConfig.Architectures = append(newConfig.Architectures, newArch)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-25 02:30:45 +08:00
|
|
|
// Convert default action from string representation
|
|
|
|
newDefaultAction, err := seccomp.ConvertStringToAction(string(config.DefaultAction))
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
newConfig.DefaultAction = newDefaultAction
|
|
|
|
|
|
|
|
// Loop through all syscall blocks and convert them to libcontainer format
|
|
|
|
for _, call := range config.Syscalls {
|
|
|
|
newAction, err := seccomp.ConvertStringToAction(string(call.Action))
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
newCall := configs.Syscall{
|
|
|
|
Name: call.Name,
|
|
|
|
Action: newAction,
|
|
|
|
Args: []*configs.Arg{},
|
|
|
|
}
|
|
|
|
|
|
|
|
// Loop through all the arguments of the syscall and convert them
|
|
|
|
for _, arg := range call.Args {
|
|
|
|
newOp, err := seccomp.ConvertStringToOperator(string(arg.Op))
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
newArg := configs.Arg{
|
|
|
|
Index: arg.Index,
|
|
|
|
Value: arg.Value,
|
|
|
|
ValueTwo: arg.ValueTwo,
|
|
|
|
Op: newOp,
|
|
|
|
}
|
|
|
|
|
|
|
|
newCall.Args = append(newCall.Args, &newArg)
|
|
|
|
}
|
|
|
|
|
|
|
|
newConfig.Syscalls = append(newConfig.Syscalls, &newCall)
|
|
|
|
}
|
|
|
|
|
|
|
|
return newConfig, nil
|
|
|
|
}
|
2015-09-16 06:21:07 +08:00
|
|
|
|
2016-03-11 06:18:39 +08:00
|
|
|
func createHooks(rspec *specs.Spec, config *configs.Config) {
|
2015-09-16 06:21:07 +08:00
|
|
|
config.Hooks = &configs.Hooks{}
|
|
|
|
for _, h := range rspec.Hooks.Prestart {
|
|
|
|
cmd := configs.Command{
|
|
|
|
Path: h.Path,
|
|
|
|
Args: h.Args,
|
|
|
|
Env: h.Env,
|
|
|
|
}
|
|
|
|
config.Hooks.Prestart = append(config.Hooks.Prestart, configs.NewCommandHook(cmd))
|
|
|
|
}
|
2015-11-07 07:03:32 +08:00
|
|
|
for _, h := range rspec.Hooks.Poststart {
|
|
|
|
cmd := configs.Command{
|
|
|
|
Path: h.Path,
|
|
|
|
Args: h.Args,
|
|
|
|
Env: h.Env,
|
|
|
|
}
|
|
|
|
config.Hooks.Poststart = append(config.Hooks.Poststart, configs.NewCommandHook(cmd))
|
|
|
|
}
|
2015-09-16 06:21:07 +08:00
|
|
|
for _, h := range rspec.Hooks.Poststop {
|
|
|
|
cmd := configs.Command{
|
|
|
|
Path: h.Path,
|
|
|
|
Args: h.Args,
|
|
|
|
Env: h.Env,
|
|
|
|
}
|
|
|
|
config.Hooks.Poststop = append(config.Hooks.Poststop, configs.NewCommandHook(cmd))
|
|
|
|
}
|
|
|
|
}
|