merges latest spec with runc
Signed-off-by: Mike Brown <brownwm@us.ibm.com>
This commit is contained in:
parent
4f601205d4
commit
c2c0458598
|
@ -53,8 +53,8 @@
|
|||
},
|
||||
{
|
||||
"ImportPath": "github.com/opencontainers/specs",
|
||||
"Comment": "v0.2.0-40-ga7b5092",
|
||||
"Rev": "a7b50925d8996923d99e1c50750131e20b743067"
|
||||
"Comment": "v0.2.0-49-g25cbfc4",
|
||||
"Rev": "25cbfc427ba6154016f33c7ed1b8ed43b8b7b7ed"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/seccomp/libseccomp-golang",
|
||||
|
|
|
@ -1,5 +1,30 @@
|
|||
OpenContainers Specifications
|
||||
|
||||
Changes with v0.3.0:
|
||||
Breaking changes:
|
||||
|
||||
* config: Single, unified config file, #284
|
||||
* config: cwd is a required default, and must be absolute, #286,
|
||||
#307, #308, #312
|
||||
* config: qualify the name of the version field, #309
|
||||
* config-linux: Convert classID from hex to uint32, #296
|
||||
* config-linux: Separate mknod from cgroups, #298
|
||||
|
||||
Additions:
|
||||
|
||||
* config-linux: Add NoNewPrivileges setting for linux, #290
|
||||
|
||||
Minor fixes and documentation:
|
||||
|
||||
* config-linux: clarify oom_score_adj, #236, #292
|
||||
* config-linux: Update links to cgroups documentation, #318
|
||||
* config-linux: Remove pointers for slices preferring omitempty
|
||||
tag instead, #316
|
||||
* README: add runtime, bundle, and hook author user, #280
|
||||
* ROADMAP: reshuffled and split into GitHub issues, #300, #301,
|
||||
#304, #306
|
||||
* style: Collect established styles in a discoverable location, #287, #311
|
||||
|
||||
Changes with v0.2.0:
|
||||
* Add Apparmor, Selinux and Seccomp
|
||||
* Add Apparmor, Selinux and Seccomp sections
|
||||
|
|
|
@ -12,8 +12,6 @@ DOC_FILES := \
|
|||
runtime-linux.md \
|
||||
config.md \
|
||||
config-linux.md \
|
||||
runtime-config.md \
|
||||
runtime-config-linux.md \
|
||||
glossary.md
|
||||
EPOCH_TEST_COMMIT := 041eb73d2e0391463894c04c8ac938036143eba3
|
||||
|
||||
|
|
|
@ -9,10 +9,8 @@ Table of Contents
|
|||
- [Specification Style](style.md)
|
||||
- [Filesystem Bundle](bundle.md)
|
||||
- Configuration
|
||||
- [Container Configuration](config.md)
|
||||
- [Container Configuration (Linux-specific)](config-linux.md)
|
||||
- [Runtime Configuration](runtime-config.md)
|
||||
- [Runtime Configuration (Linux-specific)](runtime-config-linux.md)
|
||||
- [General](config.md)
|
||||
- [Linux-specific](config-linux.md)
|
||||
- [Runtime and Lifecycle](runtime.md)
|
||||
- [Linux Specific Runtime](runtime-linux.md)
|
||||
- [Implementations](implementations.md)
|
||||
|
@ -27,12 +25,12 @@ To provide context for users the following section gives example use cases for e
|
|||
#### Application Bundle Builders
|
||||
|
||||
Application bundle builders can create a [bundle](bundle.md) directory that includes all of the files required for launching an application as a container.
|
||||
The bundle contains OCI [configuration files](config.md) where the builder can specify host-independent details such as [which executable to launch](config.md#process-configuration) and host-specific settings such as [mount](runtime-config.md#mount-configuration) locations, [hook](runtime-config.md#hooks) paths, Linux [namespaces](runtime-config-linux.md#namespaces) and [cgroups](runtime-config-linux.md#control-groups).
|
||||
The bundle contains an OCI [configuration file](config.md) where the builder can specify host-independent details such as [which executable to launch](config.md#process-configuration) and host-specific settings such as [mount](config.md#mounts) locations, [hook](config.md#hooks) paths, Linux [namespaces](config-linux.md#namespaces) and [cgroups](config-linux.md#control-groups).
|
||||
Because the configuration includes host-specific settings, application bundle directories copied between two hosts may require configuration adjustments.
|
||||
|
||||
#### Hook Developers
|
||||
|
||||
[Hook](runtime-config.md#hooks) developers can extend the functionality of an OCI-compliant runtime by hooking into a container's lifecycle with an external application.
|
||||
[Hook](config.md#hooks) developers can extend the functionality of an OCI-compliant runtime by hooking into a container's lifecycle with an external application.
|
||||
Example use cases include sophisticated network configuration, volume garbage collection, etc.
|
||||
|
||||
#### Runtime Developers
|
||||
|
|
|
@ -17,13 +17,6 @@ Although OCI doesn't define a transport method we should have a cryptographic di
|
|||
|
||||
*Owner:* philips
|
||||
|
||||
### Review the need for runtime.json (Target release: v0.3.0)
|
||||
|
||||
There are some discussions about having `runtime.json` being optional for containers and specifying defaults.
|
||||
Runtimes would use this standard set of defaults for containers and `runtime.json` would provide overrides for fine tuning of these extra host or platform specific settings.
|
||||
|
||||
*Owner:* mrunalp
|
||||
|
||||
### Define Container Lifecycle
|
||||
|
||||
Containers have a lifecycle and being able to identify and document the lifecycle of a container is very helpful for implementations of the spec.
|
||||
|
|
|
@ -8,23 +8,17 @@ See also [OS X application bundles](http://en.wikipedia.org/wiki/Bundle_%28OS_X%
|
|||
The definition of a bundle is only concerned with how a container, and its configuration data, are stored on a local file system so that it can be consumed by a compliant runtime.
|
||||
|
||||
A Standard Container bundle contains all the information needed to load and run a container.
|
||||
This includes the following three artifacts which MUST all reside in the same directory on the local filesystem:
|
||||
This includes the following artifacts which MUST all reside in the same directory on the local filesystem:
|
||||
|
||||
1. `config.json` : contains host-independent configuration data.
|
||||
This REQUIRED file, which MUST be named `config.json`, contains settings that are host-independent and application-specific such as security permissions, environment variables and arguments.
|
||||
1. `config.json` : contains configuration data.
|
||||
This REQUIRED file, which MUST be named `config.json`.
|
||||
When the bundle is packaged up for distribution, this file MUST be included.
|
||||
See [`config.json`](config.md) for more details.
|
||||
|
||||
2. `runtime.json` : contains host-specific configuration data.
|
||||
This REQUIRED file, which MUST be named `runtime.json`, contains settings that are host-specific such as mount sources and hooks.
|
||||
The goal is that the bundle can be moved as a unit to another runtime and run the same application once a host-specific `runtime.json` is defined.
|
||||
When the bundle is packaged up for distribution, this file MUST NOT be included.
|
||||
See [`runtime.json`](runtime-config.md) for more details.
|
||||
|
||||
3. A directory representing the root filesystem of the container.
|
||||
2. A directory representing the root filesystem of the container.
|
||||
While the name of this REQUIRED directory may be arbitrary, users should consider using a conventional name, such as `rootfs`.
|
||||
When the bundle is packaged up for distribution, this directory MUST be included.
|
||||
This directory MUST be referenced from within the `config.json` file.
|
||||
|
||||
While these three artifacts MUST all be present in a single directory on the local filesystem, that directory itself is not part of the bundle.
|
||||
While these artifacts MUST all be present in a single directory on the local filesystem, that directory itself is not part of the bundle.
|
||||
In other words, a tar archive of a *bundle* will have these artifacts at the root of the archive, not nested within a top-level directory.
|
||||
|
|
|
@ -16,24 +16,594 @@ Valid values are the strings for capabilities defined in [the man page](http://m
|
|||
]
|
||||
```
|
||||
|
||||
## Default Devices and File Systems
|
||||
## Default File Systems
|
||||
|
||||
The Linux ABI includes both syscalls and several special file paths.
|
||||
Applications expecting a Linux environment will very likely expect these files paths to be setup correctly.
|
||||
|
||||
The following devices and filesystems MUST be made available in each application's filesystem
|
||||
The following filesystems MUST be made available in each application's filesystem
|
||||
|
||||
| Path | Type | Notes |
|
||||
| ------------ | ------ | ------- |
|
||||
| /proc | [procfs](https://www.kernel.org/doc/Documentation/filesystems/proc.txt) | |
|
||||
| /sys | [sysfs](https://www.kernel.org/doc/Documentation/filesystems/sysfs.txt) | |
|
||||
| /dev/null | [device](http://man7.org/linux/man-pages/man4/null.4.html) | |
|
||||
| /dev/zero | [device](http://man7.org/linux/man-pages/man4/zero.4.html) | |
|
||||
| /dev/full | [device](http://man7.org/linux/man-pages/man4/full.4.html) | |
|
||||
| /dev/random | [device](http://man7.org/linux/man-pages/man4/random.4.html) | |
|
||||
| /dev/urandom | [device](http://man7.org/linux/man-pages/man4/random.4.html) | |
|
||||
| /dev/tty | [device](http://man7.org/linux/man-pages/man4/tty.4.html) | |
|
||||
| /dev/console | [device](http://man7.org/linux/man-pages/man4/console.4.html) | |
|
||||
| /dev/pts | [devpts](https://www.kernel.org/doc/Documentation/filesystems/devpts.txt) | |
|
||||
| /dev/ptmx | [device](https://www.kernel.org/doc/Documentation/filesystems/devpts.txt) | Bind-mount or symlink of /dev/pts/ptmx |
|
||||
| /dev/shm | [tmpfs](https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt) | |
|
||||
| Path | Type |
|
||||
| -------- | ------ |
|
||||
| /proc | [procfs](https://www.kernel.org/doc/Documentation/filesystems/proc.txt) |
|
||||
| /sys | [sysfs](https://www.kernel.org/doc/Documentation/filesystems/sysfs.txt) |
|
||||
| /dev/pts | [devpts](https://www.kernel.org/doc/Documentation/filesystems/devpts.txt) |
|
||||
| /dev/shm | [tmpfs](https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt) |
|
||||
|
||||
## Namespaces
|
||||
|
||||
A namespace wraps a global system resource in an abstraction that makes it appear to the processes within the namespace that they have their own isolated instance of the global resource.
|
||||
Changes to the global resource are visible to other processes that are members of the namespace, but are invisible to other processes.
|
||||
For more information, see [the man page](http://man7.org/linux/man-pages/man7/namespaces.7.html).
|
||||
|
||||
Namespaces are specified as an array of entries inside the `namespaces` root field.
|
||||
The following parameters can be specified to setup namespaces:
|
||||
|
||||
* **`type`** *(string, required)* - namespace type. The following namespaces types are supported:
|
||||
* **`pid`** processes inside the container will only be able to see other processes inside the same container
|
||||
* **`network`** the container will have its own network stack
|
||||
* **`mount`** the container will have an isolated mount table
|
||||
* **`ipc`** processes inside the container will only be able to communicate to other processes inside the same container via system level IPC
|
||||
* **`uts`** the container will be able to have its own hostname and domain name
|
||||
* **`user`** the container will be able to remap user and group IDs from the host to local users and groups within the container
|
||||
|
||||
* **`path`** *(string, optional)* - path to namespace file
|
||||
|
||||
If a path is specified, that particular file is used to join that type of namespace.
|
||||
Also, when a path is specified, a runtime MUST assume that the setup for that particular namespace has already been done and error out if the config specifies anything else related to that namespace.
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"namespaces": [
|
||||
{
|
||||
"type": "pid",
|
||||
"path": "/proc/1234/ns/pid"
|
||||
},
|
||||
{
|
||||
"type": "network",
|
||||
"path": "/var/run/netns/neta"
|
||||
},
|
||||
{
|
||||
"type": "mount"
|
||||
},
|
||||
{
|
||||
"type": "ipc"
|
||||
},
|
||||
{
|
||||
"type": "uts"
|
||||
},
|
||||
{
|
||||
"type": "user"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## User namespace mappings
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"uidMappings": [
|
||||
{
|
||||
"hostID": 1000,
|
||||
"containerID": 0,
|
||||
"size": 10
|
||||
}
|
||||
],
|
||||
"gidMappings": [
|
||||
{
|
||||
"hostID": 1000,
|
||||
"containerID": 0,
|
||||
"size": 10
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
uid/gid mappings describe the user namespace mappings from the host to the container.
|
||||
The mappings represent how the bundle `rootfs` expects the user namespace to be setup and the runtime SHOULD NOT modify the permissions on the rootfs to realize the mapping.
|
||||
*hostID* is the starting uid/gid on the host to be mapped to *containerID* which is the starting uid/gid in the container and *size* refers to the number of ids to be mapped.
|
||||
There is a limit of 5 mappings which is the Linux kernel hard limit.
|
||||
|
||||
## Devices
|
||||
|
||||
`devices` is an array specifying the list of devices that MUST be available in the container.
|
||||
The runtime may supply them however it likes (with [mknod][mknod.2], by bind mounting from the runtime mount namespace, etc.).
|
||||
|
||||
The following parameters can be specified:
|
||||
|
||||
* **`type`** *(char, required)* - type of device: `c`, `b`, `u` or `p`.
|
||||
More info in [mknod(1)][mknod.1].
|
||||
* **`path`** *(string, required)* - full path to device inside container.
|
||||
* **`major, minor`** *(int64, required unless **`type`** is `p`)* - [major, minor numbers][devices] for the device.
|
||||
* **`fileMode`** *(uint32, optional)* - file mode for the device.
|
||||
You can also control access to devices [with cgroups](#device-whitelist).
|
||||
* **`uid`** *(uint32, optional)* - id of device owner.
|
||||
* **`gid`** *(uint32, optional)* - id of device group.
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"devices": [
|
||||
{
|
||||
"path": "/dev/fuse",
|
||||
"type": "c",
|
||||
"major": 10,
|
||||
"minor": 229,
|
||||
"fileMode": 0666,
|
||||
"uid": 0,
|
||||
"gid": 0
|
||||
},
|
||||
{
|
||||
"path": "/dev/sda",
|
||||
"type": "b",
|
||||
"major": 8,
|
||||
"minor": 0,
|
||||
"fileMode": 0660,
|
||||
"uid": 0,
|
||||
"gid": 0
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
###### Default Devices
|
||||
|
||||
In addition to any devices configured with this setting, the runtime MUST also supply:
|
||||
|
||||
* [`/dev/null`][null.4]
|
||||
* [`/dev/zero`][zero.4]
|
||||
* [`/dev/full`][full.4]
|
||||
* [`/dev/random`][random.4]
|
||||
* [`/dev/urandom`][random.4]
|
||||
* [`/dev/tty`][tty.4]
|
||||
* [`/dev/console`][console.4]
|
||||
* [`/dev/ptmx`][pts.4].
|
||||
A [bind-mount or symlink of the container's `/dev/pts/ptmx`][devpts].
|
||||
|
||||
## Control groups
|
||||
|
||||
Also known as cgroups, they are used to restrict resource usage for a container and handle device access.
|
||||
cgroups provide controls to restrict cpu, memory, IO, pids and network for the container.
|
||||
For more information, see the [kernel cgroups documentation][cgroup-v1].
|
||||
|
||||
The path to the cgroups can be specified in the Spec via `cgroupsPath`.
|
||||
`cgroupsPath` is expected to be relative to the cgroups mount point.
|
||||
If `cgroupsPath` is not specified, implementations can define the default cgroup path.
|
||||
Implementations of the Spec can choose to name cgroups in any manner.
|
||||
The Spec does not include naming schema for cgroups.
|
||||
The Spec does not support [split hierarchy][cgroup-v2].
|
||||
The cgroups will be created if they don't exist.
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"cgroupsPath": "/myRuntime/myContainer"
|
||||
```
|
||||
|
||||
`cgroupsPath` can be used to either control the cgroups hierarchy for containers or to run a new process in an existing container.
|
||||
|
||||
You can configure a container's cgroups via the `resources` field of the Linux configuration.
|
||||
Do not specify `resources` unless limits have to be updated.
|
||||
For example, to run a new process in an existing container without updating limits, `resources` need not be specified.
|
||||
|
||||
#### Device whitelist
|
||||
|
||||
`devices` is an array of entries to control the [device whitelist][cgroup-v1-devices].
|
||||
The runtime MUST apply entries in the listed order.
|
||||
|
||||
The following parameters can be specified:
|
||||
|
||||
* **`allow`** *(boolean, required)* - whether the entry is allowed or denied.
|
||||
* **`type`** *(char, optional)* - type of device: `a` (all), `c` (char), or `b` (block).
|
||||
`null` or unset values mean "all", mapping to `a`.
|
||||
* **`major, minor`** *(int64, optional)* - [major, minor numbers][devices] for the device.
|
||||
`null` or unset values mean "all", mapping to [`*` in the filesystem API][cgroup-v1-devices].
|
||||
* **`access`** *(string, optional)* - cgroup permissions for device.
|
||||
A composition of `r` (read), `w` (write), and `m` (mknod).
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"devices": [
|
||||
{
|
||||
"allow": false,
|
||||
"access": "rwm"
|
||||
},
|
||||
{
|
||||
"allow": true,
|
||||
"type": "c",
|
||||
"major": 10,
|
||||
"minor": 229,
|
||||
"access": "rw"
|
||||
},
|
||||
{
|
||||
"allow": true,
|
||||
"type": "b",
|
||||
"major": 8,
|
||||
"minor": 0,
|
||||
"access": "r"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
#### Disable out-of-memory killer
|
||||
|
||||
`disableOOMKiller` contains a boolean (`true` or `false`) that enables or disables the Out of Memory killer for a cgroup.
|
||||
If enabled (`false`), tasks that attempt to consume more memory than they are allowed are immediately killed by the OOM killer.
|
||||
The OOM killer is enabled by default in every cgroup using the `memory` subsystem.
|
||||
To disable it, specify a value of `true`.
|
||||
For more information, see [the memory cgroup man page][cgroup-v1-memory].
|
||||
|
||||
* **`disableOOMKiller`** *(bool, optional)* - enables or disables the OOM killer
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"disableOOMKiller": false
|
||||
```
|
||||
|
||||
#### Set oom_score_adj
|
||||
|
||||
`oomScoreAdj` sets heuristic regarding how the process is evaluated by the kernel during memory pressure.
|
||||
For more information, see [the proc filesystem documentation section 3.1](https://www.kernel.org/doc/Documentation/filesystems/proc.txt).
|
||||
This is a kernel/system level setting, where as `disableOOMKiller` is scoped for a memory cgroup.
|
||||
For more information on how these two settings work together, see [the memory cgroup documentation section 10. OOM Contol][cgroup-v1-memory].
|
||||
|
||||
* **`oomScoreAdj`** *(int, optional)* - adjust the oom-killer score
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"oomScoreAdj": 0
|
||||
```
|
||||
|
||||
#### Memory
|
||||
|
||||
`memory` represents the cgroup subsystem `memory` and it's used to set limits on the container's memory usage.
|
||||
For more information, see [the memory cgroup man page][cgroup-v1-memory].
|
||||
|
||||
The following parameters can be specified to setup the controller:
|
||||
|
||||
* **`limit`** *(uint64, optional)* - sets limit of memory usage
|
||||
|
||||
* **`reservation`** *(uint64, optional)* - sets soft limit of memory usage
|
||||
|
||||
* **`swap`** *(uint64, optional)* - sets limit of memory+Swap usage
|
||||
|
||||
* **`kernel`** *(uint64, optional)* - sets hard limit for kernel memory
|
||||
|
||||
* **`kernelTCP`** *(uint64, optional)* - sets hard limit for kernel memory in tcp using
|
||||
|
||||
* **`swappiness`** *(uint64, optional)* - sets swappiness parameter of vmscan (See sysctl's vm.swappiness)
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"memory": {
|
||||
"limit": 0,
|
||||
"reservation": 0,
|
||||
"swap": 0,
|
||||
"kernel": 0,
|
||||
"kernelTCP": 0,
|
||||
"swappiness": 0
|
||||
}
|
||||
```
|
||||
|
||||
#### CPU
|
||||
|
||||
`cpu` represents the cgroup subsystems `cpu` and `cpusets`.
|
||||
For more information, see [the cpusets cgroup man page][cgroup-v1-cpusets].
|
||||
|
||||
The following parameters can be specified to setup the controller:
|
||||
|
||||
* **`shares`** *(uint64, optional)* - specifies a relative share of CPU time available to the tasks in a cgroup
|
||||
|
||||
* **`quota`** *(uint64, optional)* - specifies the total amount of time in microseconds for which all tasks in a cgroup can run during one period (as defined by **`period`** below)
|
||||
|
||||
* **`period`** *(uint64, optional)* - specifies a period of time in microseconds for how regularly a cgroup's access to CPU resources should be reallocated (CFS scheduler only)
|
||||
|
||||
* **`realtimeRuntime`** *(uint64, optional)* - specifies a period of time in microseconds for the longest continuous period in which the tasks in a cgroup have access to CPU resources
|
||||
|
||||
* **`realtimePeriod`** *(uint64, optional)* - same as **`period`** but applies to realtime scheduler only
|
||||
|
||||
* **`cpus`** *(string, optional)* - list of CPUs the container will run in
|
||||
|
||||
* **`mems`** *(string, optional)* - list of Memory Nodes the container will run in
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"cpu": {
|
||||
"shares": 0,
|
||||
"quota": 0,
|
||||
"period": 0,
|
||||
"realtimeRuntime": 0,
|
||||
"realtimePeriod": 0,
|
||||
"cpus": "",
|
||||
"mems": ""
|
||||
}
|
||||
```
|
||||
|
||||
#### Block IO Controller
|
||||
|
||||
`blockIO` represents the cgroup subsystem `blkio` which implements the block io controller.
|
||||
For more information, see [the kernel cgroups documentation about blkio][cgroup-v1-blkio].
|
||||
|
||||
The following parameters can be specified to setup the controller:
|
||||
|
||||
* **`blkioWeight`** *(uint16, optional)* - specifies per-cgroup weight. This is default weight of the group on all devices until and unless overridden by per-device rules. The range is from 10 to 1000.
|
||||
|
||||
* **`blkioLeafWeight`** *(uint16, optional)* - equivalents of `blkioWeight` for the purpose of deciding how much weight tasks in the given cgroup has while competing with the cgroup's child cgroups. The range is from 10 to 1000.
|
||||
|
||||
* **`blkioWeightDevice`** *(array, optional)* - specifies the list of devices which will be bandwidth rate limited. The following parameters can be specified per-device:
|
||||
* **`major, minor`** *(int64, required)* - major, minor numbers for device. More info in `man mknod`.
|
||||
* **`weight`** *(uint16, optional)* - bandwidth rate for the device, range is from 10 to 1000
|
||||
* **`leafWeight`** *(uint16, optional)* - bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, CFQ scheduler only
|
||||
|
||||
You must specify at least one of `weight` or `leafWeight` in a given entry, and can specify both.
|
||||
|
||||
* **`blkioThrottleReadBpsDevice`**, **`blkioThrottleWriteBpsDevice`**, **`blkioThrottleReadIOPSDevice`**, **`blkioThrottleWriteIOPSDevice`** *(array, optional)* - specify the list of devices which will be IO rate limited. The following parameters can be specified per-device:
|
||||
* **`major, minor`** *(int64, required)* - major, minor numbers for device. More info in `man mknod`.
|
||||
* **`rate`** *(uint64, required)* - IO rate limit for the device
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"blockIO": {
|
||||
"blkioWeight": 0,
|
||||
"blkioLeafWeight": 0,
|
||||
"blkioWeightDevice": [
|
||||
{
|
||||
"major": 8,
|
||||
"minor": 0,
|
||||
"weight": 500,
|
||||
"leafWeight": 300
|
||||
},
|
||||
{
|
||||
"major": 8,
|
||||
"minor": 16,
|
||||
"weight": 500
|
||||
}
|
||||
],
|
||||
"blkioThrottleReadBpsDevice": [
|
||||
{
|
||||
"major": 8,
|
||||
"minor": 0,
|
||||
"rate": 600
|
||||
}
|
||||
],
|
||||
"blkioThrottleWriteIOPSDevice": [
|
||||
{
|
||||
"major": 8,
|
||||
"minor": 16,
|
||||
"rate": 300
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
#### Huge page limits
|
||||
|
||||
`hugepageLimits` represents the `hugetlb` controller which allows to limit the
|
||||
HugeTLB usage per control group and enforces the controller limit during page fault.
|
||||
For more information, see the [kernel cgroups documentation about HugeTLB][cgroup-v1-hugetlb].
|
||||
|
||||
`hugepageLimits` is an array of entries, each having the following structure:
|
||||
|
||||
* **`pageSize`** *(string, required)* - hugepage size
|
||||
|
||||
* **`limit`** *(uint64, required)* - limit in bytes of *hugepagesize* HugeTLB usage
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"hugepageLimits": [
|
||||
{
|
||||
"pageSize": "2MB",
|
||||
"limit": 9223372036854771712
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
#### Network
|
||||
|
||||
`network` represents the cgroup subsystems `net_cls` and `net_prio`.
|
||||
For more information, see [the net\_cls cgroup man page][cgroup-v1-net-cls] and [the net\_prio cgroup man page][cgroup-v1-net-prio].
|
||||
|
||||
The following parameters can be specified to setup these cgroup controllers:
|
||||
|
||||
* **`classID`** *(uint32, optional)* - is the network class identifier the cgroup's network packets will be tagged with
|
||||
|
||||
* **`priorities`** *(array, optional)* - specifies a list of objects of the priorities assigned to traffic originating from
|
||||
processes in the group and egressing the system on various interfaces. The following parameters can be specified per-priority:
|
||||
* **`name`** *(string, required)* - interface name
|
||||
* **`priority`** *(uint32, required)* - priority applied to the interface
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"network": {
|
||||
"classID": 1048577,
|
||||
"priorities": [
|
||||
{
|
||||
"name": "eth0",
|
||||
"priority": 500
|
||||
},
|
||||
{
|
||||
"name": "eth1",
|
||||
"priority": 1000
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
#### PIDs
|
||||
|
||||
`pids` represents the cgroup subsystem `pids`.
|
||||
For more information, see [the pids cgroup man page][cgroup-v1-pids].
|
||||
|
||||
The following paramters can be specified to setup the controller:
|
||||
|
||||
* **`limit`** *(int64, required)* - specifies the maximum number of tasks in the cgroup
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"pids": {
|
||||
"limit": 32771
|
||||
}
|
||||
```
|
||||
|
||||
## Sysctl
|
||||
|
||||
sysctl allows kernel parameters to be modified at runtime for the container.
|
||||
For more information, see [the man page](http://man7.org/linux/man-pages/man8/sysctl.8.html)
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"sysctl": {
|
||||
"net.ipv4.ip_forward": "1",
|
||||
"net.core.somaxconn": "256"
|
||||
}
|
||||
```
|
||||
|
||||
## Rlimits
|
||||
|
||||
rlimits allow setting resource limits.
|
||||
`type` is a string with a value from those defined in [the man page](http://man7.org/linux/man-pages/man2/setrlimit.2.html).
|
||||
The kernel enforces the `soft` limit for a resource while the `hard` limit acts as a ceiling for that value that could be set by an unprivileged process.
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"rlimits": [
|
||||
{
|
||||
"type": "RLIMIT_NPROC",
|
||||
"soft": 1024,
|
||||
"hard": 102400
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## SELinux process label
|
||||
|
||||
SELinux process label specifies the label with which the processes in a container are run.
|
||||
For more information about SELinux, see [Selinux documentation](http://selinuxproject.org/page/Main_Page)
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"selinuxProcessLabel": "system_u:system_r:svirt_lxc_net_t:s0:c124,c675"
|
||||
```
|
||||
|
||||
## Apparmor profile
|
||||
|
||||
Apparmor profile specifies the name of the apparmor profile that will be used for the container.
|
||||
For more information about Apparmor, see [Apparmor documentation](https://wiki.ubuntu.com/AppArmor)
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"apparmorProfile": "acme_secure_profile"
|
||||
```
|
||||
|
||||
## seccomp
|
||||
|
||||
Seccomp provides application sandboxing mechanism in the Linux kernel.
|
||||
Seccomp configuration allows one to configure actions to take for matched syscalls and furthermore also allows matching on values passed as arguments to syscalls.
|
||||
For more information about Seccomp, see [Seccomp kernel documentation](https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt)
|
||||
The actions, architectures, and operators are strings that match the definitions in seccomp.h from [libseccomp](https://github.com/seccomp/libseccomp) and are translated to corresponding values.
|
||||
A valid list of constants as of Libseccomp v2.2.3 is contained below.
|
||||
|
||||
Architecture Constants
|
||||
* `SCMP_ARCH_X86`
|
||||
* `SCMP_ARCH_X86_64`
|
||||
* `SCMP_ARCH_X32`
|
||||
* `SCMP_ARCH_ARM`
|
||||
* `SCMP_ARCH_AARCH64`
|
||||
* `SCMP_ARCH_MIPS`
|
||||
* `SCMP_ARCH_MIPS64`
|
||||
* `SCMP_ARCH_MIPS64N32`
|
||||
* `SCMP_ARCH_MIPSEL`
|
||||
* `SCMP_ARCH_MIPSEL64`
|
||||
* `SCMP_ARCH_MIPSEL64N32`
|
||||
|
||||
Action Constants:
|
||||
* `SCMP_ACT_KILL`
|
||||
* `SCMP_ACT_TRAP`
|
||||
* `SCMP_ACT_ERRNO`
|
||||
* `SCMP_ACT_TRACE`
|
||||
* `SCMP_ACT_ALLOW`
|
||||
|
||||
Operator Constants:
|
||||
* `SCMP_CMP_NE`
|
||||
* `SCMP_CMP_LT`
|
||||
* `SCMP_CMP_LE`
|
||||
* `SCMP_CMP_EQ`
|
||||
* `SCMP_CMP_GE`
|
||||
* `SCMP_CMP_GT`
|
||||
* `SCMP_CMP_MASKED_EQ`
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"seccomp": {
|
||||
"defaultAction": "SCMP_ACT_ALLOW",
|
||||
"architectures": [
|
||||
"SCMP_ARCH_X86"
|
||||
],
|
||||
"syscalls": [
|
||||
{
|
||||
"name": "getcwd",
|
||||
"action": "SCMP_ACT_ERRNO"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Rootfs Mount Propagation
|
||||
|
||||
rootfsPropagation sets the rootfs's mount propagation.
|
||||
Its value is either slave, private, or shared.
|
||||
[The kernel doc](https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt) has more information about mount propagation.
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"rootfsPropagation": "slave",
|
||||
```
|
||||
|
||||
## No new privileges
|
||||
|
||||
Setting `noNewPrivileges` to true prevents the processes in the container from gaining additional privileges.
|
||||
[The kernel doc](https://www.kernel.org/doc/Documentation/prctl/no_new_privs.txt) has more information on how this is achieved using a prctl system call.
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"noNewPrivileges": true,
|
||||
```
|
||||
|
||||
[cgroup-v1]: https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
|
||||
[cgroup-v1-blkio]: https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt
|
||||
[cgroup-v1-cpusets]: https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.txt
|
||||
[cgroup-v1-devices]: https://www.kernel.org/doc/Documentation/cgroup-v1/devices.txt
|
||||
[cgroup-v1-hugetlb]: https://www.kernel.org/doc/Documentation/cgroup-v1/hugetlb.txt
|
||||
[cgroup-v1-memory]: https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt
|
||||
[cgroup-v1-net-cls]: https://www.kernel.org/doc/Documentation/cgroup-v1/net_cls.txt
|
||||
[cgroup-v1-net-prio]: https://www.kernel.org/doc/Documentation/cgroup-v1/net_prio.txt
|
||||
[cgroup-v1-pids]: https://www.kernel.org/doc/Documentation/cgroup-v1/pids.txt
|
||||
[cgroup-v2]: https://www.kernel.org/doc/Documentation/cgroup-v2.txt
|
||||
[devices]: https://www.kernel.org/doc/Documentation/devices.txt
|
||||
[devpts]: https://www.kernel.org/doc/Documentation/filesystems/devpts.txt
|
||||
|
||||
[mknod.1]: http://man7.org/linux/man-pages/man1/mknod.1.html
|
||||
[mknod.2]: http://man7.org/linux/man-pages/man2/mknod.2.html
|
||||
[console.4]: http://man7.org/linux/man-pages/man4/console.4.html
|
||||
[full.4]: http://man7.org/linux/man-pages/man4/full.4.html
|
||||
[null.4]: http://man7.org/linux/man-pages/man4/null.4.html
|
||||
[pts.4]: http://man7.org/linux/man-pages/man4/pts.4.html
|
||||
[random.4]: http://man7.org/linux/man-pages/man4/random.4.html
|
||||
[tty.4]: http://man7.org/linux/man-pages/man4/tty.4.html
|
||||
[zero.4]: http://man7.org/linux/man-pages/man4/zero.4.html
|
||||
|
|
|
@ -15,7 +15,9 @@ type Spec struct {
|
|||
// Hostname is the container's host name.
|
||||
Hostname string `json:"hostname,omitempty"`
|
||||
// Mounts profile configuration for adding mounts to the container's filesystem.
|
||||
Mounts []MountPoint `json:"mounts"`
|
||||
Mounts []Mount `json:"mounts"`
|
||||
// Hooks are the commands run at various lifecycle events of the container.
|
||||
Hooks Hooks `json:"hooks"`
|
||||
}
|
||||
|
||||
// Process contains information to start a specific application inside the container.
|
||||
|
@ -50,10 +52,33 @@ type Platform struct {
|
|||
Arch string `json:"arch"`
|
||||
}
|
||||
|
||||
// MountPoint describes a directory that may be fullfilled by a mount in the runtime.json.
|
||||
type MountPoint struct {
|
||||
// Name is a unique descriptive identifier for this mount point.
|
||||
Name string `json:"name"`
|
||||
// Path specifies the path of the mount. The path and child directories MUST exist, a runtime MUST NOT create directories automatically to a mount point.
|
||||
Path string `json:"path"`
|
||||
// Mount specifies a mount for a container.
|
||||
type Mount struct {
|
||||
// Destination is the path where the mount will be placed relative to the container's root. The path and child directories MUST exist, a runtime MUST NOT create directories automatically to a mount point.
|
||||
Destination string `json:"destination"`
|
||||
// Type specifies the mount kind.
|
||||
Type string `json:"type"`
|
||||
// Source specifies the source path of the mount. In the case of bind mounts on
|
||||
// linux based systems this would be the file on the host.
|
||||
Source string `json:"source"`
|
||||
// Options are fstab style mount options.
|
||||
Options []string `json:"options,omitempty"`
|
||||
}
|
||||
|
||||
// Hook specifies a command that is run at a particular event in the lifecycle of a container
|
||||
type Hook struct {
|
||||
Path string `json:"path"`
|
||||
Args []string `json:"args,omitempty"`
|
||||
Env []string `json:"env,omitempty"`
|
||||
}
|
||||
|
||||
// Hooks for container setup and teardown
|
||||
type Hooks struct {
|
||||
// Prestart is a list of hooks to be run before the container process is executed.
|
||||
// On Linux, they are run after the container namespaces are created.
|
||||
Prestart []Hook `json:"prestart,omitempty"`
|
||||
// Poststart is a list of hooks to be run after the container process is started.
|
||||
Poststart []Hook `json:"poststart,omitempty"`
|
||||
// Poststop is a list of hooks to be run after the container process exits.
|
||||
Poststop []Hook `json:"poststop,omitempty"`
|
||||
}
|
||||
|
|
|
@ -37,38 +37,52 @@ Each container has exactly one *root filesystem*, specified in the *root* object
|
|||
}
|
||||
```
|
||||
|
||||
## Mount Points
|
||||
## Mounts
|
||||
|
||||
You can add array of mount points inside container as `mounts`.
|
||||
Each record in this array must have configuration in [runtime config](runtime-config.md#mount-configuration).
|
||||
The runtime MUST mount entries in the listed order.
|
||||
The parameters are similar to the ones in [the Linux mount system call](http://man7.org/linux/man-pages/man2/mount.2.html).
|
||||
|
||||
* **`name`** (string, required) Name of mount point. Used for config lookup.
|
||||
* **`path`** (string, required) Destination of mount point: path inside container.
|
||||
* **`destination`** (string, required) Destination of mount point: path inside container.
|
||||
* **`type`** (string, required) Linux, *filesystemtype* argument supported by the kernel are listed in */proc/filesystems* (e.g., "minix", "ext2", "ext3", "jfs", "xfs", "reiserfs", "msdos", "proc", "nfs", "iso9660"). Windows: ntfs
|
||||
* **`source`** (string, required) a device name, but can also be a directory name or a dummy. Windows, the volume name that is the target of the mount point. \\?\Volume\{GUID}\ (on Windows source is called target)
|
||||
* **`options`** (list of strings, optional) in the fstab format [https://wiki.archlinux.org/index.php/Fstab](https://wiki.archlinux.org/index.php/Fstab).
|
||||
|
||||
*Example*
|
||||
### Linux Example
|
||||
|
||||
```json
|
||||
"mounts": [
|
||||
{
|
||||
"name": "proc",
|
||||
"path": "/proc"
|
||||
"destination": "/tmp",
|
||||
"type": "tmpfs",
|
||||
"source": "tmpfs",
|
||||
"options": ["nosuid","strictatime","mode=755","size=65536k"]
|
||||
},
|
||||
{
|
||||
"name": "dev",
|
||||
"path": "/dev"
|
||||
},
|
||||
{
|
||||
"name": "devpts",
|
||||
"path": "/dev/pts"
|
||||
},
|
||||
{
|
||||
"name": "data",
|
||||
"path": "/data"
|
||||
"destination": "/data",
|
||||
"type": "bind",
|
||||
"source": "/volumes/testing",
|
||||
"options": ["rbind","rw"]
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
### Windows Example
|
||||
|
||||
```json
|
||||
"mounts": [
|
||||
"myfancymountpoint": {
|
||||
"destination": "C:\\Users\\crosbymichael\\My Fancy Mount Point\\",
|
||||
"type": "ntfs",
|
||||
"source": "\\\\?\\Volume\\{2eca078d-5cbc-43d3-aff8-7e8511f60d0e}\\",
|
||||
"options": []
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
See links for details about [mountvol](http://ss64.com/nt/mountvol.html) and [SetVolumeMountPoint](https://msdn.microsoft.com/en-us/library/windows/desktop/aa365561(v=vs.85).aspx) in Windows.
|
||||
|
||||
|
||||
## Process configuration
|
||||
|
||||
* **`terminal`** (bool, optional) specifies whether you want a terminal attached to that process. Defaults to false.
|
||||
|
@ -130,4 +144,72 @@ For Linux-based systems the user structure has the following fields:
|
|||
Interpretation of the platform section of the JSON file is used to find which platform-specific sections may be available in the document.
|
||||
For example, if `os` is set to `linux`, then a JSON object conforming to the [Linux-specific schema](config-linux.md) SHOULD be found at the key `linux` in the `config.json`.
|
||||
|
||||
## Hooks
|
||||
|
||||
Lifecycle hooks allow custom events for different points in a container's runtime.
|
||||
Presently there are `Prestart`, `Poststart` and `Poststop`.
|
||||
|
||||
* [`Prestart`](#prestart) is a list of hooks to be run before the container process is executed
|
||||
* [`Poststart`](#poststart) is a list of hooks to be run immediately after the container process is started
|
||||
* [`Poststop`](#poststop) is a list of hooks to be run after the container process exits
|
||||
|
||||
Hooks allow one to run code before/after various lifecycle events of the container.
|
||||
Hooks MUST be called in the listed order.
|
||||
The state of the container is passed to the hooks over stdin, so the hooks could get the information they need to do their work.
|
||||
|
||||
Hook paths are absolute and are executed from the host's filesystem.
|
||||
|
||||
### Prestart
|
||||
|
||||
The pre-start hooks are called after the container process is spawned, but before the user supplied command is executed.
|
||||
They are called after the container namespaces are created on Linux, so they provide an opportunity to customize the container.
|
||||
In Linux, for e.g., the network namespace could be configured in this hook.
|
||||
|
||||
If a hook returns a non-zero exit code, then an error including the exit code and the stderr is returned to the caller and the container is torn down.
|
||||
|
||||
### Poststart
|
||||
|
||||
The post-start hooks are called after the user process is started.
|
||||
For example this hook can notify user that real process is spawned.
|
||||
|
||||
If a hook returns a non-zero exit code, then an error is logged and the remaining hooks are executed.
|
||||
|
||||
### Poststop
|
||||
|
||||
The post-stop hooks are called after the container process is stopped.
|
||||
Cleanup or debugging could be performed in such a hook.
|
||||
If a hook returns a non-zero exit code, then an error is logged and the remaining hooks are executed.
|
||||
|
||||
*Example*
|
||||
|
||||
```json
|
||||
"hooks" : {
|
||||
"prestart": [
|
||||
{
|
||||
"path": "/usr/bin/fix-mounts",
|
||||
"args": ["fix-mounts", "arg1", "arg2"],
|
||||
"env": [ "key1=value1"]
|
||||
},
|
||||
{
|
||||
"path": "/usr/bin/setup-network"
|
||||
}
|
||||
],
|
||||
"poststart": [
|
||||
{
|
||||
"path": "/usr/bin/notify-start"
|
||||
}
|
||||
],
|
||||
"poststop": [
|
||||
{
|
||||
"path": "/usr/sbin/cleanup.sh",
|
||||
"args": ["cleanup.sh", "-f"]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
`path` is required for a hook.
|
||||
`args` and `env` are optional.
|
||||
The semantics are the same as `Path`, `Args` and `Env` in [golang Cmd](https://golang.org/pkg/os/exec/#Cmd).
|
||||
|
||||
[uts-namespace]: http://man7.org/linux/man-pages/man7/namespaces.7.html
|
||||
|
|
|
@ -1,5 +1,10 @@
|
|||
package specs
|
||||
|
||||
import "os"
|
||||
|
||||
// LinuxStateDirectory holds the container's state information
|
||||
const LinuxStateDirectory = "/run/opencontainer/containers"
|
||||
|
||||
// LinuxSpec is the full specification for linux containers.
|
||||
type LinuxSpec struct {
|
||||
Spec
|
||||
|
@ -11,6 +16,35 @@ type LinuxSpec struct {
|
|||
type Linux struct {
|
||||
// Capabilities are linux capabilities that are kept for the container.
|
||||
Capabilities []string `json:"capabilities"`
|
||||
// UIDMapping specifies user mappings for supporting user namespaces on linux.
|
||||
UIDMappings []IDMapping `json:"uidMappings,omitempty"`
|
||||
// GIDMapping specifies group mappings for supporting user namespaces on linux.
|
||||
GIDMappings []IDMapping `json:"gidMappings,omitempty"`
|
||||
// Rlimits specifies rlimit options to apply to the container's process.
|
||||
Rlimits []Rlimit `json:"rlimits,omitempty"`
|
||||
// Sysctl are a set of key value pairs that are set for the container on start
|
||||
Sysctl map[string]string `json:"sysctl,omitempty"`
|
||||
// Resources contain cgroup information for handling resource constraints
|
||||
// for the container
|
||||
Resources *Resources `json:"resources,omitempty"`
|
||||
// CgroupsPath specifies the path to cgroups that are created and/or joined by the container.
|
||||
// The path is expected to be relative to the cgroups mountpoint.
|
||||
// If resources are specified, the cgroups at CgroupsPath will be updated based on resources.
|
||||
CgroupsPath *string `json:"cgroupsPath,omitempty"`
|
||||
// Namespaces contains the namespaces that are created and/or joined by the container
|
||||
Namespaces []Namespace `json:"namespaces"`
|
||||
// Devices are a list of device nodes that are created for the container
|
||||
Devices []Device `json:"devices"`
|
||||
// ApparmorProfile specified the apparmor profile for the container.
|
||||
ApparmorProfile string `json:"apparmorProfile"`
|
||||
// SelinuxProcessLabel specifies the selinux context that the container process is run as.
|
||||
SelinuxProcessLabel string `json:"selinuxProcessLabel"`
|
||||
// Seccomp specifies the seccomp security settings for the container.
|
||||
Seccomp Seccomp `json:"seccomp"`
|
||||
// RootfsPropagation is the rootfs mount propagation mode for the container.
|
||||
RootfsPropagation string `json:"rootfsPropagation,omitempty"`
|
||||
// NoNewPrivileges controls whether additional privileges could be gained by processes in the container.
|
||||
NoNewPrivileges bool `json:"noNewPrivileges,omitempty"`
|
||||
}
|
||||
|
||||
// User specifies linux specific user and group information for the container's
|
||||
|
@ -23,3 +57,277 @@ type User struct {
|
|||
// AdditionalGids are additional group ids set for the container's process.
|
||||
AdditionalGids []uint32 `json:"additionalGids,omitempty"`
|
||||
}
|
||||
|
||||
// Namespace is the configuration for a linux namespace
|
||||
type Namespace struct {
|
||||
// Type is the type of Linux namespace
|
||||
Type NamespaceType `json:"type"`
|
||||
// Path is a path to an existing namespace persisted on disk that can be joined
|
||||
// and is of the same type
|
||||
Path string `json:"path,omitempty"`
|
||||
}
|
||||
|
||||
// NamespaceType is one of the linux namespaces
|
||||
type NamespaceType string
|
||||
|
||||
const (
|
||||
// PIDNamespace for isolating process IDs
|
||||
PIDNamespace NamespaceType = "pid"
|
||||
// NetworkNamespace for isolating network devices, stacks, ports, etc
|
||||
NetworkNamespace = "network"
|
||||
// MountNamespace for isolating mount points
|
||||
MountNamespace = "mount"
|
||||
// IPCNamespace for isolating System V IPC, POSIX message queues
|
||||
IPCNamespace = "ipc"
|
||||
// UTSNamespace for isolating hostname and NIS domain name
|
||||
UTSNamespace = "uts"
|
||||
// UserNamespace for isolating user and group IDs
|
||||
UserNamespace = "user"
|
||||
)
|
||||
|
||||
// IDMapping specifies UID/GID mappings
|
||||
type IDMapping struct {
|
||||
// HostID is the UID/GID of the host user or group
|
||||
HostID uint32 `json:"hostID"`
|
||||
// ContainerID is the UID/GID of the container's user or group
|
||||
ContainerID uint32 `json:"containerID"`
|
||||
// Size is the length of the range of IDs mapped between the two namespaces
|
||||
Size uint32 `json:"size"`
|
||||
}
|
||||
|
||||
// Rlimit type and restrictions
|
||||
type Rlimit struct {
|
||||
// Type of the rlimit to set
|
||||
Type string `json:"type"`
|
||||
// Hard is the hard limit for the specified type
|
||||
Hard uint64 `json:"hard"`
|
||||
// Soft is the soft limit for the specified type
|
||||
Soft uint64 `json:"soft"`
|
||||
}
|
||||
|
||||
// HugepageLimit structure corresponds to limiting kernel hugepages
|
||||
type HugepageLimit struct {
|
||||
// Pagesize is the hugepage size
|
||||
Pagesize *string `json:"pageSize,omitempty"`
|
||||
// Limit is the limit of "hugepagesize" hugetlb usage
|
||||
Limit *uint64 `json:"limit,omitempty"`
|
||||
}
|
||||
|
||||
// InterfacePriority for network interfaces
|
||||
type InterfacePriority struct {
|
||||
// Name is the name of the network interface
|
||||
Name string `json:"name"`
|
||||
// Priority for the interface
|
||||
Priority uint32 `json:"priority"`
|
||||
}
|
||||
|
||||
// blockIODevice holds major:minor format supported in blkio cgroup
|
||||
type blockIODevice struct {
|
||||
// Major is the device's major number.
|
||||
Major int64 `json:"major"`
|
||||
// Minor is the device's minor number.
|
||||
Minor int64 `json:"minor"`
|
||||
}
|
||||
|
||||
// WeightDevice struct holds a `major:minor weight` pair for blkioWeightDevice
|
||||
type WeightDevice struct {
|
||||
blockIODevice
|
||||
// Weight is the bandwidth rate for the device, range is from 10 to 1000
|
||||
Weight *uint16 `json:"weight,omitempty"`
|
||||
// LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, CFQ scheduler only
|
||||
LeafWeight *uint16 `json:"leafWeight,omitempty"`
|
||||
}
|
||||
|
||||
// ThrottleDevice struct holds a `major:minor rate_per_second` pair
|
||||
type ThrottleDevice struct {
|
||||
blockIODevice
|
||||
// Rate is the IO rate limit per cgroup per device
|
||||
Rate *uint64 `json:"rate,omitempty"`
|
||||
}
|
||||
|
||||
// BlockIO for Linux cgroup 'blkio' resource management
|
||||
type BlockIO struct {
|
||||
// Specifies per cgroup weight, range is from 10 to 1000
|
||||
Weight *uint16 `json:"blkioWeight,omitempty"`
|
||||
// Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, CFQ scheduler only
|
||||
LeafWeight *uint16 `json:"blkioLeafWeight,omitempty"`
|
||||
// Weight per cgroup per device, can override BlkioWeight
|
||||
WeightDevice []WeightDevice `json:"blkioWeightDevice,omitempty"`
|
||||
// IO read rate limit per cgroup per device, bytes per second
|
||||
ThrottleReadBpsDevice []ThrottleDevice `json:"blkioThrottleReadBpsDevice,omitempty"`
|
||||
// IO write rate limit per cgroup per device, bytes per second
|
||||
ThrottleWriteBpsDevice []ThrottleDevice `json:"blkioThrottleWriteBpsDevice,omitempty"`
|
||||
// IO read rate limit per cgroup per device, IO per second
|
||||
ThrottleReadIOPSDevice []ThrottleDevice `json:"blkioThrottleReadIOPSDevice,omitempty"`
|
||||
// IO write rate limit per cgroup per device, IO per second
|
||||
ThrottleWriteIOPSDevice []ThrottleDevice `json:"blkioThrottleWriteIOPSDevice,omitempty"`
|
||||
}
|
||||
|
||||
// Memory for Linux cgroup 'memory' resource management
|
||||
type Memory struct {
|
||||
// Memory limit (in bytes).
|
||||
Limit *uint64 `json:"limit,omitempty"`
|
||||
// Memory reservation or soft_limit (in bytes).
|
||||
Reservation *uint64 `json:"reservation,omitempty"`
|
||||
// Total memory limit (memory + swap).
|
||||
Swap *uint64 `json:"swap,omitempty"`
|
||||
// Kernel memory limit (in bytes).
|
||||
Kernel *uint64 `json:"kernel,omitempty"`
|
||||
// Kernel memory limit for tcp (in bytes)
|
||||
KernelTCP *uint64 `json:"kernelTCP"`
|
||||
// How aggressive the kernel will swap memory pages. Range from 0 to 100.
|
||||
Swappiness *uint64 `json:"swappiness,omitempty"`
|
||||
}
|
||||
|
||||
// CPU for Linux cgroup 'cpu' resource management
|
||||
type CPU struct {
|
||||
// CPU shares (relative weight (ratio) vs. other cgroups with cpu shares).
|
||||
Shares *uint64 `json:"shares,omitempty"`
|
||||
// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
|
||||
Quota *uint64 `json:"quota,omitempty"`
|
||||
// CPU period to be used for hardcapping (in usecs).
|
||||
Period *uint64 `json:"period,omitempty"`
|
||||
// How much time realtime scheduling may use (in usecs).
|
||||
RealtimeRuntime *uint64 `json:"realtimeRuntime,omitempty"`
|
||||
// CPU period to be used for realtime scheduling (in usecs).
|
||||
RealtimePeriod *uint64 `json:"realtimePeriod,omitempty"`
|
||||
// CPUs to use within the cpuset. Default is to use any CPU available.
|
||||
Cpus *string `json:"cpus,omitempty"`
|
||||
// List of memory nodes in the cpuset. Default is to use any available memory node.
|
||||
Mems *string `json:"mems,omitempty"`
|
||||
}
|
||||
|
||||
// Pids for Linux cgroup 'pids' resource management (Linux 4.3)
|
||||
type Pids struct {
|
||||
// Maximum number of PIDs. Default is "no limit".
|
||||
Limit *int64 `json:"limit,omitempty"`
|
||||
}
|
||||
|
||||
// Network identification and priority configuration
|
||||
type Network struct {
|
||||
// Set class identifier for container's network packets
|
||||
ClassID *uint32 `json:"classID"`
|
||||
// Set priority of network traffic for container
|
||||
Priorities []InterfacePriority `json:"priorities,omitempty"`
|
||||
}
|
||||
|
||||
// Resources has container runtime resource constraints
|
||||
type Resources struct {
|
||||
// Devices are a list of device rules for the whitelist controller
|
||||
Devices []DeviceCgroup `json:"devices"`
|
||||
// DisableOOMKiller disables the OOM killer for out of memory conditions
|
||||
DisableOOMKiller *bool `json:"disableOOMKiller,omitempty"`
|
||||
// Specify an oom_score_adj for the container.
|
||||
OOMScoreAdj *int `json:"oomScoreAdj,omitempty"`
|
||||
// Memory restriction configuration
|
||||
Memory *Memory `json:"memory,omitempty"`
|
||||
// CPU resource restriction configuration
|
||||
CPU *CPU `json:"cpu,omitempty"`
|
||||
// Task resource restriction configuration.
|
||||
Pids *Pids `json:"pids,omitempty"`
|
||||
// BlockIO restriction configuration
|
||||
BlockIO *BlockIO `json:"blockIO,omitempty"`
|
||||
// Hugetlb limit (in bytes)
|
||||
HugepageLimits []HugepageLimit `json:"hugepageLimits,omitempty"`
|
||||
// Network restriction configuration
|
||||
Network *Network `json:"network,omitempty"`
|
||||
}
|
||||
|
||||
// Device represents the mknod information for a Linux special device file
|
||||
type Device struct {
|
||||
// Path to the device.
|
||||
Path string `json:"path"`
|
||||
// Device type, block, char, etc.
|
||||
Type rune `json:"type"`
|
||||
// Major is the device's major number.
|
||||
Major int64 `json:"major"`
|
||||
// Minor is the device's minor number.
|
||||
Minor int64 `json:"minor"`
|
||||
// FileMode permission bits for the device.
|
||||
FileMode *os.FileMode `json:"fileMode,omitempty"`
|
||||
// UID of the device.
|
||||
UID *uint32 `json:"uid,omitempty"`
|
||||
// Gid of the device.
|
||||
GID *uint32 `json:"gid,omitempty"`
|
||||
}
|
||||
|
||||
// DeviceCgroup represents a device rule for the whitelist controller
|
||||
type DeviceCgroup struct {
|
||||
// Allow or deny
|
||||
Allow bool `json:"allow"`
|
||||
// Device type, block, char, etc.
|
||||
Type *rune `json:"type,omitempty"`
|
||||
// Major is the device's major number.
|
||||
Major *int64 `json:"major,omitempty"`
|
||||
// Minor is the device's minor number.
|
||||
Minor *int64 `json:"minor,omitempty"`
|
||||
// Cgroup access permissions format, rwm.
|
||||
Access *string `json:"access,omitempty"`
|
||||
}
|
||||
|
||||
// Seccomp represents syscall restrictions
|
||||
type Seccomp struct {
|
||||
DefaultAction Action `json:"defaultAction"`
|
||||
Architectures []Arch `json:"architectures"`
|
||||
Syscalls []Syscall `json:"syscalls,omitempty"`
|
||||
}
|
||||
|
||||
// Arch used for additional architectures
|
||||
type Arch string
|
||||
|
||||
// Additional architectures permitted to be used for system calls
|
||||
// By default only the native architecture of the kernel is permitted
|
||||
const (
|
||||
ArchX86 Arch = "SCMP_ARCH_X86"
|
||||
ArchX86_64 Arch = "SCMP_ARCH_X86_64"
|
||||
ArchX32 Arch = "SCMP_ARCH_X32"
|
||||
ArchARM Arch = "SCMP_ARCH_ARM"
|
||||
ArchAARCH64 Arch = "SCMP_ARCH_AARCH64"
|
||||
ArchMIPS Arch = "SCMP_ARCH_MIPS"
|
||||
ArchMIPS64 Arch = "SCMP_ARCH_MIPS64"
|
||||
ArchMIPS64N32 Arch = "SCMP_ARCH_MIPS64N32"
|
||||
ArchMIPSEL Arch = "SCMP_ARCH_MIPSEL"
|
||||
ArchMIPSEL64 Arch = "SCMP_ARCH_MIPSEL64"
|
||||
ArchMIPSEL64N32 Arch = "SCMP_ARCH_MIPSEL64N32"
|
||||
)
|
||||
|
||||
// Action taken upon Seccomp rule match
|
||||
type Action string
|
||||
|
||||
// Define actions for Seccomp rules
|
||||
const (
|
||||
ActKill Action = "SCMP_ACT_KILL"
|
||||
ActTrap Action = "SCMP_ACT_TRAP"
|
||||
ActErrno Action = "SCMP_ACT_ERRNO"
|
||||
ActTrace Action = "SCMP_ACT_TRACE"
|
||||
ActAllow Action = "SCMP_ACT_ALLOW"
|
||||
)
|
||||
|
||||
// Operator used to match syscall arguments in Seccomp
|
||||
type Operator string
|
||||
|
||||
// Define operators for syscall arguments in Seccomp
|
||||
const (
|
||||
OpNotEqual Operator = "SCMP_CMP_NE"
|
||||
OpLessThan Operator = "SCMP_CMP_LT"
|
||||
OpLessEqual Operator = "SCMP_CMP_LE"
|
||||
OpEqualTo Operator = "SCMP_CMP_EQ"
|
||||
OpGreaterEqual Operator = "SCMP_CMP_GE"
|
||||
OpGreaterThan Operator = "SCMP_CMP_GT"
|
||||
OpMaskedEqual Operator = "SCMP_CMP_MASKED_EQ"
|
||||
)
|
||||
|
||||
// Arg used for matching specific syscall arguments in Seccomp
|
||||
type Arg struct {
|
||||
Index uint `json:"index"`
|
||||
Value uint64 `json:"value"`
|
||||
ValueTwo uint64 `json:"valueTwo"`
|
||||
Op Operator `json:"op"`
|
||||
}
|
||||
|
||||
// Syscall is used to match a syscall in Seccomp
|
||||
type Syscall struct {
|
||||
Name string `json:"name"`
|
||||
Action Action `json:"action"`
|
||||
Args []Arg `json:"args,omitempty"`
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@ A [directory structure](bundle.md) that is written ahead of time, distributed, a
|
|||
|
||||
## Configuration
|
||||
|
||||
The [`config.json`](config.md) and [`runtime.json`](runtime-config.md) files in a [bundle](#bundle) which define the intended [container](#container) and container process.
|
||||
The [`config.json`](config.md) file in a [bundle](#bundle) which defines the intended [container](#container) and container process.
|
||||
|
||||
## Container
|
||||
|
||||
|
|
553
Godeps/_workspace/src/github.com/opencontainers/specs/runtime-config-linux.md
generated
vendored
553
Godeps/_workspace/src/github.com/opencontainers/specs/runtime-config-linux.md
generated
vendored
|
@ -1,553 +0,0 @@
|
|||
# Linux-specific Runtime Configuration
|
||||
|
||||
## Namespaces
|
||||
|
||||
A namespace wraps a global system resource in an abstraction that makes it appear to the processes within the namespace that they have their own isolated instance of the global resource.
|
||||
Changes to the global resource are visible to other processes that are members of the namespace, but are invisible to other processes.
|
||||
For more information, see [the man page](http://man7.org/linux/man-pages/man7/namespaces.7.html).
|
||||
|
||||
Namespaces are specified as an array of entries inside the `namespaces` root field.
|
||||
The following parameters can be specified to setup namespaces:
|
||||
|
||||
* **`type`** *(string, required)* - namespace type. The following namespaces types are supported:
|
||||
* **`pid`** processes inside the container will only be able to see other processes inside the same container
|
||||
* **`network`** the container will have its own network stack
|
||||
* **`mount`** the container will have an isolated mount table
|
||||
* **`ipc`** processes inside the container will only be able to communicate to other processes inside the same container via system level IPC
|
||||
* **`uts`** the container will be able to have its own hostname and domain name
|
||||
* **`user`** the container will be able to remap user and group IDs from the host to local users and groups within the container
|
||||
|
||||
* **`path`** *(string, optional)* - path to namespace file
|
||||
|
||||
If a path is specified, that particular file is used to join that type of namespace.
|
||||
Also, when a path is specified, a runtime MUST assume that the setup for that particular namespace has already been done and error out if the config specifies anything else related to that namespace.
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"namespaces": [
|
||||
{
|
||||
"type": "pid",
|
||||
"path": "/proc/1234/ns/pid"
|
||||
},
|
||||
{
|
||||
"type": "network",
|
||||
"path": "/var/run/netns/neta"
|
||||
},
|
||||
{
|
||||
"type": "mount"
|
||||
},
|
||||
{
|
||||
"type": "ipc"
|
||||
},
|
||||
{
|
||||
"type": "uts"
|
||||
},
|
||||
{
|
||||
"type": "user"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## User namespace mappings
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"uidMappings": [
|
||||
{
|
||||
"hostID": 1000,
|
||||
"containerID": 0,
|
||||
"size": 10
|
||||
}
|
||||
],
|
||||
"gidMappings": [
|
||||
{
|
||||
"hostID": 1000,
|
||||
"containerID": 0,
|
||||
"size": 10
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
uid/gid mappings describe the user namespace mappings from the host to the container.
|
||||
The mappings represent how the bundle `rootfs` expects the user namespace to be setup and the runtime SHOULD NOT modify the permissions on the rootfs to realize the mapping.
|
||||
*hostID* is the starting uid/gid on the host to be mapped to *containerID* which is the starting uid/gid in the container and *size* refers to the number of ids to be mapped.
|
||||
There is a limit of 5 mappings which is the Linux kernel hard limit.
|
||||
|
||||
## Devices
|
||||
|
||||
`devices` is an array specifying the list of devices to be created in the container.
|
||||
|
||||
The following parameters can be specified:
|
||||
|
||||
* **`type`** *(char, required)* - type of device: `c`, `b`, `u` or `p`. More info in `man mknod`.
|
||||
|
||||
* **`path`** *(string, optional)* - full path to device inside container
|
||||
|
||||
* **`major, minor`** *(int64, required)* - major, minor numbers for device. More info in `man mknod`. There is a special value: `-1`, which means `*` for `device` cgroup setup.
|
||||
|
||||
* **`permissions`** *(string, optional)* - cgroup permissions for device. A composition of `r` (*read*), `w` (*write*), and `m` (*mknod*).
|
||||
|
||||
* **`fileMode`** *(uint32, optional)* - file mode for device file
|
||||
|
||||
* **`uid`** *(uint32, optional)* - uid of device owner
|
||||
|
||||
* **`gid`** *(uint32, optional)* - gid of device owner
|
||||
|
||||
**`fileMode`**, **`uid`** and **`gid`** are required if **`path`** is given and are otherwise not allowed.
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"devices": [
|
||||
{
|
||||
"path": "/dev/random",
|
||||
"type": "c",
|
||||
"major": 1,
|
||||
"minor": 8,
|
||||
"permissions": "rwm",
|
||||
"fileMode": 0666,
|
||||
"uid": 0,
|
||||
"gid": 0
|
||||
},
|
||||
{
|
||||
"path": "/dev/urandom",
|
||||
"type": "c",
|
||||
"major": 1,
|
||||
"minor": 9,
|
||||
"permissions": "rwm",
|
||||
"fileMode": 0666,
|
||||
"uid": 0,
|
||||
"gid": 0
|
||||
},
|
||||
{
|
||||
"path": "/dev/null",
|
||||
"type": "c",
|
||||
"major": 1,
|
||||
"minor": 3,
|
||||
"permissions": "rwm",
|
||||
"fileMode": 0666,
|
||||
"uid": 0,
|
||||
"gid": 0
|
||||
},
|
||||
{
|
||||
"path": "/dev/zero",
|
||||
"type": "c",
|
||||
"major": 1,
|
||||
"minor": 5,
|
||||
"permissions": "rwm",
|
||||
"fileMode": 0666,
|
||||
"uid": 0,
|
||||
"gid": 0
|
||||
},
|
||||
{
|
||||
"path": "/dev/tty",
|
||||
"type": "c",
|
||||
"major": 5,
|
||||
"minor": 0,
|
||||
"permissions": "rwm",
|
||||
"fileMode": 0666,
|
||||
"uid": 0,
|
||||
"gid": 0
|
||||
},
|
||||
{
|
||||
"path": "/dev/full",
|
||||
"type": "c",
|
||||
"major": 1,
|
||||
"minor": 7,
|
||||
"permissions": "rwm",
|
||||
"fileMode": 0666,
|
||||
"uid": 0,
|
||||
"gid": 0
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## Control groups
|
||||
|
||||
Also known as cgroups, they are used to restrict resource usage for a container and handle device access.
|
||||
cgroups provide controls to restrict cpu, memory, IO, pids and network for the container.
|
||||
For more information, see the [kernel cgroups documentation](https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt).
|
||||
|
||||
The path to the cgroups can be specified in the Spec via `cgroupsPath`.
|
||||
`cgroupsPath` is expected to be relative to the cgroups mount point.
|
||||
If `cgroupsPath` is not specified, implementations can define the default cgroup path.
|
||||
Implementations of the Spec can choose to name cgroups in any manner.
|
||||
The Spec does not include naming schema for cgroups.
|
||||
The Spec does not support [split hierarchy](https://www.kernel.org/doc/Documentation/cgroups/unified-hierarchy.txt).
|
||||
The cgroups will be created if they don't exist.
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"cgroupsPath": "/myRuntime/myContainer"
|
||||
```
|
||||
|
||||
`cgroupsPath` can be used to either control the cgroups hierarchy for containers or to run a new process in an existing container.
|
||||
|
||||
You can configure a container's cgroups via the `resources` field of the Linux configuration.
|
||||
Do not specify `resources` unless limits have to be updated.
|
||||
For example, to run a new process in an existing container without updating limits, `resources` need not be specified.
|
||||
|
||||
#### Disable out-of-memory killer
|
||||
|
||||
`disableOOMKiller` contains a boolean (`true` or `false`) that enables or disables the Out of Memory killer for a cgroup.
|
||||
If enabled (`false`), tasks that attempt to consume more memory than they are allowed are immediately killed by the OOM killer.
|
||||
The OOM killer is enabled by default in every cgroup using the `memory` subsystem.
|
||||
To disable it, specify a value of `true`.
|
||||
For more information, see [the memory cgroup man page](https://www.kernel.org/doc/Documentation/cgroups/memory.txt).
|
||||
|
||||
* **`disableOOMKiller`** *(bool, optional)* - enables or disables the OOM killer
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"disableOOMKiller": false
|
||||
```
|
||||
|
||||
#### Set oom_score_adj
|
||||
|
||||
`oomScoreAdj` sets heuristic regarding how the process is evaluated by the kernel during memory pressure.
|
||||
For more information, see [the proc filesystem documentation section 3.1](https://www.kernel.org/doc/Documentation/filesystems/proc.txt).
|
||||
This is a kernel/system level setting, where as `disableOOMKiller` is scoped for a memory cgroup.
|
||||
For more information on how these two settings work together, see [the memory cgroup documentation section 10. OOM Contol](https://www.kernel.org/doc/Documentation/cgroups/memory.txt).
|
||||
|
||||
* **`oomScoreAdj`** *(int, optional)* - adjust the oom-killer score
|
||||
|
||||
###### Example
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"oomScoreAdj": 0
|
||||
```
|
||||
|
||||
#### Memory
|
||||
|
||||
`memory` represents the cgroup subsystem `memory` and it's used to set limits on the container's memory usage.
|
||||
For more information, see [the memory cgroup man page](https://www.kernel.org/doc/Documentation/cgroups/memory.txt).
|
||||
|
||||
The following parameters can be specified to setup the controller:
|
||||
|
||||
* **`limit`** *(uint64, optional)* - sets limit of memory usage
|
||||
|
||||
* **`reservation`** *(uint64, optional)* - sets soft limit of memory usage
|
||||
|
||||
* **`swap`** *(uint64, optional)* - sets limit of memory+Swap usage
|
||||
|
||||
* **`kernel`** *(uint64, optional)* - sets hard limit for kernel memory
|
||||
|
||||
* **`kernelTCP`** *(uint64, optional)* - sets hard limit for kernel memory in tcp using
|
||||
|
||||
* **`swappiness`** *(uint64, optional)* - sets swappiness parameter of vmscan (See sysctl's vm.swappiness)
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"memory": {
|
||||
"limit": 0,
|
||||
"reservation": 0,
|
||||
"swap": 0,
|
||||
"kernel": 0,
|
||||
"kernelTCP": 0,
|
||||
"swappiness": 0
|
||||
}
|
||||
```
|
||||
|
||||
#### CPU
|
||||
|
||||
`cpu` represents the cgroup subsystems `cpu` and `cpusets`.
|
||||
For more information, see [the cpusets cgroup man page](https://www.kernel.org/doc/Documentation/cgroups/cpusets.txt).
|
||||
|
||||
The following parameters can be specified to setup the controller:
|
||||
|
||||
* **`shares`** *(uint64, optional)* - specifies a relative share of CPU time available to the tasks in a cgroup
|
||||
|
||||
* **`quota`** *(uint64, optional)* - specifies the total amount of time in microseconds for which all tasks in a cgroup can run during one period (as defined by **`period`** below)
|
||||
|
||||
* **`period`** *(uint64, optional)* - specifies a period of time in microseconds for how regularly a cgroup's access to CPU resources should be reallocated (CFS scheduler only)
|
||||
|
||||
* **`realtimeRuntime`** *(uint64, optional)* - specifies a period of time in microseconds for the longest continuous period in which the tasks in a cgroup have access to CPU resources
|
||||
|
||||
* **`realtimePeriod`** *(uint64, optional)* - same as **`period`** but applies to realtime scheduler only
|
||||
|
||||
* **`cpus`** *(string, optional)* - list of CPUs the container will run in
|
||||
|
||||
* **`mems`** *(string, optional)* - list of Memory Nodes the container will run in
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"cpu": {
|
||||
"shares": 0,
|
||||
"quota": 0,
|
||||
"period": 0,
|
||||
"realtimeRuntime": 0,
|
||||
"realtimePeriod": 0,
|
||||
"cpus": "",
|
||||
"mems": ""
|
||||
}
|
||||
```
|
||||
|
||||
#### Block IO Controller
|
||||
|
||||
`blockIO` represents the cgroup subsystem `blkio` which implements the block io controller.
|
||||
For more information, see [the kernel cgroups documentation about blkio](https://www.kernel.org/doc/Documentation/cgroups/blkio-controller.txt).
|
||||
|
||||
The following parameters can be specified to setup the controller:
|
||||
|
||||
* **`blkioWeight`** *(uint16, optional)* - specifies per-cgroup weight. This is default weight of the group on all devices until and unless overridden by per-device rules. The range is from 10 to 1000.
|
||||
|
||||
* **`blkioLeafWeight`** *(uint16, optional)* - equivalents of `blkioWeight` for the purpose of deciding how much weight tasks in the given cgroup has while competing with the cgroup's child cgroups. The range is from 10 to 1000.
|
||||
|
||||
* **`blkioWeightDevice`** *(array, optional)* - specifies the list of devices which will be bandwidth rate limited. The following parameters can be specified per-device:
|
||||
* **`major, minor`** *(int64, required)* - major, minor numbers for device. More info in `man mknod`.
|
||||
* **`weight`** *(uint16, optional)* - bandwidth rate for the device, range is from 10 to 1000
|
||||
* **`leafWeight`** *(uint16, optional)* - bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, CFQ scheduler only
|
||||
|
||||
You must specify at least one of `weight` or `leafWeight` in a given entry, and can specify both.
|
||||
|
||||
* **`blkioThrottleReadBpsDevice`**, **`blkioThrottleWriteBpsDevice`**, **`blkioThrottleReadIOPSDevice`**, **`blkioThrottleWriteIOPSDevice`** *(array, optional)* - specify the list of devices which will be IO rate limited. The following parameters can be specified per-device:
|
||||
* **`major, minor`** *(int64, required)* - major, minor numbers for device. More info in `man mknod`.
|
||||
* **`rate`** *(uint64, required)* - IO rate limit for the device
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"blockIO": {
|
||||
"blkioWeight": 0,
|
||||
"blkioLeafWeight": 0,
|
||||
"blkioWeightDevice": [
|
||||
{
|
||||
"major": 8,
|
||||
"minor": 0,
|
||||
"weight": 500,
|
||||
"leafWeight": 300
|
||||
},
|
||||
{
|
||||
"major": 8,
|
||||
"minor": 16,
|
||||
"weight": 500
|
||||
}
|
||||
],
|
||||
"blkioThrottleReadBpsDevice": [
|
||||
{
|
||||
"major": 8,
|
||||
"minor": 0,
|
||||
"rate": 600
|
||||
}
|
||||
],
|
||||
"blkioThrottleWriteIOPSDevice": [
|
||||
{
|
||||
"major": 8,
|
||||
"minor": 16,
|
||||
"rate": 300
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
#### Huge page limits
|
||||
|
||||
`hugepageLimits` represents the `hugetlb` controller which allows to limit the
|
||||
HugeTLB usage per control group and enforces the controller limit during page fault.
|
||||
For more information, see the [kernel cgroups documentation about HugeTLB](https://www.kernel.org/doc/Documentation/cgroups/hugetlb.txt).
|
||||
|
||||
`hugepageLimits` is an array of entries, each having the following structure:
|
||||
|
||||
* **`pageSize`** *(string, required)* - hugepage size
|
||||
|
||||
* **`limit`** *(uint64, required)* - limit in bytes of *hugepagesize* HugeTLB usage
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"hugepageLimits": [
|
||||
{
|
||||
"pageSize": "2MB",
|
||||
"limit": 9223372036854771712
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
#### Network
|
||||
|
||||
`network` represents the cgroup subsystems `net_cls` and `net_prio`.
|
||||
For more information, see [the net\_cls cgroup man page](https://www.kernel.org/doc/Documentation/cgroups/net_cls.txt) and [the net\_prio cgroup man page](https://www.kernel.org/doc/Documentation/cgroups/net_prio.txt).
|
||||
|
||||
The following parameters can be specified to setup these cgroup controllers:
|
||||
|
||||
* **`classID`** *(uint32, optional)* - is the network class identifier the cgroup's network packets will be tagged with
|
||||
|
||||
* **`priorities`** *(array, optional)* - specifies a list of objects of the priorities assigned to traffic originating from
|
||||
processes in the group and egressing the system on various interfaces. The following parameters can be specified per-priority:
|
||||
* **`name`** *(string, required)* - interface name
|
||||
* **`priority`** *(uint32, required)* - priority applied to the interface
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"network": {
|
||||
"classID": 1048577,
|
||||
"priorities": [
|
||||
{
|
||||
"name": "eth0",
|
||||
"priority": 500
|
||||
},
|
||||
{
|
||||
"name": "eth1",
|
||||
"priority": 1000
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
#### PIDs
|
||||
|
||||
`pids` represents the cgroup subsystem `pids`.
|
||||
For more information, see [the pids cgroup man page](https://www.kernel.org/doc/Documentation/cgroups/pids.txt
|
||||
).
|
||||
|
||||
The following paramters can be specified to setup the controller:
|
||||
|
||||
* **`limit`** *(int64, required)* - specifies the maximum number of tasks in the cgroup
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"pids": {
|
||||
"limit": 32771
|
||||
}
|
||||
```
|
||||
|
||||
## Sysctl
|
||||
|
||||
sysctl allows kernel parameters to be modified at runtime for the container.
|
||||
For more information, see [the man page](http://man7.org/linux/man-pages/man8/sysctl.8.html)
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"sysctl": {
|
||||
"net.ipv4.ip_forward": "1",
|
||||
"net.core.somaxconn": "256"
|
||||
}
|
||||
```
|
||||
|
||||
## Rlimits
|
||||
|
||||
rlimits allow setting resource limits.
|
||||
`type` is a string with a value from those defined in [the man page](http://man7.org/linux/man-pages/man2/setrlimit.2.html).
|
||||
The kernel enforces the `soft` limit for a resource while the `hard` limit acts as a ceiling for that value that could be set by an unprivileged process.
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"rlimits": [
|
||||
{
|
||||
"type": "RLIMIT_NPROC",
|
||||
"soft": 1024,
|
||||
"hard": 102400
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## SELinux process label
|
||||
|
||||
SELinux process label specifies the label with which the processes in a container are run.
|
||||
For more information about SELinux, see [Selinux documentation](http://selinuxproject.org/page/Main_Page)
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"selinuxProcessLabel": "system_u:system_r:svirt_lxc_net_t:s0:c124,c675"
|
||||
```
|
||||
|
||||
## Apparmor profile
|
||||
|
||||
Apparmor profile specifies the name of the apparmor profile that will be used for the container.
|
||||
For more information about Apparmor, see [Apparmor documentation](https://wiki.ubuntu.com/AppArmor)
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"apparmorProfile": "acme_secure_profile"
|
||||
```
|
||||
|
||||
## seccomp
|
||||
|
||||
Seccomp provides application sandboxing mechanism in the Linux kernel.
|
||||
Seccomp configuration allows one to configure actions to take for matched syscalls and furthermore also allows matching on values passed as arguments to syscalls.
|
||||
For more information about Seccomp, see [Seccomp kernel documentation](https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt)
|
||||
The actions, architectures, and operators are strings that match the definitions in seccomp.h from [libseccomp](https://github.com/seccomp/libseccomp) and are translated to corresponding values.
|
||||
A valid list of constants as of Libseccomp v2.2.3 is contained below.
|
||||
|
||||
Architecture Constants
|
||||
* `SCMP_ARCH_X86`
|
||||
* `SCMP_ARCH_X86_64`
|
||||
* `SCMP_ARCH_X32`
|
||||
* `SCMP_ARCH_ARM`
|
||||
* `SCMP_ARCH_AARCH64`
|
||||
* `SCMP_ARCH_MIPS`
|
||||
* `SCMP_ARCH_MIPS64`
|
||||
* `SCMP_ARCH_MIPS64N32`
|
||||
* `SCMP_ARCH_MIPSEL`
|
||||
* `SCMP_ARCH_MIPSEL64`
|
||||
* `SCMP_ARCH_MIPSEL64N32`
|
||||
|
||||
Action Constants:
|
||||
* `SCMP_ACT_KILL`
|
||||
* `SCMP_ACT_TRAP`
|
||||
* `SCMP_ACT_ERRNO`
|
||||
* `SCMP_ACT_TRACE`
|
||||
* `SCMP_ACT_ALLOW`
|
||||
|
||||
Operator Constants:
|
||||
* `SCMP_CMP_NE`
|
||||
* `SCMP_CMP_LT`
|
||||
* `SCMP_CMP_LE`
|
||||
* `SCMP_CMP_EQ`
|
||||
* `SCMP_CMP_GE`
|
||||
* `SCMP_CMP_GT`
|
||||
* `SCMP_CMP_MASKED_EQ`
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"seccomp": {
|
||||
"defaultAction": "SCMP_ACT_ALLOW",
|
||||
"architectures": [
|
||||
"SCMP_ARCH_X86"
|
||||
],
|
||||
"syscalls": [
|
||||
{
|
||||
"name": "getcwd",
|
||||
"action": "SCMP_ACT_ERRNO"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Rootfs Mount Propagation
|
||||
|
||||
rootfsPropagation sets the rootfs's mount propagation.
|
||||
Its value is either slave, private, or shared.
|
||||
[The kernel doc](https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt) has more information about mount propagation.
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"rootfsPropagation": "slave",
|
||||
```
|
||||
|
||||
## No new privileges
|
||||
|
||||
Setting `noNewPrivileges` to true prevents the processes in the container from gaining additional privileges.
|
||||
[The kernel doc](https://www.kernel.org/doc/Documentation/prctl/no_new_privs.txt) has more information on how this is achieved using a prctl system call.
|
||||
|
||||
###### Example
|
||||
|
||||
```json
|
||||
"noNewPrivileges": true,
|
||||
```
|
|
@ -1,122 +0,0 @@
|
|||
# Runtime Configuration
|
||||
|
||||
## Hooks
|
||||
|
||||
Lifecycle hooks allow custom events for different points in a container's runtime.
|
||||
Presently there are `Prestart`, `Poststart` and `Poststop`.
|
||||
|
||||
* [`Prestart`](#prestart) is a list of hooks to be run before the container process is executed
|
||||
* [`Poststart`](#poststart) is a list of hooks to be run immediately after the container process is started
|
||||
* [`Poststop`](#poststop) is a list of hooks to be run after the container process exits
|
||||
|
||||
Hooks allow one to run code before/after various lifecycle events of the container.
|
||||
Hooks MUST be called in the listed order.
|
||||
The state of the container is passed to the hooks over stdin, so the hooks could get the information they need to do their work.
|
||||
|
||||
Hook paths are absolute and are executed from the host's filesystem.
|
||||
|
||||
### Prestart
|
||||
|
||||
The pre-start hooks are called after the container process is spawned, but before the user supplied command is executed.
|
||||
They are called after the container namespaces are created on Linux, so they provide an opportunity to customize the container.
|
||||
In Linux, for e.g., the network namespace could be configured in this hook.
|
||||
|
||||
If a hook returns a non-zero exit code, then an error including the exit code and the stderr is returned to the caller and the container is torn down.
|
||||
|
||||
### Poststart
|
||||
|
||||
The post-start hooks are called after the user process is started.
|
||||
For example this hook can notify user that real process is spawned.
|
||||
|
||||
If a hook returns a non-zero exit code, then an error is logged and the remaining hooks are executed.
|
||||
|
||||
### Poststop
|
||||
|
||||
The post-stop hooks are called after the container process is stopped.
|
||||
Cleanup or debugging could be performed in such a hook.
|
||||
If a hook returns a non-zero exit code, then an error is logged and the remaining hooks are executed.
|
||||
|
||||
*Example*
|
||||
|
||||
```json
|
||||
"hooks" : {
|
||||
"prestart": [
|
||||
{
|
||||
"path": "/usr/bin/fix-mounts",
|
||||
"args": ["fix-mounts", "arg1", "arg2"],
|
||||
"env": [ "key1=value1"]
|
||||
},
|
||||
{
|
||||
"path": "/usr/bin/setup-network"
|
||||
}
|
||||
],
|
||||
"poststart": [
|
||||
{
|
||||
"path": "/usr/bin/notify-start"
|
||||
}
|
||||
],
|
||||
"poststop": [
|
||||
{
|
||||
"path": "/usr/sbin/cleanup.sh",
|
||||
"args": ["cleanup.sh", "-f"]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
`path` is required for a hook.
|
||||
`args` and `env` are optional.
|
||||
The semantics are the same as `Path`, `Args` and `Env` in [golang Cmd](https://golang.org/pkg/os/exec/#Cmd).
|
||||
|
||||
## Mount Configuration
|
||||
|
||||
Additional filesystems can be declared as "mounts", specified in the *mounts* object.
|
||||
Keys in this object are names of mount points from portable config.
|
||||
Values are objects with configuration of mount points.
|
||||
The parameters are similar to the ones in [the Linux mount system call](http://man7.org/linux/man-pages/man2/mount.2.html).
|
||||
Only [mounts from the portable config](config.md#mount-points) will be mounted.
|
||||
|
||||
* **`type`** (string, required) Linux, *filesystemtype* argument supported by the kernel are listed in */proc/filesystems* (e.g., "minix", "ext2", "ext3", "jfs", "xfs", "reiserfs", "msdos", "proc", "nfs", "iso9660"). Windows: ntfs
|
||||
* **`source`** (string, required) a device name, but can also be a directory name or a dummy. Windows, the volume name that is the target of the mount point. \\?\Volume\{GUID}\ (on Windows source is called target)
|
||||
* **`options`** (list of strings, optional) in the fstab format [https://wiki.archlinux.org/index.php/Fstab](https://wiki.archlinux.org/index.php/Fstab).
|
||||
|
||||
*Example (Linux)*
|
||||
|
||||
```json
|
||||
"mounts": {
|
||||
"proc": {
|
||||
"type": "proc",
|
||||
"source": "proc",
|
||||
"options": []
|
||||
},
|
||||
"dev": {
|
||||
"type": "tmpfs",
|
||||
"source": "tmpfs",
|
||||
"options": ["nosuid","strictatime","mode=755","size=65536k"]
|
||||
},
|
||||
"devpts": {
|
||||
"type": "devpts",
|
||||
"source": "devpts",
|
||||
"options": ["nosuid","noexec","newinstance","ptmxmode=0666","mode=0620","gid=5"]
|
||||
},
|
||||
"data": {
|
||||
"type": "bind",
|
||||
"source": "/volumes/testing",
|
||||
"options": ["rbind","rw"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
*Example (Windows)*
|
||||
|
||||
```json
|
||||
"mounts": {
|
||||
"myfancymountpoint": {
|
||||
"type": "ntfs",
|
||||
"source": "\\\\?\\Volume\\{2eca078d-5cbc-43d3-aff8-7e8511f60d0e}\\",
|
||||
"options": []
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
See links for details about [mountvol](http://ss64.com/nt/mountvol.html) and [SetVolumeMountPoint](https://msdn.microsoft.com/en-us/library/windows/desktop/aa365561(v=vs.85).aspx) in Windows.
|
|
@ -38,8 +38,8 @@ This is provided so that consumers can find the container's configuration and ro
|
|||
The lifecycle describes the timeline of events that happen from when a container is created to when it ceases to exist.
|
||||
|
||||
1. OCI compliant runtime is invoked by passing the bundle path as argument.
|
||||
2. The container's runtime environment is created according to the configuration in `config.json` and `runtime.json`.
|
||||
Any updates to `config.json` or `runtime.json` after container is running do not affect the container.
|
||||
2. The container's runtime environment is created according to the configuration in [`config.json`](config.md).
|
||||
Any updates to `config.json` after container is running do not affect the container.
|
||||
3. The container's state.json file is written to the filesystem.
|
||||
4. The prestart hooks are invoked by the runtime.
|
||||
If any prestart hook fails, then the container is stopped and the lifecycle continues at step 8.
|
||||
|
@ -56,4 +56,4 @@ Note: The lifecycle is a WIP and it will evolve as we have more use cases and mo
|
|||
|
||||
## Hooks
|
||||
|
||||
See [runtime configuration for hooks](./runtime-config.md)
|
||||
See [runtime configuration for hooks](./config.md#hooks)
|
||||
|
|
|
@ -1,42 +0,0 @@
|
|||
package specs
|
||||
|
||||
// RuntimeSpec contains host-specific configuration information for
|
||||
// a container. This information must not be included when the bundle
|
||||
// is packaged for distribution.
|
||||
type RuntimeSpec struct {
|
||||
// Mounts is a mapping of names to mount configurations.
|
||||
// Which mounts will be mounted and where should be chosen with MountPoints
|
||||
// in Spec.
|
||||
Mounts map[string]Mount `json:"mounts"`
|
||||
// Hooks are the commands run at various lifecycle events of the container.
|
||||
Hooks Hooks `json:"hooks"`
|
||||
}
|
||||
|
||||
// Hook specifies a command that is run at a particular event in the lifecycle of a container
|
||||
type Hook struct {
|
||||
Path string `json:"path"`
|
||||
Args []string `json:"args,omitempty"`
|
||||
Env []string `json:"env,omitempty"`
|
||||
}
|
||||
|
||||
// Hooks for container setup and teardown
|
||||
type Hooks struct {
|
||||
// Prestart is a list of hooks to be run before the container process is executed.
|
||||
// On Linux, they are run after the container namespaces are created.
|
||||
Prestart []Hook `json:"prestart,omitempty"`
|
||||
// Poststart is a list of hooks to be run after the container process is started.
|
||||
Poststart []Hook `json:"poststart,omitempty"`
|
||||
// Poststop is a list of hooks to be run after the container process exits.
|
||||
Poststop []Hook `json:"poststop,omitempty"`
|
||||
}
|
||||
|
||||
// Mount specifies a mount for a container
|
||||
type Mount struct {
|
||||
// Type specifies the mount kind.
|
||||
Type string `json:"type"`
|
||||
// Source specifies the source path of the mount. In the case of bind mounts on
|
||||
// linux based systems this would be the file on the host.
|
||||
Source string `json:"source"`
|
||||
// Options are fstab style mount options.
|
||||
Options []string `json:"options,omitempty"`
|
||||
}
|
306
Godeps/_workspace/src/github.com/opencontainers/specs/runtime_config_linux.go
generated
vendored
306
Godeps/_workspace/src/github.com/opencontainers/specs/runtime_config_linux.go
generated
vendored
|
@ -1,306 +0,0 @@
|
|||
package specs
|
||||
|
||||
import "os"
|
||||
|
||||
// LinuxStateDirectory holds the container's state information
|
||||
const LinuxStateDirectory = "/run/opencontainer/containers"
|
||||
|
||||
// LinuxRuntimeSpec is the full specification for linux containers.
|
||||
type LinuxRuntimeSpec struct {
|
||||
RuntimeSpec
|
||||
// LinuxRuntime is platform specific configuration for linux based containers.
|
||||
Linux LinuxRuntime `json:"linux"`
|
||||
}
|
||||
|
||||
// LinuxRuntime hosts the Linux-only runtime information
|
||||
type LinuxRuntime struct {
|
||||
// UIDMapping specifies user mappings for supporting user namespaces on linux.
|
||||
UIDMappings []IDMapping `json:"uidMappings,omitempty"`
|
||||
// GIDMapping specifies group mappings for supporting user namespaces on linux.
|
||||
GIDMappings []IDMapping `json:"gidMappings,omitempty"`
|
||||
// Rlimits specifies rlimit options to apply to the container's process.
|
||||
Rlimits []Rlimit `json:"rlimits,omitempty"`
|
||||
// Sysctl are a set of key value pairs that are set for the container on start
|
||||
Sysctl map[string]string `json:"sysctl,omitempty"`
|
||||
// Resources contain cgroup information for handling resource constraints
|
||||
// for the container
|
||||
Resources *Resources `json:"resources,omitempty"`
|
||||
// CgroupsPath specifies the path to cgroups that are created and/or joined by the container.
|
||||
// The path is expected to be relative to the cgroups mountpoint.
|
||||
// If resources are specified, the cgroups at CgroupsPath will be updated based on resources.
|
||||
CgroupsPath *string `json:"cgroupsPath,omitempty"`
|
||||
// Namespaces contains the namespaces that are created and/or joined by the container
|
||||
Namespaces []Namespace `json:"namespaces"`
|
||||
// Devices are a list of device nodes that are created and enabled for the container
|
||||
Devices []Device `json:"devices"`
|
||||
// ApparmorProfile specified the apparmor profile for the container.
|
||||
ApparmorProfile string `json:"apparmorProfile"`
|
||||
// SelinuxProcessLabel specifies the selinux context that the container process is run as.
|
||||
SelinuxProcessLabel string `json:"selinuxProcessLabel"`
|
||||
// Seccomp specifies the seccomp security settings for the container.
|
||||
Seccomp Seccomp `json:"seccomp"`
|
||||
// RootfsPropagation is the rootfs mount propagation mode for the container.
|
||||
RootfsPropagation string `json:"rootfsPropagation,omitempty"`
|
||||
// NoNewPrivileges controls whether additional privileges could be gained by processes in the container.
|
||||
NoNewPrivileges bool `json:"noNewPrivileges,omitempty"`
|
||||
}
|
||||
|
||||
// Namespace is the configuration for a linux namespace
|
||||
type Namespace struct {
|
||||
// Type is the type of Linux namespace
|
||||
Type NamespaceType `json:"type"`
|
||||
// Path is a path to an existing namespace persisted on disk that can be joined
|
||||
// and is of the same type
|
||||
Path string `json:"path,omitempty"`
|
||||
}
|
||||
|
||||
// NamespaceType is one of the linux namespaces
|
||||
type NamespaceType string
|
||||
|
||||
const (
|
||||
// PIDNamespace for isolating process IDs
|
||||
PIDNamespace NamespaceType = "pid"
|
||||
// NetworkNamespace for isolating network devices, stacks, ports, etc
|
||||
NetworkNamespace = "network"
|
||||
// MountNamespace for isolating mount points
|
||||
MountNamespace = "mount"
|
||||
// IPCNamespace for isolating System V IPC, POSIX message queues
|
||||
IPCNamespace = "ipc"
|
||||
// UTSNamespace for isolating hostname and NIS domain name
|
||||
UTSNamespace = "uts"
|
||||
// UserNamespace for isolating user and group IDs
|
||||
UserNamespace = "user"
|
||||
)
|
||||
|
||||
// IDMapping specifies UID/GID mappings
|
||||
type IDMapping struct {
|
||||
// HostID is the UID/GID of the host user or group
|
||||
HostID uint32 `json:"hostID"`
|
||||
// ContainerID is the UID/GID of the container's user or group
|
||||
ContainerID uint32 `json:"containerID"`
|
||||
// Size is the length of the range of IDs mapped between the two namespaces
|
||||
Size uint32 `json:"size"`
|
||||
}
|
||||
|
||||
// Rlimit type and restrictions
|
||||
type Rlimit struct {
|
||||
// Type of the rlimit to set
|
||||
Type string `json:"type"`
|
||||
// Hard is the hard limit for the specified type
|
||||
Hard uint64 `json:"hard"`
|
||||
// Soft is the soft limit for the specified type
|
||||
Soft uint64 `json:"soft"`
|
||||
}
|
||||
|
||||
// HugepageLimit structure corresponds to limiting kernel hugepages
|
||||
type HugepageLimit struct {
|
||||
// Pagesize is the hugepage size
|
||||
Pagesize *string `json:"pageSize,omitempty"`
|
||||
// Limit is the limit of "hugepagesize" hugetlb usage
|
||||
Limit *uint64 `json:"limit,omitempty"`
|
||||
}
|
||||
|
||||
// InterfacePriority for network interfaces
|
||||
type InterfacePriority struct {
|
||||
// Name is the name of the network interface
|
||||
Name string `json:"name"`
|
||||
// Priority for the interface
|
||||
Priority uint32 `json:"priority"`
|
||||
}
|
||||
|
||||
// blockIODevice holds major:minor format supported in blkio cgroup
|
||||
type blockIODevice struct {
|
||||
// Major is the device's major number.
|
||||
Major int64 `json:"major"`
|
||||
// Minor is the device's minor number.
|
||||
Minor int64 `json:"minor"`
|
||||
}
|
||||
|
||||
// WeightDevice struct holds a `major:minor weight` pair for blkioWeightDevice
|
||||
type WeightDevice struct {
|
||||
blockIODevice
|
||||
// Weight is the bandwidth rate for the device, range is from 10 to 1000
|
||||
Weight *uint16 `json:"weight,omitempty"`
|
||||
// LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, CFQ scheduler only
|
||||
LeafWeight *uint16 `json:"leafWeight,omitempty"`
|
||||
}
|
||||
|
||||
// ThrottleDevice struct holds a `major:minor rate_per_second` pair
|
||||
type ThrottleDevice struct {
|
||||
blockIODevice
|
||||
// Rate is the IO rate limit per cgroup per device
|
||||
Rate *uint64 `json:"rate,omitempty"`
|
||||
}
|
||||
|
||||
// BlockIO for Linux cgroup 'blkio' resource management
|
||||
type BlockIO struct {
|
||||
// Specifies per cgroup weight, range is from 10 to 1000
|
||||
Weight *uint16 `json:"blkioWeight,omitempty"`
|
||||
// Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, CFQ scheduler only
|
||||
LeafWeight *uint16 `json:"blkioLeafWeight,omitempty"`
|
||||
// Weight per cgroup per device, can override BlkioWeight
|
||||
WeightDevice []*WeightDevice `json:"blkioWeightDevice,omitempty"`
|
||||
// IO read rate limit per cgroup per device, bytes per second
|
||||
ThrottleReadBpsDevice []*ThrottleDevice `json:"blkioThrottleReadBpsDevice,omitempty"`
|
||||
// IO write rate limit per cgroup per device, bytes per second
|
||||
ThrottleWriteBpsDevice []*ThrottleDevice `json:"blkioThrottleWriteBpsDevice,omitempty"`
|
||||
// IO read rate limit per cgroup per device, IO per second
|
||||
ThrottleReadIOPSDevice []*ThrottleDevice `json:"blkioThrottleReadIOPSDevice,omitempty"`
|
||||
// IO write rate limit per cgroup per device, IO per second
|
||||
ThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkioThrottleWriteIOPSDevice,omitempty"`
|
||||
}
|
||||
|
||||
// Memory for Linux cgroup 'memory' resource management
|
||||
type Memory struct {
|
||||
// Memory limit (in bytes).
|
||||
Limit *uint64 `json:"limit,omitempty"`
|
||||
// Memory reservation or soft_limit (in bytes).
|
||||
Reservation *uint64 `json:"reservation,omitempty"`
|
||||
// Total memory limit (memory + swap).
|
||||
Swap *uint64 `json:"swap,omitempty"`
|
||||
// Kernel memory limit (in bytes).
|
||||
Kernel *uint64 `json:"kernel,omitempty"`
|
||||
// Kernel memory limit for tcp (in bytes)
|
||||
KernelTCP *uint64 `json:"kernelTCP"`
|
||||
// How aggressive the kernel will swap memory pages. Range from 0 to 100.
|
||||
Swappiness *uint64 `json:"swappiness,omitempty"`
|
||||
}
|
||||
|
||||
// CPU for Linux cgroup 'cpu' resource management
|
||||
type CPU struct {
|
||||
// CPU shares (relative weight (ratio) vs. other cgroups with cpu shares).
|
||||
Shares *uint64 `json:"shares,omitempty"`
|
||||
// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
|
||||
Quota *uint64 `json:"quota,omitempty"`
|
||||
// CPU period to be used for hardcapping (in usecs).
|
||||
Period *uint64 `json:"period,omitempty"`
|
||||
// How much time realtime scheduling may use (in usecs).
|
||||
RealtimeRuntime *uint64 `json:"realtimeRuntime,omitempty"`
|
||||
// CPU period to be used for realtime scheduling (in usecs).
|
||||
RealtimePeriod *uint64 `json:"realtimePeriod,omitempty"`
|
||||
// CPUs to use within the cpuset. Default is to use any CPU available.
|
||||
Cpus *string `json:"cpus,omitempty"`
|
||||
// List of memory nodes in the cpuset. Default is to use any available memory node.
|
||||
Mems *string `json:"mems,omitempty"`
|
||||
}
|
||||
|
||||
// Pids for Linux cgroup 'pids' resource management (Linux 4.3)
|
||||
type Pids struct {
|
||||
// Maximum number of PIDs. Default is "no limit".
|
||||
Limit *int64 `json:"limit,omitempty"`
|
||||
}
|
||||
|
||||
// Network identification and priority configuration
|
||||
type Network struct {
|
||||
// Set class identifier for container's network packets
|
||||
ClassID *uint32 `json:"classID"`
|
||||
// Set priority of network traffic for container
|
||||
Priorities []InterfacePriority `json:"priorities"`
|
||||
}
|
||||
|
||||
// Resources has container runtime resource constraints
|
||||
type Resources struct {
|
||||
// DisableOOMKiller disables the OOM killer for out of memory conditions
|
||||
DisableOOMKiller *bool `json:"disableOOMKiller,omitempty"`
|
||||
// Specify an oom_score_adj for the container.
|
||||
OOMScoreAdj *int `json:"oomScoreAdj,omitempty"`
|
||||
// Memory restriction configuration
|
||||
Memory *Memory `json:"memory,omitempty"`
|
||||
// CPU resource restriction configuration
|
||||
CPU *CPU `json:"cpu,omitempty"`
|
||||
// Task resource restriction configuration.
|
||||
Pids *Pids `json:"pids,omitempty"`
|
||||
// BlockIO restriction configuration
|
||||
BlockIO *BlockIO `json:"blockIO,omitempty"`
|
||||
// Hugetlb limit (in bytes)
|
||||
HugepageLimits []HugepageLimit `json:"hugepageLimits,omitempty"`
|
||||
// Network restriction configuration
|
||||
Network *Network `json:"network,omitempty"`
|
||||
}
|
||||
|
||||
// Device represents the information on a Linux special device file
|
||||
type Device struct {
|
||||
// Path to the device.
|
||||
Path string `json:"path"`
|
||||
// Device type, block, char, etc.
|
||||
Type rune `json:"type"`
|
||||
// Major is the device's major number.
|
||||
Major int64 `json:"major"`
|
||||
// Minor is the device's minor number.
|
||||
Minor int64 `json:"minor"`
|
||||
// Cgroup permissions format, rwm.
|
||||
Permissions string `json:"permissions"`
|
||||
// FileMode permission bits for the device.
|
||||
FileMode os.FileMode `json:"fileMode"`
|
||||
// UID of the device.
|
||||
UID uint32 `json:"uid"`
|
||||
// Gid of the device.
|
||||
GID uint32 `json:"gid"`
|
||||
}
|
||||
|
||||
// Seccomp represents syscall restrictions
|
||||
type Seccomp struct {
|
||||
DefaultAction Action `json:"defaultAction"`
|
||||
Architectures []Arch `json:"architectures"`
|
||||
Syscalls []*Syscall `json:"syscalls"`
|
||||
}
|
||||
|
||||
// Arch used for additional architectures
|
||||
type Arch string
|
||||
|
||||
// Additional architectures permitted to be used for system calls
|
||||
// By default only the native architecture of the kernel is permitted
|
||||
const (
|
||||
ArchX86 Arch = "SCMP_ARCH_X86"
|
||||
ArchX86_64 Arch = "SCMP_ARCH_X86_64"
|
||||
ArchX32 Arch = "SCMP_ARCH_X32"
|
||||
ArchARM Arch = "SCMP_ARCH_ARM"
|
||||
ArchAARCH64 Arch = "SCMP_ARCH_AARCH64"
|
||||
ArchMIPS Arch = "SCMP_ARCH_MIPS"
|
||||
ArchMIPS64 Arch = "SCMP_ARCH_MIPS64"
|
||||
ArchMIPS64N32 Arch = "SCMP_ARCH_MIPS64N32"
|
||||
ArchMIPSEL Arch = "SCMP_ARCH_MIPSEL"
|
||||
ArchMIPSEL64 Arch = "SCMP_ARCH_MIPSEL64"
|
||||
ArchMIPSEL64N32 Arch = "SCMP_ARCH_MIPSEL64N32"
|
||||
)
|
||||
|
||||
// Action taken upon Seccomp rule match
|
||||
type Action string
|
||||
|
||||
// Define actions for Seccomp rules
|
||||
const (
|
||||
ActKill Action = "SCMP_ACT_KILL"
|
||||
ActTrap Action = "SCMP_ACT_TRAP"
|
||||
ActErrno Action = "SCMP_ACT_ERRNO"
|
||||
ActTrace Action = "SCMP_ACT_TRACE"
|
||||
ActAllow Action = "SCMP_ACT_ALLOW"
|
||||
)
|
||||
|
||||
// Operator used to match syscall arguments in Seccomp
|
||||
type Operator string
|
||||
|
||||
// Define operators for syscall arguments in Seccomp
|
||||
const (
|
||||
OpNotEqual Operator = "SCMP_CMP_NE"
|
||||
OpLessThan Operator = "SCMP_CMP_LT"
|
||||
OpLessEqual Operator = "SCMP_CMP_LE"
|
||||
OpEqualTo Operator = "SCMP_CMP_EQ"
|
||||
OpGreaterEqual Operator = "SCMP_CMP_GE"
|
||||
OpGreaterThan Operator = "SCMP_CMP_GT"
|
||||
OpMaskedEqual Operator = "SCMP_CMP_MASKED_EQ"
|
||||
)
|
||||
|
||||
// Arg used for matching specific syscall arguments in Seccomp
|
||||
type Arg struct {
|
||||
Index uint `json:"index"`
|
||||
Value uint64 `json:"value"`
|
||||
ValueTwo uint64 `json:"valueTwo"`
|
||||
Op Operator `json:"op"`
|
||||
}
|
||||
|
||||
// Syscall is used to match a syscall in Seccomp
|
||||
type Syscall struct {
|
||||
Name string `json:"name"`
|
||||
Action Action `json:"action"`
|
||||
Args []*Arg `json:"args"`
|
||||
}
|
|
@ -15,7 +15,7 @@ The redundancy reduction from removing the namespacing prefix is not useful enou
|
|||
So we have a consistent way to identify unset values ([source][optional-pointer]).
|
||||
|
||||
[capabilities]: config-linux.md#capabilities
|
||||
[class-id]: runtime-config-linux.md#network
|
||||
[class-id]: config-linux.md#network
|
||||
[integer-over-hex]: https://github.com/opencontainers/specs/pull/267#discussion_r48360013
|
||||
[keep-prefix]: https://github.com/opencontainers/specs/pull/159#issuecomment-138728337
|
||||
[optional-pointer]: https://github.com/opencontainers/specs/pull/233#discussion_r47829711
|
||||
|
|
|
@ -11,7 +11,7 @@ const (
|
|||
VersionPatch = 0
|
||||
|
||||
// VersionDev indicates development branch. Releases will be empty string.
|
||||
VersionDev = "-dev"
|
||||
VersionDev = ""
|
||||
)
|
||||
|
||||
// Version is the specification version that the package types support.
|
||||
|
|
2
exec.go
2
exec.go
|
@ -123,7 +123,7 @@ func getProcess(context *cli.Context, bundle string) (*specs.Process, error) {
|
|||
if err := os.Chdir(bundle); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
spec, _, err := loadSpec(specConfig, runtimeConfig)
|
||||
spec, err := loadSpec(specConfig)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -38,7 +38,13 @@ type Device struct {
|
|||
}
|
||||
|
||||
func (d *Device) CgroupString() string {
|
||||
return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions)
|
||||
var p string
|
||||
if d.Permissions == "" {
|
||||
p = "rwm" // empty permissions is invalid... causes a write invalid argument error upon saving to cgroups
|
||||
} else {
|
||||
p = d.Permissions
|
||||
}
|
||||
return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), p)
|
||||
}
|
||||
|
||||
func (d *Device) Mkdev() int {
|
||||
|
|
9
main.go
9
main.go
|
@ -10,10 +10,9 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
version = "0.0.7"
|
||||
specConfig = "config.json"
|
||||
runtimeConfig = "runtime.json"
|
||||
usage = `Open Container Initiative runtime
|
||||
version = "0.0.7"
|
||||
specConfig = "config.json"
|
||||
usage = `Open Container Initiative runtime
|
||||
|
||||
runc is a command line client for running applications packaged according to
|
||||
the Open Container Format (OCF) and is a compliant implementation of the
|
||||
|
@ -31,7 +30,7 @@ a container in your shell by running:
|
|||
# runc start [ -b bundle ]
|
||||
|
||||
If not specified, the default value for the 'bundle' is the current directory.
|
||||
'Bundle' is the directory where '` + specConfig + `' and '` + runtimeConfig + `' must be located.`
|
||||
'Bundle' is the directory where '` + specConfig + `' must be located.`
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
|
|
@ -75,11 +75,11 @@ var restoreCommand = cli.Command{
|
|||
fatal(err)
|
||||
}
|
||||
}
|
||||
spec, rspec, err := loadSpec(specConfig, runtimeConfig)
|
||||
spec, err := loadSpec(specConfig)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
config, err := createLibcontainerConfig(context.GlobalString("id"), spec, rspec)
|
||||
config, err := createLibcontainerConfig(context.GlobalString("id"), spec)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
|
332
spec.go
332
spec.go
|
@ -56,34 +56,48 @@ var specCommand = cli.Command{
|
|||
Cwd: "/",
|
||||
},
|
||||
Hostname: "shell",
|
||||
Mounts: []specs.MountPoint{
|
||||
Mounts: []specs.Mount{
|
||||
{
|
||||
Name: "proc",
|
||||
Path: "/proc",
|
||||
Destination: "/proc",
|
||||
Type: "proc",
|
||||
Source: "proc",
|
||||
Options: nil,
|
||||
},
|
||||
{
|
||||
Name: "dev",
|
||||
Path: "/dev",
|
||||
Destination: "/dev",
|
||||
Type: "tmpfs",
|
||||
Source: "tmpfs",
|
||||
Options: []string{"nosuid", "strictatime", "mode=755", "size=65536k"},
|
||||
},
|
||||
{
|
||||
Name: "devpts",
|
||||
Path: "/dev/pts",
|
||||
Destination: "/dev/pts",
|
||||
Type: "devpts",
|
||||
Source: "devpts",
|
||||
Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"},
|
||||
},
|
||||
{
|
||||
Name: "shm",
|
||||
Path: "/dev/shm",
|
||||
Destination: "/dev/shm",
|
||||
Type: "tmpfs",
|
||||
Source: "shm",
|
||||
Options: []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"},
|
||||
},
|
||||
{
|
||||
Name: "mqueue",
|
||||
Path: "/dev/mqueue",
|
||||
Destination: "/dev/mqueue",
|
||||
Type: "mqueue",
|
||||
Source: "mqueue",
|
||||
Options: []string{"nosuid", "noexec", "nodev"},
|
||||
},
|
||||
{
|
||||
Name: "sysfs",
|
||||
Path: "/sys",
|
||||
Destination: "/sys",
|
||||
Type: "sysfs",
|
||||
Source: "sysfs",
|
||||
Options: []string{"nosuid", "noexec", "nodev"},
|
||||
},
|
||||
{
|
||||
Name: "cgroup",
|
||||
Path: "/sys/fs/cgroup",
|
||||
Destination: "/sys/fs/cgroup",
|
||||
Type: "cgroup",
|
||||
Source: "cgroup",
|
||||
Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -93,49 +107,7 @@ var specCommand = cli.Command{
|
|||
"CAP_KILL",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
},
|
||||
},
|
||||
}
|
||||
rspec := specs.LinuxRuntimeSpec{
|
||||
RuntimeSpec: specs.RuntimeSpec{
|
||||
Mounts: map[string]specs.Mount{
|
||||
"proc": {
|
||||
Type: "proc",
|
||||
Source: "proc",
|
||||
Options: nil,
|
||||
},
|
||||
"dev": {
|
||||
Type: "tmpfs",
|
||||
Source: "tmpfs",
|
||||
Options: []string{"nosuid", "strictatime", "mode=755", "size=65536k"},
|
||||
},
|
||||
"devpts": {
|
||||
Type: "devpts",
|
||||
Source: "devpts",
|
||||
Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"},
|
||||
},
|
||||
"shm": {
|
||||
Type: "tmpfs",
|
||||
Source: "shm",
|
||||
Options: []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"},
|
||||
},
|
||||
"mqueue": {
|
||||
Type: "mqueue",
|
||||
Source: "mqueue",
|
||||
Options: []string{"nosuid", "noexec", "nodev"},
|
||||
},
|
||||
"sysfs": {
|
||||
Type: "sysfs",
|
||||
Source: "sysfs",
|
||||
Options: []string{"nosuid", "noexec", "nodev"},
|
||||
},
|
||||
"cgroup": {
|
||||
Type: "cgroup",
|
||||
Source: "cgroup",
|
||||
Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"},
|
||||
},
|
||||
},
|
||||
},
|
||||
Linux: specs.LinuxRuntime{
|
||||
|
||||
Namespaces: []specs.Namespace{
|
||||
{
|
||||
Type: "pid",
|
||||
|
@ -153,6 +125,7 @@ var specCommand = cli.Command{
|
|||
Type: "mount",
|
||||
},
|
||||
},
|
||||
|
||||
Rlimits: []specs.Rlimit{
|
||||
{
|
||||
Type: "RLIMIT_NOFILE",
|
||||
|
@ -160,77 +133,93 @@ var specCommand = cli.Command{
|
|||
Soft: uint64(1024),
|
||||
},
|
||||
},
|
||||
Devices: []specs.Device{
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/null",
|
||||
Major: 1,
|
||||
Minor: 3,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
UID: 0,
|
||||
GID: 0,
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/random",
|
||||
Major: 1,
|
||||
Minor: 8,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
UID: 0,
|
||||
GID: 0,
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/full",
|
||||
Major: 1,
|
||||
Minor: 7,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
UID: 0,
|
||||
GID: 0,
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/tty",
|
||||
Major: 5,
|
||||
Minor: 0,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
UID: 0,
|
||||
GID: 0,
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/zero",
|
||||
Major: 1,
|
||||
Minor: 5,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
UID: 0,
|
||||
GID: 0,
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/urandom",
|
||||
Major: 1,
|
||||
Minor: 9,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
UID: 0,
|
||||
GID: 0,
|
||||
},
|
||||
},
|
||||
Resources: &specs.Resources{
|
||||
Devices: []specs.DeviceCgroup{
|
||||
{
|
||||
Allow: false,
|
||||
Access: sPtr("rwm"),
|
||||
},
|
||||
{
|
||||
Allow: true,
|
||||
Type: rPtr('c'),
|
||||
Major: iPtr(8),
|
||||
Minor: iPtr(229),
|
||||
Access: sPtr("rw"),
|
||||
},
|
||||
{
|
||||
Allow: true,
|
||||
Type: rPtr('b'),
|
||||
Major: iPtr(8),
|
||||
Minor: iPtr(0),
|
||||
Access: sPtr("r"),
|
||||
},
|
||||
},
|
||||
Memory: &specs.Memory{},
|
||||
},
|
||||
Devices: []specs.Device{
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/null",
|
||||
Major: 1,
|
||||
Minor: 3,
|
||||
FileMode: fmPtr(0666),
|
||||
UID: u32Ptr(0),
|
||||
GID: u32Ptr(0),
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/random",
|
||||
Major: 1,
|
||||
Minor: 8,
|
||||
FileMode: fmPtr(0666),
|
||||
UID: u32Ptr(0),
|
||||
GID: u32Ptr(0),
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/full",
|
||||
Major: 1,
|
||||
Minor: 7,
|
||||
FileMode: fmPtr(0666),
|
||||
UID: u32Ptr(0),
|
||||
GID: u32Ptr(0),
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/tty",
|
||||
Major: 5,
|
||||
Minor: 0,
|
||||
FileMode: fmPtr(0666),
|
||||
UID: u32Ptr(0),
|
||||
GID: u32Ptr(0),
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/zero",
|
||||
Major: 1,
|
||||
Minor: 5,
|
||||
FileMode: fmPtr(0666),
|
||||
UID: u32Ptr(0),
|
||||
GID: u32Ptr(0),
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/urandom",
|
||||
Major: 1,
|
||||
Minor: 9,
|
||||
FileMode: fmPtr(0666),
|
||||
UID: u32Ptr(0),
|
||||
GID: u32Ptr(0),
|
||||
},
|
||||
},
|
||||
|
||||
Seccomp: specs.Seccomp{
|
||||
DefaultAction: "SCMP_ACT_ALLOW",
|
||||
Syscalls: []*specs.Syscall{},
|
||||
Syscalls: []specs.Syscall{},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
checkNoFile := func(name string) error {
|
||||
_, err := os.Stat(name)
|
||||
if err == nil {
|
||||
|
@ -250,9 +239,6 @@ var specCommand = cli.Command{
|
|||
if err := checkNoFile(specConfig); err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
if err := checkNoFile(runtimeConfig); err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
data, err := json.MarshalIndent(&spec, "", "\t")
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
|
@ -260,16 +246,15 @@ var specCommand = cli.Command{
|
|||
if err := ioutil.WriteFile(specConfig, data, 0666); err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
rdata, err := json.MarshalIndent(&rspec, "", "\t")
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
if err := ioutil.WriteFile(runtimeConfig, rdata, 0666); err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
func sPtr(s string) *string { return &s }
|
||||
func rPtr(r rune) *rune { return &r }
|
||||
func iPtr(i int64) *int64 { return &i }
|
||||
func u32Ptr(i int64) *uint32 { u := uint32(i); return &u }
|
||||
func fmPtr(i int64) *os.FileMode { fm := os.FileMode(i); return &fm }
|
||||
|
||||
var namespaceMapping = map[specs.NamespaceType]configs.NamespaceType{
|
||||
specs.PIDNamespace: configs.NEWPID,
|
||||
specs.NetworkNamespace: configs.NEWNET,
|
||||
|
@ -291,7 +276,7 @@ var mountPropagationMapping = map[string]int{
|
|||
|
||||
// validateSpec validates the fields in the spec
|
||||
// TODO: Add validation for other fields where applicable
|
||||
func validateSpec(spec *specs.LinuxSpec, rspec *specs.LinuxRuntimeSpec) error {
|
||||
func validateSpec(spec *specs.LinuxSpec) error {
|
||||
if spec.Process.Cwd == "" {
|
||||
return fmt.Errorf("Cwd property must not be empty")
|
||||
}
|
||||
|
@ -303,35 +288,23 @@ func validateSpec(spec *specs.LinuxSpec, rspec *specs.LinuxRuntimeSpec) error {
|
|||
|
||||
// loadSpec loads the specification from the provided path.
|
||||
// If the path is empty then the default path will be "config.json"
|
||||
func loadSpec(cPath, rPath string) (spec *specs.LinuxSpec, rspec *specs.LinuxRuntimeSpec, err error) {
|
||||
func loadSpec(cPath string) (spec *specs.LinuxSpec, err error) {
|
||||
cf, err := os.Open(cPath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, nil, fmt.Errorf("JSON specification file %s not found", cPath)
|
||||
return nil, fmt.Errorf("JSON specification file %s not found", cPath)
|
||||
}
|
||||
return spec, rspec, err
|
||||
return spec, err
|
||||
}
|
||||
defer cf.Close()
|
||||
|
||||
rf, err := os.Open(rPath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, nil, fmt.Errorf("JSON runtime config file %s not found", rPath)
|
||||
}
|
||||
return spec, rspec, err
|
||||
}
|
||||
defer rf.Close()
|
||||
|
||||
if err = json.NewDecoder(cf).Decode(&spec); err != nil {
|
||||
return spec, rspec, err
|
||||
return spec, err
|
||||
}
|
||||
if err = json.NewDecoder(rf).Decode(&rspec); err != nil {
|
||||
return spec, rspec, err
|
||||
}
|
||||
return spec, rspec, validateSpec(spec, rspec)
|
||||
return spec, validateSpec(spec)
|
||||
}
|
||||
|
||||
func createLibcontainerConfig(cgroupName string, spec *specs.LinuxSpec, rspec *specs.LinuxRuntimeSpec) (*configs.Config, error) {
|
||||
func createLibcontainerConfig(cgroupName string, spec *specs.LinuxSpec) (*configs.Config, error) {
|
||||
cwd, err := os.Getwd()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -348,11 +321,11 @@ func createLibcontainerConfig(cgroupName string, spec *specs.LinuxSpec, rspec *s
|
|||
}
|
||||
|
||||
exists := false
|
||||
if config.RootPropagation, exists = mountPropagationMapping[rspec.Linux.RootfsPropagation]; !exists {
|
||||
return nil, fmt.Errorf("rootfsPropagation=%v is not supported", rspec.Linux.RootfsPropagation)
|
||||
if config.RootPropagation, exists = mountPropagationMapping[spec.Linux.RootfsPropagation]; !exists {
|
||||
return nil, fmt.Errorf("rootfsPropagation=%v is not supported", spec.Linux.RootfsPropagation)
|
||||
}
|
||||
|
||||
for _, ns := range rspec.Linux.Namespaces {
|
||||
for _, ns := range spec.Linux.Namespaces {
|
||||
t, exists := namespaceMapping[ns.Type]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("namespace %q does not exist", ns)
|
||||
|
@ -366,27 +339,23 @@ func createLibcontainerConfig(cgroupName string, spec *specs.LinuxSpec, rspec *s
|
|||
},
|
||||
}
|
||||
}
|
||||
for _, mp := range spec.Mounts {
|
||||
m, ok := rspec.Mounts[mp.Name]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("Mount with Name %q not found in runtime config", mp.Name)
|
||||
}
|
||||
config.Mounts = append(config.Mounts, createLibcontainerMount(cwd, mp.Path, m))
|
||||
for _, m := range spec.Mounts {
|
||||
config.Mounts = append(config.Mounts, createLibcontainerMount(cwd, m))
|
||||
}
|
||||
if err := createDevices(rspec, config); err != nil {
|
||||
if err := createDevices(spec, config); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := setupUserNamespace(rspec, config); err != nil {
|
||||
if err := setupUserNamespace(spec, config); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, rlimit := range rspec.Linux.Rlimits {
|
||||
for _, rlimit := range spec.Linux.Rlimits {
|
||||
rl, err := createLibContainerRlimit(rlimit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
config.Rlimits = append(config.Rlimits, rl)
|
||||
}
|
||||
c, err := createCgroupConfig(cgroupName, rspec, config.Devices)
|
||||
c, err := createCgroupConfig(cgroupName, spec, config.Devices)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -400,23 +369,23 @@ func createLibcontainerConfig(cgroupName string, spec *specs.LinuxSpec, rspec *s
|
|||
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
|
||||
}
|
||||
}
|
||||
seccomp, err := setupSeccomp(&rspec.Linux.Seccomp)
|
||||
seccomp, err := setupSeccomp(&spec.Linux.Seccomp)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
config.Seccomp = seccomp
|
||||
config.Sysctl = rspec.Linux.Sysctl
|
||||
config.ProcessLabel = rspec.Linux.SelinuxProcessLabel
|
||||
config.AppArmorProfile = rspec.Linux.ApparmorProfile
|
||||
config.Sysctl = spec.Linux.Sysctl
|
||||
config.ProcessLabel = spec.Linux.SelinuxProcessLabel
|
||||
config.AppArmorProfile = spec.Linux.ApparmorProfile
|
||||
for _, g := range spec.Process.User.AdditionalGids {
|
||||
config.AdditionalGroups = append(config.AdditionalGroups, strconv.FormatUint(uint64(g), 10))
|
||||
}
|
||||
createHooks(rspec, config)
|
||||
createHooks(spec, config)
|
||||
config.Version = specs.Version
|
||||
return config, nil
|
||||
}
|
||||
|
||||
func createLibcontainerMount(cwd, dest string, m specs.Mount) *configs.Mount {
|
||||
func createLibcontainerMount(cwd string, m specs.Mount) *configs.Mount {
|
||||
flags, pgflags, data := parseMountOptions(m.Options)
|
||||
source := m.Source
|
||||
if m.Type == "bind" {
|
||||
|
@ -427,14 +396,14 @@ func createLibcontainerMount(cwd, dest string, m specs.Mount) *configs.Mount {
|
|||
return &configs.Mount{
|
||||
Device: m.Type,
|
||||
Source: source,
|
||||
Destination: dest,
|
||||
Destination: m.Destination,
|
||||
Data: data,
|
||||
Flags: flags,
|
||||
PropagationFlags: pgflags,
|
||||
}
|
||||
}
|
||||
|
||||
func createCgroupConfig(name string, spec *specs.LinuxRuntimeSpec, devices []*configs.Device) (*configs.Cgroup, error) {
|
||||
func createCgroupConfig(name string, spec *specs.LinuxSpec, devices []*configs.Device) (*configs.Cgroup, error) {
|
||||
myCgroupPath, err := cgroups.GetThisCgroupDir("devices")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -553,17 +522,16 @@ func createCgroupConfig(name string, spec *specs.LinuxRuntimeSpec, devices []*co
|
|||
return c, nil
|
||||
}
|
||||
|
||||
func createDevices(spec *specs.LinuxRuntimeSpec, config *configs.Config) error {
|
||||
func createDevices(spec *specs.LinuxSpec, config *configs.Config) error {
|
||||
for _, d := range spec.Linux.Devices {
|
||||
device := &configs.Device{
|
||||
Type: d.Type,
|
||||
Path: d.Path,
|
||||
Major: d.Major,
|
||||
Minor: d.Minor,
|
||||
Permissions: d.Permissions,
|
||||
FileMode: d.FileMode,
|
||||
Uid: d.UID,
|
||||
Gid: d.GID,
|
||||
Type: d.Type,
|
||||
Path: d.Path,
|
||||
Major: d.Major,
|
||||
Minor: d.Minor,
|
||||
FileMode: *d.FileMode,
|
||||
Uid: *d.UID,
|
||||
Gid: *d.GID,
|
||||
}
|
||||
config.Devices = append(config.Devices, device)
|
||||
}
|
||||
|
@ -578,7 +546,7 @@ func setReadonly(config *configs.Config) {
|
|||
}
|
||||
}
|
||||
|
||||
func setupUserNamespace(spec *specs.LinuxRuntimeSpec, config *configs.Config) error {
|
||||
func setupUserNamespace(spec *specs.LinuxSpec, config *configs.Config) error {
|
||||
if len(spec.Linux.UIDMappings) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
@ -760,7 +728,7 @@ func setupSeccomp(config *specs.Seccomp) (*configs.Seccomp, error) {
|
|||
return newConfig, nil
|
||||
}
|
||||
|
||||
func createHooks(rspec *specs.LinuxRuntimeSpec, config *configs.Config) {
|
||||
func createHooks(rspec *specs.LinuxSpec, config *configs.Config) {
|
||||
config.Hooks = &configs.Hooks{}
|
||||
for _, h := range rspec.Hooks.Prestart {
|
||||
cmd := configs.Command{
|
||||
|
|
10
start.go
10
start.go
|
@ -49,14 +49,14 @@ var startCommand = cli.Command{
|
|||
fatal(err)
|
||||
}
|
||||
}
|
||||
spec, rspec, err := loadSpec(specConfig, runtimeConfig)
|
||||
spec, err := loadSpec(specConfig)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
notifySocket := os.Getenv("NOTIFY_SOCKET")
|
||||
if notifySocket != "" {
|
||||
setupSdNotify(spec, rspec, notifySocket)
|
||||
setupSdNotify(spec, notifySocket)
|
||||
}
|
||||
|
||||
var (
|
||||
|
@ -70,7 +70,7 @@ var startCommand = cli.Command{
|
|||
if os.Geteuid() != 0 {
|
||||
logrus.Fatal("runc should be run as root")
|
||||
}
|
||||
status, err := startContainer(context, spec, rspec)
|
||||
status, err := startContainer(context, spec)
|
||||
if err != nil {
|
||||
logrus.Fatalf("Container start failed: %v", err)
|
||||
}
|
||||
|
@ -92,8 +92,8 @@ func init() {
|
|||
}
|
||||
}
|
||||
|
||||
func startContainer(context *cli.Context, spec *specs.LinuxSpec, rspec *specs.LinuxRuntimeSpec) (int, error) {
|
||||
config, err := createLibcontainerConfig(context.GlobalString("id"), spec, rspec)
|
||||
func startContainer(context *cli.Context, spec *specs.LinuxSpec) (int, error) {
|
||||
config, err := createLibcontainerConfig(context.GlobalString("id"), spec)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
|
|
6
utils.go
6
utils.go
|
@ -175,11 +175,9 @@ func dupStdio(process *libcontainer.Process, rootuid int) error {
|
|||
|
||||
// If systemd is supporting sd_notify protocol, this function will add support
|
||||
// for sd_notify protocol from within the container.
|
||||
func setupSdNotify(spec *specs.LinuxSpec, rspec *specs.LinuxRuntimeSpec, notifySocket string) {
|
||||
mountName := "sdNotify"
|
||||
spec.Mounts = append(spec.Mounts, specs.MountPoint{Name: mountName, Path: notifySocket})
|
||||
func setupSdNotify(spec *specs.LinuxSpec, notifySocket string) {
|
||||
spec.Mounts = append(spec.Mounts, specs.Mount{Destination: notifySocket, Type: "bind", Source: notifySocket, Options: []string{"bind"}})
|
||||
spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", notifySocket))
|
||||
rspec.Mounts[mountName] = specs.Mount{Type: "bind", Source: notifySocket, Options: []string{"bind"}}
|
||||
}
|
||||
|
||||
// If systemd is supporting on-demand socket activation, this function will add support
|
||||
|
|
Loading…
Reference in New Issue