2015-07-31 03:17:04 +08:00
|
|
|
package specs
|
|
|
|
|
config: Single, unified config file
Reverting 7232e4b1 (specs: introduce the concept of a runtime.json,
2015-07-30, #88) after discussion on the mailing list [1]. The main
reason is that it's hard to draw a clear line around "inherently
runtime-specific" or "non-portable", so we shouldn't try to do that in
the spec. Folks who want to flag settings as non-portable for their
own system are welcome to do so (e.g. "we will clobber 'hooks' in
bundles we run") are welcome to do so, but we don't have to have
to split the config into multiple files to do that.
There have been a number of additional changes since #88, so this
isn't a pure Git reversion. Besides copy-pasting and the associated
link-target updates, I've:
* Restored path -> destination, now that the mount type contains both
source and target paths again. I'd prefer 'target' to 'destination'
to match mount(2), but the pre-7232e4b1 phrasing was 'destination'
(possibly due to Windows using 'target' for the source?).
* Restored the Windows mount example to its pre-7232e4b1 content.
* Removed required mounts from the config example (requirements landed
in 3848a238, config-linux: specify the default devices/filesystems
available, 2015-09-09, #164), because specifying those mounts in the
config is now redundant.
* Used headers (vs. bold paragraphs) to set off mount examples so we
get link anchors in the rendered Markdown.
* Replaced references to runtime.json with references to config.json.
[1]: https://groups.google.com/a/opencontainers.org/forum/#!topic/dev/0QbyJDM9fWY
Subject: Single, unified config file (i.e. rolling back specs#88)
Date: Wed, 4 Nov 2015 09:53:20 -0800
Message-ID: <20151104175320.GC24652@odin.tremily.us>
Signed-off-by: W. Trevor King <wking@tremily.us>
2015-12-29 02:06:40 +08:00
|
|
|
import "os"
|
|
|
|
|
|
|
|
// LinuxStateDirectory holds the container's state information
|
|
|
|
const LinuxStateDirectory = "/run/opencontainer/containers"
|
|
|
|
|
2015-07-31 03:17:04 +08:00
|
|
|
// LinuxSpec is the full specification for linux containers.
|
|
|
|
type LinuxSpec struct {
|
|
|
|
Spec
|
|
|
|
// Linux is platform specific configuration for linux based containers.
|
|
|
|
Linux Linux `json:"linux"`
|
|
|
|
}
|
|
|
|
|
|
|
|
// Linux contains platform specific configuration for linux based containers.
|
|
|
|
type Linux struct {
|
|
|
|
// Capabilities are linux capabilities that are kept for the container.
|
|
|
|
Capabilities []string `json:"capabilities"`
|
config: Single, unified config file
Reverting 7232e4b1 (specs: introduce the concept of a runtime.json,
2015-07-30, #88) after discussion on the mailing list [1]. The main
reason is that it's hard to draw a clear line around "inherently
runtime-specific" or "non-portable", so we shouldn't try to do that in
the spec. Folks who want to flag settings as non-portable for their
own system are welcome to do so (e.g. "we will clobber 'hooks' in
bundles we run") are welcome to do so, but we don't have to have
to split the config into multiple files to do that.
There have been a number of additional changes since #88, so this
isn't a pure Git reversion. Besides copy-pasting and the associated
link-target updates, I've:
* Restored path -> destination, now that the mount type contains both
source and target paths again. I'd prefer 'target' to 'destination'
to match mount(2), but the pre-7232e4b1 phrasing was 'destination'
(possibly due to Windows using 'target' for the source?).
* Restored the Windows mount example to its pre-7232e4b1 content.
* Removed required mounts from the config example (requirements landed
in 3848a238, config-linux: specify the default devices/filesystems
available, 2015-09-09, #164), because specifying those mounts in the
config is now redundant.
* Used headers (vs. bold paragraphs) to set off mount examples so we
get link anchors in the rendered Markdown.
* Replaced references to runtime.json with references to config.json.
[1]: https://groups.google.com/a/opencontainers.org/forum/#!topic/dev/0QbyJDM9fWY
Subject: Single, unified config file (i.e. rolling back specs#88)
Date: Wed, 4 Nov 2015 09:53:20 -0800
Message-ID: <20151104175320.GC24652@odin.tremily.us>
Signed-off-by: W. Trevor King <wking@tremily.us>
2015-12-29 02:06:40 +08:00
|
|
|
// UIDMapping specifies user mappings for supporting user namespaces on linux.
|
|
|
|
UIDMappings []IDMapping `json:"uidMappings,omitempty"`
|
|
|
|
// GIDMapping specifies group mappings for supporting user namespaces on linux.
|
|
|
|
GIDMappings []IDMapping `json:"gidMappings,omitempty"`
|
|
|
|
// Rlimits specifies rlimit options to apply to the container's process.
|
|
|
|
Rlimits []Rlimit `json:"rlimits,omitempty"`
|
|
|
|
// Sysctl are a set of key value pairs that are set for the container on start
|
|
|
|
Sysctl map[string]string `json:"sysctl,omitempty"`
|
|
|
|
// Resources contain cgroup information for handling resource constraints
|
|
|
|
// for the container
|
|
|
|
Resources *Resources `json:"resources,omitempty"`
|
|
|
|
// CgroupsPath specifies the path to cgroups that are created and/or joined by the container.
|
|
|
|
// The path is expected to be relative to the cgroups mountpoint.
|
|
|
|
// If resources are specified, the cgroups at CgroupsPath will be updated based on resources.
|
|
|
|
CgroupsPath *string `json:"cgroupsPath,omitempty"`
|
|
|
|
// Namespaces contains the namespaces that are created and/or joined by the container
|
|
|
|
Namespaces []Namespace `json:"namespaces"`
|
|
|
|
// Devices are a list of device nodes that are created and enabled for the container
|
|
|
|
Devices []Device `json:"devices"`
|
|
|
|
// ApparmorProfile specified the apparmor profile for the container.
|
|
|
|
ApparmorProfile string `json:"apparmorProfile"`
|
|
|
|
// SelinuxProcessLabel specifies the selinux context that the container process is run as.
|
|
|
|
SelinuxProcessLabel string `json:"selinuxProcessLabel"`
|
|
|
|
// Seccomp specifies the seccomp security settings for the container.
|
|
|
|
Seccomp Seccomp `json:"seccomp"`
|
|
|
|
// RootfsPropagation is the rootfs mount propagation mode for the container.
|
|
|
|
RootfsPropagation string `json:"rootfsPropagation,omitempty"`
|
|
|
|
// NoNewPrivileges controls whether additional privileges could be gained by processes in the container.
|
|
|
|
NoNewPrivileges bool `json:"noNewPrivileges,omitempty"`
|
2015-07-31 03:17:04 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// User specifies linux specific user and group information for the container's
|
|
|
|
// main process.
|
|
|
|
type User struct {
|
2015-09-01 05:55:20 +08:00
|
|
|
// UID is the user id.
|
2015-10-07 02:40:46 +08:00
|
|
|
UID uint32 `json:"uid"`
|
2015-09-01 05:55:20 +08:00
|
|
|
// GID is the group id.
|
2015-10-07 02:40:46 +08:00
|
|
|
GID uint32 `json:"gid"`
|
2015-07-31 03:17:04 +08:00
|
|
|
// AdditionalGids are additional group ids set for the container's process.
|
2015-12-23 18:52:47 +08:00
|
|
|
AdditionalGids []uint32 `json:"additionalGids,omitempty"`
|
2015-07-31 03:17:04 +08:00
|
|
|
}
|
config: Single, unified config file
Reverting 7232e4b1 (specs: introduce the concept of a runtime.json,
2015-07-30, #88) after discussion on the mailing list [1]. The main
reason is that it's hard to draw a clear line around "inherently
runtime-specific" or "non-portable", so we shouldn't try to do that in
the spec. Folks who want to flag settings as non-portable for their
own system are welcome to do so (e.g. "we will clobber 'hooks' in
bundles we run") are welcome to do so, but we don't have to have
to split the config into multiple files to do that.
There have been a number of additional changes since #88, so this
isn't a pure Git reversion. Besides copy-pasting and the associated
link-target updates, I've:
* Restored path -> destination, now that the mount type contains both
source and target paths again. I'd prefer 'target' to 'destination'
to match mount(2), but the pre-7232e4b1 phrasing was 'destination'
(possibly due to Windows using 'target' for the source?).
* Restored the Windows mount example to its pre-7232e4b1 content.
* Removed required mounts from the config example (requirements landed
in 3848a238, config-linux: specify the default devices/filesystems
available, 2015-09-09, #164), because specifying those mounts in the
config is now redundant.
* Used headers (vs. bold paragraphs) to set off mount examples so we
get link anchors in the rendered Markdown.
* Replaced references to runtime.json with references to config.json.
[1]: https://groups.google.com/a/opencontainers.org/forum/#!topic/dev/0QbyJDM9fWY
Subject: Single, unified config file (i.e. rolling back specs#88)
Date: Wed, 4 Nov 2015 09:53:20 -0800
Message-ID: <20151104175320.GC24652@odin.tremily.us>
Signed-off-by: W. Trevor King <wking@tremily.us>
2015-12-29 02:06:40 +08:00
|
|
|
|
|
|
|
// Namespace is the configuration for a linux namespace
|
|
|
|
type Namespace struct {
|
|
|
|
// Type is the type of Linux namespace
|
|
|
|
Type NamespaceType `json:"type"`
|
|
|
|
// Path is a path to an existing namespace persisted on disk that can be joined
|
|
|
|
// and is of the same type
|
|
|
|
Path string `json:"path,omitempty"`
|
|
|
|
}
|
|
|
|
|
|
|
|
// NamespaceType is one of the linux namespaces
|
|
|
|
type NamespaceType string
|
|
|
|
|
|
|
|
const (
|
|
|
|
// PIDNamespace for isolating process IDs
|
|
|
|
PIDNamespace NamespaceType = "pid"
|
|
|
|
// NetworkNamespace for isolating network devices, stacks, ports, etc
|
|
|
|
NetworkNamespace = "network"
|
|
|
|
// MountNamespace for isolating mount points
|
|
|
|
MountNamespace = "mount"
|
|
|
|
// IPCNamespace for isolating System V IPC, POSIX message queues
|
|
|
|
IPCNamespace = "ipc"
|
|
|
|
// UTSNamespace for isolating hostname and NIS domain name
|
|
|
|
UTSNamespace = "uts"
|
|
|
|
// UserNamespace for isolating user and group IDs
|
|
|
|
UserNamespace = "user"
|
|
|
|
)
|
|
|
|
|
|
|
|
// IDMapping specifies UID/GID mappings
|
|
|
|
type IDMapping struct {
|
|
|
|
// HostID is the UID/GID of the host user or group
|
|
|
|
HostID uint32 `json:"hostID"`
|
|
|
|
// ContainerID is the UID/GID of the container's user or group
|
|
|
|
ContainerID uint32 `json:"containerID"`
|
|
|
|
// Size is the length of the range of IDs mapped between the two namespaces
|
|
|
|
Size uint32 `json:"size"`
|
|
|
|
}
|
|
|
|
|
|
|
|
// Rlimit type and restrictions
|
|
|
|
type Rlimit struct {
|
|
|
|
// Type of the rlimit to set
|
|
|
|
Type string `json:"type"`
|
|
|
|
// Hard is the hard limit for the specified type
|
|
|
|
Hard uint64 `json:"hard"`
|
|
|
|
// Soft is the soft limit for the specified type
|
|
|
|
Soft uint64 `json:"soft"`
|
|
|
|
}
|
|
|
|
|
|
|
|
// HugepageLimit structure corresponds to limiting kernel hugepages
|
|
|
|
type HugepageLimit struct {
|
|
|
|
// Pagesize is the hugepage size
|
|
|
|
Pagesize *string `json:"pageSize,omitempty"`
|
|
|
|
// Limit is the limit of "hugepagesize" hugetlb usage
|
|
|
|
Limit *uint64 `json:"limit,omitempty"`
|
|
|
|
}
|
|
|
|
|
|
|
|
// InterfacePriority for network interfaces
|
|
|
|
type InterfacePriority struct {
|
|
|
|
// Name is the name of the network interface
|
|
|
|
Name string `json:"name"`
|
|
|
|
// Priority for the interface
|
|
|
|
Priority uint32 `json:"priority"`
|
|
|
|
}
|
|
|
|
|
|
|
|
// blockIODevice holds major:minor format supported in blkio cgroup
|
|
|
|
type blockIODevice struct {
|
|
|
|
// Major is the device's major number.
|
|
|
|
Major int64 `json:"major"`
|
|
|
|
// Minor is the device's minor number.
|
|
|
|
Minor int64 `json:"minor"`
|
|
|
|
}
|
|
|
|
|
|
|
|
// WeightDevice struct holds a `major:minor weight` pair for blkioWeightDevice
|
|
|
|
type WeightDevice struct {
|
|
|
|
blockIODevice
|
|
|
|
// Weight is the bandwidth rate for the device, range is from 10 to 1000
|
|
|
|
Weight *uint16 `json:"weight,omitempty"`
|
|
|
|
// LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, CFQ scheduler only
|
|
|
|
LeafWeight *uint16 `json:"leafWeight,omitempty"`
|
|
|
|
}
|
|
|
|
|
|
|
|
// ThrottleDevice struct holds a `major:minor rate_per_second` pair
|
|
|
|
type ThrottleDevice struct {
|
|
|
|
blockIODevice
|
|
|
|
// Rate is the IO rate limit per cgroup per device
|
|
|
|
Rate *uint64 `json:"rate,omitempty"`
|
|
|
|
}
|
|
|
|
|
|
|
|
// BlockIO for Linux cgroup 'blkio' resource management
|
|
|
|
type BlockIO struct {
|
|
|
|
// Specifies per cgroup weight, range is from 10 to 1000
|
|
|
|
Weight *uint16 `json:"blkioWeight,omitempty"`
|
|
|
|
// Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, CFQ scheduler only
|
|
|
|
LeafWeight *uint16 `json:"blkioLeafWeight,omitempty"`
|
|
|
|
// Weight per cgroup per device, can override BlkioWeight
|
|
|
|
WeightDevice []WeightDevice `json:"blkioWeightDevice,omitempty"`
|
|
|
|
// IO read rate limit per cgroup per device, bytes per second
|
|
|
|
ThrottleReadBpsDevice []ThrottleDevice `json:"blkioThrottleReadBpsDevice,omitempty"`
|
|
|
|
// IO write rate limit per cgroup per device, bytes per second
|
|
|
|
ThrottleWriteBpsDevice []ThrottleDevice `json:"blkioThrottleWriteBpsDevice,omitempty"`
|
|
|
|
// IO read rate limit per cgroup per device, IO per second
|
|
|
|
ThrottleReadIOPSDevice []ThrottleDevice `json:"blkioThrottleReadIOPSDevice,omitempty"`
|
|
|
|
// IO write rate limit per cgroup per device, IO per second
|
|
|
|
ThrottleWriteIOPSDevice []ThrottleDevice `json:"blkioThrottleWriteIOPSDevice,omitempty"`
|
|
|
|
}
|
|
|
|
|
|
|
|
// Memory for Linux cgroup 'memory' resource management
|
|
|
|
type Memory struct {
|
|
|
|
// Memory limit (in bytes).
|
|
|
|
Limit *uint64 `json:"limit,omitempty"`
|
|
|
|
// Memory reservation or soft_limit (in bytes).
|
|
|
|
Reservation *uint64 `json:"reservation,omitempty"`
|
|
|
|
// Total memory limit (memory + swap).
|
|
|
|
Swap *uint64 `json:"swap,omitempty"`
|
|
|
|
// Kernel memory limit (in bytes).
|
|
|
|
Kernel *uint64 `json:"kernel,omitempty"`
|
|
|
|
// Kernel memory limit for tcp (in bytes)
|
|
|
|
KernelTCP *uint64 `json:"kernelTCP"`
|
|
|
|
// How aggressive the kernel will swap memory pages. Range from 0 to 100.
|
|
|
|
Swappiness *uint64 `json:"swappiness,omitempty"`
|
|
|
|
}
|
|
|
|
|
|
|
|
// CPU for Linux cgroup 'cpu' resource management
|
|
|
|
type CPU struct {
|
|
|
|
// CPU shares (relative weight (ratio) vs. other cgroups with cpu shares).
|
|
|
|
Shares *uint64 `json:"shares,omitempty"`
|
|
|
|
// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
|
|
|
|
Quota *uint64 `json:"quota,omitempty"`
|
|
|
|
// CPU period to be used for hardcapping (in usecs).
|
|
|
|
Period *uint64 `json:"period,omitempty"`
|
|
|
|
// How much time realtime scheduling may use (in usecs).
|
|
|
|
RealtimeRuntime *uint64 `json:"realtimeRuntime,omitempty"`
|
|
|
|
// CPU period to be used for realtime scheduling (in usecs).
|
|
|
|
RealtimePeriod *uint64 `json:"realtimePeriod,omitempty"`
|
|
|
|
// CPUs to use within the cpuset. Default is to use any CPU available.
|
|
|
|
Cpus *string `json:"cpus,omitempty"`
|
|
|
|
// List of memory nodes in the cpuset. Default is to use any available memory node.
|
|
|
|
Mems *string `json:"mems,omitempty"`
|
|
|
|
}
|
|
|
|
|
|
|
|
// Pids for Linux cgroup 'pids' resource management (Linux 4.3)
|
|
|
|
type Pids struct {
|
|
|
|
// Maximum number of PIDs. Default is "no limit".
|
|
|
|
Limit *int64 `json:"limit,omitempty"`
|
|
|
|
}
|
|
|
|
|
|
|
|
// Network identification and priority configuration
|
|
|
|
type Network struct {
|
|
|
|
// Set class identifier for container's network packets
|
|
|
|
ClassID *uint32 `json:"classID"`
|
|
|
|
// Set priority of network traffic for container
|
|
|
|
Priorities []InterfacePriority `json:"priorities,omitempty"`
|
|
|
|
}
|
|
|
|
|
|
|
|
// Resources has container runtime resource constraints
|
|
|
|
type Resources struct {
|
|
|
|
// DisableOOMKiller disables the OOM killer for out of memory conditions
|
|
|
|
DisableOOMKiller *bool `json:"disableOOMKiller,omitempty"`
|
|
|
|
// Specify an oom_score_adj for the container.
|
|
|
|
OOMScoreAdj *int `json:"oomScoreAdj,omitempty"`
|
|
|
|
// Memory restriction configuration
|
|
|
|
Memory *Memory `json:"memory,omitempty"`
|
|
|
|
// CPU resource restriction configuration
|
|
|
|
CPU *CPU `json:"cpu,omitempty"`
|
|
|
|
// Task resource restriction configuration.
|
|
|
|
Pids *Pids `json:"pids,omitempty"`
|
|
|
|
// BlockIO restriction configuration
|
|
|
|
BlockIO *BlockIO `json:"blockIO,omitempty"`
|
|
|
|
// Hugetlb limit (in bytes)
|
|
|
|
HugepageLimits []HugepageLimit `json:"hugepageLimits,omitempty"`
|
|
|
|
// Network restriction configuration
|
|
|
|
Network *Network `json:"network,omitempty"`
|
|
|
|
}
|
|
|
|
|
|
|
|
// Device represents the information on a Linux special device file
|
|
|
|
type Device struct {
|
|
|
|
// Path to the device.
|
|
|
|
Path string `json:"path"`
|
|
|
|
// Device type, block, char, etc.
|
|
|
|
Type rune `json:"type"`
|
|
|
|
// Major is the device's major number.
|
|
|
|
Major int64 `json:"major"`
|
|
|
|
// Minor is the device's minor number.
|
|
|
|
Minor int64 `json:"minor"`
|
|
|
|
// Cgroup permissions format, rwm.
|
|
|
|
Permissions string `json:"permissions"`
|
|
|
|
// FileMode permission bits for the device.
|
|
|
|
FileMode os.FileMode `json:"fileMode"`
|
|
|
|
// UID of the device.
|
|
|
|
UID uint32 `json:"uid"`
|
|
|
|
// Gid of the device.
|
|
|
|
GID uint32 `json:"gid"`
|
|
|
|
}
|
|
|
|
|
|
|
|
// Seccomp represents syscall restrictions
|
|
|
|
type Seccomp struct {
|
|
|
|
DefaultAction Action `json:"defaultAction"`
|
|
|
|
Architectures []Arch `json:"architectures"`
|
|
|
|
Syscalls []Syscall `json:"syscalls,omitempty"`
|
|
|
|
}
|
|
|
|
|
|
|
|
// Arch used for additional architectures
|
|
|
|
type Arch string
|
|
|
|
|
|
|
|
// Additional architectures permitted to be used for system calls
|
|
|
|
// By default only the native architecture of the kernel is permitted
|
|
|
|
const (
|
|
|
|
ArchX86 Arch = "SCMP_ARCH_X86"
|
|
|
|
ArchX86_64 Arch = "SCMP_ARCH_X86_64"
|
|
|
|
ArchX32 Arch = "SCMP_ARCH_X32"
|
|
|
|
ArchARM Arch = "SCMP_ARCH_ARM"
|
|
|
|
ArchAARCH64 Arch = "SCMP_ARCH_AARCH64"
|
|
|
|
ArchMIPS Arch = "SCMP_ARCH_MIPS"
|
|
|
|
ArchMIPS64 Arch = "SCMP_ARCH_MIPS64"
|
|
|
|
ArchMIPS64N32 Arch = "SCMP_ARCH_MIPS64N32"
|
|
|
|
ArchMIPSEL Arch = "SCMP_ARCH_MIPSEL"
|
|
|
|
ArchMIPSEL64 Arch = "SCMP_ARCH_MIPSEL64"
|
|
|
|
ArchMIPSEL64N32 Arch = "SCMP_ARCH_MIPSEL64N32"
|
|
|
|
)
|
|
|
|
|
|
|
|
// Action taken upon Seccomp rule match
|
|
|
|
type Action string
|
|
|
|
|
|
|
|
// Define actions for Seccomp rules
|
|
|
|
const (
|
|
|
|
ActKill Action = "SCMP_ACT_KILL"
|
|
|
|
ActTrap Action = "SCMP_ACT_TRAP"
|
|
|
|
ActErrno Action = "SCMP_ACT_ERRNO"
|
|
|
|
ActTrace Action = "SCMP_ACT_TRACE"
|
|
|
|
ActAllow Action = "SCMP_ACT_ALLOW"
|
|
|
|
)
|
|
|
|
|
|
|
|
// Operator used to match syscall arguments in Seccomp
|
|
|
|
type Operator string
|
|
|
|
|
|
|
|
// Define operators for syscall arguments in Seccomp
|
|
|
|
const (
|
|
|
|
OpNotEqual Operator = "SCMP_CMP_NE"
|
|
|
|
OpLessThan Operator = "SCMP_CMP_LT"
|
|
|
|
OpLessEqual Operator = "SCMP_CMP_LE"
|
|
|
|
OpEqualTo Operator = "SCMP_CMP_EQ"
|
|
|
|
OpGreaterEqual Operator = "SCMP_CMP_GE"
|
|
|
|
OpGreaterThan Operator = "SCMP_CMP_GT"
|
|
|
|
OpMaskedEqual Operator = "SCMP_CMP_MASKED_EQ"
|
|
|
|
)
|
|
|
|
|
|
|
|
// Arg used for matching specific syscall arguments in Seccomp
|
|
|
|
type Arg struct {
|
|
|
|
Index uint `json:"index"`
|
|
|
|
Value uint64 `json:"value"`
|
|
|
|
ValueTwo uint64 `json:"valueTwo"`
|
|
|
|
Op Operator `json:"op"`
|
|
|
|
}
|
|
|
|
|
|
|
|
// Syscall is used to match a syscall in Seccomp
|
|
|
|
type Syscall struct {
|
|
|
|
Name string `json:"name"`
|
|
|
|
Action Action `json:"action"`
|
|
|
|
Args []Arg `json:"args,omitempty"`
|
|
|
|
}
|