2014-12-17 17:12:23 +08:00
package configs
2014-07-09 01:17:05 +08:00
2015-09-11 08:57:31 +08:00
import (
"bytes"
"encoding/json"
2016-03-30 02:14:59 +08:00
"fmt"
2015-09-11 08:57:31 +08:00
"os/exec"
2016-03-30 02:14:59 +08:00
"time"
2016-03-03 23:32:59 +08:00
"github.com/Sirupsen/logrus"
2015-09-11 08:57:31 +08:00
)
2015-02-01 11:56:27 +08:00
type Rlimit struct {
2015-02-12 08:45:23 +08:00
Type int ` json:"type" `
Hard uint64 ` json:"hard" `
Soft uint64 ` json:"soft" `
2015-01-27 20:54:19 +08:00
}
2015-02-01 11:56:27 +08:00
// IDMap represents UID/GID Mappings for User Namespaces.
type IDMap struct {
2015-02-12 08:45:23 +08:00
ContainerID int ` json:"container_id" `
HostID int ` json:"host_id" `
Size int ` json:"size" `
2014-11-25 06:39:32 +08:00
}
2015-06-30 02:12:54 +08:00
// Seccomp represents syscall restrictions
2015-09-23 22:52:53 +08:00
// By default, only the native architecture of the kernel is allowed to be used
// for syscalls. Additional architectures can be added by specifying them in
// Architectures.
2015-05-30 06:24:18 +08:00
type Seccomp struct {
2015-06-30 02:12:54 +08:00
DefaultAction Action ` json:"default_action" `
2015-09-23 22:52:53 +08:00
Architectures [ ] string ` json:"architectures" `
2015-06-30 02:12:54 +08:00
Syscalls [ ] * Syscall ` json:"syscalls" `
2015-05-30 06:24:18 +08:00
}
2016-04-12 16:12:23 +08:00
// Action is taken upon rule match in Seccomp
2015-05-30 06:24:18 +08:00
type Action int
const (
2015-11-13 09:03:53 +08:00
Kill Action = iota + 1
2015-06-30 02:12:54 +08:00
Errno
2015-05-30 06:24:18 +08:00
Trap
Allow
2015-11-13 09:03:53 +08:00
Trace
2015-05-30 06:24:18 +08:00
)
2016-04-12 16:12:23 +08:00
// Operator is a comparison operator to be used when matching syscall arguments in Seccomp
2015-05-30 06:24:18 +08:00
type Operator int
const (
2015-11-13 09:03:53 +08:00
EqualTo Operator = iota + 1
2015-05-30 06:24:18 +08:00
NotEqualTo
2015-06-30 02:12:54 +08:00
GreaterThan
GreaterThanOrEqualTo
2015-05-30 06:24:18 +08:00
LessThan
2015-06-30 02:12:54 +08:00
LessThanOrEqualTo
2015-05-30 06:24:18 +08:00
MaskEqualTo
)
2016-04-12 16:12:23 +08:00
// Arg is a rule to match a specific syscall argument in Seccomp
2015-05-30 06:24:18 +08:00
type Arg struct {
2015-06-30 02:12:54 +08:00
Index uint ` json:"index" `
Value uint64 ` json:"value" `
ValueTwo uint64 ` json:"value_two" `
Op Operator ` json:"op" `
2015-05-30 06:24:18 +08:00
}
2016-04-12 16:12:23 +08:00
// Syscall is a rule to match a syscall in Seccomp
2015-05-30 06:24:18 +08:00
type Syscall struct {
2015-06-30 02:12:54 +08:00
Name string ` json:"name" `
2015-05-30 06:24:18 +08:00
Action Action ` json:"action" `
Args [ ] * Arg ` json:"args" `
2015-05-23 07:10:20 +08:00
}
2015-05-14 06:42:16 +08:00
// TODO Windows. Many of these fields should be factored out into those parts
// which are common across platforms, and those which are platform specific.
2014-07-09 01:17:05 +08:00
// Config defines configuration options for executing a process inside a contained environment.
type Config struct {
2015-02-01 11:56:27 +08:00
// NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs
// This is a common option when the container is running in ramdisk
2015-02-12 08:45:23 +08:00
NoPivotRoot bool ` json:"no_pivot_root" `
2015-02-01 11:56:27 +08:00
2015-02-07 10:50:11 +08:00
// ParentDeathSignal specifies the signal that is sent to the container's process in the case
// that the parent process dies.
2015-02-12 08:45:23 +08:00
ParentDeathSignal int ` json:"parent_death_signal" `
2015-02-07 10:50:11 +08:00
2015-02-01 11:56:27 +08:00
// PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set.
// When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable.
// This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot.
2015-02-12 08:45:23 +08:00
PivotDir string ` json:"pivot_dir" `
2015-02-01 11:56:27 +08:00
2015-02-04 09:44:58 +08:00
// Path to a directory containing the container's root filesystem.
2015-02-12 08:45:23 +08:00
Rootfs string ` json:"rootfs" `
2015-02-04 09:44:58 +08:00
// Readonlyfs will remount the container's rootfs as readonly where only externally mounted
// bind mounts are writtable.
2015-02-12 08:45:23 +08:00
Readonlyfs bool ` json:"readonlyfs" `
2015-02-01 11:56:27 +08:00
2015-10-02 05:03:02 +08:00
// Specifies the mount propagation flags to be applied to /.
RootPropagation int ` json:"rootPropagation" `
2015-04-10 22:45:04 +08:00
2015-02-01 11:56:27 +08:00
// Mounts specify additional source and destination paths that will be mounted inside the container's
// rootfs and mount namespace if specified
2015-02-12 08:45:23 +08:00
Mounts [ ] * Mount ` json:"mounts" `
2015-02-01 11:56:27 +08:00
// The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well!
2015-02-12 08:45:23 +08:00
Devices [ ] * Device ` json:"devices" `
2015-02-01 11:56:27 +08:00
2015-02-12 08:45:23 +08:00
MountLabel string ` json:"mount_label" `
2014-07-09 01:17:05 +08:00
// Hostname optionally sets the container's hostname if provided
2015-02-12 08:45:23 +08:00
Hostname string ` json:"hostname" `
2014-07-09 01:17:05 +08:00
// Namespaces specifies the container's namespaces that it should setup when cloning the init process
// If a namespace is not provided that namespace is shared from the container's parent process
2015-02-12 08:45:23 +08:00
Namespaces Namespaces ` json:"namespaces" `
2014-07-09 01:17:05 +08:00
// Capabilities specify the capabilities to keep when executing the process inside the container
// All capbilities not specified will be dropped from the processes capability mask
2015-02-12 08:45:23 +08:00
Capabilities [ ] string ` json:"capabilities" `
2014-07-09 01:17:05 +08:00
// Networks specifies the container's network setup to be created
2015-02-12 08:45:23 +08:00
Networks [ ] * Network ` json:"networks" `
2014-07-09 01:17:05 +08:00
// Routes can be specified to create entries in the route table as the container is started
2015-02-12 08:45:23 +08:00
Routes [ ] * Route ` json:"routes" `
2014-07-09 01:17:05 +08:00
// Cgroups specifies specific cgroup settings for the various subsystems that the container is
// placed into to limit the resources the container has available
2015-02-12 08:45:23 +08:00
Cgroups * Cgroup ` json:"cgroups" `
2014-07-09 01:17:05 +08:00
// AppArmorProfile specifies the profile to apply to the process running in the container and is
// change at the time the process is execed
2016-03-04 02:44:33 +08:00
AppArmorProfile string ` json:"apparmor_profile,omitempty" `
2014-07-09 01:17:05 +08:00
// ProcessLabel specifies the label to apply to the process running in the container. It is
// commonly used by selinux
2016-03-04 02:44:33 +08:00
ProcessLabel string ` json:"process_label,omitempty" `
2014-07-09 01:17:05 +08:00
2014-11-27 02:16:53 +08:00
// Rlimits specifies the resource limits, such as max open files, to set in the container
// If Rlimits are not set, the container will inherit rlimits from the parent process
2016-03-11 06:35:16 +08:00
Rlimits [ ] Rlimit ` json:"rlimits,omitempty" `
2015-01-27 20:54:19 +08:00
2015-08-27 07:37:24 +08:00
// OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores
// for a process. Valid values are between the range [-1000, '1000'], where processes with
// higher scores are preferred for being killed.
// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
OomScoreAdj int ` json:"oom_score_adj" `
2015-01-27 20:54:19 +08:00
// AdditionalGroups specifies the gids that should be added to supplementary groups
// in addition to those that the user belongs to.
2015-05-01 07:02:31 +08:00
AdditionalGroups [ ] string ` json:"additional_groups" `
2015-02-01 11:56:27 +08:00
2015-01-27 20:54:19 +08:00
// UidMappings is an array of User ID mappings for User Namespaces
2015-02-12 08:45:23 +08:00
UidMappings [ ] IDMap ` json:"uid_mappings" `
2015-01-27 20:54:19 +08:00
// GidMappings is an array of Group ID mappings for User Namespaces
2015-02-12 08:45:23 +08:00
GidMappings [ ] IDMap ` json:"gid_mappings" `
2015-02-13 08:23:05 +08:00
// MaskPaths specifies paths within the container's rootfs to mask over with a bind
// mount pointing to /dev/null as to prevent reads of the file.
MaskPaths [ ] string ` json:"mask_paths" `
// ReadonlyPaths specifies paths within the container's rootfs to remount as read-only
// so that these files prevent any writes.
ReadonlyPaths [ ] string ` json:"readonly_paths" `
2015-04-23 10:17:30 +08:00
2015-07-07 07:18:08 +08:00
// Sysctl is a map of properties and their values. It is the equivalent of using
2015-04-23 10:17:30 +08:00
// sysctl -w my.property.name value in Linux.
2015-07-07 07:18:08 +08:00
Sysctl map [ string ] string ` json:"sysctl" `
2015-05-23 07:10:20 +08:00
2015-05-30 06:24:18 +08:00
// Seccomp allows actions to be taken whenever a syscall is made within the container.
2015-06-30 02:12:54 +08:00
// A number of rules are given, each having an action to be taken if a syscall matches it.
// A default action to be taken if no rules match is also given.
2015-05-30 06:24:18 +08:00
Seccomp * Seccomp ` json:"seccomp" `
2015-09-11 08:57:31 +08:00
2016-02-16 19:54:58 +08:00
// NoNewPrivileges controls whether processes in the container can gain additional privileges.
2016-03-04 02:44:33 +08:00
NoNewPrivileges bool ` json:"no_new_privileges,omitempty" `
2016-02-16 19:54:58 +08:00
2015-09-11 08:57:31 +08:00
// Hooks are a collection of actions to perform at various container lifecycle events.
2016-03-03 23:32:59 +08:00
// CommandHooks are serialized to JSON, but other hooks are not.
Hooks * Hooks
2015-09-24 08:13:00 +08:00
// Version is the version of opencontainer specification that is supported.
Version string ` json:"version" `
2016-02-25 02:45:20 +08:00
// Labels are user defined metadata that is stored in the config and populated on the state
Labels [ ] string ` json:"labels" `
2015-09-11 08:57:31 +08:00
}
type Hooks struct {
// Prestart commands are executed after the container namespaces are created,
// but before the user supplied command is executed from init.
Prestart [ ] Hook
2015-11-07 07:02:50 +08:00
// Poststart commands are executed after the container init process starts.
Poststart [ ] Hook
2015-09-12 01:28:25 +08:00
// Poststop commands are executed after the container init process exits.
2015-09-11 08:57:31 +08:00
Poststop [ ] Hook
}
2016-03-03 23:32:59 +08:00
func ( hooks * Hooks ) UnmarshalJSON ( b [ ] byte ) error {
var state struct {
Prestart [ ] CommandHook
Poststart [ ] CommandHook
Poststop [ ] CommandHook
}
if err := json . Unmarshal ( b , & state ) ; err != nil {
return err
}
deserialize := func ( shooks [ ] CommandHook ) ( hooks [ ] Hook ) {
for _ , shook := range shooks {
hooks = append ( hooks , shook )
}
return hooks
}
hooks . Prestart = deserialize ( state . Prestart )
hooks . Poststart = deserialize ( state . Poststart )
hooks . Poststop = deserialize ( state . Poststop )
return nil
}
func ( hooks Hooks ) MarshalJSON ( ) ( [ ] byte , error ) {
serialize := func ( hooks [ ] Hook ) ( serializableHooks [ ] CommandHook ) {
for _ , hook := range hooks {
switch chook := hook . ( type ) {
case CommandHook :
serializableHooks = append ( serializableHooks , chook )
default :
logrus . Warnf ( "cannot serialize hook of type %T, skipping" , hook )
}
}
return serializableHooks
}
return json . Marshal ( map [ string ] interface { } {
"prestart" : serialize ( hooks . Prestart ) ,
"poststart" : serialize ( hooks . Poststart ) ,
"poststop" : serialize ( hooks . Poststop ) ,
} )
}
2015-09-11 08:57:31 +08:00
// HookState is the payload provided to a hook on execution.
type HookState struct {
2016-04-06 23:57:59 +08:00
Version string ` json:"ociVersion" `
ID string ` json:"id" `
Pid int ` json:"pid" `
Root string ` json:"root" `
BundlePath string ` json:"bundlePath" `
2015-09-11 08:57:31 +08:00
}
type Hook interface {
// Run executes the hook with the provided state.
2015-09-12 01:28:25 +08:00
Run ( HookState ) error
2015-09-11 08:57:31 +08:00
}
2016-04-12 16:12:23 +08:00
// NewFunctionHook will call the provided function when the hook is run.
2015-09-12 01:28:25 +08:00
func NewFunctionHook ( f func ( HookState ) error ) FuncHook {
return FuncHook {
2015-09-11 09:15:00 +08:00
run : f ,
2015-09-11 08:57:31 +08:00
}
}
type FuncHook struct {
2015-09-12 01:28:25 +08:00
run func ( HookState ) error
2015-09-11 09:15:00 +08:00
}
2015-09-12 01:28:25 +08:00
func ( f FuncHook ) Run ( s HookState ) error {
2015-09-11 09:15:00 +08:00
return f . run ( s )
2015-09-11 08:57:31 +08:00
}
type Command struct {
2016-03-30 02:14:59 +08:00
Path string ` json:"path" `
Args [ ] string ` json:"args" `
Env [ ] string ` json:"env" `
Dir string ` json:"dir" `
Timeout * time . Duration ` json:"timeout" `
2015-09-11 08:57:31 +08:00
}
2016-04-12 16:12:23 +08:00
// NewCommandHook will execute the provided command when the hook is run.
2015-09-12 01:28:25 +08:00
func NewCommandHook ( cmd Command ) CommandHook {
return CommandHook {
2015-09-11 08:57:31 +08:00
Command : cmd ,
}
}
type CommandHook struct {
Command
}
2015-09-12 01:28:25 +08:00
func ( c Command ) Run ( s HookState ) error {
2015-09-11 08:57:31 +08:00
b , err := json . Marshal ( s )
if err != nil {
return err
}
cmd := exec . Cmd {
Path : c . Path ,
Args : c . Args ,
Env : c . Env ,
Stdin : bytes . NewReader ( b ) ,
}
2016-03-30 02:14:59 +08:00
errC := make ( chan error , 1 )
go func ( ) {
2016-04-09 02:02:44 +08:00
out , err := cmd . CombinedOutput ( )
if err != nil {
err = fmt . Errorf ( "%s: %s" , err , out )
}
errC <- err
2016-03-30 02:14:59 +08:00
} ( )
if c . Timeout != nil {
select {
case err := <- errC :
return err
case <- time . After ( * c . Timeout ) :
cmd . Process . Kill ( )
cmd . Wait ( )
return fmt . Errorf ( "hook ran past specified timeout of %.1fs" , c . Timeout . Seconds ( ) )
}
}
return <- errC
2014-07-09 01:17:05 +08:00
}