commit
9f0cca11d0
|
@ -8,7 +8,7 @@ In the design and development of libcontainer we try to follow these principles:
|
|||
* Less code is better.
|
||||
* Fewer components are better. Do you really need to add one more class?
|
||||
* 50 lines of straightforward, readable code is better than 10 lines of magic that nobody can understand.
|
||||
* Don't do later what you can do now. "//FIXME: refactor" is not acceptable in new code.
|
||||
* Don't do later what you can do now. "//TODO: refactor" is not acceptable in new code.
|
||||
* When hesitating between two options, choose the one that is easier to reverse.
|
||||
* "No" is temporary; "Yes" is forever. If you're not sure about a new feature, say no. You can change your mind later.
|
||||
* Containers must be portable to the greatest possible number of machines. Be suspicious of any change which makes machines less interchangeable.
|
||||
|
|
159
README.md
159
README.md
|
@ -1,48 +1,169 @@
|
|||
## libcontainer - reference implementation for containers [![Build Status](https://ci.dockerproject.com/github.com/docker/libcontainer/status.svg?branch=master)](https://ci.dockerproject.com/github.com/docker/libcontainer)
|
||||
|
||||
### Note on API changes:
|
||||
|
||||
Please bear with us while we work on making the libcontainer API stable and something that we can support long term. We are currently discussing the API with the community, therefore, if you currently depend on libcontainer please pin your dependency at a specific tag or commit id. Please join the discussion and help shape the API.
|
||||
|
||||
#### Background
|
||||
|
||||
libcontainer specifies configuration options for what a container is. It provides a native Go implementation for using Linux namespaces with no external dependencies. libcontainer provides many convenience functions for working with namespaces, networking, and management.
|
||||
Libcontainer provides a native Go implementation for creating containers
|
||||
with namespaces, cgroups, capabilities, and filesystem access controls.
|
||||
It allows you to manage the lifecycle of the container performing additional operations
|
||||
after the container is created.
|
||||
|
||||
|
||||
#### Container
|
||||
A container is a self contained execution environment that shares the kernel of the host system and which is (optionally) isolated from other containers in the system.
|
||||
A container is a self contained execution environment that shares the kernel of the
|
||||
host system and which is (optionally) isolated from other containers in the system.
|
||||
|
||||
libcontainer may be used to execute a process in a container. If a user tries to run a new process inside an existing container, the new process is added to the processes executing in the container.
|
||||
#### Using libcontainer
|
||||
|
||||
To create a container you first have to initialize an instance of a factory
|
||||
that will handle the creation and initialization for a container.
|
||||
|
||||
Because containers are spawned in a two step process you will need to provide
|
||||
arguments to a binary that will be executed as the init process for the container.
|
||||
To use the current binary that is spawning the containers and acting as the parent
|
||||
you can use `os.Args[0]` and we have a command called `init` setup.
|
||||
|
||||
```go
|
||||
initArgs := []string{os.Args[0], "init"}
|
||||
|
||||
root, err := libcontainer.New("/var/lib/container", initArgs)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
```
|
||||
|
||||
Once you have an instance of the factory created we can create a configuration
|
||||
struct describing how the container is to be created. A sample would look similar to this:
|
||||
|
||||
```go
|
||||
config := &configs.Config{
|
||||
Rootfs: rootfs,
|
||||
Capabilities: []string{
|
||||
"CHOWN",
|
||||
"DAC_OVERRIDE",
|
||||
"FSETID",
|
||||
"FOWNER",
|
||||
"MKNOD",
|
||||
"NET_RAW",
|
||||
"SETGID",
|
||||
"SETUID",
|
||||
"SETFCAP",
|
||||
"SETPCAP",
|
||||
"NET_BIND_SERVICE",
|
||||
"SYS_CHROOT",
|
||||
"KILL",
|
||||
"AUDIT_WRITE",
|
||||
},
|
||||
Namespaces: configs.Namespaces([]configs.Namespace{
|
||||
{Type: configs.NEWNS},
|
||||
{Type: configs.NEWUTS},
|
||||
{Type: configs.NEWIPC},
|
||||
{Type: configs.NEWPID},
|
||||
{Type: configs.NEWNET},
|
||||
}),
|
||||
Cgroups: &configs.Cgroup{
|
||||
Name: "test-container",
|
||||
Parent: "system",
|
||||
AllowAllDevices: false,
|
||||
AllowedDevices: configs.DefaultAllowedDevices,
|
||||
},
|
||||
|
||||
Devices: configs.DefaultAutoCreatedDevices,
|
||||
Hostname: "testing",
|
||||
Networks: []*configs.Network{
|
||||
{
|
||||
Type: "loopback",
|
||||
Address: "127.0.0.1/0",
|
||||
Gateway: "localhost",
|
||||
},
|
||||
},
|
||||
Rlimits: []configs.Rlimit{
|
||||
{
|
||||
Type: syscall.RLIMIT_NOFILE,
|
||||
Hard: uint64(1024),
|
||||
Soft: uint64(1024),
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
Once you have the configuration populated you can create a container:
|
||||
|
||||
```go
|
||||
container, err := root.Create("container-id", config)
|
||||
```
|
||||
|
||||
To spawn bash as the initial process inside the container and have the
|
||||
processes pid returned in order to wait, signal, or kill the process:
|
||||
|
||||
```go
|
||||
process := &libcontainer.Process{
|
||||
Args: []string{"/bin/bash"},
|
||||
Env: []string{"PATH=/bin"},
|
||||
User: "daemon",
|
||||
Stdin: os.Stdin,
|
||||
Stdout: os.Stdout,
|
||||
Stderr: os.Stderr,
|
||||
}
|
||||
|
||||
pid, err := container.Start(process)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
|
||||
#### Root file system
|
||||
// wait for the process to finish.
|
||||
wait(pid)
|
||||
|
||||
A container runs with a directory known as its *root file system*, or *rootfs*, mounted as the file system root. The rootfs is usually a full system tree.
|
||||
// destroy the container.
|
||||
container.Destroy()
|
||||
```
|
||||
|
||||
Additional ways to interact with a running container are:
|
||||
|
||||
```go
|
||||
// return all the pids for all processes running inside the container.
|
||||
processes, err := container.Processes()
|
||||
|
||||
// get detailed cpu, memory, io, and network statistics for the container and
|
||||
// it's processes.
|
||||
stats, err := container.Stats()
|
||||
|
||||
|
||||
#### Configuration
|
||||
// pause all processes inside the container.
|
||||
container.Pause()
|
||||
|
||||
A container is initially configured by supplying configuration data when the container is created.
|
||||
// resume all paused processes.
|
||||
container.Resume()
|
||||
```
|
||||
|
||||
|
||||
#### nsinit
|
||||
|
||||
`nsinit` is a cli application which demonstrates the use of libcontainer. It is able to spawn new containers or join existing containers, based on the current directory.
|
||||
`nsinit` is a cli application which demonstrates the use of libcontainer.
|
||||
It is able to spawn new containers or join existing containers. A root
|
||||
filesystem must be provided for use along with a container configuration file.
|
||||
|
||||
To use `nsinit`, cd into a Linux rootfs and copy a `container.json` file into the directory with your specified configuration. Environment, networking, and different capabilities for the container are specified in this file. The configuration is used for each process executed inside the container.
|
||||
To use `nsinit`, cd into a Linux rootfs and copy a `container.json` file into
|
||||
the directory with your specified configuration. Environment, networking,
|
||||
and different capabilities for the container are specified in this file.
|
||||
The configuration is used for each process executed inside the container.
|
||||
|
||||
See the `sample_configs` folder for examples of what the container configuration should look like.
|
||||
|
||||
To execute `/bin/bash` in the current directory as a container just run the following **as root**:
|
||||
```bash
|
||||
nsinit exec /bin/bash
|
||||
nsinit exec --tty /bin/bash
|
||||
```
|
||||
|
||||
If you wish to spawn another process inside the container while your current bash session is running, run the same command again to get another bash shell (or change the command). If the original process (PID 1) dies, all other processes spawned inside the container will be killed and the namespace will be removed.
|
||||
If you wish to spawn another process inside the container while your
|
||||
current bash session is running, run the same command again to
|
||||
get another bash shell (or change the command). If the original
|
||||
process (PID 1) dies, all other processes spawned inside the container
|
||||
will be killed and the namespace will be removed.
|
||||
|
||||
You can identify if a process is running in a container by looking to see if `state.json` is in the root of the directory.
|
||||
You can identify if a process is running in a container by
|
||||
looking to see if `state.json` is in the root of the directory.
|
||||
|
||||
You may also specify an alternate root place where the `container.json` file is read and where the `state.json` file will be saved.
|
||||
You may also specify an alternate root place where
|
||||
the `container.json` file is read and where the `state.json` file will be saved.
|
||||
|
||||
#### Future
|
||||
See the [roadmap](ROADMAP.md).
|
||||
|
|
|
@ -83,7 +83,7 @@ func (m *Manager) Apply(pid int) error {
|
|||
if err := sys.Set(d); err != nil {
|
||||
return err
|
||||
}
|
||||
// FIXME: Apply should, ideally, be reentrant or be broken up into a separate
|
||||
// TODO: Apply should, ideally, be reentrant or be broken up into a separate
|
||||
// create and join phase so that the cgroup hierarchy for a container can be
|
||||
// created then join consists of writing the process pids to cgroup.procs
|
||||
p, err := d.path(name)
|
||||
|
|
|
@ -21,41 +21,34 @@ type Config struct {
|
|||
// This is a common option when the container is running in ramdisk
|
||||
NoPivotRoot bool `json:"no_pivot_root,omitempty"`
|
||||
|
||||
// ParentDeathSignal specifies the signal that is sent to the container's process in the case
|
||||
// that the parent process dies.
|
||||
ParentDeathSignal int `json:"parent_death_signal,omitempty"`
|
||||
|
||||
// PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set.
|
||||
// When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable.
|
||||
// This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot.
|
||||
PivotDir string `json:"pivot_dir,omitempty"`
|
||||
|
||||
// ReadonlyFs will remount the container's rootfs as readonly where only externally mounted
|
||||
// bind mounts are writtable
|
||||
ReadonlyFs bool `json:"readonly_fs,omitempty"`
|
||||
// Path to a directory containing the container's root filesystem.
|
||||
Rootfs string `json:"rootfs,omitempty"`
|
||||
|
||||
// Readonlyfs will remount the container's rootfs as readonly where only externally mounted
|
||||
// bind mounts are writtable.
|
||||
Readonlyfs bool `json:"readonlyfs,omitempty"`
|
||||
|
||||
// Mounts specify additional source and destination paths that will be mounted inside the container's
|
||||
// rootfs and mount namespace if specified
|
||||
Mounts []*Mount `json:"mounts,omitempty"`
|
||||
|
||||
// The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well!
|
||||
DeviceNodes []*Device `json:"device_nodes,omitempty"`
|
||||
Devices []*Device `json:"devices,omitempty"`
|
||||
|
||||
MountLabel string `json:"mount_label,omitempty"`
|
||||
|
||||
// Pathname to container's root filesystem
|
||||
RootFs string `json:"root_fs,omitempty"`
|
||||
|
||||
// Hostname optionally sets the container's hostname if provided
|
||||
Hostname string `json:"hostname,omitempty"`
|
||||
|
||||
// User will set the uid and gid of the executing process running inside the container
|
||||
User string `json:"user,omitempty"`
|
||||
|
||||
// WorkingDir will change the processes current working directory inside the container's rootfs
|
||||
WorkingDir string `json:"working_dir,omitempty"`
|
||||
|
||||
// Env will populate the processes environment with the provided values
|
||||
// Any values from the parent processes will be cleared before the values
|
||||
// provided in Env are provided to the process
|
||||
Env []string `json:"environment,omitempty"`
|
||||
|
||||
// Console is the path to the console allocated to the container.
|
||||
Console string `json:"console,omitempty"`
|
||||
|
||||
|
|
|
@ -94,11 +94,6 @@ func TestConfigJsonFormat(t *testing.T) {
|
|||
t.Fail()
|
||||
}
|
||||
|
||||
if n.VethPrefix != "veth" {
|
||||
t.Logf("veth prefix should be veth but received %q", n.VethPrefix)
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
if n.Gateway != "172.17.42.1" {
|
||||
t.Logf("veth gateway should be 172.17.42.1 but received %q", n.Gateway)
|
||||
t.Fail()
|
||||
|
@ -114,7 +109,7 @@ func TestConfigJsonFormat(t *testing.T) {
|
|||
}
|
||||
|
||||
for _, d := range DefaultSimpleDevices {
|
||||
if !containsDevice(d, container.DeviceNodes) {
|
||||
if !containsDevice(d, container.Devices) {
|
||||
t.Logf("expected device configuration for %s", d.Path)
|
||||
t.Fail()
|
||||
}
|
||||
|
@ -163,3 +158,69 @@ func TestRemoveNamespace(t *testing.T) {
|
|||
t.Fatalf("namespaces should have 0 items but reports %d", len(ns))
|
||||
}
|
||||
}
|
||||
|
||||
func TestHostUIDNoUSERNS(t *testing.T) {
|
||||
config := &Config{
|
||||
Namespaces: Namespaces{},
|
||||
}
|
||||
uid, err := config.HostUID()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if uid != 0 {
|
||||
t.Fatal("expected uid 0 with no USERNS but received %d", uid)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHostUIDWithUSERNS(t *testing.T) {
|
||||
config := &Config{
|
||||
Namespaces: Namespaces{{Type: NEWUSER}},
|
||||
UidMappings: []IDMap{
|
||||
{
|
||||
ContainerID: 0,
|
||||
HostID: 1000,
|
||||
Size: 1,
|
||||
},
|
||||
},
|
||||
}
|
||||
uid, err := config.HostUID()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if uid != 1000 {
|
||||
t.Fatal("expected uid 1000 with no USERNS but received %d", uid)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHostGIDNoUSERNS(t *testing.T) {
|
||||
config := &Config{
|
||||
Namespaces: Namespaces{},
|
||||
}
|
||||
uid, err := config.HostGID()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if uid != 0 {
|
||||
t.Fatal("expected gid 0 with no USERNS but received %d", uid)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHostGIDWithUSERNS(t *testing.T) {
|
||||
config := &Config{
|
||||
Namespaces: Namespaces{{Type: NEWUSER}},
|
||||
GidMappings: []IDMap{
|
||||
{
|
||||
ContainerID: 0,
|
||||
HostID: 1000,
|
||||
Size: 1,
|
||||
},
|
||||
},
|
||||
}
|
||||
uid, err := config.HostGID()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if uid != 1000 {
|
||||
t.Fatal("expected gid 1000 with no USERNS but received %d", uid)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,19 +10,29 @@ const (
|
|||
)
|
||||
|
||||
type Device struct {
|
||||
// Device type, block, char, etc.
|
||||
Type rune `json:"type,omitempty"`
|
||||
// It is fine if this is an empty string in the case that you are using Wildcards
|
||||
|
||||
// Path to the device.
|
||||
Path string `json:"path,omitempty"`
|
||||
// Use the wildcard constant for wildcards.
|
||||
|
||||
// Major is the device's major number.
|
||||
Major int64 `json:"major,omitempty"`
|
||||
// Use the wildcard constant for wildcards.
|
||||
|
||||
// Minor is the device's minor number.
|
||||
Minor int64 `json:"minor,omitempty"`
|
||||
// Typically just "rwm"
|
||||
|
||||
// Cgroup permissions format, rwm.
|
||||
Permissions string `json:"permissions,omitempty"`
|
||||
// The permission bits of the file's mode
|
||||
|
||||
// FileMode permission bits for the device.
|
||||
FileMode os.FileMode `json:"file_mode,omitempty"`
|
||||
Uid uint32 `json:"uid,omitempty"`
|
||||
Gid uint32 `json:"gid,omitempty"`
|
||||
|
||||
// Uid of the device.
|
||||
Uid uint32 `json:"uid,omitempty"`
|
||||
|
||||
// Gid of the device.
|
||||
Gid uint32 `json:"gid,omitempty"`
|
||||
}
|
||||
|
||||
func (d *Device) CgroupString() string {
|
||||
|
|
122
configs/mount.go
122
configs/mount.go
|
@ -1,17 +1,5 @@
|
|||
package configs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
|
||||
"github.com/docker/docker/pkg/symlink"
|
||||
"github.com/docker/libcontainer/label"
|
||||
)
|
||||
|
||||
const DefaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
|
||||
|
||||
type Mount struct {
|
||||
Type string `json:"type,omitempty"`
|
||||
Source string `json:"source,omitempty"` // Source path, in the host namespace
|
||||
|
@ -21,113 +9,3 @@ type Mount struct {
|
|||
Private bool `json:"private,omitempty"`
|
||||
Slave bool `json:"slave,omitempty"`
|
||||
}
|
||||
|
||||
func (m *Mount) Mount(rootfs, mountLabel string) error {
|
||||
switch m.Type {
|
||||
case "bind":
|
||||
return m.bindMount(rootfs, mountLabel)
|
||||
case "tmpfs":
|
||||
return m.tmpfsMount(rootfs, mountLabel)
|
||||
default:
|
||||
return fmt.Errorf("unsupported mount type %s for %s", m.Type, m.Destination)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Mount) bindMount(rootfs, mountLabel string) error {
|
||||
var (
|
||||
flags = syscall.MS_BIND | syscall.MS_REC
|
||||
dest = filepath.Join(rootfs, m.Destination)
|
||||
)
|
||||
|
||||
if !m.Writable {
|
||||
flags = flags | syscall.MS_RDONLY
|
||||
}
|
||||
|
||||
if m.Slave {
|
||||
flags = flags | syscall.MS_SLAVE
|
||||
}
|
||||
|
||||
stat, err := os.Stat(m.Source)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// FIXME: (crosbymichael) This does not belong here and should be done a layer above
|
||||
dest, err = symlink.FollowSymlinkInScope(dest, rootfs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := createIfNotExists(dest, stat.IsDir()); err != nil {
|
||||
return fmt.Errorf("creating new bind mount target %s", err)
|
||||
}
|
||||
|
||||
if err := syscall.Mount(m.Source, dest, "bind", uintptr(flags), ""); err != nil {
|
||||
return fmt.Errorf("mounting %s into %s %s", m.Source, dest, err)
|
||||
}
|
||||
|
||||
if !m.Writable {
|
||||
if err := syscall.Mount(m.Source, dest, "bind", uintptr(flags|syscall.MS_REMOUNT), ""); err != nil {
|
||||
return fmt.Errorf("remounting %s into %s %s", m.Source, dest, err)
|
||||
}
|
||||
}
|
||||
|
||||
if m.Relabel != "" {
|
||||
if err := label.Relabel(m.Source, mountLabel, m.Relabel); err != nil {
|
||||
return fmt.Errorf("relabeling %s to %s %s", m.Source, mountLabel, err)
|
||||
}
|
||||
}
|
||||
|
||||
if m.Private {
|
||||
if err := syscall.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil {
|
||||
return fmt.Errorf("mounting %s private %s", dest, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Mount) tmpfsMount(rootfs, mountLabel string) error {
|
||||
var (
|
||||
err error
|
||||
l = label.FormatMountLabel("", mountLabel)
|
||||
dest = filepath.Join(rootfs, m.Destination)
|
||||
)
|
||||
|
||||
// FIXME: (crosbymichael) This does not belong here and should be done a layer above
|
||||
if dest, err = symlink.FollowSymlinkInScope(dest, rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := createIfNotExists(dest, true); err != nil {
|
||||
return fmt.Errorf("creating new tmpfs mount target %s", err)
|
||||
}
|
||||
|
||||
if err := syscall.Mount("tmpfs", dest, "tmpfs", uintptr(DefaultMountFlags), l); err != nil {
|
||||
return fmt.Errorf("%s mounting %s in tmpfs", err, dest)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func createIfNotExists(path string, isDir bool) error {
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
if isDir {
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
f, err := os.OpenFile(path, os.O_CREATE, 0755)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
f.Close()
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -8,24 +8,24 @@ type Network struct {
|
|||
// Type sets the networks type, commonly veth and loopback
|
||||
Type string `json:"type,omitempty"`
|
||||
|
||||
// Name of the network interface
|
||||
Name string `json:"name,omitempty"`
|
||||
|
||||
// The bridge to use.
|
||||
Bridge string `json:"bridge,omitempty"`
|
||||
|
||||
// Prefix for the veth interfaces.
|
||||
VethPrefix string `json:"veth_prefix,omitempty"`
|
||||
|
||||
// MacAddress contains the MAC address to set on the network interface
|
||||
MacAddress string `json:"mac_address,omitempty"`
|
||||
|
||||
// Address contains the IPv4 and mask to set on the network interface
|
||||
Address string `json:"address,omitempty"`
|
||||
|
||||
// IPv6Address contains the IPv6 and mask to set on the network interface
|
||||
IPv6Address string `json:"ipv6_address,omitempty"`
|
||||
|
||||
// Gateway sets the gateway address that is used as the default for the interface
|
||||
Gateway string `json:"gateway,omitempty"`
|
||||
|
||||
// IPv6Address contains the IPv6 and mask to set on the network interface
|
||||
IPv6Address string `json:"ipv6_address,omitempty"`
|
||||
|
||||
// IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface
|
||||
IPv6Gateway string `json:"ipv6_gateway,omitempty"`
|
||||
|
||||
|
@ -38,6 +38,10 @@ type Network struct {
|
|||
// container's interfaces if a pair is created, specifically in the case of type veth
|
||||
// Note: This does not apply to loopback interfaces.
|
||||
TxQueueLen int `json:"txqueuelen,omitempty"`
|
||||
|
||||
// HostInterfaceName is a unique name of a veth pair that resides on in the host interface of the
|
||||
// container.
|
||||
HostInterfaceName string `json:"host_interface_name,omitempty"`
|
||||
}
|
||||
|
||||
// Routes can be specified to create entries in the route table as the container is started
|
||||
|
|
|
@ -1,32 +1,5 @@
|
|||
package configs
|
||||
|
||||
// State represents a running container's state
|
||||
type State struct {
|
||||
// InitPid is the init process id in the parent namespace
|
||||
InitPid int `json:"init_pid,omitempty"`
|
||||
|
||||
// InitStartTime is the init process start time
|
||||
InitStartTime string `json:"init_start_time,omitempty"`
|
||||
|
||||
// Network runtime state.
|
||||
NetworkState NetworkState `json:"network_state,omitempty"`
|
||||
|
||||
// Path to all the cgroups setup for a container. Key is cgroup subsystem name.
|
||||
CgroupPaths map[string]string `json:"cgroup_paths,omitempty"`
|
||||
|
||||
Status Status `json:"status,omitempty"`
|
||||
}
|
||||
|
||||
// Struct describing the network specific runtime state that will be maintained by libcontainer for all running containers
|
||||
// Do not depend on it outside of libcontainer.
|
||||
// TODO: move veth names to config time
|
||||
type NetworkState struct {
|
||||
// The name of the veth interface on the Host.
|
||||
VethHost string `json:"veth_host,omitempty"`
|
||||
// The name of the veth interface created inside the container for the child.
|
||||
VethChild string `json:"veth_child,omitempty"`
|
||||
}
|
||||
|
||||
// The status of a container.
|
||||
type Status int
|
||||
|
||||
|
@ -43,3 +16,15 @@ const (
|
|||
// The container does not exist.
|
||||
Destroyed
|
||||
)
|
||||
|
||||
// State represents a running container's state
|
||||
type State struct {
|
||||
// InitPid is the init process id in the parent namespace
|
||||
InitPid int `json:"init_pid,omitempty"`
|
||||
|
||||
// InitStartTime is the init process start time
|
||||
InitStartTime string `json:"init_start_time,omitempty"`
|
||||
|
||||
// Path to all the cgroups setup for a container. Key is cgroup subsystem name.
|
||||
CgroupPaths map[string]string `json:"cgroup_paths,omitempty"`
|
||||
}
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
package validate
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/docker/libcontainer/configs"
|
||||
)
|
||||
|
||||
type Validator interface {
|
||||
Validate(*configs.Config) error
|
||||
}
|
||||
|
||||
func New() Validator {
|
||||
return &ConfigValidator{}
|
||||
}
|
||||
|
||||
type ConfigValidator struct {
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) Validate(config *configs.Config) error {
|
||||
if err := v.rootfs(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.network(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.hostname(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.security(config); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// rootfs validates the the rootfs is an absolute path and is not a symlink
|
||||
// to the container's root filesystem.
|
||||
func (v *ConfigValidator) rootfs(config *configs.Config) error {
|
||||
cleaned, err := filepath.Abs(config.Rootfs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Rootfs != cleaned {
|
||||
return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) network(config *configs.Config) error {
|
||||
if !config.Namespaces.Contains(configs.NEWNET) {
|
||||
if len(config.Networks) > 0 || len(config.Routes) > 0 {
|
||||
return fmt.Errorf("unable to apply network settings without a private NET namespace")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) hostname(config *configs.Config) error {
|
||||
if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) {
|
||||
return fmt.Errorf("unable to set hostname without a private UTS namespace")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) security(config *configs.Config) error {
|
||||
// restrict sys without mount namespace
|
||||
if config.RestrictSys && !config.Namespaces.Contains(configs.NEWNS) {
|
||||
return fmt.Errorf("unable to restrict sys entries without a private MNT namespace")
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
package libcontainer
|
||||
|
||||
import "io"
|
||||
|
||||
// Console is a psuedo TTY.
|
||||
type Console interface {
|
||||
io.ReadWriter
|
||||
io.Closer
|
||||
|
||||
// Path returns the filesystem path to the slave side of the pty.
|
||||
Path() string
|
||||
|
||||
// Fd returns the fd for the master of the pty.
|
||||
Fd() uintptr
|
||||
}
|
|
@ -1,128 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package console
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
|
||||
"github.com/docker/libcontainer/label"
|
||||
)
|
||||
|
||||
// Setup initializes the proper /dev/console inside the rootfs path
|
||||
func Setup(rootfs, consolePath, mountLabel string, hostRootUid, hostRootGid int) error {
|
||||
oldMask := syscall.Umask(0000)
|
||||
defer syscall.Umask(oldMask)
|
||||
|
||||
if err := os.Chmod(consolePath, 0600); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := os.Chown(consolePath, hostRootUid, hostRootGid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := label.SetFileLabel(consolePath, mountLabel); err != nil {
|
||||
return fmt.Errorf("set file label %s %s", consolePath, err)
|
||||
}
|
||||
|
||||
dest := filepath.Join(rootfs, "dev/console")
|
||||
|
||||
f, err := os.Create(dest)
|
||||
if err != nil && !os.IsExist(err) {
|
||||
return fmt.Errorf("create %s %s", dest, err)
|
||||
}
|
||||
|
||||
if f != nil {
|
||||
f.Close()
|
||||
}
|
||||
|
||||
if err := syscall.Mount(consolePath, dest, "bind", syscall.MS_BIND, ""); err != nil {
|
||||
return fmt.Errorf("bind %s to %s %s", consolePath, dest, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func OpenAndDup(consolePath string) error {
|
||||
slave, err := OpenTerminal(consolePath, syscall.O_RDWR)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open terminal %s", err)
|
||||
}
|
||||
|
||||
if err := syscall.Dup2(int(slave.Fd()), 0); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := syscall.Dup2(int(slave.Fd()), 1); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return syscall.Dup2(int(slave.Fd()), 2)
|
||||
}
|
||||
|
||||
// Unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f.
|
||||
// Unlockpt should be called before opening the slave side of a pseudoterminal.
|
||||
func Unlockpt(f *os.File) error {
|
||||
var u int32
|
||||
|
||||
return Ioctl(f.Fd(), syscall.TIOCSPTLCK, uintptr(unsafe.Pointer(&u)))
|
||||
}
|
||||
|
||||
// Ptsname retrieves the name of the first available pts for the given master.
|
||||
func Ptsname(f *os.File) (string, error) {
|
||||
var n int32
|
||||
|
||||
if err := Ioctl(f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return fmt.Sprintf("/dev/pts/%d", n), nil
|
||||
}
|
||||
|
||||
// CreateMasterAndConsole will open /dev/ptmx on the host and retreive the
|
||||
// pts name for use as the pty slave inside the container
|
||||
func CreateMasterAndConsole() (*os.File, string, error) {
|
||||
master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
console, err := Ptsname(master)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
if err := Unlockpt(master); err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
return master, console, nil
|
||||
}
|
||||
|
||||
// OpenPtmx opens /dev/ptmx, i.e. the PTY master.
|
||||
func OpenPtmx() (*os.File, error) {
|
||||
// O_NOCTTY and O_CLOEXEC are not present in os package so we use the syscall's one for all.
|
||||
return os.OpenFile("/dev/ptmx", syscall.O_RDONLY|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
|
||||
}
|
||||
|
||||
// OpenTerminal is a clone of os.OpenFile without the O_CLOEXEC
|
||||
// used to open the pty slave inside the container namespace
|
||||
func OpenTerminal(name string, flag int) (*os.File, error) {
|
||||
r, e := syscall.Open(name, flag, 0)
|
||||
if e != nil {
|
||||
return nil, &os.PathError{Op: "open", Path: name, Err: e}
|
||||
}
|
||||
return os.NewFile(uintptr(r), name), nil
|
||||
}
|
||||
|
||||
func Ioctl(fd uintptr, flag, data uintptr) error {
|
||||
if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, flag, data); err != 0 {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -6,16 +6,9 @@ package libcontainer
|
|||
import (
|
||||
"os"
|
||||
|
||||
"github.com/docker/libcontainer/cgroups"
|
||||
"github.com/docker/libcontainer/configs"
|
||||
"github.com/docker/libcontainer/network"
|
||||
)
|
||||
|
||||
type Stats struct {
|
||||
NetworkStats *network.NetworkStats `json:"network_stats,omitempty"`
|
||||
CgroupStats *cgroups.Stats `json:"cgroup_stats,omitempty"`
|
||||
}
|
||||
|
||||
// A libcontainer container object.
|
||||
//
|
||||
// Each container is thread-safe within the same process. Since a container can
|
||||
|
|
|
@ -60,10 +60,10 @@ func DeviceFromPath(path, permissions string) (*configs.Device, error) {
|
|||
}
|
||||
|
||||
func HostDevices() ([]*configs.Device, error) {
|
||||
return getDeviceNodes("/dev")
|
||||
return getDevices("/dev")
|
||||
}
|
||||
|
||||
func getDeviceNodes(path string) ([]*configs.Device, error) {
|
||||
func getDevices(path string) ([]*configs.Device, error) {
|
||||
files, err := ioutilReadDir(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -76,7 +76,7 @@ func getDeviceNodes(path string) ([]*configs.Device, error) {
|
|||
case "pts", "shm", "fd", "mqueue":
|
||||
continue
|
||||
default:
|
||||
sub, err := getDeviceNodes(filepath.Join(path, f.Name()))
|
||||
sub, err := getDevices(filepath.Join(path, f.Name()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
8
error.go
8
error.go
|
@ -54,3 +54,11 @@ type Error interface {
|
|||
// Returns the error code for this error.
|
||||
Code() ErrorCode
|
||||
}
|
||||
|
||||
type initError struct {
|
||||
Message string `json:"message,omitempty"`
|
||||
}
|
||||
|
||||
func (i initError) Error() string {
|
||||
return i.Message
|
||||
}
|
||||
|
|
|
@ -26,13 +26,11 @@ func testExecPS(t *testing.T, userns bool) {
|
|||
if testing.Short() {
|
||||
return
|
||||
}
|
||||
|
||||
rootfs, err := newRootFs()
|
||||
rootfs, err := newRootfs()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer remove(rootfs)
|
||||
|
||||
config := newTemplateConfig(rootfs)
|
||||
if userns {
|
||||
config.UidMappings = []configs.IDMap{{0, 0, 1000}}
|
||||
|
@ -42,13 +40,11 @@ func testExecPS(t *testing.T, userns bool) {
|
|||
|
||||
buffers, exitCode, err := runContainer(config, "", "ps")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
t.Fatalf("%s: %s", buffers, err)
|
||||
}
|
||||
|
||||
if exitCode != 0 {
|
||||
t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr)
|
||||
}
|
||||
|
||||
lines := strings.Split(buffers.Stdout.String(), "\n")
|
||||
if len(lines) < 2 {
|
||||
t.Fatalf("more than one process running for output %q", buffers.Stdout.String())
|
||||
|
@ -65,7 +61,7 @@ func TestIPCPrivate(t *testing.T) {
|
|||
return
|
||||
}
|
||||
|
||||
rootfs, err := newRootFs()
|
||||
rootfs, err := newRootfs()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -96,7 +92,7 @@ func TestIPCHost(t *testing.T) {
|
|||
return
|
||||
}
|
||||
|
||||
rootfs, err := newRootFs()
|
||||
rootfs, err := newRootfs()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -128,7 +124,7 @@ func TestIPCJoinPath(t *testing.T) {
|
|||
return
|
||||
}
|
||||
|
||||
rootfs, err := newRootFs()
|
||||
rootfs, err := newRootfs()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -161,7 +157,7 @@ func TestIPCBadPath(t *testing.T) {
|
|||
return
|
||||
}
|
||||
|
||||
rootfs, err := newRootFs()
|
||||
rootfs, err := newRootfs()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -181,7 +177,7 @@ func TestRlimit(t *testing.T) {
|
|||
return
|
||||
}
|
||||
|
||||
rootfs, err := newRootFs()
|
||||
rootfs, err := newRootfs()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -232,7 +228,7 @@ func TestEnter(t *testing.T) {
|
|||
}
|
||||
defer os.RemoveAll(root)
|
||||
|
||||
rootfs, err := newRootFs()
|
||||
rootfs, err := newRootfs()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -261,6 +257,7 @@ func TestEnter(t *testing.T) {
|
|||
|
||||
pconfig := libcontainer.Process{
|
||||
Args: []string{"sh", "-c", "cat && readlink /proc/self/ns/pid"},
|
||||
Env: standardEnvironment,
|
||||
Stdin: stdinR,
|
||||
Stdout: &stdout,
|
||||
}
|
||||
|
@ -335,7 +332,7 @@ func TestFreeze(t *testing.T) {
|
|||
}
|
||||
defer os.RemoveAll(root)
|
||||
|
||||
rootfs, err := newRootFs()
|
||||
rootfs, err := newRootfs()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -361,6 +358,7 @@ func TestFreeze(t *testing.T) {
|
|||
|
||||
pconfig := libcontainer.Process{
|
||||
Args: []string{"cat"},
|
||||
Env: standardEnvironment,
|
||||
Stdin: stdinR,
|
||||
}
|
||||
pid, err := container.Start(&pconfig)
|
||||
|
|
|
@ -15,14 +15,12 @@ func init() {
|
|||
if len(os.Args) < 2 || os.Args[1] != "init" {
|
||||
return
|
||||
}
|
||||
runtime.GOMAXPROCS(1)
|
||||
runtime.LockOSThread()
|
||||
|
||||
factory, err := libcontainer.New("", nil)
|
||||
if err != nil {
|
||||
log.Fatalf("unable to initialize for container: %s", err)
|
||||
}
|
||||
|
||||
factory.StartInitialization(3)
|
||||
|
||||
os.Exit(1)
|
||||
}
|
||||
|
|
|
@ -6,13 +6,20 @@ import (
|
|||
"github.com/docker/libcontainer/configs"
|
||||
)
|
||||
|
||||
var standardEnvironment = []string{
|
||||
"HOME=/root",
|
||||
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
|
||||
"HOSTNAME=integration",
|
||||
"TERM=xterm",
|
||||
}
|
||||
|
||||
// newTemplateConfig returns a base template for running a container
|
||||
//
|
||||
// it uses a network strategy of just setting a loopback interface
|
||||
// and the default setup for devices
|
||||
func newTemplateConfig(rootfs string) *configs.Config {
|
||||
return &configs.Config{
|
||||
RootFs: rootfs,
|
||||
Rootfs: rootfs,
|
||||
Capabilities: []string{
|
||||
"CHOWN",
|
||||
"DAC_OVERRIDE",
|
||||
|
@ -43,14 +50,8 @@ func newTemplateConfig(rootfs string) *configs.Config {
|
|||
AllowedDevices: configs.DefaultAllowedDevices,
|
||||
},
|
||||
|
||||
DeviceNodes: configs.DefaultAutoCreatedDevices,
|
||||
Hostname: "integration",
|
||||
Env: []string{
|
||||
"HOME=/root",
|
||||
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
|
||||
"HOSTNAME=integration",
|
||||
"TERM=xterm",
|
||||
},
|
||||
Devices: configs.DefaultAutoCreatedDevices,
|
||||
Hostname: "integration",
|
||||
Networks: []*configs.Network{
|
||||
{
|
||||
Type: "loopback",
|
||||
|
|
|
@ -2,12 +2,11 @@ package integration
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/docker/libcontainer"
|
||||
|
@ -28,31 +27,19 @@ type stdBuffers struct {
|
|||
Stderr *bytes.Buffer
|
||||
}
|
||||
|
||||
func writeConfig(config *configs.Config) error {
|
||||
f, err := os.OpenFile(filepath.Join(config.RootFs, "container.json"), os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0700)
|
||||
if err != nil {
|
||||
return err
|
||||
func (b *stdBuffers) String() string {
|
||||
s := []string{}
|
||||
if b.Stderr != nil {
|
||||
s = append(s, b.Stderr.String())
|
||||
}
|
||||
defer f.Close()
|
||||
return json.NewEncoder(f).Encode(config)
|
||||
if b.Stdout != nil {
|
||||
s = append(s, b.Stdout.String())
|
||||
}
|
||||
return strings.Join(s, "|")
|
||||
}
|
||||
|
||||
func loadConfig() (*configs.Config, error) {
|
||||
f, err := os.Open(filepath.Join(os.Getenv("data_path"), "container.json"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var container *configs.Config
|
||||
if err := json.NewDecoder(f).Decode(&container); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return container, nil
|
||||
}
|
||||
|
||||
// newRootFs creates a new tmp directory and copies the busybox root filesystem
|
||||
func newRootFs() (string, error) {
|
||||
// newRootfs creates a new tmp directory and copies the busybox root filesystem
|
||||
func newRootfs() (string, error) {
|
||||
dir, err := ioutil.TempDir("", "")
|
||||
if err != nil {
|
||||
return "", err
|
||||
|
@ -85,14 +72,10 @@ func copyBusybox(dest string) error {
|
|||
// buffers are returned containing the STDOUT and STDERR output for the run
|
||||
// along with the exit code and any go error
|
||||
func runContainer(config *configs.Config, console string, args ...string) (buffers *stdBuffers, exitCode int, err error) {
|
||||
if err := writeConfig(config); err != nil {
|
||||
return nil, -1, err
|
||||
}
|
||||
|
||||
buffers = newStdBuffers()
|
||||
|
||||
process := &libcontainer.Process{
|
||||
Args: args,
|
||||
Env: standardEnvironment,
|
||||
Stdin: buffers.Stdin,
|
||||
Stdout: buffers.Stdout,
|
||||
Stderr: buffers.Stderr,
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/syndtr/gocapability/capability"
|
||||
)
|
||||
|
||||
const allCapabilityTypes = capability.CAPS | capability.BOUNDS
|
||||
|
||||
var capabilityList = map[string]capability.Cap{
|
||||
"SETPCAP": capability.CAP_SETPCAP,
|
||||
"SYS_MODULE": capability.CAP_SYS_MODULE,
|
||||
"SYS_RAWIO": capability.CAP_SYS_RAWIO,
|
||||
"SYS_PACCT": capability.CAP_SYS_PACCT,
|
||||
"SYS_ADMIN": capability.CAP_SYS_ADMIN,
|
||||
"SYS_NICE": capability.CAP_SYS_NICE,
|
||||
"SYS_RESOURCE": capability.CAP_SYS_RESOURCE,
|
||||
"SYS_TIME": capability.CAP_SYS_TIME,
|
||||
"SYS_TTY_CONFIG": capability.CAP_SYS_TTY_CONFIG,
|
||||
"MKNOD": capability.CAP_MKNOD,
|
||||
"AUDIT_WRITE": capability.CAP_AUDIT_WRITE,
|
||||
"AUDIT_CONTROL": capability.CAP_AUDIT_CONTROL,
|
||||
"MAC_OVERRIDE": capability.CAP_MAC_OVERRIDE,
|
||||
"MAC_ADMIN": capability.CAP_MAC_ADMIN,
|
||||
"NET_ADMIN": capability.CAP_NET_ADMIN,
|
||||
"SYSLOG": capability.CAP_SYSLOG,
|
||||
"CHOWN": capability.CAP_CHOWN,
|
||||
"NET_RAW": capability.CAP_NET_RAW,
|
||||
"DAC_OVERRIDE": capability.CAP_DAC_OVERRIDE,
|
||||
"FOWNER": capability.CAP_FOWNER,
|
||||
"DAC_READ_SEARCH": capability.CAP_DAC_READ_SEARCH,
|
||||
"FSETID": capability.CAP_FSETID,
|
||||
"KILL": capability.CAP_KILL,
|
||||
"SETGID": capability.CAP_SETGID,
|
||||
"SETUID": capability.CAP_SETUID,
|
||||
"LINUX_IMMUTABLE": capability.CAP_LINUX_IMMUTABLE,
|
||||
"NET_BIND_SERVICE": capability.CAP_NET_BIND_SERVICE,
|
||||
"NET_BROADCAST": capability.CAP_NET_BROADCAST,
|
||||
"IPC_LOCK": capability.CAP_IPC_LOCK,
|
||||
"IPC_OWNER": capability.CAP_IPC_OWNER,
|
||||
"SYS_CHROOT": capability.CAP_SYS_CHROOT,
|
||||
"SYS_PTRACE": capability.CAP_SYS_PTRACE,
|
||||
"SYS_BOOT": capability.CAP_SYS_BOOT,
|
||||
"LEASE": capability.CAP_LEASE,
|
||||
"SETFCAP": capability.CAP_SETFCAP,
|
||||
"WAKE_ALARM": capability.CAP_WAKE_ALARM,
|
||||
"BLOCK_SUSPEND": capability.CAP_BLOCK_SUSPEND,
|
||||
}
|
||||
|
||||
func newCapWhitelist(caps []string) (*whitelist, error) {
|
||||
l := []capability.Cap{}
|
||||
for _, c := range caps {
|
||||
v, ok := capabilityList[c]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown capability %q", c)
|
||||
}
|
||||
l = append(l, v)
|
||||
}
|
||||
pid, err := capability.NewPid(os.Getpid())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &whitelist{
|
||||
keep: l,
|
||||
pid: pid,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type whitelist struct {
|
||||
pid capability.Capabilities
|
||||
keep []capability.Cap
|
||||
}
|
||||
|
||||
// dropBoundingSet drops the capability bounding set to those specified in the whitelist.
|
||||
func (w *whitelist) dropBoundingSet() error {
|
||||
w.pid.Clear(capability.BOUNDS)
|
||||
w.pid.Set(capability.BOUNDS, w.keep...)
|
||||
return w.pid.Apply(capability.BOUNDS)
|
||||
}
|
||||
|
||||
// drop drops all capabilities for the current process except those specified in the whitelist.
|
||||
func (w *whitelist) drop() error {
|
||||
w.pid.Clear(allCapabilityTypes)
|
||||
w.pid.Set(allCapabilityTypes, w.keep...)
|
||||
return w.pid.Apply(allCapabilityTypes)
|
||||
}
|
|
@ -0,0 +1,145 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
|
||||
"github.com/docker/libcontainer/label"
|
||||
)
|
||||
|
||||
func NewConsole() (Console, error) {
|
||||
master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
console, err := ptsname(master)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := unlockpt(master); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &linuxConsole{
|
||||
slavePath: console,
|
||||
master: master,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// newConsoleFromPath is an internal fucntion returning an initialzied console for use inside
|
||||
// a container's MNT namespace.
|
||||
func newConsoleFromPath(slavePath string) *linuxConsole {
|
||||
return &linuxConsole{
|
||||
slavePath: slavePath,
|
||||
}
|
||||
}
|
||||
|
||||
// linuxConsole is a linux psuedo TTY for use within a container.
|
||||
type linuxConsole struct {
|
||||
master *os.File
|
||||
slavePath string
|
||||
}
|
||||
|
||||
func (c *linuxConsole) Fd() uintptr {
|
||||
return c.master.Fd()
|
||||
}
|
||||
|
||||
func (c *linuxConsole) Path() string {
|
||||
return c.slavePath
|
||||
}
|
||||
|
||||
func (c *linuxConsole) Read(b []byte) (int, error) {
|
||||
return c.master.Read(b)
|
||||
}
|
||||
|
||||
func (c *linuxConsole) Write(b []byte) (int, error) {
|
||||
return c.master.Write(b)
|
||||
}
|
||||
|
||||
func (c *linuxConsole) Close() error {
|
||||
if m := c.master; m != nil {
|
||||
return m.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// mount initializes the console inside the rootfs mounting with the specified mount label
|
||||
// and applying the correct ownership of the console.
|
||||
func (c *linuxConsole) mount(rootfs, mountLabel string, uid, gid int) error {
|
||||
oldMask := syscall.Umask(0000)
|
||||
defer syscall.Umask(oldMask)
|
||||
if err := os.Chmod(c.slavePath, 0600); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Chown(c.slavePath, uid, gid); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := label.SetFileLabel(c.slavePath, mountLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
dest := filepath.Join(rootfs, "dev/console")
|
||||
f, err := os.Create(dest)
|
||||
if err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
if f != nil {
|
||||
f.Close()
|
||||
}
|
||||
return syscall.Mount(c.slavePath, dest, "bind", syscall.MS_BIND, "")
|
||||
}
|
||||
|
||||
// dupStdio opens the slavePath for the console and dup2s the fds to the current
|
||||
// processes stdio, fd 0,1,2.
|
||||
func (c *linuxConsole) dupStdio() error {
|
||||
slave, err := c.open(syscall.O_RDWR)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fd := int(slave.Fd())
|
||||
for _, i := range []int{0, 1, 2} {
|
||||
if err := syscall.Dup2(fd, i); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// open is a clone of os.OpenFile without the O_CLOEXEC used to open the pty slave.
|
||||
func (c *linuxConsole) open(flag int) (*os.File, error) {
|
||||
r, e := syscall.Open(c.slavePath, flag, 0)
|
||||
if e != nil {
|
||||
return nil, &os.PathError{
|
||||
Op: "open",
|
||||
Path: c.slavePath,
|
||||
Err: e,
|
||||
}
|
||||
}
|
||||
return os.NewFile(uintptr(r), c.slavePath), nil
|
||||
}
|
||||
|
||||
func ioctl(fd uintptr, flag, data uintptr) error {
|
||||
if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, flag, data); err != 0 {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f.
|
||||
// unlockpt should be called before opening the slave side of a pty.
|
||||
func unlockpt(f *os.File) error {
|
||||
var u int32
|
||||
return ioctl(f.Fd(), syscall.TIOCSPTLCK, uintptr(unsafe.Pointer(&u)))
|
||||
}
|
||||
|
||||
// ptsname retrieves the name of the first available pts for the given master.
|
||||
func ptsname(f *os.File) (string, error) {
|
||||
var n int32
|
||||
if err := ioctl(f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return fmt.Sprintf("/dev/pts/%d", n), nil
|
||||
}
|
|
@ -3,44 +3,23 @@
|
|||
package libcontainer
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
|
||||
"github.com/docker/libcontainer/apparmor"
|
||||
"github.com/docker/libcontainer/cgroups"
|
||||
"github.com/docker/libcontainer/configs"
|
||||
"github.com/docker/libcontainer/label"
|
||||
"github.com/docker/libcontainer/mount"
|
||||
"github.com/docker/libcontainer/network"
|
||||
"github.com/docker/libcontainer/system"
|
||||
"github.com/golang/glog"
|
||||
)
|
||||
|
||||
const (
|
||||
EXIT_SIGNAL_OFFSET = 128
|
||||
)
|
||||
|
||||
type initError struct {
|
||||
Message string `json:"message,omitempty"`
|
||||
}
|
||||
|
||||
func (i initError) Error() string {
|
||||
return i.Message
|
||||
}
|
||||
|
||||
type linuxContainer struct {
|
||||
id string
|
||||
root string
|
||||
config *configs.Config
|
||||
state *configs.State
|
||||
cgroupManager cgroups.Manager
|
||||
initArgs []string
|
||||
initProcess parentProcess
|
||||
}
|
||||
|
||||
// ID returns the container's unique ID
|
||||
|
@ -54,12 +33,11 @@ func (c *linuxContainer) Config() configs.Config {
|
|||
}
|
||||
|
||||
func (c *linuxContainer) Status() (configs.Status, error) {
|
||||
if c.state.InitPid <= 0 {
|
||||
if c.initProcess == nil {
|
||||
return configs.Destroyed, nil
|
||||
}
|
||||
// return Running if the init process is alive
|
||||
err := syscall.Kill(c.state.InitPid, 0)
|
||||
if err != nil {
|
||||
if err := syscall.Kill(c.initProcess.pid(), 0); err != nil {
|
||||
if err == syscall.ESRCH {
|
||||
return configs.Destroyed, nil
|
||||
}
|
||||
|
@ -90,8 +68,15 @@ func (c *linuxContainer) Stats() (*Stats, error) {
|
|||
if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil {
|
||||
return stats, newGenericError(err, SystemError)
|
||||
}
|
||||
if stats.NetworkStats, err = network.GetStats(&c.state.NetworkState); err != nil {
|
||||
return stats, newGenericError(err, SystemError)
|
||||
for _, iface := range c.config.Networks {
|
||||
switch iface.Type {
|
||||
case "veth":
|
||||
istats, err := getNetworkInterfaceStats(iface.HostInterfaceName)
|
||||
if err != nil {
|
||||
return stats, newGenericError(err, SystemError)
|
||||
}
|
||||
stats.Interfaces = append(stats.Interfaces, istats)
|
||||
}
|
||||
}
|
||||
return stats, nil
|
||||
}
|
||||
|
@ -101,174 +86,125 @@ func (c *linuxContainer) Start(process *Process) (int, error) {
|
|||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
doInit := status == configs.Destroyed
|
||||
parent, err := c.newParentProcess(process, doInit)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
if err := parent.start(); err != nil {
|
||||
// terminate the process to ensure that it properly is reaped.
|
||||
if err := parent.terminate(); err != nil {
|
||||
glog.Warning(err)
|
||||
}
|
||||
return -1, err
|
||||
}
|
||||
if doInit {
|
||||
c.initProcess = parent
|
||||
}
|
||||
return parent.pid(), nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProcess, error) {
|
||||
parentPipe, childPipe, err := newPipe()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cmd, err := c.commandTemplate(p, childPipe)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !doInit {
|
||||
return c.newSetnsProcess(p, cmd, parentPipe, childPipe), nil
|
||||
}
|
||||
return c.newInitProcess(p, cmd, parentPipe, childPipe), nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) {
|
||||
cmd := exec.Command(c.initArgs[0], c.initArgs[1:]...)
|
||||
cmd.Stdin = process.Stdin
|
||||
cmd.Stdout = process.Stdout
|
||||
cmd.Stderr = process.Stderr
|
||||
cmd.Env = c.config.Env
|
||||
cmd.Dir = c.config.RootFs
|
||||
cmd.Stdin = p.Stdin
|
||||
cmd.Stdout = p.Stdout
|
||||
cmd.Stderr = p.Stderr
|
||||
cmd.Dir = c.config.Rootfs
|
||||
if cmd.SysProcAttr == nil {
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{}
|
||||
}
|
||||
// TODO: add pdeath to config for a container
|
||||
cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL
|
||||
if status != configs.Destroyed {
|
||||
glog.Info("start new container process")
|
||||
// TODO: (crosbymichael) check out console use for execin
|
||||
//return namespaces.ExecIn(process.Args, c.config.Env, "", cmd, c.config, c.state)
|
||||
return c.startNewProcess(cmd, process.Args)
|
||||
}
|
||||
if err := c.startInitProcess(cmd, process.Args); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
return c.state.InitPid, nil
|
||||
cmd.ExtraFiles = []*os.File{childPipe}
|
||||
cmd.SysProcAttr.Pdeathsig = syscall.Signal(c.config.ParentDeathSignal)
|
||||
return cmd, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) startNewProcess(cmd *exec.Cmd, args []string) (int, error) {
|
||||
var err error
|
||||
parent, child, err := newInitPipe()
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
defer parent.Close()
|
||||
cmd.ExtraFiles = []*os.File{child}
|
||||
cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.state.InitPid))
|
||||
if err := cmd.Start(); err != nil {
|
||||
child.Close()
|
||||
return -1, err
|
||||
}
|
||||
child.Close()
|
||||
s, err := cmd.Process.Wait()
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
if !s.Success() {
|
||||
return -1, &exec.ExitError{s}
|
||||
}
|
||||
decoder := json.NewDecoder(parent)
|
||||
var pid *pid
|
||||
if err := decoder.Decode(&pid); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
p, err := os.FindProcess(pid.Pid)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
terminate := func(terr error) (int, error) {
|
||||
// TODO: log the errors for kill and wait
|
||||
p.Kill()
|
||||
p.Wait()
|
||||
return -1, terr
|
||||
}
|
||||
// Enter cgroups.
|
||||
if err := enterCgroups(c.state, pid.Pid); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
encoder := json.NewEncoder(parent)
|
||||
if err := encoder.Encode(c.config); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
process := processArgs{
|
||||
Config: c.config,
|
||||
Args: args,
|
||||
}
|
||||
if err := encoder.Encode(process); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
return pid.Pid, nil
|
||||
}
|
||||
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) *initProcess {
|
||||
t := "_LIBCONTAINER_INITTYPE=standard"
|
||||
|
||||
func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, args []string) error {
|
||||
// create a pipe so that we can syncronize with the namespaced process and
|
||||
// pass the state and configuration to the child process
|
||||
parent, child, err := newInitPipe()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer parent.Close()
|
||||
cmd.ExtraFiles = []*os.File{child}
|
||||
cmd.SysProcAttr.Cloneflags = c.config.Namespaces.CloneFlags()
|
||||
if c.config.Namespaces.Contains(configs.NEWUSER) {
|
||||
addUidGidMappings(cmd.SysProcAttr, c.config)
|
||||
cloneFlags := c.config.Namespaces.CloneFlags()
|
||||
if cloneFlags&syscall.CLONE_NEWUSER != 0 {
|
||||
c.addUidGidMappings(cmd.SysProcAttr)
|
||||
// Default to root user when user namespaces are enabled.
|
||||
if cmd.SysProcAttr.Credential == nil {
|
||||
cmd.SysProcAttr.Credential = &syscall.Credential{}
|
||||
}
|
||||
t = "_LIBCONTAINER_INITTYPE=userns"
|
||||
}
|
||||
glog.Info("starting container init process")
|
||||
err = cmd.Start()
|
||||
child.Close()
|
||||
if err != nil {
|
||||
return newGenericError(err, SystemError)
|
||||
}
|
||||
wait := func() (*os.ProcessState, error) {
|
||||
ps, err := cmd.Process.Wait()
|
||||
// we should kill all processes in cgroup when init is died if we use
|
||||
// host PID namespace
|
||||
if !c.config.Namespaces.Contains(configs.NEWPID) {
|
||||
c.killAllPids()
|
||||
}
|
||||
return ps, newGenericError(err, SystemError)
|
||||
}
|
||||
terminate := func(terr error) error {
|
||||
// TODO: log the errors for kill and wait
|
||||
cmd.Process.Kill()
|
||||
wait()
|
||||
return terr
|
||||
}
|
||||
started, err := system.GetProcessStartTime(cmd.Process.Pid)
|
||||
if err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
// Do this before syncing with child so that no children
|
||||
// can escape the cgroup
|
||||
if err := c.cgroupManager.Apply(cmd.Process.Pid); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
c.cgroupManager.Destroy()
|
||||
}
|
||||
}()
|
||||
var networkState configs.NetworkState
|
||||
if err := c.initializeNetworking(cmd.Process.Pid, &networkState); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
process := processArgs{
|
||||
Args: args,
|
||||
Config: c.config,
|
||||
NetworkState: &networkState,
|
||||
}
|
||||
// Start the setup process to setup the init process
|
||||
if c.config.Namespaces.Contains(configs.NEWUSER) {
|
||||
if err = executeSetupCmd(cmd.Args, cmd.Process.Pid, c.config, &process, &networkState); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
}
|
||||
// send the state to the container's init process then shutdown writes for the parent
|
||||
if err := json.NewEncoder(parent).Encode(process); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
// shutdown writes for the parent side of the pipe
|
||||
if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
// wait for the child process to fully complete and receive an error message
|
||||
// if one was encoutered
|
||||
var ierr *initError
|
||||
if err := json.NewDecoder(parent).Decode(&ierr); err != nil && err != io.EOF {
|
||||
return terminate(err)
|
||||
}
|
||||
if ierr != nil {
|
||||
return terminate(ierr)
|
||||
cmd.Env = append(cmd.Env, t)
|
||||
cmd.SysProcAttr.Cloneflags = cloneFlags
|
||||
return &initProcess{
|
||||
cmd: cmd,
|
||||
childPipe: childPipe,
|
||||
parentPipe: parentPipe,
|
||||
manager: c.cgroupManager,
|
||||
config: c.newInitConfig(p),
|
||||
}
|
||||
}
|
||||
|
||||
c.state.InitPid = cmd.Process.Pid
|
||||
c.state.InitStartTime = started
|
||||
c.state.NetworkState = networkState
|
||||
c.state.CgroupPaths = c.cgroupManager.GetPaths()
|
||||
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) *setnsProcess {
|
||||
cmd.Env = append(cmd.Env,
|
||||
fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.initProcess.pid()),
|
||||
"_LIBCONTAINER_INITTYPE=setns",
|
||||
)
|
||||
// TODO: set on container for process management
|
||||
return &setnsProcess{
|
||||
cmd: cmd,
|
||||
cgroupPaths: c.cgroupManager.GetPaths(),
|
||||
childPipe: childPipe,
|
||||
parentPipe: parentPipe,
|
||||
config: c.newInitConfig(p),
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
|
||||
return &initConfig{
|
||||
Config: c.config,
|
||||
Args: process.Args,
|
||||
Env: process.Env,
|
||||
}
|
||||
}
|
||||
|
||||
// Converts IDMap to SysProcIDMap array and adds it to SysProcAttr.
|
||||
func (c *linuxContainer) addUidGidMappings(sys *syscall.SysProcAttr) {
|
||||
if c.config.UidMappings != nil {
|
||||
sys.UidMappings = make([]syscall.SysProcIDMap, len(c.config.UidMappings))
|
||||
for i, um := range c.config.UidMappings {
|
||||
sys.UidMappings[i].ContainerID = um.ContainerID
|
||||
sys.UidMappings[i].HostID = um.HostID
|
||||
sys.UidMappings[i].Size = um.Size
|
||||
}
|
||||
}
|
||||
if c.config.GidMappings != nil {
|
||||
sys.GidMappings = make([]syscall.SysProcIDMap, len(c.config.GidMappings))
|
||||
for i, gm := range c.config.GidMappings {
|
||||
sys.GidMappings[i].ContainerID = gm.ContainerID
|
||||
sys.GidMappings[i].HostID = gm.HostID
|
||||
sys.GidMappings[i].Size = gm.Size
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func newPipe() (parent *os.File, child *os.File, err error) {
|
||||
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Destroy() error {
|
||||
|
@ -279,6 +215,7 @@ func (c *linuxContainer) Destroy() error {
|
|||
if status != configs.Destroyed {
|
||||
return newGenericError(nil, ContainerNotStopped)
|
||||
}
|
||||
// TODO: remove cgroups
|
||||
return os.RemoveAll(c.root)
|
||||
}
|
||||
|
||||
|
@ -291,289 +228,11 @@ func (c *linuxContainer) Resume() error {
|
|||
}
|
||||
|
||||
func (c *linuxContainer) Signal(signal os.Signal) error {
|
||||
glog.Infof("sending signal %d to pid %d", signal, c.state.InitPid)
|
||||
panic("not implemented")
|
||||
glog.Infof("sending signal %d to pid %d", signal, c.initProcess.pid())
|
||||
return c.initProcess.signal(signal)
|
||||
}
|
||||
|
||||
// TODO: rename to be more descriptive
|
||||
func (c *linuxContainer) OOM() (<-chan struct{}, error) {
|
||||
return NotifyOnOOM(c.state)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) updateStateFile() error {
|
||||
fnew := filepath.Join(c.root, fmt.Sprintf("%s.new", stateFilename))
|
||||
f, err := os.Create(fnew)
|
||||
if err != nil {
|
||||
return newGenericError(err, SystemError)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
if err := json.NewEncoder(f).Encode(c.state); err != nil {
|
||||
f.Close()
|
||||
os.Remove(fnew)
|
||||
return newGenericError(err, SystemError)
|
||||
}
|
||||
fname := filepath.Join(c.root, stateFilename)
|
||||
if err := os.Rename(fnew, fname); err != nil {
|
||||
return newGenericError(err, SystemError)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// New returns a newly initialized Pipe for communication between processes
|
||||
func newInitPipe() (parent *os.File, child *os.File, err error) {
|
||||
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
|
||||
}
|
||||
|
||||
// Converts IDMap to SysProcIDMap array and adds it to SysProcAttr.
|
||||
func addUidGidMappings(sys *syscall.SysProcAttr, container *configs.Config) {
|
||||
if container.UidMappings != nil {
|
||||
sys.UidMappings = make([]syscall.SysProcIDMap, len(container.UidMappings))
|
||||
for i, um := range container.UidMappings {
|
||||
sys.UidMappings[i].ContainerID = um.ContainerID
|
||||
sys.UidMappings[i].HostID = um.HostID
|
||||
sys.UidMappings[i].Size = um.Size
|
||||
}
|
||||
}
|
||||
|
||||
if container.GidMappings != nil {
|
||||
sys.GidMappings = make([]syscall.SysProcIDMap, len(container.GidMappings))
|
||||
for i, gm := range container.GidMappings {
|
||||
sys.GidMappings[i].ContainerID = gm.ContainerID
|
||||
sys.GidMappings[i].HostID = gm.HostID
|
||||
sys.GidMappings[i].Size = gm.Size
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// killAllPids iterates over all of the container's processes
|
||||
// sending a SIGKILL to each process.
|
||||
func (c *linuxContainer) killAllPids() error {
|
||||
glog.Info("killing all processes in container")
|
||||
var procs []*os.Process
|
||||
c.cgroupManager.Freeze(configs.Frozen)
|
||||
pids, err := c.cgroupManager.GetPids()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, pid := range pids {
|
||||
// TODO: log err without aborting if we are unable to find
|
||||
// a single PID
|
||||
if p, err := os.FindProcess(pid); err == nil {
|
||||
procs = append(procs, p)
|
||||
p.Kill()
|
||||
}
|
||||
}
|
||||
c.cgroupManager.Freeze(configs.Thawed)
|
||||
for _, p := range procs {
|
||||
p.Wait()
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// initializeNetworking creates the container's network stack outside of the namespace and moves
|
||||
// interfaces into the container's net namespaces if necessary
|
||||
func (c *linuxContainer) initializeNetworking(nspid int, networkState *configs.NetworkState) error {
|
||||
glog.Info("initailzing container's network stack")
|
||||
for _, config := range c.config.Networks {
|
||||
strategy, err := network.GetStrategy(config.Type)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := strategy.Create(config, nspid, networkState); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func executeSetupCmd(args []string, ppid int, container *configs.Config, process *processArgs, networkState *configs.NetworkState) error {
|
||||
command := exec.Command(args[0], args[1:]...)
|
||||
parent, child, err := newInitPipe()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer parent.Close()
|
||||
command.ExtraFiles = []*os.File{child}
|
||||
command.Dir = container.RootFs
|
||||
command.Env = append(command.Env,
|
||||
fmt.Sprintf("_LIBCONTAINER_INITPID=%d", ppid),
|
||||
fmt.Sprintf("_LIBCONTAINER_USERNS=1"))
|
||||
err = command.Start()
|
||||
child.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
s, err := command.Process.Wait()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !s.Success() {
|
||||
return &exec.ExitError{s}
|
||||
}
|
||||
decoder := json.NewDecoder(parent)
|
||||
var pid *pid
|
||||
if err := decoder.Decode(&pid); err != nil {
|
||||
return err
|
||||
}
|
||||
p, err := os.FindProcess(pid.Pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
terminate := func(terr error) error {
|
||||
// TODO: log the errors for kill and wait
|
||||
p.Kill()
|
||||
p.Wait()
|
||||
return terr
|
||||
}
|
||||
// send the state to the container's init process then shutdown writes for the parent
|
||||
if err := json.NewEncoder(parent).Encode(process); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
// shutdown writes for the parent side of the pipe
|
||||
if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
// wait for the child process to fully complete and receive an error message
|
||||
// if one was encoutered
|
||||
var ierr *initError
|
||||
if err := decoder.Decode(&ierr); err != nil && err != io.EOF {
|
||||
return terminate(err)
|
||||
}
|
||||
if ierr != nil {
|
||||
return ierr
|
||||
}
|
||||
s, err = p.Wait()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !s.Success() {
|
||||
return &exec.ExitError{s}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type pid struct {
|
||||
Pid int `json:"Pid"`
|
||||
}
|
||||
|
||||
// Finalize entering into a container and execute a specified command
|
||||
func InitIn(pipe *os.File) (err error) {
|
||||
defer func() {
|
||||
// if we have an error during the initialization of the container's init then send it back to the
|
||||
// parent process in the form of an initError.
|
||||
if err != nil {
|
||||
// ensure that any data sent from the parent is consumed so it doesn't
|
||||
// receive ECONNRESET when the child writes to the pipe.
|
||||
ioutil.ReadAll(pipe)
|
||||
if err := json.NewEncoder(pipe).Encode(initError{
|
||||
Message: err.Error(),
|
||||
}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
// ensure that this pipe is always closed
|
||||
pipe.Close()
|
||||
}()
|
||||
decoder := json.NewDecoder(pipe)
|
||||
var config *configs.Config
|
||||
if err := decoder.Decode(&config); err != nil {
|
||||
return err
|
||||
}
|
||||
var process *processArgs
|
||||
if err := decoder.Decode(&process); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := finalizeSetns(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.Execv(process.Args[0], process.Args[0:], config.Env); err != nil {
|
||||
return err
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
// finalize expects that the setns calls have been setup and that is has joined an
|
||||
// existing namespace
|
||||
func finalizeSetns(container *configs.Config) error {
|
||||
// clear the current processes env and replace it with the environment defined on the container
|
||||
if err := loadContainerEnvironment(container); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := setupRlimits(container); err != nil {
|
||||
return fmt.Errorf("setup rlimits %s", err)
|
||||
}
|
||||
|
||||
if err := finalizeNamespace(container); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil {
|
||||
return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err)
|
||||
}
|
||||
|
||||
if container.ProcessLabel != "" {
|
||||
if err := label.SetProcessLabel(container.ProcessLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetupContainer is run to setup mounts and networking related operations
|
||||
// for a user namespace enabled process as a user namespace root doesn't
|
||||
// have permissions to perform these operations.
|
||||
// The setup process joins all the namespaces of user namespace enabled init
|
||||
// except the user namespace, so it run as root in the root user namespace
|
||||
// to perform these operations.
|
||||
func SetupContainer(process *processArgs) error {
|
||||
container := process.Config
|
||||
networkState := process.NetworkState
|
||||
|
||||
// TODO : move to validation
|
||||
/*
|
||||
rootfs, err := utils.ResolveRootfs(container.RootFs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
*/
|
||||
|
||||
// clear the current processes env and replace it with the environment
|
||||
// defined on the container
|
||||
if err := loadContainerEnvironment(container); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cloneFlags := container.Namespaces.CloneFlags()
|
||||
if (cloneFlags & syscall.CLONE_NEWNET) == 0 {
|
||||
if len(container.Networks) != 0 || len(container.Routes) != 0 {
|
||||
return fmt.Errorf("unable to apply network parameters without network namespace")
|
||||
}
|
||||
} else {
|
||||
if err := setupNetwork(container, networkState); err != nil {
|
||||
return fmt.Errorf("setup networking %s", err)
|
||||
}
|
||||
if err := setupRoute(container); err != nil {
|
||||
return fmt.Errorf("setup route %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
label.Init()
|
||||
|
||||
// InitializeMountNamespace() can be executed only for a new mount namespace
|
||||
if (cloneFlags & syscall.CLONE_NEWNS) != 0 {
|
||||
if err := mount.InitializeMountNamespace(container); err != nil {
|
||||
return fmt.Errorf("setup mount namespace %s", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func enterCgroups(state *configs.State, pid int) error {
|
||||
return cgroups.EnterPid(state.CgroupPaths, pid)
|
||||
return NotifyOnOOM(c.cgroupManager.GetPaths())
|
||||
}
|
||||
|
|
|
@ -69,7 +69,6 @@ func TestGetContainerStats(t *testing.T) {
|
|||
},
|
||||
},
|
||||
},
|
||||
state: &configs.State{},
|
||||
}
|
||||
|
||||
stats, err := container.Stats()
|
||||
|
|
419
linux_factory.go
419
linux_factory.go
|
@ -9,24 +9,12 @@ import (
|
|||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/golang/glog"
|
||||
|
||||
"github.com/docker/libcontainer/apparmor"
|
||||
cgroups "github.com/docker/libcontainer/cgroups/manager"
|
||||
"github.com/docker/libcontainer/configs"
|
||||
"github.com/docker/libcontainer/console"
|
||||
"github.com/docker/libcontainer/label"
|
||||
"github.com/docker/libcontainer/mount"
|
||||
"github.com/docker/libcontainer/netlink"
|
||||
"github.com/docker/libcontainer/network"
|
||||
"github.com/docker/libcontainer/security/capabilities"
|
||||
"github.com/docker/libcontainer/security/restrict"
|
||||
"github.com/docker/libcontainer/system"
|
||||
"github.com/docker/libcontainer/user"
|
||||
"github.com/docker/libcontainer/utils"
|
||||
"github.com/docker/libcontainer/configs/validate"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -39,13 +27,6 @@ var (
|
|||
maxIdLen = 1024
|
||||
)
|
||||
|
||||
// Process is used for transferring parameters from Exec() to Init()
|
||||
type processArgs struct {
|
||||
Args []string `json:"args,omitempty"`
|
||||
Config *configs.Config `json:"config,omitempty"`
|
||||
NetworkState *configs.NetworkState `json:"network_state,omitempty"`
|
||||
}
|
||||
|
||||
// New returns a linux based container factory based in the root directory.
|
||||
func New(root string, initArgs []string) (Factory, error) {
|
||||
if root != "" {
|
||||
|
@ -54,16 +35,18 @@ func New(root string, initArgs []string) (Factory, error) {
|
|||
}
|
||||
}
|
||||
return &linuxFactory{
|
||||
root: root,
|
||||
initArgs: initArgs,
|
||||
root: root,
|
||||
initArgs: initArgs,
|
||||
validator: validate.New(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// linuxFactory implements the default factory interface for linux based systems.
|
||||
type linuxFactory struct {
|
||||
// root is the root directory
|
||||
root string
|
||||
initArgs []string
|
||||
root string
|
||||
initArgs []string
|
||||
validator validate.Validator
|
||||
}
|
||||
|
||||
func (l *linuxFactory) Create(id string, config *configs.Config) (Container, error) {
|
||||
|
@ -73,6 +56,9 @@ func (l *linuxFactory) Create(id string, config *configs.Config) (Container, err
|
|||
if err := l.validateID(id); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := l.validator.Validate(config); err != nil {
|
||||
return nil, newGenericError(err, ConfigInvalid)
|
||||
}
|
||||
containerRoot := filepath.Join(l.root, id)
|
||||
if _, err := os.Stat(containerRoot); err == nil {
|
||||
return nil, newGenericError(fmt.Errorf("Container with id exists: %v", id), IdInUse)
|
||||
|
@ -96,14 +82,12 @@ func (l *linuxFactory) Create(id string, config *configs.Config) (Container, err
|
|||
os.RemoveAll(containerRoot)
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
cgroupManager := cgroups.NewCgroupManager(config.Cgroups)
|
||||
return &linuxContainer{
|
||||
id: id,
|
||||
root: containerRoot,
|
||||
config: config,
|
||||
initArgs: l.initArgs,
|
||||
state: &configs.State{},
|
||||
cgroupManager: cgroupManager,
|
||||
cgroupManager: cgroups.NewCgroupManager(config.Cgroups),
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
@ -122,27 +106,32 @@ func (l *linuxFactory) Load(id string) (Container, error) {
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r := &restoredProcess{
|
||||
processPid: state.InitPid,
|
||||
processStartTime: state.InitStartTime,
|
||||
}
|
||||
cgroupManager := cgroups.LoadCgroupManager(config.Cgroups, state.CgroupPaths)
|
||||
glog.Infof("using %s as cgroup manager", cgroupManager)
|
||||
return &linuxContainer{
|
||||
initProcess: r,
|
||||
id: id,
|
||||
root: containerRoot,
|
||||
config: config,
|
||||
state: state,
|
||||
cgroupManager: cgroupManager,
|
||||
initArgs: l.initArgs,
|
||||
cgroupManager: cgroupManager,
|
||||
root: containerRoot,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
|
||||
// This is a low level implementation detail of the reexec and should not be consumed externally
|
||||
func (l *linuxFactory) StartInitialization(pipefd uintptr) (err error) {
|
||||
pipe := os.NewFile(uintptr(pipefd), "pipe")
|
||||
setupUserns := os.Getenv("_LIBCONTAINER_USERNS") != ""
|
||||
pid := os.Getenv("_LIBCONTAINER_INITPID")
|
||||
if pid != "" && !setupUserns {
|
||||
return InitIn(pipe)
|
||||
}
|
||||
var (
|
||||
pipe = os.NewFile(uintptr(pipefd), "pipe")
|
||||
it = initType(os.Getenv("_LIBCONTAINER_INITTYPE"))
|
||||
)
|
||||
// clear the current process's environment to clean any libcontainer
|
||||
// specific env vars.
|
||||
os.Clearenv()
|
||||
defer func() {
|
||||
// if we have an error during the initialization of the container's init then send it back to the
|
||||
// parent process in the form of an initError.
|
||||
|
@ -159,27 +148,11 @@ func (l *linuxFactory) StartInitialization(pipefd uintptr) (err error) {
|
|||
// ensure that this pipe is always closed
|
||||
pipe.Close()
|
||||
}()
|
||||
uncleanRootfs, err := os.Getwd()
|
||||
i, err := newContainerInit(it, pipe)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var process *processArgs
|
||||
// We always read this as it is a way to sync with the parent as well
|
||||
if err := json.NewDecoder(pipe).Decode(&process); err != nil {
|
||||
return err
|
||||
}
|
||||
if setupUserns {
|
||||
err = SetupContainer(process)
|
||||
if err == nil {
|
||||
os.Exit(0)
|
||||
} else {
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
if process.Config.Namespaces.Contains(configs.NEWUSER) {
|
||||
return l.initUserNs(uncleanRootfs, process)
|
||||
}
|
||||
return l.initDefault(uncleanRootfs, process)
|
||||
return i.Init()
|
||||
}
|
||||
|
||||
func (l *linuxFactory) loadContainerConfig(root string) (*configs.Config, error) {
|
||||
|
@ -224,336 +197,34 @@ func (l *linuxFactory) validateID(id string) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (l *linuxFactory) initDefault(uncleanRootfs string, process *processArgs) (err error) {
|
||||
config := process.Config
|
||||
networkState := process.NetworkState
|
||||
|
||||
// TODO: move to validation
|
||||
/*
|
||||
rootfs, err := utils.ResolveRootfs(uncleanRootfs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
*/
|
||||
|
||||
// clear the current processes env and replace it with the environment
|
||||
// defined on the container
|
||||
if err := loadContainerEnvironment(config); err != nil {
|
||||
return err
|
||||
}
|
||||
// join any namespaces via a path to the namespace fd if provided
|
||||
if err := joinExistingNamespaces(config.Namespaces); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Console != "" {
|
||||
if err := console.OpenAndDup(config.Console); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if _, err := syscall.Setsid(); err != nil {
|
||||
return fmt.Errorf("setsid %s", err)
|
||||
}
|
||||
if config.Console != "" {
|
||||
if err := system.Setctty(); err != nil {
|
||||
return fmt.Errorf("setctty %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
cloneFlags := config.Namespaces.CloneFlags()
|
||||
if (cloneFlags & syscall.CLONE_NEWNET) == 0 {
|
||||
if len(config.Networks) != 0 || len(config.Routes) != 0 {
|
||||
return fmt.Errorf("unable to apply network parameters without network namespace")
|
||||
}
|
||||
} else {
|
||||
if err := setupNetwork(config, networkState); err != nil {
|
||||
return fmt.Errorf("setup networking %s", err)
|
||||
}
|
||||
if err := setupRoute(config); err != nil {
|
||||
return fmt.Errorf("setup route %s", err)
|
||||
}
|
||||
}
|
||||
if err := setupRlimits(config); err != nil {
|
||||
return fmt.Errorf("setup rlimits %s", err)
|
||||
}
|
||||
label.Init()
|
||||
// InitializeMountNamespace() can be executed only for a new mount namespace
|
||||
if (cloneFlags & syscall.CLONE_NEWNS) != 0 {
|
||||
if err := mount.InitializeMountNamespace(config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if config.Hostname != "" {
|
||||
// TODO: (crosbymichael) move this to pre spawn validation
|
||||
if (cloneFlags & syscall.CLONE_NEWUTS) == 0 {
|
||||
return fmt.Errorf("unable to set the hostname without UTS namespace")
|
||||
}
|
||||
if err := syscall.Sethostname([]byte(config.Hostname)); err != nil {
|
||||
return fmt.Errorf("unable to sethostname %q: %s", config.Hostname, err)
|
||||
}
|
||||
}
|
||||
if err := apparmor.ApplyProfile(config.AppArmorProfile); err != nil {
|
||||
return fmt.Errorf("set apparmor profile %s: %s", config.AppArmorProfile, err)
|
||||
}
|
||||
if err := label.SetProcessLabel(config.ProcessLabel); err != nil {
|
||||
return fmt.Errorf("set process label %s", err)
|
||||
}
|
||||
// TODO: (crosbymichael) make this configurable at the Config level
|
||||
if config.RestrictSys {
|
||||
if (cloneFlags & syscall.CLONE_NEWNS) == 0 {
|
||||
return fmt.Errorf("unable to restrict access to kernel files without mount namespace")
|
||||
}
|
||||
if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
pdeathSignal, err := system.GetParentDeathSignal()
|
||||
if err != nil {
|
||||
return fmt.Errorf("get parent death signal %s", err)
|
||||
}
|
||||
if err := finalizeNamespace(config); err != nil {
|
||||
return fmt.Errorf("finalize namespace %s", err)
|
||||
}
|
||||
// finalizeNamespace can change user/group which clears the parent death
|
||||
// signal, so we restore it here.
|
||||
if err := restoreParentDeathSignal(pdeathSignal); err != nil {
|
||||
return fmt.Errorf("restore parent death signal %s", err)
|
||||
}
|
||||
return system.Execv(process.Args[0], process.Args[0:], config.Env)
|
||||
// restoredProcess represents a process where the calling process may or may not be
|
||||
// the parent process. This process is created when a factory loads a container from
|
||||
// a persisted state.
|
||||
type restoredProcess struct {
|
||||
processPid int
|
||||
processStartTime string
|
||||
}
|
||||
|
||||
func (l *linuxFactory) initUserNs(uncleanRootfs string, process *processArgs) (err error) {
|
||||
config := process.Config
|
||||
// clear the current processes env and replace it with the environment
|
||||
// defined on the config
|
||||
if err := loadContainerEnvironment(config); err != nil {
|
||||
return err
|
||||
}
|
||||
// join any namespaces via a path to the namespace fd if provided
|
||||
if err := joinExistingNamespaces(config.Namespaces); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Console != "" {
|
||||
if err := console.OpenAndDup("/dev/console"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if _, err := syscall.Setsid(); err != nil {
|
||||
return fmt.Errorf("setsid %s", err)
|
||||
}
|
||||
if config.Console != "" {
|
||||
if err := system.Setctty(); err != nil {
|
||||
return fmt.Errorf("setctty %s", err)
|
||||
}
|
||||
}
|
||||
if config.WorkingDir == "" {
|
||||
config.WorkingDir = "/"
|
||||
}
|
||||
|
||||
if err := setupRlimits(config); err != nil {
|
||||
return fmt.Errorf("setup rlimits %s", err)
|
||||
}
|
||||
cloneFlags := config.Namespaces.CloneFlags()
|
||||
if config.Hostname != "" {
|
||||
// TODO: move validation
|
||||
if (cloneFlags & syscall.CLONE_NEWUTS) == 0 {
|
||||
return fmt.Errorf("unable to set the hostname without UTS namespace")
|
||||
}
|
||||
if err := syscall.Sethostname([]byte(config.Hostname)); err != nil {
|
||||
return fmt.Errorf("unable to sethostname %q: %s", config.Hostname, err)
|
||||
}
|
||||
}
|
||||
if err := apparmor.ApplyProfile(config.AppArmorProfile); err != nil {
|
||||
return fmt.Errorf("set apparmor profile %s: %s", config.AppArmorProfile, err)
|
||||
}
|
||||
if err := label.SetProcessLabel(config.ProcessLabel); err != nil {
|
||||
return fmt.Errorf("set process label %s", err)
|
||||
}
|
||||
if config.RestrictSys {
|
||||
if (cloneFlags & syscall.CLONE_NEWNS) == 0 {
|
||||
return fmt.Errorf("unable to restrict access to kernel files without mount namespace")
|
||||
}
|
||||
if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
pdeathSignal, err := system.GetParentDeathSignal()
|
||||
if err != nil {
|
||||
return fmt.Errorf("get parent death signal %s", err)
|
||||
}
|
||||
if err := finalizeNamespace(config); err != nil {
|
||||
return fmt.Errorf("finalize namespace %s", err)
|
||||
}
|
||||
// finalizeNamespace can change user/group which clears the parent death
|
||||
// signal, so we restore it here.
|
||||
if err := restoreParentDeathSignal(pdeathSignal); err != nil {
|
||||
return fmt.Errorf("restore parent death signal %s", err)
|
||||
}
|
||||
return system.Execv(process.Args[0], process.Args[0:], config.Env)
|
||||
func (p *restoredProcess) start() error {
|
||||
return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError)
|
||||
}
|
||||
|
||||
// restoreParentDeathSignal sets the parent death signal to old.
|
||||
func restoreParentDeathSignal(old int) error {
|
||||
if old == 0 {
|
||||
return nil
|
||||
}
|
||||
current, err := system.GetParentDeathSignal()
|
||||
if err != nil {
|
||||
return fmt.Errorf("get parent death signal %s", err)
|
||||
}
|
||||
if old == current {
|
||||
return nil
|
||||
}
|
||||
if err := system.ParentDeathSignal(uintptr(old)); err != nil {
|
||||
return fmt.Errorf("set parent death signal %s", err)
|
||||
}
|
||||
// Signal self if parent is already dead. Does nothing if running in a new
|
||||
// PID namespace, as Getppid will always return 0.
|
||||
if syscall.Getppid() == 1 {
|
||||
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
|
||||
}
|
||||
return nil
|
||||
func (p *restoredProcess) pid() int {
|
||||
return p.processPid
|
||||
}
|
||||
|
||||
// setupUser changes the groups, gid, and uid for the user inside the container
|
||||
func setupUser(config *configs.Config) error {
|
||||
// Set up defaults.
|
||||
defaultExecUser := user.ExecUser{
|
||||
Uid: syscall.Getuid(),
|
||||
Gid: syscall.Getgid(),
|
||||
Home: "/",
|
||||
}
|
||||
passwdPath, err := user.GetPasswdPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
groupPath, err := user.GetGroupPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("get supplementary groups %s", err)
|
||||
}
|
||||
suppGroups := append(execUser.Sgids, config.AdditionalGroups...)
|
||||
if err := syscall.Setgroups(suppGroups); err != nil {
|
||||
return fmt.Errorf("setgroups %s", err)
|
||||
}
|
||||
if err := system.Setgid(execUser.Gid); err != nil {
|
||||
return fmt.Errorf("setgid %s", err)
|
||||
}
|
||||
if err := system.Setuid(execUser.Uid); err != nil {
|
||||
return fmt.Errorf("setuid %s", err)
|
||||
}
|
||||
// if we didn't get HOME already, set it based on the user's HOME
|
||||
if envHome := os.Getenv("HOME"); envHome == "" {
|
||||
if err := os.Setenv("HOME", execUser.Home); err != nil {
|
||||
return fmt.Errorf("set HOME %s", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
func (p *restoredProcess) terminate() error {
|
||||
return newGenericError(fmt.Errorf("restored process cannot be terminated"), SystemError)
|
||||
}
|
||||
|
||||
// setupVethNetwork uses the Network config if it is not nil to initialize
|
||||
// the new veth interface inside the container for use by changing the name to eth0
|
||||
// setting the MTU and IP address along with the default gateway
|
||||
func setupNetwork(config *configs.Config, networkState *configs.NetworkState) error {
|
||||
for _, config := range config.Networks {
|
||||
strategy, err := network.GetStrategy(config.Type)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err1 := strategy.Initialize(config, networkState)
|
||||
if err1 != nil {
|
||||
return err1
|
||||
}
|
||||
}
|
||||
return nil
|
||||
func (p *restoredProcess) wait() (*os.ProcessState, error) {
|
||||
return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError)
|
||||
}
|
||||
|
||||
func setupRoute(config *configs.Config) error {
|
||||
for _, config := range config.Routes {
|
||||
if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
func (p *restoredProcess) startTime() (string, error) {
|
||||
return p.processStartTime, nil
|
||||
}
|
||||
|
||||
func setupRlimits(config *configs.Config) error {
|
||||
for _, rlimit := range config.Rlimits {
|
||||
l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}
|
||||
if err := syscall.Setrlimit(rlimit.Type, l); err != nil {
|
||||
return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// finalizeNamespace drops the caps, sets the correct user
|
||||
// and working dir, and closes any leaky file descriptors
|
||||
// before execing the command inside the namespace
|
||||
func finalizeNamespace(config *configs.Config) error {
|
||||
// Ensure that all non-standard fds we may have accidentally
|
||||
// inherited are marked close-on-exec so they stay out of the
|
||||
// container
|
||||
if err := utils.CloseExecFrom(3); err != nil {
|
||||
return fmt.Errorf("close open file descriptors %s", err)
|
||||
}
|
||||
// drop capabilities in bounding set before changing user
|
||||
if err := capabilities.DropBoundingSet(config.Capabilities); err != nil {
|
||||
return fmt.Errorf("drop bounding set %s", err)
|
||||
}
|
||||
// preserve existing capabilities while we change users
|
||||
if err := system.SetKeepCaps(); err != nil {
|
||||
return fmt.Errorf("set keep caps %s", err)
|
||||
}
|
||||
if err := setupUser(config); err != nil {
|
||||
return fmt.Errorf("setup user %s", err)
|
||||
}
|
||||
if err := system.ClearKeepCaps(); err != nil {
|
||||
return fmt.Errorf("clear keep caps %s", err)
|
||||
}
|
||||
// drop all other capabilities
|
||||
if err := capabilities.DropCapabilities(config.Capabilities); err != nil {
|
||||
return fmt.Errorf("drop capabilities %s", err)
|
||||
}
|
||||
if config.WorkingDir != "" {
|
||||
if err := syscall.Chdir(config.WorkingDir); err != nil {
|
||||
return fmt.Errorf("chdir to %s %s", config.WorkingDir, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func loadContainerEnvironment(config *configs.Config) error {
|
||||
os.Clearenv()
|
||||
for _, pair := range config.Env {
|
||||
p := strings.SplitN(pair, "=", 2)
|
||||
if len(p) < 2 {
|
||||
return fmt.Errorf("invalid environment '%v'", pair)
|
||||
}
|
||||
if err := os.Setenv(p[0], p[1]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// joinExistingNamespaces gets all the namespace paths specified for the container and
|
||||
// does a setns on the namespace fd so that the current process joins the namespace.
|
||||
func joinExistingNamespaces(namespaces []configs.Namespace) error {
|
||||
for _, ns := range namespaces {
|
||||
if ns.Path != "" {
|
||||
f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = system.Setns(f.Fd(), uintptr(ns.Syscall()))
|
||||
f.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
func (p *restoredProcess) signal(s os.Signal) error {
|
||||
return newGenericError(fmt.Errorf("restored process cannot be signaled"), SystemError)
|
||||
}
|
||||
|
|
|
@ -29,21 +29,17 @@ func TestFactoryNew(t *testing.T) {
|
|||
t.Fatal(rerr)
|
||||
}
|
||||
defer os.RemoveAll(root)
|
||||
|
||||
factory, err := New(root, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if factory == nil {
|
||||
t.Fatal("factory should not be nil")
|
||||
}
|
||||
|
||||
lfactory, ok := factory.(*linuxFactory)
|
||||
if !ok {
|
||||
t.Fatal("expected linux factory returned on linux based systems")
|
||||
}
|
||||
|
||||
if lfactory.root != root {
|
||||
t.Fatalf("expected factory root to be %q but received %q", root, lfactory.root)
|
||||
}
|
||||
|
@ -55,17 +51,14 @@ func TestFactoryLoadNotExists(t *testing.T) {
|
|||
t.Fatal(rerr)
|
||||
}
|
||||
defer os.RemoveAll(root)
|
||||
|
||||
factory, err := New(root, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
_, err = factory.Load("nocontainer")
|
||||
if err == nil {
|
||||
t.Fatal("expected nil error loading non-existing container")
|
||||
}
|
||||
|
||||
lerr, ok := err.(Error)
|
||||
if !ok {
|
||||
t.Fatal("expected libcontainer error type")
|
||||
|
@ -81,18 +74,16 @@ func TestFactoryLoadContainer(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
defer os.RemoveAll(root)
|
||||
|
||||
// setup default container config and state for mocking
|
||||
var (
|
||||
id = "1"
|
||||
expectedConfig = &configs.Config{
|
||||
RootFs: "/mycontainer/root",
|
||||
Rootfs: "/mycontainer/root",
|
||||
}
|
||||
expectedState = &configs.State{
|
||||
InitPid: 1024,
|
||||
}
|
||||
)
|
||||
|
||||
if err := os.Mkdir(filepath.Join(root, id), 0700); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -102,34 +93,27 @@ func TestFactoryLoadContainer(t *testing.T) {
|
|||
if err := marshal(filepath.Join(root, id, stateFilename), expectedState); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
factory, err := New(root, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
container, err := factory.Load(id)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if container.ID() != id {
|
||||
t.Fatalf("expected container id %q but received %q", id, container.ID())
|
||||
}
|
||||
|
||||
config := container.Config()
|
||||
|
||||
if config.RootFs != expectedConfig.RootFs {
|
||||
t.Fatalf("expected rootfs %q but received %q", expectedConfig.RootFs, config.RootFs)
|
||||
if config.Rootfs != expectedConfig.Rootfs {
|
||||
t.Fatalf("expected rootfs %q but received %q", expectedConfig.Rootfs, config.Rootfs)
|
||||
}
|
||||
|
||||
lcontainer, ok := container.(*linuxContainer)
|
||||
if !ok {
|
||||
t.Fatal("expected linux container on linux based systems")
|
||||
}
|
||||
|
||||
if lcontainer.state.InitPid != expectedState.InitPid {
|
||||
t.Fatalf("expected init pid %d but received %d", expectedState.InitPid, lcontainer.state.InitPid)
|
||||
if lcontainer.initProcess.pid() != expectedState.InitPid {
|
||||
t.Fatalf("expected init pid %d but received %d", expectedState.InitPid, lcontainer.initProcess.pid())
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,228 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/docker/libcontainer/configs"
|
||||
"github.com/docker/libcontainer/netlink"
|
||||
"github.com/docker/libcontainer/system"
|
||||
"github.com/docker/libcontainer/user"
|
||||
"github.com/docker/libcontainer/utils"
|
||||
)
|
||||
|
||||
type initType string
|
||||
|
||||
const (
|
||||
initSetns initType = "setns"
|
||||
initStandard initType = "standard"
|
||||
initUserns initType = "userns"
|
||||
initUsernsSetup initType = "userns_setup"
|
||||
)
|
||||
|
||||
type pid struct {
|
||||
Pid int `json:"pid"`
|
||||
}
|
||||
|
||||
// network is an internal struct used to setup container networks.
|
||||
type network struct {
|
||||
configs.Network
|
||||
|
||||
// TempVethPeerName is a unique tempory veth peer name that was placed into
|
||||
// the container's namespace.
|
||||
TempVethPeerName string `json:"temp_veth_peer_name"`
|
||||
}
|
||||
|
||||
// Process is used for transferring parameters from Exec() to Init()
|
||||
type initConfig struct {
|
||||
Args []string `json:"args"`
|
||||
Env []string `json:"env"`
|
||||
Cwd string `json:"cwd"`
|
||||
User string `json:"user"`
|
||||
Config *configs.Config `json:"config"`
|
||||
Networks []*network `json:"network"`
|
||||
}
|
||||
|
||||
type initer interface {
|
||||
Init() error
|
||||
}
|
||||
|
||||
func newContainerInit(t initType, pipe *os.File) (initer, error) {
|
||||
var config *initConfig
|
||||
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := populateProcessEnvironment(config.Env); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
switch t {
|
||||
case initSetns:
|
||||
return &linuxSetnsInit{
|
||||
config: config,
|
||||
}, nil
|
||||
case initUserns:
|
||||
return &linuxUsernsInit{
|
||||
config: config,
|
||||
}, nil
|
||||
case initUsernsSetup:
|
||||
return &linuxUsernsSideCar{
|
||||
config: config,
|
||||
}, nil
|
||||
case initStandard:
|
||||
return &linuxStandardInit{
|
||||
config: config,
|
||||
}, nil
|
||||
}
|
||||
return nil, fmt.Errorf("unknown init type %q", t)
|
||||
}
|
||||
|
||||
// populateProcessEnvironment loads the provided environment variables into the
|
||||
// current processes's environment.
|
||||
func populateProcessEnvironment(env []string) error {
|
||||
for _, pair := range env {
|
||||
p := strings.SplitN(pair, "=", 2)
|
||||
if len(p) < 2 {
|
||||
return fmt.Errorf("invalid environment '%v'", pair)
|
||||
}
|
||||
if err := os.Setenv(p[0], p[1]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// finalizeNamespace drops the caps, sets the correct user
|
||||
// and working dir, and closes any leaky file descriptors
|
||||
// before execing the command inside the namespace
|
||||
func finalizeNamespace(config *initConfig) error {
|
||||
// Ensure that all non-standard fds we may have accidentally
|
||||
// inherited are marked close-on-exec so they stay out of the
|
||||
// container
|
||||
if err := utils.CloseExecFrom(3); err != nil {
|
||||
return err
|
||||
}
|
||||
w, err := newCapWhitelist(config.Config.Capabilities)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// drop capabilities in bounding set before changing user
|
||||
if err := w.dropBoundingSet(); err != nil {
|
||||
return err
|
||||
}
|
||||
// preserve existing capabilities while we change users
|
||||
if err := system.SetKeepCaps(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setupUser(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.ClearKeepCaps(); err != nil {
|
||||
return err
|
||||
}
|
||||
// drop all other capabilities
|
||||
if err := w.drop(); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Cwd != "" {
|
||||
if err := syscall.Chdir(config.Cwd); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// joinExistingNamespaces gets all the namespace paths specified for the container and
|
||||
// does a setns on the namespace fd so that the current process joins the namespace.
|
||||
func joinExistingNamespaces(namespaces []configs.Namespace) error {
|
||||
for _, ns := range namespaces {
|
||||
if ns.Path != "" {
|
||||
f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = system.Setns(f.Fd(), uintptr(ns.Syscall()))
|
||||
f.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupUser changes the groups, gid, and uid for the user inside the container
|
||||
func setupUser(config *initConfig) error {
|
||||
// Set up defaults.
|
||||
defaultExecUser := user.ExecUser{
|
||||
Uid: syscall.Getuid(),
|
||||
Gid: syscall.Getgid(),
|
||||
Home: "/",
|
||||
}
|
||||
passwdPath, err := user.GetPasswdPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
groupPath, err := user.GetGroupPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
suppGroups := append(execUser.Sgids, config.Config.AdditionalGroups...)
|
||||
if err := syscall.Setgroups(suppGroups); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.Setgid(execUser.Gid); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.Setuid(execUser.Uid); err != nil {
|
||||
return err
|
||||
}
|
||||
// if we didn't get HOME already, set it based on the user's HOME
|
||||
if envHome := os.Getenv("HOME"); envHome == "" {
|
||||
if err := os.Setenv("HOME", execUser.Home); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupNetwork sets up and initializes any network interface inside the container.
|
||||
func setupNetwork(config *initConfig) error {
|
||||
for _, config := range config.Networks {
|
||||
strategy, err := getStrategy(config.Type)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := strategy.initialize(config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupRoute(config *configs.Config) error {
|
||||
for _, config := range config.Routes {
|
||||
if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupRlimits(config *configs.Config) error {
|
||||
for _, rlimit := range config.Rlimits {
|
||||
l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}
|
||||
if err := syscall.Setrlimit(rlimit.Type, l); err != nil {
|
||||
return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,214 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/docker/libcontainer/netlink"
|
||||
"github.com/docker/libcontainer/utils"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrNotValidStrategyType = errors.New("not a valid network strategy type")
|
||||
)
|
||||
|
||||
var strategies = map[string]networkStrategy{
|
||||
"veth": &veth{},
|
||||
"loopback": &loopback{},
|
||||
}
|
||||
|
||||
// networkStrategy represents a specific network configuration for
|
||||
// a container's networking stack
|
||||
type networkStrategy interface {
|
||||
create(*network, int) error
|
||||
initialize(*network) error
|
||||
}
|
||||
|
||||
// getStrategy returns the specific network strategy for the
|
||||
// provided type. If no strategy is registered for the type an
|
||||
// ErrNotValidStrategyType is returned.
|
||||
func getStrategy(tpe string) (networkStrategy, error) {
|
||||
s, exists := strategies[tpe]
|
||||
if !exists {
|
||||
return nil, ErrNotValidStrategyType
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo.
|
||||
func getNetworkInterfaceStats(interfaceName string) (*NetworkInterface, error) {
|
||||
out := &NetworkInterface{Name: interfaceName}
|
||||
// This can happen if the network runtime information is missing - possible if the
|
||||
// container was created by an old version of libcontainer.
|
||||
if interfaceName == "" {
|
||||
return out, nil
|
||||
}
|
||||
type netStatsPair struct {
|
||||
// Where to write the output.
|
||||
Out *uint64
|
||||
// The network stats file to read.
|
||||
File string
|
||||
}
|
||||
// Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container.
|
||||
netStats := []netStatsPair{
|
||||
{Out: &out.RxBytes, File: "tx_bytes"},
|
||||
{Out: &out.RxPackets, File: "tx_packets"},
|
||||
{Out: &out.RxErrors, File: "tx_errors"},
|
||||
{Out: &out.RxDropped, File: "tx_dropped"},
|
||||
|
||||
{Out: &out.TxBytes, File: "rx_bytes"},
|
||||
{Out: &out.TxPackets, File: "rx_packets"},
|
||||
{Out: &out.TxErrors, File: "rx_errors"},
|
||||
{Out: &out.TxDropped, File: "rx_dropped"},
|
||||
}
|
||||
for _, netStat := range netStats {
|
||||
data, err := readSysfsNetworkStats(interfaceName, netStat.File)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
*(netStat.Out) = data
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// Reads the specified statistics available under /sys/class/net/<EthInterface>/statistics
|
||||
func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) {
|
||||
data, err := ioutil.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
|
||||
}
|
||||
|
||||
// loopback is a network strategy that provides a basic loopback device
|
||||
type loopback struct {
|
||||
}
|
||||
|
||||
func (l *loopback) create(n *network, nspid int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *loopback) initialize(config *network) error {
|
||||
iface, err := net.InterfaceByName("lo")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return netlink.NetworkLinkUp(iface)
|
||||
}
|
||||
|
||||
// veth is a network strategy that uses a bridge and creates
|
||||
// a veth pair, one that stays outside on the host and the other
|
||||
// is placed inside the container's namespace
|
||||
type veth struct {
|
||||
}
|
||||
|
||||
func (v *veth) create(n *network, nspid int) (err error) {
|
||||
tmpName, err := v.generateTempPeerName()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
n.TempVethPeerName = tmpName
|
||||
defer func() {
|
||||
if err != nil {
|
||||
netlink.NetworkLinkDel(n.HostInterfaceName)
|
||||
netlink.NetworkLinkDel(n.TempVethPeerName)
|
||||
}
|
||||
}()
|
||||
if n.Bridge == "" {
|
||||
return fmt.Errorf("bridge is not specified")
|
||||
}
|
||||
bridge, err := net.InterfaceByName(n.Bridge)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.NetworkCreateVethPair(n.HostInterfaceName, n.TempVethPeerName, n.TxQueueLen); err != nil {
|
||||
return err
|
||||
}
|
||||
host, err := net.InterfaceByName(n.HostInterfaceName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.AddToBridge(host, bridge); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.NetworkSetMTU(host, n.Mtu); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.NetworkLinkUp(host); err != nil {
|
||||
return err
|
||||
}
|
||||
child, err := net.InterfaceByName(n.TempVethPeerName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return netlink.NetworkSetNsPid(child, nspid)
|
||||
}
|
||||
|
||||
func (v *veth) generateTempPeerName() (string, error) {
|
||||
return utils.GenerateRandomName("veth", 7)
|
||||
}
|
||||
|
||||
func (v *veth) initialize(config *network) error {
|
||||
peer := config.TempVethPeerName
|
||||
if peer == "" {
|
||||
return fmt.Errorf("peer is not specified")
|
||||
}
|
||||
child, err := net.InterfaceByName(peer)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.NetworkLinkDown(child); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.NetworkChangeName(child, config.Name); err != nil {
|
||||
return err
|
||||
}
|
||||
// get the interface again after we changed the name as the index also changes.
|
||||
if child, err = net.InterfaceByName(config.Name); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.MacAddress != "" {
|
||||
if err := netlink.NetworkSetMacAddress(child, config.MacAddress); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
ip, ipNet, err := net.ParseCIDR(config.Address)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.NetworkLinkAddIp(child, ip, ipNet); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.IPv6Address != "" {
|
||||
if ip, ipNet, err = net.ParseCIDR(config.IPv6Address); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.NetworkLinkAddIp(child, ip, ipNet); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := netlink.NetworkSetMTU(child, config.Mtu); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.NetworkLinkUp(child); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Gateway != "" {
|
||||
if err := netlink.AddDefaultGw(config.Gateway, config.Name); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if config.IPv6Gateway != "" {
|
||||
if err := netlink.AddDefaultGw(config.IPv6Gateway, config.Name); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,275 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"syscall"
|
||||
|
||||
"github.com/docker/libcontainer/cgroups"
|
||||
"github.com/docker/libcontainer/configs"
|
||||
"github.com/docker/libcontainer/system"
|
||||
"github.com/golang/glog"
|
||||
)
|
||||
|
||||
type parentProcess interface {
|
||||
// pid returns the pid for the running process.
|
||||
pid() int
|
||||
|
||||
// start starts the process execution.
|
||||
start() error
|
||||
|
||||
// send a SIGKILL to the process and wait for the exit.
|
||||
terminate() error
|
||||
|
||||
// wait waits on the process returning the process state.
|
||||
wait() (*os.ProcessState, error)
|
||||
|
||||
// startTime return's the process start time.
|
||||
startTime() (string, error)
|
||||
|
||||
signal(os.Signal) error
|
||||
}
|
||||
|
||||
type setnsProcess struct {
|
||||
cmd *exec.Cmd
|
||||
parentPipe *os.File
|
||||
childPipe *os.File
|
||||
forkedProcess *os.Process
|
||||
cgroupPaths map[string]string
|
||||
config *initConfig
|
||||
}
|
||||
|
||||
func (p *setnsProcess) startTime() (string, error) {
|
||||
return system.GetProcessStartTime(p.pid())
|
||||
}
|
||||
|
||||
func (p *setnsProcess) signal(s os.Signal) error {
|
||||
return p.forkedProcess.Signal(s)
|
||||
}
|
||||
|
||||
func (p *setnsProcess) start() (err error) {
|
||||
defer p.parentPipe.Close()
|
||||
if p.forkedProcess, err = p.execSetns(); err != nil {
|
||||
return err
|
||||
}
|
||||
if len(p.cgroupPaths) > 0 {
|
||||
if err := cgroups.EnterPid(p.cgroupPaths, p.forkedProcess.Pid); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := json.NewEncoder(p.parentPipe).Encode(p.config); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// execSetns runs the process that executes C code to perform the setns calls
|
||||
// because setns support requires the C process to fork off a child and perform the setns
|
||||
// before the go runtime boots, we wait on the process to die and receive the child's pid
|
||||
// over the provided pipe.
|
||||
func (p *setnsProcess) execSetns() (*os.Process, error) {
|
||||
err := p.cmd.Start()
|
||||
p.childPipe.Close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
status, err := p.cmd.Process.Wait()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !status.Success() {
|
||||
return nil, &exec.ExitError{status}
|
||||
}
|
||||
var pid *pid
|
||||
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return os.FindProcess(pid.Pid)
|
||||
}
|
||||
|
||||
// terminate sends a SIGKILL to the forked process for the setns routine then waits to
|
||||
// avoid the process becomming a zombie.
|
||||
func (p *setnsProcess) terminate() error {
|
||||
if p.forkedProcess == nil {
|
||||
return nil
|
||||
}
|
||||
err := p.forkedProcess.Kill()
|
||||
if _, werr := p.wait(); err == nil {
|
||||
err = werr
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (p *setnsProcess) wait() (*os.ProcessState, error) {
|
||||
return p.forkedProcess.Wait()
|
||||
}
|
||||
|
||||
func (p *setnsProcess) pid() int {
|
||||
return p.forkedProcess.Pid
|
||||
}
|
||||
|
||||
type initProcess struct {
|
||||
cmd *exec.Cmd
|
||||
parentPipe *os.File
|
||||
childPipe *os.File
|
||||
config *initConfig
|
||||
manager cgroups.Manager
|
||||
}
|
||||
|
||||
func (p *initProcess) pid() int {
|
||||
return p.cmd.Process.Pid
|
||||
}
|
||||
|
||||
func (p *initProcess) start() error {
|
||||
defer p.parentPipe.Close()
|
||||
err := p.cmd.Start()
|
||||
p.childPipe.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Do this before syncing with child so that no children
|
||||
// can escape the cgroup
|
||||
if err := p.manager.Apply(p.pid()); err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
// TODO: should not be the responsibility to call here
|
||||
p.manager.Destroy()
|
||||
}
|
||||
}()
|
||||
if err := p.createNetworkInterfaces(); err != nil {
|
||||
return err
|
||||
}
|
||||
// Start the setup process to setup the init process
|
||||
if p.cmd.SysProcAttr.Cloneflags&syscall.CLONE_NEWUSER != 0 {
|
||||
parent, err := p.newUsernsSetupProcess()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := parent.start(); err != nil {
|
||||
if err := parent.terminate(); err != nil {
|
||||
glog.Warning(err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
if _, err := parent.wait(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := p.sendConfig(); err != nil {
|
||||
return err
|
||||
}
|
||||
// wait for the child process to fully complete and receive an error message
|
||||
// if one was encoutered
|
||||
var ierr *initError
|
||||
if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF {
|
||||
return err
|
||||
}
|
||||
if ierr != nil {
|
||||
return ierr
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *initProcess) wait() (*os.ProcessState, error) {
|
||||
state, err := p.cmd.Process.Wait()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// we should kill all processes in cgroup when init is died if we use host PID namespace
|
||||
if p.cmd.SysProcAttr.Cloneflags&syscall.CLONE_NEWPID == 0 {
|
||||
// TODO: this will not work for the success path because libcontainer
|
||||
// does not wait on the process. This needs to be moved to destroy or add a Wait()
|
||||
// method back onto the container.
|
||||
var procs []*os.Process
|
||||
p.manager.Freeze(configs.Frozen)
|
||||
pids, err := p.manager.GetPids()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, pid := range pids {
|
||||
// TODO: log err without aborting if we are unable to find
|
||||
// a single PID
|
||||
if p, err := os.FindProcess(pid); err == nil {
|
||||
procs = append(procs, p)
|
||||
p.Kill()
|
||||
}
|
||||
}
|
||||
p.manager.Freeze(configs.Thawed)
|
||||
for _, p := range procs {
|
||||
p.Wait()
|
||||
}
|
||||
}
|
||||
return state, nil
|
||||
}
|
||||
|
||||
func (p *initProcess) terminate() error {
|
||||
if p.cmd.Process == nil {
|
||||
return nil
|
||||
}
|
||||
err := p.cmd.Process.Kill()
|
||||
if _, werr := p.wait(); err == nil {
|
||||
err = werr
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (p *initProcess) startTime() (string, error) {
|
||||
return system.GetProcessStartTime(p.pid())
|
||||
}
|
||||
|
||||
func (p *initProcess) sendConfig() error {
|
||||
// send the state to the container's init process then shutdown writes for the parent
|
||||
if err := json.NewEncoder(p.parentPipe).Encode(p.config); err != nil {
|
||||
return err
|
||||
}
|
||||
// shutdown writes for the parent side of the pipe
|
||||
return syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR)
|
||||
}
|
||||
|
||||
func (p *initProcess) createNetworkInterfaces() error {
|
||||
for _, config := range p.config.Config.Networks {
|
||||
strategy, err := getStrategy(config.Type)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
n := &network{
|
||||
Network: *config,
|
||||
}
|
||||
if err := strategy.create(n, p.pid()); err != nil {
|
||||
return err
|
||||
}
|
||||
p.config.Networks = append(p.config.Networks, n)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *initProcess) newUsernsSetupProcess() (parentProcess, error) {
|
||||
parentPipe, childPipe, err := newPipe()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cmd := exec.Command(p.cmd.Args[0], p.cmd.Args[1:]...)
|
||||
cmd.ExtraFiles = []*os.File{childPipe}
|
||||
cmd.Dir = p.cmd.Dir
|
||||
cmd.Env = append(cmd.Env,
|
||||
fmt.Sprintf("_LIBCONTAINER_INITPID=%d", p.pid()),
|
||||
fmt.Sprintf("_LIBCONTAINER_INITTYPE=userns_setup"),
|
||||
)
|
||||
return &setnsProcess{
|
||||
cmd: cmd,
|
||||
childPipe: childPipe,
|
||||
parentPipe: parentPipe,
|
||||
config: p.config,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (p *initProcess) signal(s os.Signal) error {
|
||||
return p.cmd.Process.Signal(s)
|
||||
}
|
|
@ -0,0 +1,404 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/docker/docker/pkg/symlink"
|
||||
"github.com/docker/libcontainer/configs"
|
||||
"github.com/docker/libcontainer/label"
|
||||
)
|
||||
|
||||
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
|
||||
|
||||
type mount struct {
|
||||
source string
|
||||
path string
|
||||
device string
|
||||
flags int
|
||||
data string
|
||||
}
|
||||
|
||||
// setupRootfs sets up the devices, mount points, and filesystems for use inside a
|
||||
// new mount namespace.
|
||||
func setupRootfs(config *configs.Config) (err error) {
|
||||
if err := prepareRoot(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := mountSystem(config); err != nil {
|
||||
return err
|
||||
}
|
||||
// apply any user specified mounts within the new mount namespace
|
||||
for _, m := range config.Mounts {
|
||||
if err := mountUserMount(m, config.Rootfs, config.MountLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := createDevices(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setupPtmx(config); err != nil {
|
||||
return err
|
||||
}
|
||||
// stdin, stdout and stderr could be pointing to /dev/null from parent namespace.
|
||||
// Re-open them inside this namespace.
|
||||
// FIXME: Need to fix this for user namespaces.
|
||||
if !config.Namespaces.Contains(configs.NEWUSER) {
|
||||
if err := reOpenDevNull(config.Rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := setupDevSymlinks(config.Rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := syscall.Chdir(config.Rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.NoPivotRoot {
|
||||
err = msMoveRoot(config.Rootfs)
|
||||
} else {
|
||||
err = pivotRoot(config.Rootfs, config.PivotDir)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Readonlyfs {
|
||||
if err := setReadonly(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
syscall.Umask(0022)
|
||||
return nil
|
||||
}
|
||||
|
||||
// mountSystem sets up linux specific system mounts like mqueue, sys, proc, shm, and devpts
|
||||
// inside the mount namespace
|
||||
func mountSystem(config *configs.Config) error {
|
||||
for _, m := range newSystemMounts(config.Rootfs, config.MountLabel, config.RestrictSys) {
|
||||
if err := os.MkdirAll(m.path, 0755); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
if err := syscall.Mount(m.source, m.path, m.device, uintptr(m.flags), m.data); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupDevSymlinks(rootfs string) error {
|
||||
var links = [][2]string{
|
||||
{"/proc/self/fd", "/dev/fd"},
|
||||
{"/proc/self/fd/0", "/dev/stdin"},
|
||||
{"/proc/self/fd/1", "/dev/stdout"},
|
||||
{"/proc/self/fd/2", "/dev/stderr"},
|
||||
}
|
||||
|
||||
// kcore support can be toggled with CONFIG_PROC_KCORE; only create a symlink
|
||||
// in /dev if it exists in /proc.
|
||||
if _, err := os.Stat("/proc/kcore"); err == nil {
|
||||
links = append(links, [2]string{"/proc/kcore", "/dev/kcore"})
|
||||
}
|
||||
|
||||
for _, link := range links {
|
||||
var (
|
||||
src = link[0]
|
||||
dst = filepath.Join(rootfs, link[1])
|
||||
)
|
||||
|
||||
if err := os.Symlink(src, dst); err != nil && !os.IsExist(err) {
|
||||
return fmt.Errorf("symlink %s %s %s", src, dst, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// TODO: this is crappy right now and should be cleaned up with a better way of handling system and
|
||||
// standard bind mounts allowing them to be more dynamic
|
||||
func newSystemMounts(rootfs, mountLabel string, sysReadonly bool) []mount {
|
||||
systemMounts := []mount{
|
||||
{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
|
||||
{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)},
|
||||
{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
|
||||
{source: "mqueue", path: filepath.Join(rootfs, "dev", "mqueue"), device: "mqueue", flags: defaultMountFlags},
|
||||
{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
|
||||
}
|
||||
|
||||
sysMountFlags := defaultMountFlags
|
||||
if sysReadonly {
|
||||
sysMountFlags |= syscall.MS_RDONLY
|
||||
}
|
||||
|
||||
systemMounts = append(systemMounts, mount{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: sysMountFlags})
|
||||
|
||||
return systemMounts
|
||||
}
|
||||
|
||||
// Is stdin, stdout or stderr were to be pointing to '/dev/null',
|
||||
// this method will make them point to '/dev/null' from within this namespace.
|
||||
func reOpenDevNull(rootfs string) error {
|
||||
var stat, devNullStat syscall.Stat_t
|
||||
file, err := os.Open(filepath.Join(rootfs, "/dev/null"))
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to open /dev/null - %s", err)
|
||||
}
|
||||
defer file.Close()
|
||||
if err = syscall.Fstat(int(file.Fd()), &devNullStat); err != nil {
|
||||
return fmt.Errorf("Failed to stat /dev/null - %s", err)
|
||||
}
|
||||
for fd := 0; fd < 3; fd++ {
|
||||
if err = syscall.Fstat(fd, &stat); err != nil {
|
||||
return fmt.Errorf("Failed to stat fd %d - %s", fd, err)
|
||||
}
|
||||
if stat.Rdev == devNullStat.Rdev {
|
||||
// Close and re-open the fd.
|
||||
if err = syscall.Dup2(int(file.Fd()), fd); err != nil {
|
||||
return fmt.Errorf("Failed to dup fd %d to fd %d - %s", file.Fd(), fd, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create the device nodes in the container.
|
||||
func createDevices(config *configs.Config) error {
|
||||
oldMask := syscall.Umask(0000)
|
||||
for _, node := range config.Devices {
|
||||
if err := createDeviceNode(config.Rootfs, node); err != nil {
|
||||
syscall.Umask(oldMask)
|
||||
return err
|
||||
}
|
||||
}
|
||||
syscall.Umask(oldMask)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Creates the device node in the rootfs of the container.
|
||||
func createDeviceNode(rootfs string, node *configs.Device) error {
|
||||
var (
|
||||
dest = filepath.Join(rootfs, node.Path)
|
||||
parent = filepath.Dir(dest)
|
||||
)
|
||||
if err := os.MkdirAll(parent, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
fileMode := node.FileMode
|
||||
switch node.Type {
|
||||
case 'c':
|
||||
fileMode |= syscall.S_IFCHR
|
||||
case 'b':
|
||||
fileMode |= syscall.S_IFBLK
|
||||
default:
|
||||
return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path)
|
||||
}
|
||||
if err := syscall.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil && !os.IsExist(err) {
|
||||
return fmt.Errorf("mknod %s %s", node.Path, err)
|
||||
}
|
||||
if err := syscall.Chown(dest, int(node.Uid), int(node.Gid)); err != nil {
|
||||
return fmt.Errorf("chown %s to %d:%d", node.Path, node.Uid, node.Gid)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func prepareRoot(config *configs.Config) error {
|
||||
flag := syscall.MS_PRIVATE | syscall.MS_REC
|
||||
if config.NoPivotRoot {
|
||||
flag = syscall.MS_SLAVE | syscall.MS_REC
|
||||
}
|
||||
if err := syscall.Mount("", "/", "", uintptr(flag), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
return syscall.Mount(config.Rootfs, config.Rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, "")
|
||||
}
|
||||
|
||||
func setReadonly() error {
|
||||
return syscall.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "")
|
||||
}
|
||||
|
||||
func setupPtmx(config *configs.Config) error {
|
||||
ptmx := filepath.Join(config.Rootfs, "dev/ptmx")
|
||||
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
if err := os.Symlink("pts/ptmx", ptmx); err != nil {
|
||||
return fmt.Errorf("symlink dev ptmx %s", err)
|
||||
}
|
||||
if config.Console != "" {
|
||||
uid, err := config.HostUID()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
gid, err := config.HostGID()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
console := newConsoleFromPath(config.Console)
|
||||
return console.mount(config.Rootfs, config.MountLabel, uid, gid)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func pivotRoot(rootfs, pivotBaseDir string) error {
|
||||
if pivotBaseDir == "" {
|
||||
pivotBaseDir = "/"
|
||||
}
|
||||
tmpDir := filepath.Join(rootfs, pivotBaseDir)
|
||||
if err := os.MkdirAll(tmpDir, 0755); err != nil {
|
||||
return fmt.Errorf("can't create tmp dir %s, error %v", tmpDir, err)
|
||||
}
|
||||
pivotDir, err := ioutil.TempDir(tmpDir, ".pivot_root")
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't create pivot_root dir %s, error %v", pivotDir, err)
|
||||
}
|
||||
if err := syscall.PivotRoot(rootfs, pivotDir); err != nil {
|
||||
return fmt.Errorf("pivot_root %s", err)
|
||||
}
|
||||
if err := syscall.Chdir("/"); err != nil {
|
||||
return fmt.Errorf("chdir / %s", err)
|
||||
}
|
||||
// path to pivot dir now changed, update
|
||||
pivotDir = filepath.Join(pivotBaseDir, filepath.Base(pivotDir))
|
||||
if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil {
|
||||
return fmt.Errorf("unmount pivot_root dir %s", err)
|
||||
}
|
||||
return os.Remove(pivotDir)
|
||||
}
|
||||
|
||||
func msMoveRoot(rootfs string) error {
|
||||
if err := syscall.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := syscall.Chroot("."); err != nil {
|
||||
return err
|
||||
}
|
||||
return syscall.Chdir("/")
|
||||
}
|
||||
|
||||
func mountUserMount(m *configs.Mount, rootfs, mountLabel string) error {
|
||||
switch m.Type {
|
||||
case "bind":
|
||||
return bindMount(m, rootfs, mountLabel)
|
||||
case "tmpfs":
|
||||
return tmpfsMount(m, rootfs, mountLabel)
|
||||
default:
|
||||
return fmt.Errorf("unsupported mount type %s for %s", m.Type, m.Destination)
|
||||
}
|
||||
}
|
||||
|
||||
func bindMount(m *configs.Mount, rootfs, mountLabel string) error {
|
||||
var (
|
||||
flags = syscall.MS_BIND | syscall.MS_REC
|
||||
dest = filepath.Join(rootfs, m.Destination)
|
||||
)
|
||||
if !m.Writable {
|
||||
flags = flags | syscall.MS_RDONLY
|
||||
}
|
||||
if m.Slave {
|
||||
flags = flags | syscall.MS_SLAVE
|
||||
}
|
||||
stat, err := os.Stat(m.Source)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// TODO: (crosbymichael) This does not belong here and should be done a layer above
|
||||
dest, err = symlink.FollowSymlinkInScope(dest, rootfs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := createIfNotExists(dest, stat.IsDir()); err != nil {
|
||||
return fmt.Errorf("creating new bind mount target %s", err)
|
||||
}
|
||||
if err := syscall.Mount(m.Source, dest, "bind", uintptr(flags), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
if !m.Writable {
|
||||
if err := syscall.Mount(m.Source, dest, "bind", uintptr(flags|syscall.MS_REMOUNT), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if m.Relabel != "" {
|
||||
if err := label.Relabel(m.Source, mountLabel, m.Relabel); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if m.Private {
|
||||
if err := syscall.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func tmpfsMount(m *configs.Mount, rootfs, mountLabel string) error {
|
||||
var (
|
||||
err error
|
||||
l = label.FormatMountLabel("", mountLabel)
|
||||
dest = filepath.Join(rootfs, m.Destination)
|
||||
)
|
||||
// TODO: (crosbymichael) This does not belong here and should be done a layer above
|
||||
if dest, err = symlink.FollowSymlinkInScope(dest, rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := createIfNotExists(dest, true); err != nil {
|
||||
return err
|
||||
}
|
||||
return syscall.Mount("tmpfs", dest, "tmpfs", uintptr(defaultMountFlags), l)
|
||||
}
|
||||
|
||||
// createIfNotExists creates a file or a directory only if it does not already exist.
|
||||
func createIfNotExists(path string, isDir bool) error {
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
if isDir {
|
||||
return os.MkdirAll(path, 0755)
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
f, err := os.OpenFile(path, os.O_CREATE, 0755)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
f.Close()
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// remountReadonly will bind over the top of an existing path and ensure that it is read-only.
|
||||
func remountReadonly(path string) error {
|
||||
for i := 0; i < 5; i++ {
|
||||
if err := syscall.Mount("", path, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil && !os.IsNotExist(err) {
|
||||
switch err {
|
||||
case syscall.EINVAL:
|
||||
// Probably not a mountpoint, use bind-mount
|
||||
if err := syscall.Mount(path, path, "", syscall.MS_BIND, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
return syscall.Mount(path, path, "", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC|defaultMountFlags, "")
|
||||
case syscall.EBUSY:
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
continue
|
||||
default:
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("unable to mount %s as readonly max retries reached", path)
|
||||
}
|
||||
|
||||
// maskProckcore bind mounts /dev/null over the top of /proc/kcore inside a container to avoid security
|
||||
// issues from processes reading memory information.
|
||||
func maskProckcore() error {
|
||||
if err := syscall.Mount("/dev/null", "/proc/kcore", "", syscall.MS_BIND, ""); err != nil && !os.IsNotExist(err) {
|
||||
return fmt.Errorf("unable to bind-mount /dev/null over /proc/kcore: %s", err)
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"github.com/docker/libcontainer/apparmor"
|
||||
"github.com/docker/libcontainer/label"
|
||||
"github.com/docker/libcontainer/system"
|
||||
)
|
||||
|
||||
// linuxSetnsInit performs the container's initialization for running a new process
|
||||
// inside an existing container.
|
||||
type linuxSetnsInit struct {
|
||||
config *initConfig
|
||||
}
|
||||
|
||||
func (l *linuxSetnsInit) Init() error {
|
||||
if err := setupRlimits(l.config.Config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := finalizeNamespace(l.config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil {
|
||||
return err
|
||||
}
|
||||
if l.config.Config.ProcessLabel != "" {
|
||||
if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return system.Execv(l.config.Args[0], l.config.Args[0:], l.config.Env)
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
|
||||
"github.com/docker/libcontainer/apparmor"
|
||||
"github.com/docker/libcontainer/configs"
|
||||
"github.com/docker/libcontainer/label"
|
||||
"github.com/docker/libcontainer/system"
|
||||
)
|
||||
|
||||
type linuxStandardInit struct {
|
||||
config *initConfig
|
||||
}
|
||||
|
||||
func (l *linuxStandardInit) Init() error {
|
||||
// join any namespaces via a path to the namespace fd if provided
|
||||
if err := joinExistingNamespaces(l.config.Config.Namespaces); err != nil {
|
||||
return err
|
||||
}
|
||||
consolePath := l.config.Config.Console
|
||||
if consolePath != "" {
|
||||
console := newConsoleFromPath(consolePath)
|
||||
if err := console.dupStdio(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if _, err := syscall.Setsid(); err != nil {
|
||||
return err
|
||||
}
|
||||
if consolePath != "" {
|
||||
if err := system.Setctty(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := setupNetwork(l.config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setupRoute(l.config.Config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setupRlimits(l.config.Config); err != nil {
|
||||
return err
|
||||
}
|
||||
label.Init()
|
||||
// InitializeMountNamespace() can be executed only for a new mount namespace
|
||||
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
|
||||
if err := setupRootfs(l.config.Config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if hostname := l.config.Config.Hostname; hostname != "" {
|
||||
if err := syscall.Sethostname([]byte(hostname)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
if l.config.Config.RestrictSys {
|
||||
for _, path := range []string{"proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"} {
|
||||
if err := remountReadonly(path); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := maskProckcore(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
pdeath, err := system.GetParentDeathSignal()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := finalizeNamespace(l.config); err != nil {
|
||||
return err
|
||||
}
|
||||
// finalizeNamespace can change user/group which clears the parent death
|
||||
// signal, so we restore it here.
|
||||
if err := pdeath.Restore(); err != nil {
|
||||
return err
|
||||
}
|
||||
// Signal self if parent is already dead. Does nothing if running in a new
|
||||
// PID namespace, as Getppid will always return 0.
|
||||
if syscall.Getppid() == 1 {
|
||||
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
|
||||
}
|
||||
return system.Execv(l.config.Args[0], l.config.Args[0:], l.config.Env)
|
||||
}
|
|
@ -0,0 +1,82 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
|
||||
"github.com/docker/libcontainer/apparmor"
|
||||
"github.com/docker/libcontainer/label"
|
||||
"github.com/docker/libcontainer/system"
|
||||
)
|
||||
|
||||
type linuxUsernsInit struct {
|
||||
config *initConfig
|
||||
}
|
||||
|
||||
func (l *linuxUsernsInit) Init() error {
|
||||
// join any namespaces via a path to the namespace fd if provided
|
||||
if err := joinExistingNamespaces(l.config.Config.Namespaces); err != nil {
|
||||
return err
|
||||
}
|
||||
consolePath := l.config.Config.Console
|
||||
if consolePath != "" {
|
||||
console := newConsoleFromPath(consolePath)
|
||||
if err := console.dupStdio(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if _, err := syscall.Setsid(); err != nil {
|
||||
return err
|
||||
}
|
||||
if consolePath != "" {
|
||||
if err := system.Setctty(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if l.config.Cwd == "" {
|
||||
l.config.Cwd = "/"
|
||||
}
|
||||
if err := setupRlimits(l.config.Config); err != nil {
|
||||
return err
|
||||
}
|
||||
if hostname := l.config.Config.Hostname; hostname != "" {
|
||||
if err := syscall.Sethostname([]byte(hostname)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
if l.config.Config.RestrictSys {
|
||||
for _, path := range []string{"proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"} {
|
||||
if err := remountReadonly(path); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := maskProckcore(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
pdeath, err := system.GetParentDeathSignal()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := finalizeNamespace(l.config); err != nil {
|
||||
return err
|
||||
}
|
||||
// finalizeNamespace can change user/group which clears the parent death
|
||||
// signal, so we restore it here.
|
||||
if err := pdeath.Restore(); err != nil {
|
||||
return err
|
||||
}
|
||||
// Signal self if parent is already dead. Does nothing if running in a new
|
||||
// PID namespace, as Getppid will always return 0.
|
||||
if syscall.Getppid() == 1 {
|
||||
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
|
||||
}
|
||||
return system.Execv(l.config.Args[0], l.config.Args[0:], l.config.Env)
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"github.com/docker/libcontainer/configs"
|
||||
"github.com/docker/libcontainer/label"
|
||||
)
|
||||
|
||||
// linuxUsernsSideCar is run to setup mounts and networking related operations
|
||||
// for a user namespace enabled process as a user namespace root doesn't
|
||||
// have permissions to perform these operations.
|
||||
// The setup process joins all the namespaces of user namespace enabled init
|
||||
// except the user namespace, so it run as root in the root user namespace
|
||||
// to perform these operations.
|
||||
type linuxUsernsSideCar struct {
|
||||
config *initConfig
|
||||
}
|
||||
|
||||
func (l *linuxUsernsSideCar) Init() error {
|
||||
if err := setupNetwork(l.config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setupRoute(l.config.Config); err != nil {
|
||||
return err
|
||||
}
|
||||
label.Init()
|
||||
// InitializeMountNamespace() can be executed only for a new mount namespace
|
||||
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
|
||||
if err := setupRootfs(l.config.Config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
215
mount/init.go
215
mount/init.go
|
@ -1,215 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package mount
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
|
||||
"github.com/docker/libcontainer/configs"
|
||||
"github.com/docker/libcontainer/label"
|
||||
)
|
||||
|
||||
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
|
||||
|
||||
type mount struct {
|
||||
source string
|
||||
path string
|
||||
device string
|
||||
flags int
|
||||
data string
|
||||
}
|
||||
|
||||
// InitializeMountNamespace sets up the devices, mount points, and filesystems for use inside a
|
||||
// new mount namespace.
|
||||
func InitializeMountNamespace(config *configs.Config) (err error) {
|
||||
if err := prepareRoot(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := mountSystem(config); err != nil {
|
||||
return err
|
||||
}
|
||||
// apply any user specified mounts within the new mount namespace
|
||||
for _, m := range config.Mounts {
|
||||
if err := m.Mount(config.RootFs, config.MountLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := createDeviceNodes(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setupPtmx(config); err != nil {
|
||||
return err
|
||||
}
|
||||
// stdin, stdout and stderr could be pointing to /dev/null from parent namespace.
|
||||
// Re-open them inside this namespace.
|
||||
// FIXME: Need to fix this for user namespaces.
|
||||
if !config.Namespaces.Contains(configs.NEWUSER) {
|
||||
if err := reOpenDevNull(config.RootFs); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := setupDevSymlinks(config.RootFs); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := syscall.Chdir(config.RootFs); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.NoPivotRoot {
|
||||
err = msMoveRoot(config.RootFs)
|
||||
} else {
|
||||
err = pivotRoot(config.RootFs, config.PivotDir)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if config.ReadonlyFs {
|
||||
if err := setReadonly(); err != nil {
|
||||
return fmt.Errorf("set readonly %s", err)
|
||||
}
|
||||
}
|
||||
syscall.Umask(0022)
|
||||
return nil
|
||||
}
|
||||
|
||||
// mountSystem sets up linux specific system mounts like mqueue, sys, proc, shm, and devpts
|
||||
// inside the mount namespace
|
||||
func mountSystem(config *configs.Config) error {
|
||||
for _, m := range newSystemMounts(config.RootFs, config.MountLabel, config.RestrictSys) {
|
||||
if err := os.MkdirAll(m.path, 0755); err != nil && !os.IsExist(err) {
|
||||
return fmt.Errorf("mkdirall %s %s", m.path, err)
|
||||
}
|
||||
if err := syscall.Mount(m.source, m.path, m.device, uintptr(m.flags), m.data); err != nil {
|
||||
return fmt.Errorf("mounting %s into %s %s", m.source, m.path, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupDevSymlinks(rootfs string) error {
|
||||
var links = [][2]string{
|
||||
{"/proc/self/fd", "/dev/fd"},
|
||||
{"/proc/self/fd/0", "/dev/stdin"},
|
||||
{"/proc/self/fd/1", "/dev/stdout"},
|
||||
{"/proc/self/fd/2", "/dev/stderr"},
|
||||
}
|
||||
|
||||
// kcore support can be toggled with CONFIG_PROC_KCORE; only create a symlink
|
||||
// in /dev if it exists in /proc.
|
||||
if _, err := os.Stat("/proc/kcore"); err == nil {
|
||||
links = append(links, [2]string{"/proc/kcore", "/dev/kcore"})
|
||||
}
|
||||
|
||||
for _, link := range links {
|
||||
var (
|
||||
src = link[0]
|
||||
dst = filepath.Join(rootfs, link[1])
|
||||
)
|
||||
|
||||
if err := os.Symlink(src, dst); err != nil && !os.IsExist(err) {
|
||||
return fmt.Errorf("symlink %s %s %s", src, dst, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// TODO: this is crappy right now and should be cleaned up with a better way of handling system and
|
||||
// standard bind mounts allowing them to be more dynamic
|
||||
func newSystemMounts(rootfs, mountLabel string, sysReadonly bool) []mount {
|
||||
systemMounts := []mount{
|
||||
{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
|
||||
{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)},
|
||||
{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
|
||||
{source: "mqueue", path: filepath.Join(rootfs, "dev", "mqueue"), device: "mqueue", flags: defaultMountFlags},
|
||||
{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
|
||||
}
|
||||
|
||||
sysMountFlags := defaultMountFlags
|
||||
if sysReadonly {
|
||||
sysMountFlags |= syscall.MS_RDONLY
|
||||
}
|
||||
|
||||
systemMounts = append(systemMounts, mount{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: sysMountFlags})
|
||||
|
||||
return systemMounts
|
||||
}
|
||||
|
||||
// Is stdin, stdout or stderr were to be pointing to '/dev/null',
|
||||
// this method will make them point to '/dev/null' from within this namespace.
|
||||
func reOpenDevNull(rootfs string) error {
|
||||
var stat, devNullStat syscall.Stat_t
|
||||
file, err := os.Open(filepath.Join(rootfs, "/dev/null"))
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to open /dev/null - %s", err)
|
||||
}
|
||||
defer file.Close()
|
||||
if err = syscall.Fstat(int(file.Fd()), &devNullStat); err != nil {
|
||||
return fmt.Errorf("Failed to stat /dev/null - %s", err)
|
||||
}
|
||||
for fd := 0; fd < 3; fd++ {
|
||||
if err = syscall.Fstat(fd, &stat); err != nil {
|
||||
return fmt.Errorf("Failed to stat fd %d - %s", fd, err)
|
||||
}
|
||||
if stat.Rdev == devNullStat.Rdev {
|
||||
// Close and re-open the fd.
|
||||
if err = syscall.Dup2(int(file.Fd()), fd); err != nil {
|
||||
return fmt.Errorf("Failed to dup fd %d to fd %d - %s", file.Fd(), fd, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create the device nodes in the container.
|
||||
func createDeviceNodes(config *configs.Config) error {
|
||||
oldMask := syscall.Umask(0000)
|
||||
for _, node := range config.DeviceNodes {
|
||||
if err := createDeviceNode(config.RootFs, node); err != nil {
|
||||
syscall.Umask(oldMask)
|
||||
return err
|
||||
}
|
||||
}
|
||||
syscall.Umask(oldMask)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Creates the device node in the rootfs of the container.
|
||||
func createDeviceNode(rootfs string, node *configs.Device) error {
|
||||
var (
|
||||
dest = filepath.Join(rootfs, node.Path)
|
||||
parent = filepath.Dir(dest)
|
||||
)
|
||||
if err := os.MkdirAll(parent, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
fileMode := node.FileMode
|
||||
switch node.Type {
|
||||
case 'c':
|
||||
fileMode |= syscall.S_IFCHR
|
||||
case 'b':
|
||||
fileMode |= syscall.S_IFBLK
|
||||
default:
|
||||
return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path)
|
||||
}
|
||||
if err := syscall.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil && !os.IsExist(err) {
|
||||
return fmt.Errorf("mknod %s %s", node.Path, err)
|
||||
}
|
||||
if err := syscall.Chown(dest, int(node.Uid), int(node.Gid)); err != nil {
|
||||
return fmt.Errorf("chown %s to %d:%d", node.Path, node.Uid, node.Gid)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func prepareRoot(config *configs.Config) error {
|
||||
flag := syscall.MS_PRIVATE | syscall.MS_REC
|
||||
if config.NoPivotRoot {
|
||||
flag = syscall.MS_SLAVE | syscall.MS_REC
|
||||
}
|
||||
if err := syscall.Mount("", "/", "", uintptr(flag), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
return syscall.Mount(config.RootFs, config.RootFs, "bind", syscall.MS_BIND|syscall.MS_REC, "")
|
||||
}
|
|
@ -1,15 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package mount
|
||||
|
||||
import "syscall"
|
||||
|
||||
func msMoveRoot(rootfs string) error {
|
||||
if err := syscall.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := syscall.Chroot("."); err != nil {
|
||||
return err
|
||||
}
|
||||
return syscall.Chdir("/")
|
||||
}
|
|
@ -1,37 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package mount
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
func pivotRoot(rootfs, pivotBaseDir string) error {
|
||||
if pivotBaseDir == "" {
|
||||
pivotBaseDir = "/"
|
||||
}
|
||||
tmpDir := filepath.Join(rootfs, pivotBaseDir)
|
||||
if err := os.MkdirAll(tmpDir, 0755); err != nil {
|
||||
return fmt.Errorf("can't create tmp dir %s, error %v", tmpDir, err)
|
||||
}
|
||||
pivotDir, err := ioutil.TempDir(tmpDir, ".pivot_root")
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't create pivot_root dir %s, error %v", pivotDir, err)
|
||||
}
|
||||
if err := syscall.PivotRoot(rootfs, pivotDir); err != nil {
|
||||
return fmt.Errorf("pivot_root %s", err)
|
||||
}
|
||||
if err := syscall.Chdir("/"); err != nil {
|
||||
return fmt.Errorf("chdir / %s", err)
|
||||
}
|
||||
// path to pivot dir now changed, update
|
||||
pivotDir = filepath.Join(pivotBaseDir, filepath.Base(pivotDir))
|
||||
if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil {
|
||||
return fmt.Errorf("unmount pivot_root dir %s", err)
|
||||
}
|
||||
return os.Remove(pivotDir)
|
||||
}
|
|
@ -1,34 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package mount
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/docker/libcontainer/configs"
|
||||
"github.com/docker/libcontainer/console"
|
||||
)
|
||||
|
||||
func setupPtmx(config *configs.Config) error {
|
||||
ptmx := filepath.Join(config.RootFs, "dev/ptmx")
|
||||
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
if err := os.Symlink("pts/ptmx", ptmx); err != nil {
|
||||
return fmt.Errorf("symlink dev ptmx %s", err)
|
||||
}
|
||||
if config.Console != "" {
|
||||
uid, err := config.HostUID()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
gid, err := config.HostGID()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return console.Setup(config.RootFs, config.Console, config.MountLabel, uid, gid)
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -1,11 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package mount
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
)
|
||||
|
||||
func setReadonly() error {
|
||||
return syscall.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "")
|
||||
}
|
|
@ -1,25 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package network
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/docker/libcontainer/configs"
|
||||
)
|
||||
|
||||
// Loopback is a network strategy that provides a basic loopback device
|
||||
type Loopback struct {
|
||||
}
|
||||
|
||||
func (l *Loopback) Create(n *configs.Network, nspid int, networkState *configs.NetworkState) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *Loopback) Initialize(config *configs.Network, networkState *configs.NetworkState) error {
|
||||
// Do not set the MTU on the loopback interface - use the default.
|
||||
if err := InterfaceUp("lo"); err != nil {
|
||||
return fmt.Errorf("lo up %s", err)
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -1,117 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package network
|
||||
|
||||
import (
|
||||
"net"
|
||||
|
||||
"github.com/docker/libcontainer/netlink"
|
||||
)
|
||||
|
||||
func InterfaceUp(name string) error {
|
||||
iface, err := net.InterfaceByName(name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return netlink.NetworkLinkUp(iface)
|
||||
}
|
||||
|
||||
func InterfaceDown(name string) error {
|
||||
iface, err := net.InterfaceByName(name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return netlink.NetworkLinkDown(iface)
|
||||
}
|
||||
|
||||
func ChangeInterfaceName(old, newName string) error {
|
||||
iface, err := net.InterfaceByName(old)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return netlink.NetworkChangeName(iface, newName)
|
||||
}
|
||||
|
||||
func CreateVethPair(name1, name2 string, txQueueLen int) error {
|
||||
return netlink.NetworkCreateVethPair(name1, name2, txQueueLen)
|
||||
}
|
||||
|
||||
func SetInterfaceInNamespacePid(name string, nsPid int) error {
|
||||
iface, err := net.InterfaceByName(name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return netlink.NetworkSetNsPid(iface, nsPid)
|
||||
}
|
||||
|
||||
func SetInterfaceInNamespaceFd(name string, fd uintptr) error {
|
||||
iface, err := net.InterfaceByName(name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return netlink.NetworkSetNsFd(iface, int(fd))
|
||||
}
|
||||
|
||||
func SetInterfaceMaster(name, master string) error {
|
||||
iface, err := net.InterfaceByName(name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
masterIface, err := net.InterfaceByName(master)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return netlink.AddToBridge(iface, masterIface)
|
||||
}
|
||||
|
||||
func SetDefaultGateway(ip, ifaceName string) error {
|
||||
return netlink.AddDefaultGw(ip, ifaceName)
|
||||
}
|
||||
|
||||
func SetInterfaceMac(name string, macaddr string) error {
|
||||
iface, err := net.InterfaceByName(name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return netlink.NetworkSetMacAddress(iface, macaddr)
|
||||
}
|
||||
|
||||
func SetInterfaceIp(name string, rawIp string) error {
|
||||
iface, err := net.InterfaceByName(name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ip, ipNet, err := net.ParseCIDR(rawIp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return netlink.NetworkLinkAddIp(iface, ip, ipNet)
|
||||
}
|
||||
|
||||
func DeleteInterfaceIp(name string, rawIp string) error {
|
||||
iface, err := net.InterfaceByName(name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ip, ipNet, err := net.ParseCIDR(rawIp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return netlink.NetworkLinkDelIp(iface, ip, ipNet)
|
||||
}
|
||||
|
||||
func SetMtu(name string, mtu int) error {
|
||||
iface, err := net.InterfaceByName(name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return netlink.NetworkSetMTU(iface, mtu)
|
||||
}
|
||||
|
||||
func SetHairpinMode(name string, enabled bool) error {
|
||||
iface, err := net.InterfaceByName(name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return netlink.SetHairpinMode(iface, enabled)
|
||||
}
|
|
@ -1,76 +0,0 @@
|
|||
package network
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/docker/libcontainer/configs"
|
||||
)
|
||||
|
||||
type NetworkStats struct {
|
||||
RxBytes uint64 `json:"rx_bytes"`
|
||||
RxPackets uint64 `json:"rx_packets"`
|
||||
RxErrors uint64 `json:"rx_errors"`
|
||||
RxDropped uint64 `json:"rx_dropped"`
|
||||
TxBytes uint64 `json:"tx_bytes"`
|
||||
TxPackets uint64 `json:"tx_packets"`
|
||||
TxErrors uint64 `json:"tx_errors"`
|
||||
TxDropped uint64 `json:"tx_dropped"`
|
||||
}
|
||||
|
||||
// Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo.
|
||||
func GetStats(networkState *configs.NetworkState) (*NetworkStats, error) {
|
||||
// This can happen if the network runtime information is missing - possible if the container was created by an old version of libcontainer.
|
||||
if networkState.VethHost == "" {
|
||||
return &NetworkStats{}, nil
|
||||
}
|
||||
|
||||
out := &NetworkStats{}
|
||||
|
||||
type netStatsPair struct {
|
||||
// Where to write the output.
|
||||
Out *uint64
|
||||
|
||||
// The network stats file to read.
|
||||
File string
|
||||
}
|
||||
|
||||
// Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container.
|
||||
netStats := []netStatsPair{
|
||||
{Out: &out.RxBytes, File: "tx_bytes"},
|
||||
{Out: &out.RxPackets, File: "tx_packets"},
|
||||
{Out: &out.RxErrors, File: "tx_errors"},
|
||||
{Out: &out.RxDropped, File: "tx_dropped"},
|
||||
|
||||
{Out: &out.TxBytes, File: "rx_bytes"},
|
||||
{Out: &out.TxPackets, File: "rx_packets"},
|
||||
{Out: &out.TxErrors, File: "rx_errors"},
|
||||
{Out: &out.TxDropped, File: "rx_dropped"},
|
||||
}
|
||||
for _, netStat := range netStats {
|
||||
data, err := readSysfsNetworkStats(networkState.VethHost, netStat.File)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
*(netStat.Out) = data
|
||||
}
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// Reads the specified statistics available under /sys/class/net/<EthInterface>/statistics
|
||||
func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) {
|
||||
fullPath := filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile)
|
||||
data, err := ioutil.ReadFile(fullPath)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
value, err := strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return value, err
|
||||
}
|
|
@ -1,36 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package network
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"github.com/docker/libcontainer/configs"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrNotValidStrategyType = errors.New("not a valid network strategy type")
|
||||
)
|
||||
|
||||
var strategies = map[string]NetworkStrategy{
|
||||
"veth": &Veth{},
|
||||
"loopback": &Loopback{},
|
||||
}
|
||||
|
||||
// NetworkStrategy represents a specific network configuration for
|
||||
// a container's networking stack
|
||||
type NetworkStrategy interface {
|
||||
Create(*configs.Network, int, *configs.NetworkState) error
|
||||
Initialize(*configs.Network, *configs.NetworkState) error
|
||||
}
|
||||
|
||||
// GetStrategy returns the specific network strategy for the
|
||||
// provided type. If no strategy is registered for the type an
|
||||
// ErrNotValidStrategyType is returned.
|
||||
func GetStrategy(tpe string) (NetworkStrategy, error) {
|
||||
s, exists := strategies[tpe]
|
||||
if !exists {
|
||||
return nil, ErrNotValidStrategyType
|
||||
}
|
||||
return s, nil
|
||||
}
|
|
@ -1 +0,0 @@
|
|||
package network
|
123
network/veth.go
123
network/veth.go
|
@ -1,123 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package network
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/docker/libcontainer/configs"
|
||||
"github.com/docker/libcontainer/netlink"
|
||||
"github.com/docker/libcontainer/utils"
|
||||
)
|
||||
|
||||
// Veth is a network strategy that uses a bridge and creates
|
||||
// a veth pair, one that stays outside on the host and the other
|
||||
// is placed inside the container's namespace
|
||||
type Veth struct {
|
||||
}
|
||||
|
||||
const defaultDevice = "eth0"
|
||||
|
||||
func (v *Veth) Create(n *configs.Network, nspid int, networkState *configs.NetworkState) error {
|
||||
var (
|
||||
bridge = n.Bridge
|
||||
prefix = n.VethPrefix
|
||||
txQueueLen = n.TxQueueLen
|
||||
)
|
||||
if bridge == "" {
|
||||
return fmt.Errorf("bridge is not specified")
|
||||
}
|
||||
if prefix == "" {
|
||||
return fmt.Errorf("veth prefix is not specified")
|
||||
}
|
||||
name1, name2, err := createVethPair(prefix, txQueueLen)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := SetInterfaceMaster(name1, bridge); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := SetMtu(name1, n.Mtu); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := InterfaceUp(name1); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := SetInterfaceInNamespacePid(name2, nspid); err != nil {
|
||||
return err
|
||||
}
|
||||
networkState.VethHost = name1
|
||||
networkState.VethChild = name2
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *Veth) Initialize(config *configs.Network, networkState *configs.NetworkState) error {
|
||||
var vethChild = networkState.VethChild
|
||||
if vethChild == "" {
|
||||
return fmt.Errorf("vethChild is not specified")
|
||||
}
|
||||
if err := InterfaceDown(vethChild); err != nil {
|
||||
return fmt.Errorf("interface down %s %s", vethChild, err)
|
||||
}
|
||||
if err := ChangeInterfaceName(vethChild, defaultDevice); err != nil {
|
||||
return fmt.Errorf("change %s to %s %s", vethChild, defaultDevice, err)
|
||||
}
|
||||
if config.MacAddress != "" {
|
||||
if err := SetInterfaceMac(defaultDevice, config.MacAddress); err != nil {
|
||||
return fmt.Errorf("set %s mac %s", defaultDevice, err)
|
||||
}
|
||||
}
|
||||
if err := SetInterfaceIp(defaultDevice, config.Address); err != nil {
|
||||
return fmt.Errorf("set %s ip %s", defaultDevice, err)
|
||||
}
|
||||
if config.IPv6Address != "" {
|
||||
if err := SetInterfaceIp(defaultDevice, config.IPv6Address); err != nil {
|
||||
return fmt.Errorf("set %s ipv6 %s", defaultDevice, err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := SetMtu(defaultDevice, config.Mtu); err != nil {
|
||||
return fmt.Errorf("set %s mtu to %d %s", defaultDevice, config.Mtu, err)
|
||||
}
|
||||
if err := InterfaceUp(defaultDevice); err != nil {
|
||||
return fmt.Errorf("%s up %s", defaultDevice, err)
|
||||
}
|
||||
if config.Gateway != "" {
|
||||
if err := SetDefaultGateway(config.Gateway, defaultDevice); err != nil {
|
||||
return fmt.Errorf("set gateway to %s on device %s failed with %s", config.Gateway, defaultDevice, err)
|
||||
}
|
||||
}
|
||||
if config.IPv6Gateway != "" {
|
||||
if err := SetDefaultGateway(config.IPv6Gateway, defaultDevice); err != nil {
|
||||
return fmt.Errorf("set gateway for ipv6 to %s on device %s failed with %s", config.IPv6Gateway, defaultDevice, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// createVethPair will automatically generage two random names for
|
||||
// the veth pair and ensure that they have been created
|
||||
func createVethPair(prefix string, txQueueLen int) (name1 string, name2 string, err error) {
|
||||
for i := 0; i < 10; i++ {
|
||||
if name1, err = utils.GenerateRandomName(prefix, 7); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if name2, err = utils.GenerateRandomName(prefix, 7); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if err = CreateVethPair(name1, name2, txQueueLen); err != nil {
|
||||
if err == netlink.ErrInterfaceExists {
|
||||
continue
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
return
|
||||
}
|
|
@ -1,53 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package network
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/docker/libcontainer/netlink"
|
||||
)
|
||||
|
||||
func TestGenerateVethNames(t *testing.T) {
|
||||
if testing.Short() {
|
||||
return
|
||||
}
|
||||
|
||||
prefix := "veth"
|
||||
|
||||
name1, name2, err := createVethPair(prefix, 0)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if name1 == "" {
|
||||
t.Fatal("name1 should not be empty")
|
||||
}
|
||||
|
||||
if name2 == "" {
|
||||
t.Fatal("name2 should not be empty")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateDuplicateVethPair(t *testing.T) {
|
||||
if testing.Short() {
|
||||
return
|
||||
}
|
||||
|
||||
prefix := "veth"
|
||||
|
||||
name1, name2, err := createVethPair(prefix, 0)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// retry to create the name interfaces and make sure that we get the correct error
|
||||
err = CreateVethPair(name1, name2, 0)
|
||||
if err == nil {
|
||||
t.Fatal("expected error to not be nil with duplicate interface")
|
||||
}
|
||||
|
||||
if err != netlink.ErrInterfaceExists {
|
||||
t.Fatalf("expected error to be ErrInterfaceExists but received %q", err)
|
||||
}
|
||||
}
|
|
@ -4,7 +4,6 @@ package libcontainer
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/docker/libcontainer/configs"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
@ -16,8 +15,8 @@ const oomCgroupName = "memory"
|
|||
// NotifyOnOOM returns channel on which you can expect event about OOM,
|
||||
// if process died without OOM this channel will be closed.
|
||||
// s is current *libcontainer.State for container.
|
||||
func NotifyOnOOM(s *configs.State) (<-chan struct{}, error) {
|
||||
dir := s.CgroupPaths[oomCgroupName]
|
||||
func NotifyOnOOM(paths map[string]string) (<-chan struct{}, error) {
|
||||
dir := paths[oomCgroupName]
|
||||
if dir == "" {
|
||||
return nil, fmt.Errorf("There is no path for %q in state", oomCgroupName)
|
||||
}
|
||||
|
|
|
@ -11,8 +11,6 @@ import (
|
|||
"syscall"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/docker/libcontainer/configs"
|
||||
)
|
||||
|
||||
func TestNotifyOnOOM(t *testing.T) {
|
||||
|
@ -29,12 +27,10 @@ func TestNotifyOnOOM(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
var eventFd, oomControlFd int
|
||||
st := &configs.State{
|
||||
CgroupPaths: map[string]string{
|
||||
"memory": memoryPath,
|
||||
},
|
||||
paths := map[string]string{
|
||||
"memory": memoryPath,
|
||||
}
|
||||
ooms, err := NotifyOnOOM(st)
|
||||
ooms, err := NotifyOnOOM(paths)
|
||||
if err != nil {
|
||||
t.Fatal("expected no error, got:", err)
|
||||
}
|
||||
|
|
|
@ -22,14 +22,14 @@ struct clone_arg {
|
|||
* Reserve some space for clone() to locate arguments
|
||||
* and retcode in this place
|
||||
*/
|
||||
char stack[4096] __attribute__((aligned (8)));
|
||||
char stack[4096] __attribute__ ((aligned(8)));
|
||||
char stack_ptr[0];
|
||||
jmp_buf *env;
|
||||
};
|
||||
|
||||
static int child_func(void *_arg)
|
||||
{
|
||||
struct clone_arg *arg = (struct clone_arg *) _arg;
|
||||
struct clone_arg *arg = (struct clone_arg *)_arg;
|
||||
longjmp(*arg->env, 1);
|
||||
}
|
||||
|
||||
|
@ -47,8 +47,8 @@ int setns(int fd, int nstype)
|
|||
#endif
|
||||
#endif
|
||||
|
||||
static int clone_parent(jmp_buf *env) __attribute__ ((noinline));
|
||||
static int clone_parent(jmp_buf *env)
|
||||
static int clone_parent(jmp_buf * env) __attribute__ ((noinline));
|
||||
static int clone_parent(jmp_buf * env)
|
||||
{
|
||||
struct clone_arg ca;
|
||||
int child;
|
||||
|
@ -100,7 +100,8 @@ void nsexec()
|
|||
|
||||
fd = openat(tfd, namespaces[i], O_RDONLY);
|
||||
if (fd == -1) {
|
||||
pr_perror("Failed to open ns file %s for ns %s", buf, namespaces[i]);
|
||||
pr_perror("Failed to open ns file %s for ns %s", buf,
|
||||
namespaces[i]);
|
||||
exit(1);
|
||||
}
|
||||
// Set the namespace.
|
||||
|
|
|
@ -1,55 +1,19 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"github.com/codegangsta/cli"
|
||||
"github.com/docker/docker/pkg/term"
|
||||
"github.com/docker/libcontainer"
|
||||
"github.com/docker/libcontainer/configs"
|
||||
consolepkg "github.com/docker/libcontainer/console"
|
||||
"github.com/docker/libcontainer/utils"
|
||||
)
|
||||
|
||||
type tty struct {
|
||||
master *os.File
|
||||
console string
|
||||
state *term.State
|
||||
}
|
||||
|
||||
func (t *tty) Close() error {
|
||||
if t.master != nil {
|
||||
t.master.Close()
|
||||
}
|
||||
if t.state != nil {
|
||||
term.RestoreTerminal(os.Stdin.Fd(), t.state)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *tty) set(config *configs.Config) {
|
||||
config.Console = t.console
|
||||
}
|
||||
|
||||
func (t *tty) attach(process *libcontainer.Process) {
|
||||
if t.master != nil {
|
||||
process.Stderr = nil
|
||||
process.Stdout = nil
|
||||
process.Stdin = nil
|
||||
}
|
||||
}
|
||||
|
||||
func (t *tty) resize() error {
|
||||
if t.master == nil {
|
||||
return nil
|
||||
}
|
||||
ws, err := term.GetWinsize(os.Stdin.Fd())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return term.SetWinsize(t.master.Fd(), ws)
|
||||
var standardEnvironment = &cli.StringSlice{
|
||||
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
|
||||
"HOSTNAME=nsinit",
|
||||
"TERM=xterm",
|
||||
}
|
||||
|
||||
var execCommand = cli.Command{
|
||||
|
@ -57,9 +21,11 @@ var execCommand = cli.Command{
|
|||
Usage: "execute a new command inside a container",
|
||||
Action: execAction,
|
||||
Flags: []cli.Flag{
|
||||
cli.BoolFlag{Name: "tty", Usage: "allocate a TTY to the container"},
|
||||
cli.BoolFlag{Name: "tty,t", Usage: "allocate a TTY to the container"},
|
||||
cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"},
|
||||
cli.StringFlag{Name: "config", Value: "container.json", Usage: "path to the configuration file"},
|
||||
cli.StringFlag{Name: "user,u", Value: "root", Usage: "set the user, uid, and/or gid for the process"},
|
||||
cli.StringSliceFlag{Name: "env", Value: standardEnvironment, Usage: "set environment variables for the process"},
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -81,7 +47,7 @@ func execAction(context *cli.Context) {
|
|||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
tty.set(config)
|
||||
config.Console = tty.console.Path()
|
||||
if container, err = factory.Create(context.String("id"), config); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
@ -89,6 +55,8 @@ func execAction(context *cli.Context) {
|
|||
go handleSignals(container, tty)
|
||||
process := &libcontainer.Process{
|
||||
Args: context.Args(),
|
||||
Env: context.StringSlice("env"),
|
||||
User: context.String("user"),
|
||||
Stdin: os.Stdin,
|
||||
Stdout: os.Stdout,
|
||||
Stderr: os.Stderr,
|
||||
|
@ -109,19 +77,7 @@ func execAction(context *cli.Context) {
|
|||
if err := container.Destroy(); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
exit(status.Sys().(syscall.WaitStatus))
|
||||
}
|
||||
|
||||
func exit(status syscall.WaitStatus) {
|
||||
var exitCode int
|
||||
if status.Exited() {
|
||||
exitCode = status.ExitStatus()
|
||||
} else if status.Signaled() {
|
||||
exitCode = -int(status.Signal())
|
||||
} else {
|
||||
fatalf("Unexpected status")
|
||||
}
|
||||
os.Exit(exitCode)
|
||||
os.Exit(utils.ExitStatus(status.Sys().(syscall.WaitStatus)))
|
||||
}
|
||||
|
||||
func handleSignals(container libcontainer.Container, tty *tty) {
|
||||
|
@ -137,24 +93,3 @@ func handleSignals(container libcontainer.Container, tty *tty) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func newTty(context *cli.Context) (*tty, error) {
|
||||
if context.Bool("tty") {
|
||||
master, console, err := consolepkg.CreateMasterAndConsole()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
go io.Copy(master, os.Stdin)
|
||||
go io.Copy(os.Stdout, master)
|
||||
state, err := term.SetRawTerminal(os.Stdin.Fd())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &tty{
|
||||
master: master,
|
||||
console: console,
|
||||
state: state,
|
||||
}, nil
|
||||
}
|
||||
return &tty{}, nil
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@ package main
|
|||
|
||||
import (
|
||||
"log"
|
||||
"runtime"
|
||||
|
||||
"github.com/codegangsta/cli"
|
||||
"github.com/docker/libcontainer"
|
||||
|
@ -15,6 +16,8 @@ var initCommand = cli.Command{
|
|||
cli.IntFlag{Name: "fd", Value: 0, Usage: "internal pipe fd"},
|
||||
},
|
||||
Action: func(context *cli.Context) {
|
||||
runtime.GOMAXPROCS(1)
|
||||
runtime.LockOSThread()
|
||||
factory, err := libcontainer.New("", nil)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/codegangsta/cli"
|
||||
"github.com/docker/docker/pkg/term"
|
||||
"github.com/docker/libcontainer"
|
||||
)
|
||||
|
||||
func newTty(context *cli.Context) (*tty, error) {
|
||||
if context.Bool("tty") {
|
||||
console, err := libcontainer.NewConsole()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
go io.Copy(console, os.Stdin)
|
||||
go io.Copy(os.Stdout, console)
|
||||
state, err := term.SetRawTerminal(os.Stdin.Fd())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &tty{
|
||||
console: console,
|
||||
state: state,
|
||||
}, nil
|
||||
}
|
||||
return &tty{}, nil
|
||||
}
|
||||
|
||||
type tty struct {
|
||||
console libcontainer.Console
|
||||
state *term.State
|
||||
}
|
||||
|
||||
func (t *tty) Close() error {
|
||||
if t.console != nil {
|
||||
t.console.Close()
|
||||
}
|
||||
if t.state != nil {
|
||||
term.RestoreTerminal(os.Stdin.Fd(), t.state)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *tty) attach(process *libcontainer.Process) {
|
||||
if t.console != nil {
|
||||
process.Stderr = nil
|
||||
process.Stdout = nil
|
||||
process.Stdin = nil
|
||||
}
|
||||
}
|
||||
|
||||
func (t *tty) resize() error {
|
||||
if t.console == nil {
|
||||
return nil
|
||||
}
|
||||
ws, err := term.GetWinsize(os.Stdin.Fd())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return term.SetWinsize(t.console.Fd(), ws)
|
||||
}
|
25
process.go
25
process.go
|
@ -2,19 +2,28 @@ package libcontainer
|
|||
|
||||
import "io"
|
||||
|
||||
// Configuration for a process to be run inside a container.
|
||||
// Process specifies the configuration and IO for a process inside
|
||||
// a container.
|
||||
type Process struct {
|
||||
// The command to be run followed by any arguments.
|
||||
Args []string
|
||||
|
||||
// Env specifies the environment variables for the process.
|
||||
Env []string
|
||||
|
||||
// User will set the uid and gid of the executing process running inside the container
|
||||
// local to the contaienr's user and group configuration.
|
||||
User string
|
||||
|
||||
// Cwd will change the processes current working directory inside the container's rootfs.
|
||||
Cwd string
|
||||
|
||||
// Stdin is a pointer to a reader which provides the standard input stream.
|
||||
Stdin io.Reader
|
||||
|
||||
// Stdout is a pointer to a writer which receives the standard output stream.
|
||||
// Stderr is a pointer to a writer which receives the standard error stream.
|
||||
//
|
||||
// If a reader or writer is nil, the input stream is assumed to be empty and the output is
|
||||
// discarded.
|
||||
//
|
||||
// Stdout and Stderr may refer to the same writer in which case the output is interspersed.
|
||||
Stdin io.Reader
|
||||
Stdout io.Writer
|
||||
|
||||
// Stderr is a pointer to a writer which receives the standard error stream.
|
||||
Stderr io.Writer
|
||||
}
|
||||
|
|
|
@ -118,7 +118,7 @@
|
|||
},
|
||||
"restrict_sys": true,
|
||||
"apparmor_profile": "docker-default",
|
||||
"device_nodes": [
|
||||
"devices": [
|
||||
{
|
||||
"permissions": "rwm",
|
||||
"file_mode": 438,
|
||||
|
|
|
@ -117,7 +117,7 @@
|
|||
"parent": "docker"
|
||||
},
|
||||
"restrict_sys": true,
|
||||
"device_nodes": [
|
||||
"devices": [
|
||||
{
|
||||
"permissions": "rwm",
|
||||
"file_mode": 438,
|
||||
|
|
|
@ -117,7 +117,7 @@
|
|||
"parent": "docker"
|
||||
},
|
||||
"restrict_sys": true,
|
||||
"device_nodes": [
|
||||
"devices": [
|
||||
{
|
||||
"permissions": "rwm",
|
||||
"file_mode": 438,
|
||||
|
|
|
@ -117,7 +117,7 @@
|
|||
"parent": "docker"
|
||||
},
|
||||
"restrict_sys": true,
|
||||
"device_nodes": [
|
||||
"devices": [
|
||||
{
|
||||
"permissions": "rwm",
|
||||
"file_mode": 438,
|
||||
|
|
|
@ -117,7 +117,7 @@
|
|||
"parent": "docker"
|
||||
},
|
||||
"restrict_sys": true,
|
||||
"device_nodes": [
|
||||
"devices": [
|
||||
{
|
||||
"permissions": "rwm",
|
||||
"file_mode": 438,
|
||||
|
|
|
@ -119,7 +119,7 @@
|
|||
"restrict_sys": true,
|
||||
"process_label": "system_u:system_r:svirt_lxc_net_t:s0:c164,c475",
|
||||
"mount_label": "system_u:system_r:svirt_lxc_net_t:s0:c164,c475",
|
||||
"device_nodes": [
|
||||
"devices": [
|
||||
{
|
||||
"permissions": "rwm",
|
||||
"file_mode": 438,
|
||||
|
|
|
@ -117,7 +117,7 @@
|
|||
"parent": "docker"
|
||||
},
|
||||
"restrict_sys": true,
|
||||
"device_nodes": [
|
||||
"devices": [
|
||||
{
|
||||
"permissions": "rwm",
|
||||
"file_mode": 438,
|
||||
|
|
|
@ -1,56 +0,0 @@
|
|||
package capabilities
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/syndtr/gocapability/capability"
|
||||
)
|
||||
|
||||
const allCapabilityTypes = capability.CAPS | capability.BOUNDS
|
||||
|
||||
// DropBoundingSet drops the capability bounding set to those specified in the
|
||||
// container configuration.
|
||||
func DropBoundingSet(capabilities []string) error {
|
||||
c, err := capability.NewPid(os.Getpid())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
keep := getEnabledCapabilities(capabilities)
|
||||
c.Clear(capability.BOUNDS)
|
||||
c.Set(capability.BOUNDS, keep...)
|
||||
|
||||
if err := c.Apply(capability.BOUNDS); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// DropCapabilities drops all capabilities for the current process except those specified in the container configuration.
|
||||
func DropCapabilities(capList []string) error {
|
||||
c, err := capability.NewPid(os.Getpid())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
keep := getEnabledCapabilities(capList)
|
||||
c.Clear(allCapabilityTypes)
|
||||
c.Set(allCapabilityTypes, keep...)
|
||||
|
||||
if err := c.Apply(allCapabilityTypes); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// getEnabledCapabilities returns the capabilities that should not be dropped by the container.
|
||||
func getEnabledCapabilities(capList []string) []capability.Cap {
|
||||
keep := []capability.Cap{}
|
||||
for _, capability := range capList {
|
||||
if c := GetCapability(capability); c != nil {
|
||||
keep = append(keep, c.Value)
|
||||
}
|
||||
}
|
||||
return keep
|
||||
}
|
|
@ -1,88 +0,0 @@
|
|||
package capabilities
|
||||
|
||||
import "github.com/syndtr/gocapability/capability"
|
||||
|
||||
type (
|
||||
CapabilityMapping struct {
|
||||
Key string `json:"key,omitempty"`
|
||||
Value capability.Cap `json:"value,omitempty"`
|
||||
}
|
||||
Capabilities []*CapabilityMapping
|
||||
)
|
||||
|
||||
func (c *CapabilityMapping) String() string {
|
||||
return c.Key
|
||||
}
|
||||
|
||||
func GetCapability(key string) *CapabilityMapping {
|
||||
for _, capp := range capabilityList {
|
||||
if capp.Key == key {
|
||||
cpy := *capp
|
||||
return &cpy
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetAllCapabilities() []string {
|
||||
output := make([]string, len(capabilityList))
|
||||
for i, capability := range capabilityList {
|
||||
output[i] = capability.String()
|
||||
}
|
||||
return output
|
||||
}
|
||||
|
||||
// Contains returns true if the specified Capability is
|
||||
// in the slice
|
||||
func (c Capabilities) contains(capp string) bool {
|
||||
return c.get(capp) != nil
|
||||
}
|
||||
|
||||
func (c Capabilities) get(capp string) *CapabilityMapping {
|
||||
for _, cap := range c {
|
||||
if cap.Key == capp {
|
||||
return cap
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var capabilityList = Capabilities{
|
||||
{Key: "SETPCAP", Value: capability.CAP_SETPCAP},
|
||||
{Key: "SYS_MODULE", Value: capability.CAP_SYS_MODULE},
|
||||
{Key: "SYS_RAWIO", Value: capability.CAP_SYS_RAWIO},
|
||||
{Key: "SYS_PACCT", Value: capability.CAP_SYS_PACCT},
|
||||
{Key: "SYS_ADMIN", Value: capability.CAP_SYS_ADMIN},
|
||||
{Key: "SYS_NICE", Value: capability.CAP_SYS_NICE},
|
||||
{Key: "SYS_RESOURCE", Value: capability.CAP_SYS_RESOURCE},
|
||||
{Key: "SYS_TIME", Value: capability.CAP_SYS_TIME},
|
||||
{Key: "SYS_TTY_CONFIG", Value: capability.CAP_SYS_TTY_CONFIG},
|
||||
{Key: "MKNOD", Value: capability.CAP_MKNOD},
|
||||
{Key: "AUDIT_WRITE", Value: capability.CAP_AUDIT_WRITE},
|
||||
{Key: "AUDIT_CONTROL", Value: capability.CAP_AUDIT_CONTROL},
|
||||
{Key: "MAC_OVERRIDE", Value: capability.CAP_MAC_OVERRIDE},
|
||||
{Key: "MAC_ADMIN", Value: capability.CAP_MAC_ADMIN},
|
||||
{Key: "NET_ADMIN", Value: capability.CAP_NET_ADMIN},
|
||||
{Key: "SYSLOG", Value: capability.CAP_SYSLOG},
|
||||
{Key: "CHOWN", Value: capability.CAP_CHOWN},
|
||||
{Key: "NET_RAW", Value: capability.CAP_NET_RAW},
|
||||
{Key: "DAC_OVERRIDE", Value: capability.CAP_DAC_OVERRIDE},
|
||||
{Key: "FOWNER", Value: capability.CAP_FOWNER},
|
||||
{Key: "DAC_READ_SEARCH", Value: capability.CAP_DAC_READ_SEARCH},
|
||||
{Key: "FSETID", Value: capability.CAP_FSETID},
|
||||
{Key: "KILL", Value: capability.CAP_KILL},
|
||||
{Key: "SETGID", Value: capability.CAP_SETGID},
|
||||
{Key: "SETUID", Value: capability.CAP_SETUID},
|
||||
{Key: "LINUX_IMMUTABLE", Value: capability.CAP_LINUX_IMMUTABLE},
|
||||
{Key: "NET_BIND_SERVICE", Value: capability.CAP_NET_BIND_SERVICE},
|
||||
{Key: "NET_BROADCAST", Value: capability.CAP_NET_BROADCAST},
|
||||
{Key: "IPC_LOCK", Value: capability.CAP_IPC_LOCK},
|
||||
{Key: "IPC_OWNER", Value: capability.CAP_IPC_OWNER},
|
||||
{Key: "SYS_CHROOT", Value: capability.CAP_SYS_CHROOT},
|
||||
{Key: "SYS_PTRACE", Value: capability.CAP_SYS_PTRACE},
|
||||
{Key: "SYS_BOOT", Value: capability.CAP_SYS_BOOT},
|
||||
{Key: "LEASE", Value: capability.CAP_LEASE},
|
||||
{Key: "SETFCAP", Value: capability.CAP_SETFCAP},
|
||||
{Key: "WAKE_ALARM", Value: capability.CAP_WAKE_ALARM},
|
||||
{Key: "BLOCK_SUSPEND", Value: capability.CAP_BLOCK_SUSPEND},
|
||||
}
|
|
@ -1,19 +0,0 @@
|
|||
package capabilities
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCapabilitiesContains(t *testing.T) {
|
||||
caps := Capabilities{
|
||||
GetCapability("MKNOD"),
|
||||
GetCapability("SETPCAP"),
|
||||
}
|
||||
|
||||
if caps.contains("SYS_ADMIN") {
|
||||
t.Fatal("capabilities should not contain SYS_ADMIN")
|
||||
}
|
||||
if !caps.contains("MKNOD") {
|
||||
t.Fatal("capabilities should contain MKNOD but does not")
|
||||
}
|
||||
}
|
|
@ -1,53 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package restrict
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"syscall"
|
||||
"time"
|
||||
)
|
||||
|
||||
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
|
||||
|
||||
func mountReadonly(path string) error {
|
||||
for i := 0; i < 5; i++ {
|
||||
if err := syscall.Mount("", path, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil && !os.IsNotExist(err) {
|
||||
switch err {
|
||||
case syscall.EINVAL:
|
||||
// Probably not a mountpoint, use bind-mount
|
||||
if err := syscall.Mount(path, path, "", syscall.MS_BIND, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return syscall.Mount(path, path, "", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC|defaultMountFlags, "")
|
||||
case syscall.EBUSY:
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
continue
|
||||
default:
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
return fmt.Errorf("unable to mount %s as readonly max retries reached", path)
|
||||
}
|
||||
|
||||
// This has to be called while the container still has CAP_SYS_ADMIN (to be able to perform mounts).
|
||||
// However, afterwards, CAP_SYS_ADMIN should be dropped (otherwise the user will be able to revert those changes).
|
||||
func Restrict(mounts ...string) error {
|
||||
for _, dest := range mounts {
|
||||
if err := mountReadonly(dest); err != nil {
|
||||
return fmt.Errorf("unable to remount %s readonly: %s", dest, err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := syscall.Mount("/dev/null", "/proc/kcore", "", syscall.MS_BIND, ""); err != nil && !os.IsNotExist(err) {
|
||||
return fmt.Errorf("unable to bind-mount /dev/null over /proc/kcore: %s", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -1,9 +0,0 @@
|
|||
// +build !linux
|
||||
|
||||
package restrict
|
||||
|
||||
import "fmt"
|
||||
|
||||
func Restrict() error {
|
||||
return fmt.Errorf("not supported")
|
||||
}
|
|
@ -10,7 +10,6 @@ func Capture(userSkip int) Stacktrace {
|
|||
skip = userSkip + 1 // add one for our own function
|
||||
frames []Frame
|
||||
)
|
||||
|
||||
for i := skip; ; i++ {
|
||||
pc, file, line, ok := runtime.Caller(i)
|
||||
if !ok {
|
||||
|
@ -18,7 +17,6 @@ func Capture(userSkip int) Stacktrace {
|
|||
}
|
||||
frames = append(frames, NewFrame(pc, file, line))
|
||||
}
|
||||
|
||||
return Stacktrace{
|
||||
Frames: frames,
|
||||
}
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
package libcontainer
|
||||
|
||||
import "github.com/docker/libcontainer/cgroups"
|
||||
|
||||
type NetworkInterface struct {
|
||||
// Name is the name of the network interface.
|
||||
Name string
|
||||
|
||||
RxBytes uint64
|
||||
RxPackets uint64
|
||||
RxErrors uint64
|
||||
RxDropped uint64
|
||||
TxBytes uint64
|
||||
TxPackets uint64
|
||||
TxErrors uint64
|
||||
TxDropped uint64
|
||||
}
|
||||
|
||||
type Stats struct {
|
||||
Interfaces []*NetworkInterface
|
||||
CgroupStats *cgroups.Stats
|
||||
}
|
|
@ -8,6 +8,26 @@ import (
|
|||
"unsafe"
|
||||
)
|
||||
|
||||
type ParentDeathSignal int
|
||||
|
||||
func (p ParentDeathSignal) Restore() error {
|
||||
if p == 0 {
|
||||
return nil
|
||||
}
|
||||
current, err := GetParentDeathSignal()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if p == current {
|
||||
return nil
|
||||
}
|
||||
return p.Set()
|
||||
}
|
||||
|
||||
func (p ParentDeathSignal) Set() error {
|
||||
return SetParentDeathSignal(uintptr(p))
|
||||
}
|
||||
|
||||
func Execv(cmd string, args []string, env []string) error {
|
||||
name, err := exec.LookPath(cmd)
|
||||
if err != nil {
|
||||
|
@ -17,23 +37,20 @@ func Execv(cmd string, args []string, env []string) error {
|
|||
return syscall.Exec(name, args, env)
|
||||
}
|
||||
|
||||
func ParentDeathSignal(sig uintptr) error {
|
||||
func SetParentDeathSignal(sig uintptr) error {
|
||||
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, sig, 0); err != 0 {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetParentDeathSignal() (int, error) {
|
||||
func GetParentDeathSignal() (ParentDeathSignal, error) {
|
||||
var sig int
|
||||
|
||||
_, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0)
|
||||
|
||||
if err != 0 {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
return sig, nil
|
||||
return ParentDeathSignal(sig), nil
|
||||
}
|
||||
|
||||
func SetKeepCaps() error {
|
||||
|
|
|
@ -10,6 +10,10 @@ import (
|
|||
"syscall"
|
||||
)
|
||||
|
||||
const (
|
||||
exitSignalOffset = 128
|
||||
)
|
||||
|
||||
// GenerateRandomName returns a new name joined with a prefix. This size
|
||||
// specified is used to truncate the randomly generated value
|
||||
func GenerateRandomName(prefix string, size int) (string, error) {
|
||||
|
@ -53,3 +57,12 @@ func CloseExecFrom(minFd int) error {
|
|||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ExitStatus returns the correct exit status for a process based on if it
|
||||
// was signaled or existed cleanly.
|
||||
func ExitStatus(status syscall.WaitStatus) int {
|
||||
if status.Signaled() {
|
||||
return exitSignalOffset + int(status.Signal())
|
||||
}
|
||||
return status.ExitStatus()
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue