Merge pull request #351 from avagin/api-rebase-2

Merge remote-tracking branch 'origin/master' into api-rebase
This commit is contained in:
Mrunal Patel 2015-01-29 19:20:09 -08:00
commit e31ef02610
45 changed files with 1239 additions and 209 deletions

View File

@ -1,6 +1,5 @@
FROM crosbymichael/golang
FROM golang:1.4
RUN apt-get update && apt-get install -y gcc make
RUN go get golang.org/x/tools/cmd/cover
ENV GOPATH $GOPATH:/go/src/github.com/docker/libcontainer/vendor

View File

@ -2,4 +2,5 @@ Michael Crosby <michael@docker.com> (@crosbymichael)
Rohit Jnagal <jnagal@google.com> (@rjnagal)
Victor Marmol <vmarmol@google.com> (@vmarmol)
Mrunal Patel <mpatel@redhat.com> (@mrunalp)
Alexandr Morozov <lk4d4@docker.com> (@LK4D4)
update-vendor.sh: Tianon Gravi <admwiggin@gmail.com> (@tianon)

View File

@ -1,13 +1,13 @@
all:
docker build -t docker/libcontainer .
docker build -t dockercore/libcontainer .
test:
# we need NET_ADMIN for the netlink tests and SYS_ADMIN for mounting
docker run --rm -it --privileged docker/libcontainer
docker run --rm -it --privileged dockercore/libcontainer
sh:
docker run --rm -it --privileged -w /busybox docker/libcontainer nsinit exec sh
docker run --rm -it --privileged -w /busybox dockercore/libcontainer nsinit exec sh
GO_PACKAGES = $(shell find . -not \( -wholename ./vendor -prune -o -wholename ./.git -prune \) -name '*.go' -print0 | xargs -0n1 dirname | sort -u)
@ -23,3 +23,5 @@ direct-build:
direct-install:
go install -v $(GO_PACKAGES)
local:
go test -v

View File

@ -13,4 +13,8 @@ Our goal is to make libcontainer run everywhere, but currently libcontainer requ
## Cross-architecture support
Our goal is to make libcontainer run everywhere. However currently libcontainer only runs on x86_64 systems. We plan on expanding architecture support, so that libcontainer containers can be created and used on more architectures.
Our goal is to make libcontainer run everywhere. Recently libcontainer has
expanded from its initial support for x86_64 systems to include POWER (ppc64
little and big endian variants), IBM System z (s390x 64-bit), and ARM. We plan
to continue expanding architecture support such that libcontainer containers
can be created and used on more architectures.

25
SPEC.md
View File

@ -318,4 +318,29 @@ a container.
| Resume | Resume all processes inside the container if paused |
| Exec | Execute a new process inside of the container ( requires setns ) |
### Execute a new process inside of a running container.
User can execute a new process inside of a running container. Any binaries to be
executed must be accessible within the container's rootfs.
The started process will run inside the container's rootfs. Any changes
made by the process to the container's filesystem will persist after the
process finished executing.
The started process will join all the container's existing namespaces. When the
container is paused, the process will also be paused and will resume when
the container is unpaused. The started process will only run when the container's
primary process (PID 1) is running, and will not be restarted when the container
is restarted.
#### Planned additions
The started process will have its own cgroups nested inside the container's
cgroups. This is used for process tracking and optionally resource allocation
handling for the new process. Freezer cgroup is required, the rest of the cgroups
are optional. The process executor must place its pid inside the correct
cgroups before starting the process. This is done so that no child processes or
threads can escape the cgroups.
When the process is stopped, the process executor will try (in a best-effort way)
to stop all its children and remove the sub-cgroups.

View File

@ -77,6 +77,8 @@ type Cgroup struct {
CpuQuota int64 `json:"cpu_quota,omitempty"` // CPU hardcap limit (in usecs). Allowed cpu time in a given period.
CpuPeriod int64 `json:"cpu_period,omitempty"` // CPU period to be used for hardcapping (in usecs). 0 to use system default.
CpusetCpus string `json:"cpuset_cpus,omitempty"` // CPU to use
CpusetMems string `json:"cpuset_mems,omitempty"` // MEM to use
BlkioWeight int64 `json:"blkio_weight,omitempty"` // Specifies per cgroup weight, range is from 10 to 1000.
Freezer FreezerState `json:"freezer,omitempty"` // set the freeze value for the process
Slice string `json:"slice,omitempty"` // Parent slice to use for systemd
}

View File

@ -124,7 +124,7 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
stats := cgroups.NewStats()
for name, path := range m.Paths {
sys, ok := subsystems[name]
if !ok {
if !ok || !cgroups.PathExists(path) {
continue
}
if err := sys.GetStats(path, stats); err != nil {

View File

@ -15,11 +15,17 @@ type BlkioGroup struct {
}
func (s *BlkioGroup) Set(d *data) error {
// we just want to join this group even though we don't set anything
if _, err := d.join("blkio"); err != nil && !cgroups.IsNotFound(err) {
dir, err := d.join("blkio")
if err != nil && !cgroups.IsNotFound(err) {
return err
}
if d.c.BlkioWeight != 0 {
if err := writeFile(dir, "blkio.weight", strconv.FormatInt(d.c.BlkioWeight, 10)); err != nil {
return err
}
}
return nil
}

View File

@ -18,7 +18,7 @@ func (s *CpusetGroup) Set(d *data) error {
if err != nil {
return err
}
return s.SetDir(dir, d.c.CpusetCpus, d.pid)
return s.SetDir(dir, d.c.CpusetCpus, d.c.CpusetMems, d.pid)
}
func (s *CpusetGroup) Remove(d *data) error {
@ -29,7 +29,7 @@ func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error {
return nil
}
func (s *CpusetGroup) SetDir(dir, value string, pid int) error {
func (s *CpusetGroup) SetDir(dir, cpus string, mems string, pid int) error {
if err := s.ensureParent(dir); err != nil {
return err
}
@ -40,10 +40,15 @@ func (s *CpusetGroup) SetDir(dir, value string, pid int) error {
return err
}
// If we don't use --cpuset, the default cpuset.cpus is set in
// s.ensureParent, otherwise, use the value we set
if value != "" {
if err := writeFile(dir, "cpuset.cpus", value); err != nil {
// If we don't use --cpuset-xxx, the default value inherit from parent cgroup
// is set in s.ensureParent, otherwise, use the value we set
if cpus != "" {
if err := writeFile(dir, "cpuset.cpus", cpus); err != nil {
return err
}
}
if mems != "" {
if err := writeFile(dir, "cpuset.mems", mems); err != nil {
return err
}
}

View File

@ -38,12 +38,17 @@ func (s *MemoryGroup) Set(d *data) error {
}
}
// By default, MemorySwap is set to twice the size of RAM.
// If you want to omit MemorySwap, set it to `-1'.
if d.c.MemorySwap != -1 {
// If you want to omit MemorySwap, set it to '-1'.
if d.c.MemorySwap == 0 {
if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(d.c.Memory*2, 10)); err != nil {
return err
}
}
if d.c.MemorySwap > 0 {
if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(d.c.MemorySwap, 10)); err != nil {
return err
}
}
}
return nil
}

View File

@ -53,7 +53,7 @@ func expectBlkioStatsEquals(t *testing.T, expected, actual cgroups.BlkioStats) {
}
if err := blkioStatEntryEquals(expected.IoMergedRecursive, actual.IoMergedRecursive); err != nil {
log.Printf("blkio IoMergedRecursive do not match - %s vs %s\n", expected.IoMergedRecursive, actual.IoMergedRecursive)
log.Printf("blkio IoMergedRecursive do not match - %v vs %v\n", expected.IoMergedRecursive, actual.IoMergedRecursive)
t.Fail()
}
@ -90,4 +90,8 @@ func expectMemoryStatEquals(t *testing.T, expected, actual cgroups.MemoryStats)
t.Fail()
}
}
if expected.Failcnt != actual.Failcnt {
log.Printf("Expected memory failcnt %d but found %d\n", expected.Failcnt, actual.Failcnt)
t.Fail()
}
}

View File

@ -118,6 +118,11 @@ func (m *Manager) Apply(pid int) error {
newProp("CPUShares", uint64(c.CpuShares)))
}
if c.BlkioWeight != 0 {
properties = append(properties,
newProp("BlockIOWeight", uint64(c.BlkioWeight)))
}
if _, err := theConn.StartTransientUnit(unitName, "replace", properties...); err != nil {
return err
}
@ -330,5 +335,5 @@ func joinCpuset(c *cgroups.Cgroup, pid int) error {
s := &fs.CpusetGroup{}
return s.SetDir(path, c.CpusetCpus, pid)
return s.SetDir(path, c.CpusetCpus, c.CpusetMems, pid)
}

View File

@ -9,6 +9,7 @@ import (
"path/filepath"
"strconv"
"strings"
"time"
"github.com/docker/docker/pkg/mount"
)
@ -173,7 +174,7 @@ func ParseCgroupFile(subsystem string, r io.Reader) (string, error) {
return "", NewNotFoundError(subsystem)
}
func pathExists(path string) bool {
func PathExists(path string) bool {
if _, err := os.Stat(path); err != nil {
return false
}
@ -182,7 +183,7 @@ func pathExists(path string) bool {
func EnterPid(cgroupPaths map[string]string, pid int) error {
for _, path := range cgroupPaths {
if pathExists(path) {
if PathExists(path) {
if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"),
[]byte(strconv.Itoa(pid)), 0700); err != nil {
return err
@ -193,13 +194,30 @@ func EnterPid(cgroupPaths map[string]string, pid int) error {
}
// RemovePaths iterates over the provided paths removing them.
// If an error is encountered the removal proceeds and the first error is
// returned to ensure a partial removal is not possible.
// We trying to remove all paths five times with increasing delay between tries.
// If after all there are not removed cgroups - appropriate error will be
// returned.
func RemovePaths(paths map[string]string) (err error) {
for _, path := range paths {
if rerr := os.RemoveAll(path); err == nil {
err = rerr
delay := 10 * time.Millisecond
for i := 0; i < 5; i++ {
if i != 0 {
time.Sleep(delay)
delay *= 2
}
for s, p := range paths {
os.RemoveAll(p)
// TODO: here probably should be logging
_, err := os.Stat(p)
// We need this strange way of checking cgroups existence because
// RemoveAll almost always returns error, even on already removed
// cgroups
if os.IsNotExist(err) {
delete(paths, s)
}
}
if len(paths) == 0 {
return nil
}
}
return err
return fmt.Errorf("Failed to remove paths: %s", paths)
}

View File

@ -10,11 +10,55 @@ type MountConfig mount.MountConfig
type Network network.Network
type NamespaceType string
const (
NEWNET NamespaceType = "NEWNET"
NEWPID NamespaceType = "NEWPID"
NEWNS NamespaceType = "NEWNS"
NEWUTS NamespaceType = "NEWUTS"
NEWIPC NamespaceType = "NEWIPC"
NEWUSER NamespaceType = "NEWUSER"
)
// Namespace defines configuration for each namespace. It specifies an
// alternate path that is able to be joined via setns.
type Namespace struct {
Name string `json:"name"`
Path string `json:"path,omitempty"`
Type NamespaceType `json:"type"`
Path string `json:"path,omitempty"`
}
type Namespaces []Namespace
func (n *Namespaces) Remove(t NamespaceType) bool {
i := n.index(t)
if i == -1 {
return false
}
*n = append((*n)[:i], (*n)[i+1:]...)
return true
}
func (n *Namespaces) Add(t NamespaceType, path string) {
i := n.index(t)
if i == -1 {
*n = append(*n, Namespace{Type: t, Path: path})
return
}
(*n)[i].Path = path
}
func (n *Namespaces) index(t NamespaceType) int {
for i, ns := range *n {
if ns.Type == t {
return i
}
}
return -1
}
func (n *Namespaces) Contains(t NamespaceType) bool {
return n.index(t) != -1
}
// Config defines configuration options for executing a process inside a contained environment.
@ -45,7 +89,7 @@ type Config struct {
// Namespaces specifies the container's namespaces that it should setup when cloning the init process
// If a namespace is not provided that namespace is shared from the container's parent process
Namespaces []Namespace `json:"namespaces,omitempty"`
Namespaces Namespaces `json:"namespaces,omitempty"`
// Capabilities specify the capabilities to keep when executing the process inside the container
// All capbilities not specified will be dropped from the processes capability mask
@ -76,6 +120,15 @@ type Config struct {
// Rlimits specifies the resource limits, such as max open files, to set in the container
// If Rlimits are not set, the container will inherit rlimits from the parent process
Rlimits []Rlimit `json:"rlimits,omitempty"`
// AdditionalGroups specifies the gids that should be added to supplementary groups
// in addition to those that the user belongs to.
AdditionalGroups []int `json:"additional_groups,omitempty"`
// UidMappings is an array of User ID mappings for User Namespaces
UidMappings []IDMap `json:"uid_mappings,omitempty"`
// GidMappings is an array of Group ID mappings for User Namespaces
GidMappings []IDMap `json:"gid_mappings,omitempty"`
}
// Routes can be specified to create entries in the route table as the container is started
@ -104,3 +157,10 @@ type Rlimit struct {
Hard uint64 `json:"hard,omitempty"`
Soft uint64 `json:"soft,omitempty"`
}
// IDMap represents UID/GID Mappings for User Namespaces.
type IDMap struct {
ContainerID int `json:"container_id,omitempty"`
HostID int `json:"host_id,omitempty"`
Size int `json:"size,omitempty"`
}

View File

@ -64,12 +64,12 @@ func TestConfigJsonFormat(t *testing.T) {
t.Fail()
}
if getNamespaceIndex(container, "NEWNET") == -1 {
if !container.Namespaces.Contains(NEWNET) {
t.Log("namespaces should contain NEWNET")
t.Fail()
}
if getNamespaceIndex(container, "NEWUSER") != -1 {
if container.Namespaces.Contains(NEWUSER) {
t.Log("namespaces should not contain NEWUSER")
t.Fail()
}
@ -159,11 +159,14 @@ func TestSelinuxLabels(t *testing.T) {
}
}
func getNamespaceIndex(config *Config, name string) int {
for i, v := range config.Namespaces {
if v.Name == name {
return i
}
func TestRemoveNamespace(t *testing.T) {
ns := Namespaces{
{Type: NEWNET},
}
if !ns.Remove(NEWNET) {
t.Fatal("NEWNET was not removed")
}
if len(ns) != 0 {
t.Fatalf("namespaces should have 0 items but reports %d", len(ns))
}
return -1
}

View File

@ -13,7 +13,7 @@ import (
)
// Setup initializes the proper /dev/console inside the rootfs path
func Setup(rootfs, consolePath, mountLabel string) error {
func Setup(rootfs, consolePath, mountLabel string, hostRootUid, hostRootGid int) error {
oldMask := syscall.Umask(0000)
defer syscall.Umask(oldMask)
@ -21,7 +21,7 @@ func Setup(rootfs, consolePath, mountLabel string) error {
return err
}
if err := os.Chown(consolePath, 0, 0); err != nil {
if err := os.Chown(consolePath, hostRootUid, hostRootGid); err != nil {
return err
}

View File

@ -71,7 +71,7 @@ func TestIPCPrivate(t *testing.T) {
}
if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual == l {
t.Fatalf("ipc link should be private to the conatiner but equals host %q %q", actual, l)
t.Fatalf("ipc link should be private to the container but equals host %q %q", actual, l)
}
}
@ -92,8 +92,7 @@ func TestIPCHost(t *testing.T) {
}
config := newTemplateConfig(rootfs)
i := getNamespaceIndex(config, "NEWIPC")
config.Namespaces = append(config.Namespaces[:i], config.Namespaces[i+1:]...)
config.Namespaces.Remove(configs.NEWIPC)
buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc")
if err != nil {
t.Fatal(err)
@ -125,8 +124,7 @@ func TestIPCJoinPath(t *testing.T) {
}
config := newTemplateConfig(rootfs)
i := getNamespaceIndex(config, "NEWIPC")
config.Namespaces[i].Path = "/proc/1/ns/ipc"
config.Namespaces.Add(configs.NEWIPC, "/proc/1/ns/ipc")
buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc")
if err != nil {
@ -154,12 +152,11 @@ func TestIPCBadPath(t *testing.T) {
defer remove(rootfs)
config := newTemplateConfig(rootfs)
i := getNamespaceIndex(config, "NEWIPC")
config.Namespaces[i].Path = "/proc/1/ns/ipcc"
config.Namespaces.Add(configs.NEWIPC, "/proc/1/ns/ipcc")
_, _, err = runContainer(config, "", "true")
if err == nil {
t.Fatal("container succeded with bad ipc path")
t.Fatal("container succeeded with bad ipc path")
}
}
@ -184,15 +181,6 @@ func TestRlimit(t *testing.T) {
}
}
func getNamespaceIndex(config *configs.Config, name string) int {
for i, v := range config.Namespaces {
if v.Name == name {
return i
}
}
return -1
}
func newTestRoot() (string, error) {
dir, err := ioutil.TempDir("", "libcontainer")
if err != nil {

View File

@ -32,13 +32,13 @@ func newTemplateConfig(rootfs string) *configs.Config {
"KILL",
"AUDIT_WRITE",
},
Namespaces: []configs.Namespace{
{Name: "NEWNS"},
{Name: "NEWUTS"},
{Name: "NEWIPC"},
{Name: "NEWPID"},
{Name: "NEWNET"},
},
Namespaces: configs.Namespaces([]configs.Namespace{
{Type: configs.NEWNS},
{Type: configs.NEWUTS},
{Type: configs.NEWIPC},
{Type: configs.NEWPID},
{Type: configs.NEWNET},
}),
Cgroups: &cgroups.Cgroup{
Name: "test",
Parent: "integration",

View File

@ -168,10 +168,11 @@ func (l *linuxFactory) loadContainerState(root string) (*configs.State, error) {
func (f *linuxFactory) StartInitialization(pipefd uintptr) (err error) {
pipe := os.NewFile(uintptr(pipefd), "pipe")
setupUserns := os.Getenv("_LIBCONTAINER_USERNS")
pid := os.Getenv("_LIBCONTAINER_INITPID")
if pid != "" {
if pid != "" && setupUserns == "" {
return namespaces.InitIn(pipe)
}
return namespaces.Init(pipe)
return namespaces.Init(pipe, setupUserns != "")
}

View File

@ -25,7 +25,7 @@ type mount struct {
// InitializeMountNamespace sets up the devices, mount points, and filesystems for use inside a
// new mount namespace.
func InitializeMountNamespace(rootfs, console string, sysReadonly bool, mountConfig *MountConfig) error {
func InitializeMountNamespace(rootfs, console string, sysReadonly bool, hostRootUid, hostRootGid int, mountConfig *MountConfig) error {
var (
err error
flag = syscall.MS_PRIVATE
@ -58,14 +58,17 @@ func InitializeMountNamespace(rootfs, console string, sysReadonly bool, mountCon
return fmt.Errorf("create device nodes %s", err)
}
if err := SetupPtmx(rootfs, console, mountConfig.MountLabel); err != nil {
if err := SetupPtmx(rootfs, console, mountConfig.MountLabel, hostRootUid, hostRootGid); err != nil {
return err
}
// stdin, stdout and stderr could be pointing to /dev/null from parent namespace.
// Re-open them inside this namespace.
if err := reOpenDevNull(rootfs); err != nil {
return fmt.Errorf("Failed to reopen /dev/null %s", err)
// FIXME: Need to fix this for user namespaces.
if hostRootUid == 0 {
if err := reOpenDevNull(rootfs); err != nil {
return fmt.Errorf("Failed to reopen /dev/null %s", err)
}
}
if err := setupDevSymlinks(rootfs); err != nil {
@ -79,7 +82,7 @@ func InitializeMountNamespace(rootfs, console string, sysReadonly bool, mountCon
if mountConfig.NoPivotRoot {
err = MsMoveRoot(rootfs)
} else {
err = PivotRoot(rootfs)
err = PivotRoot(rootfs, mountConfig.PivotDir)
}
if err != nil {

View File

@ -13,6 +13,11 @@ type MountConfig struct {
// This is a common option when the container is running in ramdisk
NoPivotRoot bool `json:"no_pivot_root,omitempty"`
// PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set.
// When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable.
// This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot.
PivotDir string `json:"pivot_dir,omitempty"`
// ReadonlyFs will remount the container's rootfs as readonly where only externally mounted
// bind mounts are writtable
ReadonlyFs bool `json:"readonly_fs,omitempty"`

View File

@ -10,8 +10,15 @@ import (
"syscall"
)
func PivotRoot(rootfs string) error {
pivotDir, err := ioutil.TempDir(rootfs, ".pivot_root")
func PivotRoot(rootfs, pivotBaseDir string) error {
if pivotBaseDir == "" {
pivotBaseDir = "/"
}
tmpDir := filepath.Join(rootfs, pivotBaseDir)
if err := os.MkdirAll(tmpDir, 0755); err != nil {
return fmt.Errorf("can't create tmp dir %s, error %v", tmpDir, err)
}
pivotDir, err := ioutil.TempDir(tmpDir, ".pivot_root")
if err != nil {
return fmt.Errorf("can't create pivot_root dir %s, error %v", pivotDir, err)
}
@ -25,7 +32,7 @@ func PivotRoot(rootfs string) error {
}
// path to pivot dir now changed, update
pivotDir = filepath.Join("/", filepath.Base(pivotDir))
pivotDir = filepath.Join(pivotBaseDir, filepath.Base(pivotDir))
if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil {
return fmt.Errorf("unmount pivot_root dir %s", err)
}

View File

@ -10,7 +10,7 @@ import (
"github.com/docker/libcontainer/console"
)
func SetupPtmx(rootfs, consolePath, mountLabel string) error {
func SetupPtmx(rootfs, consolePath, mountLabel string, hostRootUid, hostRootGid int) error {
ptmx := filepath.Join(rootfs, "dev/ptmx")
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
return err
@ -21,7 +21,7 @@ func SetupPtmx(rootfs, consolePath, mountLabel string) error {
}
if consolePath != "" {
if err := console.Setup(rootfs, consolePath, mountLabel); err != nil {
if err := console.Setup(rootfs, consolePath, mountLabel, hostRootUid, hostRootGid); err != nil {
return err
}
}

View File

@ -1,10 +0,0 @@
package namespaces
import (
"os"
"os/exec"
"github.com/docker/libcontainer/configs"
)
type CreateCommand func(container *configs.Config, console, dataPath, init string, childPipe *os.File, args []string) *exec.Cmd

View File

@ -4,6 +4,7 @@ package namespaces
import (
"encoding/json"
"fmt"
"io"
"os"
"os/exec"
@ -15,6 +16,99 @@ import (
"github.com/docker/libcontainer/system"
)
const (
EXIT_SIGNAL_OFFSET = 128
)
func executeSetupCmd(args []string, ppid int, container *configs.Config, process *processArgs, networkState *network.NetworkState) error {
command := exec.Command(args[0], args[1:]...)
parent, child, err := newInitPipe()
if err != nil {
return err
}
defer parent.Close()
command.ExtraFiles = []*os.File{child}
command.Dir = container.RootFs
command.Env = append(command.Env,
fmt.Sprintf("_LIBCONTAINER_INITPID=%d", ppid),
fmt.Sprintf("_LIBCONTAINER_USERNS=1"))
err = command.Start()
child.Close()
if err != nil {
return err
}
s, err := command.Process.Wait()
if err != nil {
return err
}
if !s.Success() {
return &exec.ExitError{s}
}
decoder := json.NewDecoder(parent)
var pid *pid
if err := decoder.Decode(&pid); err != nil {
return err
}
p, err := os.FindProcess(pid.Pid)
if err != nil {
return err
}
terminate := func(terr error) error {
// TODO: log the errors for kill and wait
p.Kill()
p.Wait()
return terr
}
encoder := json.NewEncoder(parent)
if err := encoder.Encode(container); err != nil {
return terminate(err)
}
if err := encoder.Encode(process); err != nil {
return terminate(err)
}
// send the state to the container's init process then shutdown writes for the parent
if err := encoder.Encode(networkState); err != nil {
return terminate(err)
}
// shutdown writes for the parent side of the pipe
if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil {
return terminate(err)
}
// wait for the child process to fully complete and receive an error message
// if one was encoutered
var ierr *initError
if err := decoder.Decode(&ierr); err != nil && err != io.EOF {
return terminate(err)
}
if ierr != nil {
return ierr
}
s, err = p.Wait()
if err != nil {
return err
}
if !s.Success() {
return &exec.ExitError{s}
}
return nil
}
// TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work.
// Move this to libcontainer package.
// Exec performs setup outside of a namespace so that a container can be
@ -32,16 +126,35 @@ func Exec(args []string, env []string, console string, command *exec.Cmd, contai
command.Dir = container.RootFs
command.SysProcAttr.Cloneflags = uintptr(GetNamespaceFlags(container.Namespaces))
if container.Namespaces.Contains(configs.NEWUSER) {
AddUidGidMappings(command.SysProcAttr, container)
// Default to root user when user namespaces are enabled.
if command.SysProcAttr.Credential == nil {
command.SysProcAttr.Credential = &syscall.Credential{}
}
}
if err := command.Start(); err != nil {
child.Close()
return err
}
child.Close()
wait := func() (*os.ProcessState, error) {
ps, err := command.Process.Wait()
// we should kill all processes in cgroup when init is died if we use
// host PID namespace
if !container.Namespaces.Contains(configs.NEWPID) {
killAllPids(cgroupManager)
}
return ps, err
}
terminate := func(terr error) error {
// TODO: log the errors for kill and wait
command.Process.Kill()
command.Wait()
wait()
return terr
}
@ -81,6 +194,14 @@ func Exec(args []string, env []string, console string, command *exec.Cmd, contai
if err := InitializeNetworking(container, command.Process.Pid, &networkState); err != nil {
return terminate(err)
}
// Start the setup process to setup the init process
if container.Namespaces.Contains(configs.NEWUSER) {
if err = executeSetupCmd(command.Args, command.Process.Pid, container, &process, &networkState); err != nil {
return terminate(err)
}
}
// send the state to the container's init process then shutdown writes for the parent
if err := encoder.Encode(networkState); err != nil {
return terminate(err)
@ -108,6 +229,101 @@ func Exec(args []string, env []string, console string, command *exec.Cmd, contai
return nil
}
// killAllPids iterates over all of the container's processes
// sending a SIGKILL to each process.
func killAllPids(m cgroups.Manager) error {
var (
procs []*os.Process
)
m.Freeze(cgroups.Frozen)
pids, err := m.GetPids()
if err != nil {
return err
}
for _, pid := range pids {
// TODO: log err without aborting if we are unable to find
// a single PID
if p, err := os.FindProcess(pid); err == nil {
procs = append(procs, p)
p.Kill()
}
}
m.Freeze(cgroups.Thawed)
for _, p := range procs {
p.Wait()
}
return err
}
// Utility function that gets a host ID for a container ID from user namespace map
// if that ID is present in the map.
func hostIDFromMapping(containerID int, uMap []configs.IDMap) (int, bool) {
for _, m := range uMap {
if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
hostID := m.HostID + (containerID - m.ContainerID)
return hostID, true
}
}
return -1, false
}
// Gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled.
func GetHostRootGid(container *configs.Config) (int, error) {
if container.Namespaces.Contains(configs.NEWUSER) {
if container.GidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
}
hostRootGid, found := hostIDFromMapping(0, container.GidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.")
}
return hostRootGid, nil
}
// Return default root uid 0
return 0, nil
}
// Gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled.
func GetHostRootUid(container *configs.Config) (int, error) {
if container.Namespaces.Contains(configs.NEWUSER) {
if container.UidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.")
}
hostRootUid, found := hostIDFromMapping(0, container.UidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.")
}
return hostRootUid, nil
}
// Return default root uid 0
return 0, nil
}
// Converts IDMap to SysProcIDMap array and adds it to SysProcAttr.
func AddUidGidMappings(sys *syscall.SysProcAttr, container *configs.Config) {
if container.UidMappings != nil {
sys.UidMappings = make([]syscall.SysProcIDMap, len(container.UidMappings))
for i, um := range container.UidMappings {
sys.UidMappings[i].ContainerID = um.ContainerID
sys.UidMappings[i].HostID = um.HostID
sys.UidMappings[i].Size = um.Size
}
}
if container.GidMappings != nil {
sys.GidMappings = make([]syscall.SysProcIDMap, len(container.GidMappings))
for i, gm := range container.GidMappings {
sys.GidMappings[i].ContainerID = gm.ContainerID
sys.GidMappings[i].HostID = gm.HostID
sys.GidMappings[i].Size = gm.Size
}
}
}
// InitializeNetworking creates the container's network stack outside of the namespace and moves
// interfaces into the container's net namespaces if necessary
func InitializeNetworking(container *configs.Config, nspid int, networkState *network.NetworkState) error {

View File

@ -8,12 +8,16 @@ import (
"io/ioutil"
"os"
"os/exec"
"syscall"
"github.com/docker/libcontainer/apparmor"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/label"
"github.com/docker/libcontainer/mount"
"github.com/docker/libcontainer/network"
"github.com/docker/libcontainer/system"
"github.com/docker/libcontainer/utils"
)
type pid struct {
@ -140,6 +144,10 @@ func FinalizeSetns(container *configs.Config) error {
return err
}
if err := setupRlimits(container); err != nil {
return fmt.Errorf("setup rlimits %s", err)
}
if err := FinalizeNamespace(container); err != nil {
return err
}
@ -157,6 +165,68 @@ func FinalizeSetns(container *configs.Config) error {
return nil
}
// SetupContainer is run to setup mounts and networking related operations
// for a user namespace enabled process as a user namespace root doesn't
// have permissions to perform these operations.
// The setup process joins all the namespaces of user namespace enabled init
// except the user namespace, so it run as root in the root user namespace
// to perform these operations.
func SetupContainer(container *configs.Config, networkState *network.NetworkState, consolePath string) error {
rootfs, err := utils.ResolveRootfs(container.RootFs)
if err != nil {
return err
}
// clear the current processes env and replace it with the environment
// defined on the container
if err := LoadContainerEnvironment(container); err != nil {
return err
}
cloneFlags := GetNamespaceFlags(container.Namespaces)
if (cloneFlags & syscall.CLONE_NEWNET) == 0 {
if len(container.Networks) != 0 || len(container.Routes) != 0 {
return fmt.Errorf("unable to apply network parameters without network namespace")
}
} else {
if err := setupNetwork(container, networkState); err != nil {
return fmt.Errorf("setup networking %s", err)
}
if err := setupRoute(container); err != nil {
return fmt.Errorf("setup route %s", err)
}
}
label.Init()
hostRootUid, err := GetHostRootUid(container)
if err != nil {
return fmt.Errorf("failed to get hostRootUid %s", err)
}
hostRootGid, err := GetHostRootGid(container)
if err != nil {
return fmt.Errorf("failed to get hostRootGid %s", err)
}
// InitializeMountNamespace() can be executed only for a new mount namespace
if (cloneFlags & syscall.CLONE_NEWNS) == 0 {
if container.MountConfig != nil {
return fmt.Errorf("mount config is set without mount namespace")
}
} else if err := mount.InitializeMountNamespace(rootfs,
consolePath,
container.RestrictSys,
hostRootUid,
hostRootGid,
(*mount.MountConfig)(container.MountConfig)); err != nil {
return fmt.Errorf("setup mount namespace %s", err)
}
return nil
}
func EnterCgroups(state *configs.State, pid int) error {
return cgroups.EnterPid(state.CgroupPaths, pid)
}

View File

@ -37,7 +37,7 @@ type processArgs struct {
// and other options required for the new container.
// The caller of Init function has to ensure that the go runtime is locked to an OS thread
// (using runtime.LockOSThread) else system calls like setns called within Init may not work as intended.
func Init(pipe *os.File) (err error) {
func Init(pipe *os.File, setupUserns bool) (err error) {
defer func() {
// if we have an error during the initialization of the container's init then send it back to the
// parent process in the form of an initError.
@ -72,6 +72,29 @@ func Init(pipe *os.File) (err error) {
return err
}
// We always read this as it is a way to sync with the parent as well
var networkState *network.NetworkState
if err := decoder.Decode(&networkState); err != nil {
return err
}
if setupUserns {
err = SetupContainer(container, networkState, process.ConsolePath)
if err == nil {
os.Exit(0)
} else {
os.Exit(1)
}
}
if container.Namespaces.Contains(configs.NEWUSER) {
return initUserNs(container, uncleanRootfs, process, networkState)
} else {
return initDefault(container, uncleanRootfs, process, networkState)
}
}
func initDefault(container *configs.Config, uncleanRootfs string, process *processArgs, networkState *network.NetworkState) (err error) {
rootfs, err := utils.ResolveRootfs(uncleanRootfs)
if err != nil {
return err
@ -83,11 +106,6 @@ func Init(pipe *os.File) (err error) {
return err
}
// We always read this as it is a way to sync with the parent as well
var networkState *network.NetworkState
if err := decoder.Decode(&networkState); err != nil {
return err
}
// join any namespaces via a path to the namespace fd if provided
if err := joinExistingNamespaces(container.Namespaces); err != nil {
return err
@ -106,11 +124,19 @@ func Init(pipe *os.File) (err error) {
}
}
if err := setupNetwork(container, networkState); err != nil {
return fmt.Errorf("setup networking %s", err)
}
if err := setupRoute(container); err != nil {
return fmt.Errorf("setup route %s", err)
cloneFlags := GetNamespaceFlags(container.Namespaces)
if (cloneFlags & syscall.CLONE_NEWNET) == 0 {
if len(container.Networks) != 0 || len(container.Routes) != 0 {
return fmt.Errorf("unable to apply network parameters without network namespace")
}
} else {
if err := setupNetwork(container, networkState); err != nil {
return fmt.Errorf("setup networking %s", err)
}
if err := setupRoute(container); err != nil {
return fmt.Errorf("setup route %s", err)
}
}
if err := setupRlimits(container); err != nil {
@ -119,14 +145,24 @@ func Init(pipe *os.File) (err error) {
label.Init()
if err := mount.InitializeMountNamespace(rootfs,
// InitializeMountNamespace() can be executed only for a new mount namespace
if (cloneFlags & syscall.CLONE_NEWNS) == 0 {
if container.MountConfig != nil {
return fmt.Errorf("mount config is set without mount namespace")
}
} else if err := mount.InitializeMountNamespace(rootfs,
process.ConsolePath,
container.RestrictSys,
0, // Default Root Uid
0, // Default Root Gid
(*mount.MountConfig)(container.MountConfig)); err != nil {
return fmt.Errorf("setup mount namespace %s", err)
}
if container.Hostname != "" {
if (cloneFlags & syscall.CLONE_NEWUTS) == 0 {
return fmt.Errorf("unable to set the hostname without UTS namespace")
}
if err := syscall.Sethostname([]byte(container.Hostname)); err != nil {
return fmt.Errorf("unable to sethostname %q: %s", container.Hostname, err)
}
@ -142,6 +178,88 @@ func Init(pipe *os.File) (err error) {
// TODO: (crosbymichael) make this configurable at the Config level
if container.RestrictSys {
if (cloneFlags & syscall.CLONE_NEWNS) == 0 {
return fmt.Errorf("unable to restrict access to kernel files without mount namespace")
}
if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil {
return err
}
}
pdeathSignal, err := system.GetParentDeathSignal()
if err != nil {
return fmt.Errorf("get parent death signal %s", err)
}
if err := FinalizeNamespace(container); err != nil {
return fmt.Errorf("finalize namespace %s", err)
}
// FinalizeNamespace can change user/group which clears the parent death
// signal, so we restore it here.
if err := RestoreParentDeathSignal(pdeathSignal); err != nil {
return fmt.Errorf("restore parent death signal %s", err)
}
return system.Execv(process.Args[0], process.Args[0:], process.Env)
}
func initUserNs(container *configs.Config, uncleanRootfs string, process *processArgs, networkState *network.NetworkState) (err error) {
// clear the current processes env and replace it with the environment
// defined on the container
if err := LoadContainerEnvironment(container); err != nil {
return err
}
// join any namespaces via a path to the namespace fd if provided
if err := joinExistingNamespaces(container.Namespaces); err != nil {
return err
}
if process.ConsolePath != "" {
if err := console.OpenAndDup("/dev/console"); err != nil {
return err
}
}
if _, err := syscall.Setsid(); err != nil {
return fmt.Errorf("setsid %s", err)
}
if process.ConsolePath != "" {
if err := system.Setctty(); err != nil {
return fmt.Errorf("setctty %s", err)
}
}
if container.WorkingDir == "" {
container.WorkingDir = "/"
}
if err := setupRlimits(container); err != nil {
return fmt.Errorf("setup rlimits %s", err)
}
cloneFlags := GetNamespaceFlags(container.Namespaces)
if container.Hostname != "" {
if (cloneFlags & syscall.CLONE_NEWUTS) == 0 {
return fmt.Errorf("unable to set the hostname without UTS namespace")
}
if err := syscall.Sethostname([]byte(container.Hostname)); err != nil {
return fmt.Errorf("unable to sethostname %q: %s", container.Hostname, err)
}
}
if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil {
return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err)
}
if err := label.SetProcessLabel(container.ProcessLabel); err != nil {
return fmt.Errorf("set process label %s", err)
}
if container.RestrictSys {
if (cloneFlags & syscall.CLONE_NEWNS) == 0 {
return fmt.Errorf("unable to restrict access to kernel files without mount namespace")
}
if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil {
return err
}
@ -194,7 +312,7 @@ func RestoreParentDeathSignal(old int) error {
}
// SetupUser changes the groups, gid, and uid for the user inside the container
func SetupUser(u string) error {
func SetupUser(container *configs.Config) error {
// Set up defaults.
defaultExecUser := user.ExecUser{
Uid: syscall.Getuid(),
@ -202,22 +320,24 @@ func SetupUser(u string) error {
Home: "/",
}
passwdFile, err := user.GetPasswdFile()
passwdPath, err := user.GetPasswdPath()
if err != nil {
return err
}
groupFile, err := user.GetGroupFile()
groupPath, err := user.GetGroupPath()
if err != nil {
return err
}
execUser, err := user.GetExecUserFile(u, &defaultExecUser, passwdFile, groupFile)
execUser, err := user.GetExecUserPath(container.User, &defaultExecUser, passwdPath, groupPath)
if err != nil {
return fmt.Errorf("get supplementary groups %s", err)
}
if err := syscall.Setgroups(execUser.Sgids); err != nil {
suppGroups := append(execUser.Sgids, container.AdditionalGroups...)
if err := syscall.Setgroups(suppGroups); err != nil {
return fmt.Errorf("setgroups %s", err)
}
@ -297,7 +417,7 @@ func FinalizeNamespace(container *configs.Config) error {
return fmt.Errorf("set keep caps %s", err)
}
if err := SetupUser(container.User); err != nil {
if err := SetupUser(container); err != nil {
return fmt.Errorf("setup user %s", err)
}
@ -342,7 +462,7 @@ func joinExistingNamespaces(namespaces []configs.Namespace) error {
if err != nil {
return err
}
err = system.Setns(f.Fd(), uintptr(namespaceInfo[ns.Name]))
err = system.Setns(f.Fd(), uintptr(namespaceInfo[ns.Type]))
f.Close()
if err != nil {
return err

View File

@ -17,13 +17,13 @@ func (i initError) Error() string {
return i.Message
}
var namespaceInfo = map[string]int{
"NEWNET": syscall.CLONE_NEWNET,
"NEWNS": syscall.CLONE_NEWNS,
"NEWUSER": syscall.CLONE_NEWUSER,
"NEWIPC": syscall.CLONE_NEWIPC,
"NEWUTS": syscall.CLONE_NEWUTS,
"NEWPID": syscall.CLONE_NEWPID,
var namespaceInfo = map[configs.NamespaceType]int{
configs.NEWNET: syscall.CLONE_NEWNET,
configs.NEWNS: syscall.CLONE_NEWNS,
configs.NEWUSER: syscall.CLONE_NEWUSER,
configs.NEWIPC: syscall.CLONE_NEWIPC,
configs.NEWUTS: syscall.CLONE_NEWUTS,
configs.NEWPID: syscall.CLONE_NEWPID,
}
// New returns a newly initialized Pipe for communication between processes
@ -36,10 +36,13 @@ func newInitPipe() (parent *os.File, child *os.File, err error) {
}
// GetNamespaceFlags parses the container's Namespaces options to set the correct
// flags on clone, unshare, and setns
func GetNamespaceFlags(namespaces []configs.Namespace) (flag int) {
// flags on clone, unshare. This functions returns flags only for new namespaces.
func GetNamespaceFlags(namespaces configs.Namespaces) (flag int) {
for _, v := range namespaces {
flag |= namespaceInfo[v.Name]
if v.Path != "" {
continue
}
flag |= namespaceInfo[v.Type]
}
return flag
}

View File

@ -522,11 +522,10 @@ func NetworkSetMacAddress(iface *net.Interface, macaddr string) error {
var (
MULTICAST byte = 0x1
LOCALOUI byte = 0x2
)
if hwaddr[0]&0x1 == MULTICAST || hwaddr[0]&0x2 != LOCALOUI {
return fmt.Errorf("Incorrect Local MAC Address specified: %s", macaddr)
if hwaddr[0]&0x1 == MULTICAST {
return fmt.Errorf("Multicast MAC Address is not supported: %s", macaddr)
}
wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)

View File

@ -88,6 +88,18 @@ func SetInterfaceIp(name string, rawIp string) error {
return netlink.NetworkLinkAddIp(iface, ip, ipNet)
}
func DeleteInterfaceIp(name string, rawIp string) error {
iface, err := net.InterfaceByName(name)
if err != nil {
return err
}
ip, ipNet, err := net.ParseCIDR(rawIp)
if err != nil {
return err
}
return netlink.NetworkLinkDelIp(iface, ip, ipNet)
}
func SetMtu(name string, mtu int) error {
iface, err := net.InterfaceByName(name)
if err != nil {

View File

@ -1,33 +1,30 @@
// +build linux
package fs
package libcontainer
import (
"fmt"
"github.com/docker/libcontainer/configs"
"io/ioutil"
"os"
"path/filepath"
"syscall"
"github.com/docker/libcontainer/cgroups"
)
// NotifyOnOOM sends signals on the returned channel when the cgroup reaches
// its memory limit. The channel is closed when the cgroup is removed.
func NotifyOnOOM(c *cgroups.Cgroup) (<-chan struct{}, error) {
d, err := getCgroupData(c, 0)
const oomCgroupName = "memory"
// NotifyOnOOM returns channel on which you can expect event about OOM,
// if process died without OOM this channel will be closed.
// s is current *libcontainer.State for container.
func NotifyOnOOM(s *configs.State) (<-chan struct{}, error) {
dir := s.CgroupPaths[oomCgroupName]
if dir == "" {
return nil, fmt.Errorf("There is no path for %q in state", oomCgroupName)
}
oomControl, err := os.Open(filepath.Join(dir, "memory.oom_control"))
if err != nil {
return nil, err
}
return notifyOnOOM(d)
}
func notifyOnOOM(d *data) (<-chan struct{}, error) {
dir, err := d.path("memory")
if err != nil {
return nil, err
}
fd, _, syserr := syscall.RawSyscall(syscall.SYS_EVENTFD2, 0, syscall.FD_CLOEXEC, 0)
if syserr != 0 {
return nil, syserr
@ -35,48 +32,32 @@ func notifyOnOOM(d *data) (<-chan struct{}, error) {
eventfd := os.NewFile(fd, "eventfd")
oomControl, err := os.Open(filepath.Join(dir, "memory.oom_control"))
if err != nil {
eventfd.Close()
return nil, err
}
var (
eventControlPath = filepath.Join(dir, "cgroup.event_control")
data = fmt.Sprintf("%d %d", eventfd.Fd(), oomControl.Fd())
)
if err := writeFile(dir, "cgroup.event_control", data); err != nil {
eventControlPath := filepath.Join(dir, "cgroup.event_control")
data := fmt.Sprintf("%d %d", eventfd.Fd(), oomControl.Fd())
if err := ioutil.WriteFile(eventControlPath, []byte(data), 0700); err != nil {
eventfd.Close()
oomControl.Close()
return nil, err
}
ch := make(chan struct{})
go func() {
defer func() {
close(ch)
eventfd.Close()
oomControl.Close()
}()
buf := make([]byte, 8)
for {
if _, err := eventfd.Read(buf); err != nil {
return
}
// When a cgroup is destroyed, an event is sent to eventfd.
// So if the control path is gone, return instead of notifying.
if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) {
return
}
ch <- struct{}{}
}
}()
return ch, nil
}

View File

@ -1,38 +1,50 @@
// +build linux
package fs
package libcontainer
import (
"encoding/binary"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"syscall"
"testing"
"time"
"github.com/docker/libcontainer/configs"
)
func TestNotifyOnOOM(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.oom_control": "",
"cgroup.event_control": "",
})
memoryPath, err := ioutil.TempDir("", "testnotifyoom-")
if err != nil {
t.Fatal(err)
}
oomPath := filepath.Join(memoryPath, "memory.oom_control")
eventPath := filepath.Join(memoryPath, "cgroup.event_control")
if err := ioutil.WriteFile(oomPath, []byte{}, 0700); err != nil {
t.Fatal(err)
}
if err := ioutil.WriteFile(eventPath, []byte{}, 0700); err != nil {
t.Fatal(err)
}
var eventFd, oomControlFd int
ooms, err := notifyOnOOM(helper.CgroupData)
st := &configs.State{
CgroupPaths: map[string]string{
"memory": memoryPath,
},
}
ooms, err := NotifyOnOOM(st)
if err != nil {
t.Fatal("expected no error, got:", err)
}
memoryPath, _ := helper.CgroupData.path("memory")
data, err := readFile(memoryPath, "cgroup.event_control")
data, err := ioutil.ReadFile(eventPath)
if err != nil {
t.Fatal("couldn't read event control file:", err)
}
if _, err := fmt.Sscanf(data, "%d %d", &eventFd, &oomControlFd); err != nil {
if _, err := fmt.Sscanf(string(data), "%d %d", &eventFd, &oomControlFd); err != nil {
t.Fatalf("invalid control data %q: %s", data, err)
}
@ -62,7 +74,9 @@ func TestNotifyOnOOM(t *testing.T) {
// simulate what happens when a cgroup is destroyed by cleaning up and then
// writing to the eventfd.
helper.cleanup()
if err := os.RemoveAll(memoryPath); err != nil {
t.Fatal(err)
}
if _, err := syscall.Write(efd, buf); err != nil {
t.Fatal("unable to write to eventfd:", err)
}

View File

@ -26,11 +26,12 @@ func main() {
app.Before = preload
app.Commands = []cli.Command{
configCommand,
execCommand,
initCommand,
statsCommand,
configCommand,
oomCommand,
pauseCommand,
statsCommand,
unpauseCommand,
}

29
nsinit/oom.go Normal file
View File

@ -0,0 +1,29 @@
package main
import (
"log"
"github.com/codegangsta/cli"
"github.com/docker/libcontainer"
"github.com/docker/libcontainer/configs"
)
var oomCommand = cli.Command{
Name: "oom",
Usage: "display oom notifications for a container",
Action: oomAction,
}
func oomAction(context *cli.Context) {
state, err := configs.GetState(dataPath)
if err != nil {
log.Fatal(err)
}
n, err := libcontainer.NotifyOnOOM(state)
if err != nil {
log.Fatal(err)
}
for range n {
log.Printf("OOM notification received")
}
}

View File

@ -177,11 +177,11 @@
],
"hostname": "koye",
"namespaces": [
{"name":"NEWIPC"},
{"name": "NEWNET"},
{"name": "NEWNS"},
{"name": "NEWPID"},
{"name": "NEWUTS"}
{"type":"NEWIPC"},
{"type": "NEWNET"},
{"type": "NEWNS"},
{"type": "NEWPID"},
{"type": "NEWUTS"}
],
"networks": [
{

View File

@ -176,11 +176,11 @@
],
"hostname": "koye",
"namespaces": [
{"name": "NEWIPC"},
{"name": "NEWNET"},
{"name": "NEWNS"},
{"name": "NEWPID"},
{"name": "NEWUTS"}
{"type": "NEWIPC"},
{"type": "NEWNET"},
{"type": "NEWNS"},
{"type": "NEWPID"},
{"type": "NEWUTS"}
],
"networks": [
{

View File

@ -0,0 +1,200 @@
{
"capabilities": [
"CHOWN",
"DAC_OVERRIDE",
"FOWNER",
"MKNOD",
"NET_RAW",
"SETGID",
"SETUID",
"SETFCAP",
"SETPCAP",
"NET_BIND_SERVICE",
"SYS_CHROOT",
"KILL"
],
"cgroups": {
"allowed_devices": [
{
"cgroup_permissions": "m",
"major_number": -1,
"minor_number": -1,
"type": 99
},
{
"cgroup_permissions": "m",
"major_number": -1,
"minor_number": -1,
"type": 98
},
{
"cgroup_permissions": "rwm",
"major_number": 5,
"minor_number": 1,
"path": "/dev/console",
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 4,
"path": "/dev/tty0",
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 4,
"minor_number": 1,
"path": "/dev/tty1",
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 136,
"minor_number": -1,
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 5,
"minor_number": 2,
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 10,
"minor_number": 200,
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 3,
"path": "/dev/null",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 5,
"path": "/dev/zero",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 7,
"path": "/dev/full",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 5,
"path": "/dev/tty",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 9,
"path": "/dev/urandom",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 8,
"path": "/dev/random",
"type": 99
}
],
"name": "docker-koye",
"parent": "docker"
},
"restrict_sys": true,
"mount_config": {
"device_nodes": [
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 3,
"path": "/dev/null",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 5,
"path": "/dev/zero",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 7,
"path": "/dev/full",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 5,
"path": "/dev/tty",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 9,
"path": "/dev/urandom",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 8,
"path": "/dev/random",
"type": 99
}
],
"mounts": [
{
"type": "tmpfs",
"destination": "/tmp"
}
]
},
"environment": [
"HOME=/",
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"HOSTNAME=koye",
"TERM=xterm"
],
"hostname": "koye",
"namespaces": [
{"type": "NEWIPC"},
{"type": "NEWNET"},
{"type": "NEWNS"},
{"type": "NEWUTS"}
],
"networks": [
{
"address": "127.0.0.1/0",
"gateway": "localhost",
"mtu": 1500,
"type": "loopback"
}
],
"tty": true,
"user": "daemon"
}

View File

@ -182,11 +182,11 @@
],
"hostname": "koye",
"namespaces": [
{"name": "NEWIPC"},
{"name": "NEWNET"},
{"name": "NEWNS"},
{"name": "NEWPID"},
{"name": "NEWUTS"}
{"type": "NEWIPC"},
{"type": "NEWNET"},
{"type": "NEWNS"},
{"type": "NEWPID"},
{"type": "NEWUTS"}
],
"networks": [
{

View File

@ -176,11 +176,11 @@
],
"hostname": "koye",
"namespaces": [
{"name": "NEWIPC"},
{"name": "NEWNET"},
{"name": "NEWNS"},
{"name": "NEWPID"},
{"name": "NEWUTS"}
{"type": "NEWIPC"},
{"type": "NEWNET"},
{"type": "NEWNS"},
{"type": "NEWPID"},
{"type": "NEWUTS"}
],
"networks": [
{

View File

@ -178,11 +178,11 @@
],
"hostname": "koye",
"namespaces": [
{"name": "NEWIPC"},
{"name": "NEWNET"},
{"name": "NEWNS"},
{"name": "NEWPID"},
{"name": "NEWUTS"}
{"type": "NEWIPC"},
{"type": "NEWNET"},
{"type": "NEWNS"},
{"type": "NEWPID"},
{"type": "NEWUTS"}
],
"networks": [
{

251
sample_configs/userns.json Normal file
View File

@ -0,0 +1,251 @@
{
"capabilities": [
"CHOWN",
"DAC_OVERRIDE",
"FOWNER",
"MKNOD",
"NET_RAW",
"SETGID",
"SETUID",
"SETFCAP",
"SETPCAP",
"NET_BIND_SERVICE",
"SYS_CHROOT",
"KILL"
],
"cgroups": {
"allowed_devices": [
{
"cgroup_permissions": "m",
"major_number": -1,
"minor_number": -1,
"type": 99
},
{
"cgroup_permissions": "m",
"major_number": -1,
"minor_number": -1,
"type": 98
},
{
"cgroup_permissions": "rwm",
"major_number": 5,
"minor_number": 1,
"path": "/dev/console",
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 4,
"path": "/dev/tty0",
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 4,
"minor_number": 1,
"path": "/dev/tty1",
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 136,
"minor_number": -1,
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 5,
"minor_number": 2,
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 10,
"minor_number": 200,
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 3,
"path": "/dev/null",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 5,
"path": "/dev/zero",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 7,
"path": "/dev/full",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 5,
"path": "/dev/tty",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 9,
"path": "/dev/urandom",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 8,
"path": "/dev/random",
"type": 99
}
],
"name": "docker-koye",
"parent": "docker"
},
"restrict_sys": true,
"mount_config": {
"device_nodes": [
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 3,
"path": "/dev/null",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 5,
"path": "/dev/zero",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 7,
"path": "/dev/full",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 5,
"path": "/dev/tty",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 9,
"path": "/dev/urandom",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 8,
"path": "/dev/random",
"type": 99
}
],
"mounts": [
{
"type": "tmpfs",
"destination": "/tmp"
}
]
},
"environment": [
"HOME=/",
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"HOSTNAME=koye",
"TERM=xterm"
],
"hostname": "koye",
"namespaces": [
{"type": "NEWIPC"},
{"type": "NEWNET"},
{"type": "NEWNS"},
{"type": "NEWPID"},
{"type": "NEWUTS"},
{"type": "NEWUSER"}
],
"networks": [
{
"address": "127.0.0.1/0",
"gateway": "localhost",
"mtu": 1500,
"type": "loopback"
},
{
"address": "172.17.0.9/16",
"gateway": "172.17.42.1",
"bridge": "docker0",
"veth_prefix": "veth",
"mtu": 1500,
"type": "veth"
}
],
"tty": true,
"user": "root",
"uid_mappings": [
{
"container_id": 0,
"host_id": 1000,
"size": 1
},
{
"container_id": 1,
"host_id": 1,
"size": 999
},
{
"container_id": 1001,
"host_id": 1001,
"size": 9000
}
],
"gid_mappings": [
{
"container_id": 0,
"host_id": 1000,
"size": 1
},
{
"container_id": 1,
"host_id": 1,
"size": 999
},
{
"container_id": 1001,
"host_id": 1001,
"size": 9000
}
],
"rlimits": [
{
"type": 7,
"hard": 999,
"soft": 999
}
]
}

View File

@ -1 +1,2 @@
Tianon Gravi <admwiggin@gmail.com> (@tianon)
Aleksa Sarai <cyphar@cyphar.com> (@cyphar)

View File

@ -9,22 +9,22 @@ import (
// Unix-specific path to the passwd and group formatted files.
const (
unixPasswdFile = "/etc/passwd"
unixGroupFile = "/etc/group"
unixPasswdPath = "/etc/passwd"
unixGroupPath = "/etc/group"
)
func GetPasswdFile() (string, error) {
return unixPasswdFile, nil
func GetPasswdPath() (string, error) {
return unixPasswdPath, nil
}
func GetPasswd() (io.ReadCloser, error) {
return os.Open(unixPasswdFile)
return os.Open(unixPasswdPath)
}
func GetGroupFile() (string, error) {
return unixGroupFile, nil
func GetGroupPath() (string, error) {
return unixGroupPath, nil
}
func GetGroup() (io.ReadCloser, error) {
return os.Open(unixGroupFile)
return os.Open(unixGroupPath)
}

View File

@ -4,7 +4,7 @@ package user
import "io"
func GetPasswdFile() (string, error) {
func GetPasswdPath() (string, error) {
return "", ErrUnsupported
}
@ -12,7 +12,7 @@ func GetPasswd() (io.ReadCloser, error) {
return nil, ErrUnsupported
}
func GetGroupFile() (string, error) {
func GetGroupPath() (string, error) {
return "", ErrUnsupported
}

View File

@ -197,11 +197,11 @@ type ExecUser struct {
Home string
}
// GetExecUserFile is a wrapper for GetExecUser. It reads data from each of the
// GetExecUserPath is a wrapper for GetExecUser. It reads data from each of the
// given file paths and uses that data as the arguments to GetExecUser. If the
// files cannot be opened for any reason, the error is ignored and a nil
// io.Reader is passed instead.
func GetExecUserFile(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) {
func GetExecUserPath(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) {
passwd, err := os.Open(passwdPath)
if err != nil {
passwd = nil