diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..4c2914fc --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +nsinit/nsinit diff --git a/cgroups/cgroups.go b/cgroups/cgroups.go index 60b1135a..7ed9be81 100644 --- a/cgroups/cgroups.go +++ b/cgroups/cgroups.go @@ -3,7 +3,7 @@ package cgroups import ( "fmt" - "github.com/docker/libcontainer/devices" + "github.com/docker/libcontainer/configs" ) type Manager interface { @@ -17,7 +17,7 @@ type Manager interface { GetStats() (*Stats, error) // Toggles the freezer cgroup according with specified state - Freeze(state FreezerState) error + Freeze(state configs.FreezerState) error // Destroys the cgroup set Destroy() error @@ -33,14 +33,6 @@ type Manager interface { GetPaths() map[string]string } -type FreezerState string - -const ( - Undefined FreezerState = "" - Frozen FreezerState = "FROZEN" - Thawed FreezerState = "THAWED" -) - type NotFoundError struct { Subsystem string } @@ -59,26 +51,6 @@ func IsNotFound(err error) bool { if err == nil { return false } - _, ok := err.(*NotFoundError) return ok } - -type Cgroup struct { - Name string `json:"name,omitempty"` - Parent string `json:"parent,omitempty"` // name of parent cgroup or slice - - AllowAllDevices bool `json:"allow_all_devices,omitempty"` // If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list. - AllowedDevices []*devices.Device `json:"allowed_devices,omitempty"` - Memory int64 `json:"memory,omitempty"` // Memory limit (in bytes) - MemoryReservation int64 `json:"memory_reservation,omitempty"` // Memory reservation or soft_limit (in bytes) - MemorySwap int64 `json:"memory_swap,omitempty"` // Total memory usage (memory + swap); set `-1' to disable swap - CpuShares int64 `json:"cpu_shares,omitempty"` // CPU shares (relative weight vs. other containers) - CpuQuota int64 `json:"cpu_quota,omitempty"` // CPU hardcap limit (in usecs). Allowed cpu time in a given period. - CpuPeriod int64 `json:"cpu_period,omitempty"` // CPU period to be used for hardcapping (in usecs). 0 to use system default. - CpusetCpus string `json:"cpuset_cpus,omitempty"` // CPU to use - CpusetMems string `json:"cpuset_mems,omitempty"` // MEM to use - BlkioWeight int64 `json:"blkio_weight,omitempty"` // Specifies per cgroup weight, range is from 10 to 1000. - Freezer FreezerState `json:"freezer,omitempty"` // set the freeze value for the process - Slice string `json:"slice,omitempty"` // Parent slice to use for systemd -} diff --git a/cgroups/fs/apply_raw.go b/cgroups/fs/apply_raw.go index 11d35d7a..4a3a8864 100644 --- a/cgroups/fs/apply_raw.go +++ b/cgroups/fs/apply_raw.go @@ -8,6 +8,7 @@ import ( "strconv" "github.com/docker/libcontainer/cgroups" + "github.com/docker/libcontainer/configs" ) var ( @@ -24,8 +25,17 @@ var ( CgroupProcesses = "cgroup.procs" ) +type subsystem interface { + // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. + GetStats(path string, stats *cgroups.Stats) error + // Removes the cgroup represented by 'data'. + Remove(*data) error + // Creates and joins the cgroup represented by data. + Set(*data) error +} + type Manager struct { - Cgroups *cgroups.Cgroup + Cgroups *configs.Cgroup Paths map[string]string } @@ -46,19 +56,10 @@ func init() { } } -type subsystem interface { - // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. - GetStats(path string, stats *cgroups.Stats) error - // Removes the cgroup represented by 'data'. - Remove(*data) error - // Creates and joins the cgroup represented by data. - Set(*data) error -} - type data struct { root string cgroup string - c *cgroups.Cgroup + c *configs.Cgroup pid int } @@ -109,7 +110,7 @@ func (m *Manager) GetPaths() map[string]string { // Symmetrical public function to update device based cgroups. Also available // in the systemd implementation. -func ApplyDevices(c *cgroups.Cgroup, pid int) error { +func ApplyDevices(c *configs.Cgroup, pid int) error { d, err := getCgroupData(c, pid) if err != nil { return err @@ -137,7 +138,7 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) { // Freeze toggles the container's freezer cgroup depending on the state // provided -func (m *Manager) Freeze(state cgroups.FreezerState) error { +func (m *Manager) Freeze(state configs.FreezerState) error { d, err := getCgroupData(m.Cgroups, 0) if err != nil { return err @@ -170,7 +171,7 @@ func (m *Manager) GetPids() ([]int, error) { return cgroups.ReadProcsFile(dir) } -func getCgroupData(c *cgroups.Cgroup, pid int) (*data, error) { +func getCgroupData(c *configs.Cgroup, pid int) (*data, error) { if cgroupRoot == "" { return nil, fmt.Errorf("failed to find the cgroup root") } diff --git a/cgroups/fs/devices.go b/cgroups/fs/devices.go index 98d5d2d7..e904e10c 100644 --- a/cgroups/fs/devices.go +++ b/cgroups/fs/devices.go @@ -17,7 +17,7 @@ func (s *DevicesGroup) Set(d *data) error { } for _, dev := range d.c.AllowedDevices { - if err := writeFile(dir, "devices.allow", dev.GetCgroupAllowString()); err != nil { + if err := writeFile(dir, "devices.allow", dev.CgroupString()); err != nil { return err } } diff --git a/cgroups/fs/freezer.go b/cgroups/fs/freezer.go index c6b677fa..b881d0d4 100644 --- a/cgroups/fs/freezer.go +++ b/cgroups/fs/freezer.go @@ -5,6 +5,7 @@ import ( "time" "github.com/docker/libcontainer/cgroups" + "github.com/docker/libcontainer/configs" ) type FreezerGroup struct { @@ -12,7 +13,7 @@ type FreezerGroup struct { func (s *FreezerGroup) Set(d *data) error { switch d.c.Freezer { - case cgroups.Frozen, cgroups.Thawed: + case configs.Frozen, configs.Thawed: dir, err := d.path("freezer") if err != nil { return err diff --git a/cgroups/manager/manager.go b/cgroups/manager/manager.go index bd5fd48a..b8e2010e 100644 --- a/cgroups/manager/manager.go +++ b/cgroups/manager/manager.go @@ -4,13 +4,14 @@ import ( "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups/fs" "github.com/docker/libcontainer/cgroups/systemd" + "github.com/docker/libcontainer/configs" ) // Create a new cgroup manager with specified configuration // TODO this object is not really initialized until Apply() is called. // Maybe make this to the equivalent of Apply() at some point? // @vmarmol -func NewCgroupManager(cgroups *cgroups.Cgroup) cgroups.Manager { +func NewCgroupManager(cgroups *configs.Cgroup) cgroups.Manager { if systemd.UseSystemd() { return &systemd.Manager{ Cgroups: cgroups, @@ -23,7 +24,7 @@ func NewCgroupManager(cgroups *cgroups.Cgroup) cgroups.Manager { } // Restore a cgroup manager with specified configuration and state -func LoadCgroupManager(cgroups *cgroups.Cgroup, paths map[string]string) cgroups.Manager { +func LoadCgroupManager(cgroups *configs.Cgroup, paths map[string]string) cgroups.Manager { if systemd.UseSystemd() { return &systemd.Manager{ Cgroups: cgroups, diff --git a/cgroups/systemd/apply_nosystemd.go b/cgroups/systemd/apply_nosystemd.go index 62928b82..8a46ea8b 100644 --- a/cgroups/systemd/apply_nosystemd.go +++ b/cgroups/systemd/apply_nosystemd.go @@ -6,10 +6,11 @@ import ( "fmt" "github.com/docker/libcontainer/cgroups" + "github.com/docker/libcontainer/configs" ) type Manager struct { - Cgroups *cgroups.Cgroup + Cgroups *configs.Cgroup Paths map[string]string } @@ -37,14 +38,14 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) { return nil, fmt.Errorf("Systemd not supported") } -func (m *Manager) Freeze(state cgroups.FreezerState) error { +func (m *Manager) Freeze(state configs.FreezerState) error { return fmt.Errorf("Systemd not supported") } -func ApplyDevices(c *cgroups.Cgroup, pid int) error { +func ApplyDevices(c *configs.Cgroup, pid int) error { return fmt.Errorf("Systemd not supported") } -func Freeze(c *cgroups.Cgroup, state cgroups.FreezerState) error { +func Freeze(c *configs.Cgroup, state configs.FreezerState) error { return fmt.Errorf("Systemd not supported") } diff --git a/cgroups/systemd/apply_systemd.go b/cgroups/systemd/apply_systemd.go index 7143a595..f46067b4 100644 --- a/cgroups/systemd/apply_systemd.go +++ b/cgroups/systemd/apply_systemd.go @@ -16,11 +16,12 @@ import ( systemd "github.com/coreos/go-systemd/dbus" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups/fs" + "github.com/docker/libcontainer/configs" "github.com/godbus/dbus" ) type Manager struct { - Cgroups *cgroups.Cgroup + Cgroups *configs.Cgroup Paths map[string]string } @@ -190,7 +191,7 @@ func writeFile(dir, file, data string) error { return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) } -func joinFreezer(c *cgroups.Cgroup, pid int) error { +func joinFreezer(c *configs.Cgroup, pid int) error { path, err := getSubsystemPath(c, "freezer") if err != nil { return err @@ -203,7 +204,7 @@ func joinFreezer(c *cgroups.Cgroup, pid int) error { return ioutil.WriteFile(filepath.Join(path, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700) } -func getSubsystemPath(c *cgroups.Cgroup, subsystem string) (string, error) { +func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) { mountpoint, err := cgroups.FindCgroupMountpoint(subsystem) if err != nil { return "", err @@ -222,7 +223,7 @@ func getSubsystemPath(c *cgroups.Cgroup, subsystem string) (string, error) { return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil } -func (m *Manager) Freeze(state cgroups.FreezerState) error { +func (m *Manager) Freeze(state configs.FreezerState) error { path, err := getSubsystemPath(m.Cgroups, "freezer") if err != nil { return err @@ -260,7 +261,7 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) { panic("not implemented") } -func getUnitName(c *cgroups.Cgroup) string { +func getUnitName(c *configs.Cgroup) string { return fmt.Sprintf("%s-%s.scope", c.Parent, c.Name) } @@ -275,7 +276,7 @@ func getUnitName(c *cgroups.Cgroup) string { // Note: we can't use systemd to set up the initial limits, and then change the cgroup // because systemd will re-write the device settings if it needs to re-apply the cgroup context. // This happens at least for v208 when any sibling unit is started. -func joinDevices(c *cgroups.Cgroup, pid int) error { +func joinDevices(c *configs.Cgroup, pid int) error { path, err := getSubsystemPath(c, "devices") if err != nil { return err @@ -294,7 +295,7 @@ func joinDevices(c *cgroups.Cgroup, pid int) error { } for _, dev := range c.AllowedDevices { - if err := writeFile(path, "devices.allow", dev.GetCgroupAllowString()); err != nil { + if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil { return err } } @@ -304,11 +305,11 @@ func joinDevices(c *cgroups.Cgroup, pid int) error { // Symmetrical public function to update device based cgroups. Also available // in the fs implementation. -func ApplyDevices(c *cgroups.Cgroup, pid int) error { +func ApplyDevices(c *configs.Cgroup, pid int) error { return joinDevices(c, pid) } -func joinMemory(c *cgroups.Cgroup, pid int) error { +func joinMemory(c *configs.Cgroup, pid int) error { memorySwap := c.MemorySwap if memorySwap == 0 { @@ -327,7 +328,7 @@ func joinMemory(c *cgroups.Cgroup, pid int) error { // systemd does not atm set up the cpuset controller, so we must manually // join it. Additionally that is a very finicky controller where each // level must have a full setup as the default for a new directory is "no cpus" -func joinCpuset(c *cgroups.Cgroup, pid int) error { +func joinCpuset(c *configs.Cgroup, pid int) error { path, err := getSubsystemPath(c, "cpuset") if err != nil { return err diff --git a/configs/cgroup.go b/configs/cgroup.go new file mode 100644 index 00000000..0dffc640 --- /dev/null +++ b/configs/cgroup.go @@ -0,0 +1,54 @@ +package configs + +type FreezerState string + +const ( + Undefined FreezerState = "" + Frozen FreezerState = "FROZEN" + Thawed FreezerState = "THAWED" +) + +type Cgroup struct { + Name string `json:"name,omitempty"` + + // name of parent cgroup or slice + Parent string `json:"parent,omitempty"` + + // If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list. + AllowAllDevices bool `json:"allow_all_devices,omitempty"` + + AllowedDevices []*Device `json:"allowed_devices,omitempty"` + + // Memory limit (in bytes) + Memory int64 `json:"memory,omitempty"` + + // Memory reservation or soft_limit (in bytes) + MemoryReservation int64 `json:"memory_reservation,omitempty"` + + // Total memory usage (memory + swap); set `-1' to disable swap + MemorySwap int64 `json:"memory_swap,omitempty"` + + // CPU shares (relative weight vs. other containers) + CpuShares int64 `json:"cpu_shares,omitempty"` + + // CPU hardcap limit (in usecs). Allowed cpu time in a given period. + CpuQuota int64 `json:"cpu_quota,omitempty"` + + // CPU period to be used for hardcapping (in usecs). 0 to use system default. + CpuPeriod int64 `json:"cpu_period,omitempty"` + + // CPU to use + CpusetCpus string `json:"cpuset_cpus,omitempty"` + + // MEM to use + CpusetMems string `json:"cpuset_mems,omitempty"` + + // Specifies per cgroup weight, range is from 10 to 1000. + BlkioWeight int64 `json:"blkio_weight,omitempty"` + + // set the freeze value for the process + Freezer FreezerState `json:"freezer,omitempty"` + + // Parent slice to use for systemd TODO: remove in favor or parent + Slice string `json:"slice,omitempty"` +} diff --git a/configs/config.go b/configs/config.go index d1e03f61..844a9cad 100644 --- a/configs/config.go +++ b/configs/config.go @@ -1,70 +1,43 @@ package configs -import ( - "github.com/docker/libcontainer/cgroups" - "github.com/docker/libcontainer/mount" - "github.com/docker/libcontainer/network" -) +import "fmt" -type MountConfig mount.MountConfig - -type Network network.Network - -type NamespaceType string - -const ( - NEWNET NamespaceType = "NEWNET" - NEWPID NamespaceType = "NEWPID" - NEWNS NamespaceType = "NEWNS" - NEWUTS NamespaceType = "NEWUTS" - NEWIPC NamespaceType = "NEWIPC" - NEWUSER NamespaceType = "NEWUSER" -) - -// Namespace defines configuration for each namespace. It specifies an -// alternate path that is able to be joined via setns. -type Namespace struct { - Type NamespaceType `json:"type"` - Path string `json:"path,omitempty"` +type Rlimit struct { + Type int `json:"type,omitempty"` + Hard uint64 `json:"hard,omitempty"` + Soft uint64 `json:"soft,omitempty"` } -type Namespaces []Namespace - -func (n *Namespaces) Remove(t NamespaceType) bool { - i := n.index(t) - if i == -1 { - return false - } - *n = append((*n)[:i], (*n)[i+1:]...) - return true -} - -func (n *Namespaces) Add(t NamespaceType, path string) { - i := n.index(t) - if i == -1 { - *n = append(*n, Namespace{Type: t, Path: path}) - return - } - (*n)[i].Path = path -} - -func (n *Namespaces) index(t NamespaceType) int { - for i, ns := range *n { - if ns.Type == t { - return i - } - } - return -1 -} - -func (n *Namespaces) Contains(t NamespaceType) bool { - return n.index(t) != -1 +// IDMap represents UID/GID Mappings for User Namespaces. +type IDMap struct { + ContainerID int `json:"container_id,omitempty"` + HostID int `json:"host_id,omitempty"` + Size int `json:"size,omitempty"` } // Config defines configuration options for executing a process inside a contained environment. type Config struct { - // Mount specific options. - MountConfig *MountConfig `json:"mount_config,omitempty"` + // NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs + // This is a common option when the container is running in ramdisk + NoPivotRoot bool `json:"no_pivot_root,omitempty"` + + // PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set. + // When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable. + // This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot. + PivotDir string `json:"pivot_dir,omitempty"` + + // ReadonlyFs will remount the container's rootfs as readonly where only externally mounted + // bind mounts are writtable + ReadonlyFs bool `json:"readonly_fs,omitempty"` + + // Mounts specify additional source and destination paths that will be mounted inside the container's + // rootfs and mount namespace if specified + Mounts []*Mount `json:"mounts,omitempty"` + + // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well! + DeviceNodes []*Device `json:"device_nodes,omitempty"` + + MountLabel string `json:"mount_label,omitempty"` // Pathname to container's root filesystem RootFs string `json:"root_fs,omitempty"` @@ -83,9 +56,8 @@ type Config struct { // provided in Env are provided to the process Env []string `json:"environment,omitempty"` - // Tty when true will allocate a pty slave on the host for access by the container's process - // and ensure that it is mounted inside the container's rootfs - Tty bool `json:"tty,omitempty"` + // Console is the path to the console allocated to the container. + Console string `json:"console,omitempty"` // Namespaces specifies the container's namespaces that it should setup when cloning the init process // If a namespace is not provided that namespace is shared from the container's parent process @@ -103,7 +75,7 @@ type Config struct { // Cgroups specifies specific cgroup settings for the various subsystems that the container is // placed into to limit the resources the container has available - Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"` + Cgroups *Cgroup `json:"cgroups,omitempty"` // AppArmorProfile specifies the profile to apply to the process running in the container and is // change at the time the process is execed @@ -124,6 +96,7 @@ type Config struct { // AdditionalGroups specifies the gids that should be added to supplementary groups // in addition to those that the user belongs to. AdditionalGroups []int `json:"additional_groups,omitempty"` + // UidMappings is an array of User ID mappings for User Namespaces UidMappings []IDMap `json:"uid_mappings,omitempty"` @@ -131,36 +104,48 @@ type Config struct { GidMappings []IDMap `json:"gid_mappings,omitempty"` } -// Routes can be specified to create entries in the route table as the container is started -// -// All of destination, source, and gateway should be either IPv4 or IPv6. -// One of the three options must be present, and ommitted entries will use their -// IP family default for the route table. For IPv4 for example, setting the -// gateway to 1.2.3.4 and the interface to eth0 will set up a standard -// destination of 0.0.0.0(or *) when viewed in the route table. -type Route struct { - // Sets the destination and mask, should be a CIDR. Accepts IPv4 and IPv6 - Destination string `json:"destination,omitempty"` - - // Sets the source and mask, should be a CIDR. Accepts IPv4 and IPv6 - Source string `json:"source,omitempty"` - - // Sets the gateway. Accepts IPv4 and IPv6 - Gateway string `json:"gateway,omitempty"` - - // The device to set this route up for, for example: eth0 - InterfaceName string `json:"interface_name,omitempty"` +// Gets the root uid for the process on host which could be non-zero +// when user namespaces are enabled. +func (c *Config) HostUID() (int, error) { + if c.Namespaces.Contains(NEWUSER) { + if c.UidMappings == nil { + return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.") + } + id, found := c.hostIDFromMapping(0, c.UidMappings) + if !found { + return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.") + } + return id, nil + } + // Return default root uid 0 + return 0, nil } -type Rlimit struct { - Type int `json:"type,omitempty"` - Hard uint64 `json:"hard,omitempty"` - Soft uint64 `json:"soft,omitempty"` +// Gets the root uid for the process on host which could be non-zero +// when user namespaces are enabled. +func (c *Config) HostGID() (int, error) { + if c.Namespaces.Contains(NEWUSER) { + if c.GidMappings == nil { + return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.") + } + id, found := c.hostIDFromMapping(0, c.GidMappings) + if !found { + return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.") + } + return id, nil + } + // Return default root uid 0 + return 0, nil } -// IDMap represents UID/GID Mappings for User Namespaces. -type IDMap struct { - ContainerID int `json:"container_id,omitempty"` - HostID int `json:"host_id,omitempty"` - Size int `json:"size,omitempty"` +// Utility function that gets a host ID for a container ID from user namespace map +// if that ID is present in the map. +func (c *Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) { + for _, m := range uMap { + if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) { + hostID := m.HostID + (containerID - m.ContainerID) + return hostID, true + } + } + return -1, false } diff --git a/configs/config_test.go b/configs/config_test.go index d64066c4..a74ccd40 100644 --- a/configs/config_test.go +++ b/configs/config_test.go @@ -5,8 +5,6 @@ import ( "os" "path/filepath" "testing" - - "github.com/docker/libcontainer/devices" ) // Checks whether the expected capability is specified in the capabilities. @@ -19,13 +17,13 @@ func contains(expected string, values []string) bool { return false } -func containsDevice(expected *devices.Device, values []*devices.Device) bool { +func containsDevice(expected *Device, values []*Device) bool { for _, d := range values { if d.Path == expected.Path && - d.CgroupPermissions == expected.CgroupPermissions && + d.Permissions == expected.Permissions && d.FileMode == expected.FileMode && - d.MajorNumber == expected.MajorNumber && - d.MinorNumber == expected.MinorNumber && + d.Major == expected.Major && + d.Minor == expected.Minor && d.Type == expected.Type { return true } @@ -59,11 +57,6 @@ func TestConfigJsonFormat(t *testing.T) { t.Fail() } - if !container.Tty { - t.Log("tty should be set to true") - t.Fail() - } - if !container.Namespaces.Contains(NEWNET) { t.Log("namespaces should contain NEWNET") t.Fail() @@ -120,8 +113,8 @@ func TestConfigJsonFormat(t *testing.T) { } } - for _, d := range devices.DefaultSimpleDevices { - if !containsDevice(d, container.MountConfig.DeviceNodes) { + for _, d := range DefaultSimpleDevices { + if !containsDevice(d, container.DeviceNodes) { t.Logf("expected device configuration for %s", d.Path) t.Fail() } @@ -154,8 +147,8 @@ func TestSelinuxLabels(t *testing.T) { if container.ProcessLabel != label { t.Fatalf("expected process label %q but received %q", label, container.ProcessLabel) } - if container.MountConfig.MountLabel != label { - t.Fatalf("expected mount label %q but received %q", label, container.MountConfig.MountLabel) + if container.MountLabel != label { + t.Fatalf("expected mount label %q but received %q", label, container.MountLabel) } } diff --git a/configs/device.go b/configs/device.go new file mode 100644 index 00000000..18d73232 --- /dev/null +++ b/configs/device.go @@ -0,0 +1,42 @@ +package configs + +import ( + "fmt" + "os" +) + +const ( + Wildcard = -1 +) + +type Device struct { + Type rune `json:"type,omitempty"` + // It is fine if this is an empty string in the case that you are using Wildcards + Path string `json:"path,omitempty"` + // Use the wildcard constant for wildcards. + Major int64 `json:"major,omitempty"` + // Use the wildcard constant for wildcards. + Minor int64 `json:"minor,omitempty"` + // Typically just "rwm" + Permissions string `json:"permissions,omitempty"` + // The permission bits of the file's mode + FileMode os.FileMode `json:"file_mode,omitempty"` + Uid uint32 `json:"uid,omitempty"` + Gid uint32 `json:"gid,omitempty"` +} + +func (d *Device) CgroupString() string { + return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions) +} + +func (d *Device) Mkdev() int { + return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12)) +} + +// deviceNumberString converts the device number to a string return result. +func deviceNumberString(number int64) string { + if number == Wildcard { + return "*" + } + return fmt.Sprint(number) +} diff --git a/configs/device_defaults.go b/configs/device_defaults.go new file mode 100644 index 00000000..70fa4af0 --- /dev/null +++ b/configs/device_defaults.go @@ -0,0 +1,137 @@ +package configs + +var ( + // These are devices that are to be both allowed and created. + DefaultSimpleDevices = []*Device{ + // /dev/null and zero + { + Path: "/dev/null", + Type: 'c', + Major: 1, + Minor: 3, + Permissions: "rwm", + FileMode: 0666, + }, + { + Path: "/dev/zero", + Type: 'c', + Major: 1, + Minor: 5, + Permissions: "rwm", + FileMode: 0666, + }, + + { + Path: "/dev/full", + Type: 'c', + Major: 1, + Minor: 7, + Permissions: "rwm", + FileMode: 0666, + }, + + // consoles and ttys + { + Path: "/dev/tty", + Type: 'c', + Major: 5, + Minor: 0, + Permissions: "rwm", + FileMode: 0666, + }, + + // /dev/urandom,/dev/random + { + Path: "/dev/urandom", + Type: 'c', + Major: 1, + Minor: 9, + Permissions: "rwm", + FileMode: 0666, + }, + { + Path: "/dev/random", + Type: 'c', + Major: 1, + Minor: 8, + Permissions: "rwm", + FileMode: 0666, + }, + } + DefaultAllowedDevices = append([]*Device{ + // allow mknod for any device + { + Type: 'c', + Major: Wildcard, + Minor: Wildcard, + Permissions: "m", + }, + { + Type: 'b', + Major: Wildcard, + Minor: Wildcard, + Permissions: "m", + }, + + { + Path: "/dev/console", + Type: 'c', + Major: 5, + Minor: 1, + Permissions: "rwm", + }, + { + Path: "/dev/tty0", + Type: 'c', + Major: 4, + Minor: 0, + Permissions: "rwm", + }, + { + Path: "/dev/tty1", + Type: 'c', + Major: 4, + Minor: 1, + Permissions: "rwm", + }, + // /dev/pts/ - pts namespaces are "coming soon" + { + Path: "", + Type: 'c', + Major: 136, + Minor: Wildcard, + Permissions: "rwm", + }, + { + Path: "", + Type: 'c', + Major: 5, + Minor: 2, + Permissions: "rwm", + }, + + // tuntap + { + Path: "", + Type: 'c', + Major: 10, + Minor: 200, + Permissions: "rwm", + }, + }, DefaultSimpleDevices...) + DefaultAutoCreatedDevices = append([]*Device{ + { + // /dev/fuse is created but not allowed. + // This is to allow java to work. Because java + // Insists on there being a /dev/fuse + // https://github.com/docker/docker/issues/514 + // https://github.com/docker/docker/issues/2393 + // + Path: "/dev/fuse", + Type: 'c', + Major: 10, + Minor: 229, + Permissions: "rwm", + }, + }, DefaultSimpleDevices...) +) diff --git a/mount/mount.go b/configs/mount.go similarity index 82% rename from mount/mount.go rename to configs/mount.go index c1b42421..f6f39992 100644 --- a/mount/mount.go +++ b/configs/mount.go @@ -1,4 +1,4 @@ -package mount +package configs import ( "fmt" @@ -10,6 +10,8 @@ import ( "github.com/docker/libcontainer/label" ) +const DefaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV + type Mount struct { Type string `json:"type,omitempty"` Source string `json:"source,omitempty"` // Source path, in the host namespace @@ -101,9 +103,31 @@ func (m *Mount) tmpfsMount(rootfs, mountLabel string) error { return fmt.Errorf("creating new tmpfs mount target %s", err) } - if err := syscall.Mount("tmpfs", dest, "tmpfs", uintptr(defaultMountFlags), l); err != nil { + if err := syscall.Mount("tmpfs", dest, "tmpfs", uintptr(DefaultMountFlags), l); err != nil { return fmt.Errorf("%s mounting %s in tmpfs", err, dest) } return nil } + +func createIfNotExists(path string, isDir bool) error { + if _, err := os.Stat(path); err != nil { + if os.IsNotExist(err) { + if isDir { + if err := os.MkdirAll(path, 0755); err != nil { + return err + } + } else { + if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { + return err + } + f, err := os.OpenFile(path, os.O_CREATE, 0755) + if err != nil { + return err + } + f.Close() + } + } + } + return nil +} diff --git a/configs/namespaces.go b/configs/namespaces.go new file mode 100644 index 00000000..a227f1ba --- /dev/null +++ b/configs/namespaces.go @@ -0,0 +1,82 @@ +package configs + +import ( + "syscall" +) + +type NamespaceType string + +const ( + NEWNET NamespaceType = "NEWNET" + NEWPID NamespaceType = "NEWPID" + NEWNS NamespaceType = "NEWNS" + NEWUTS NamespaceType = "NEWUTS" + NEWIPC NamespaceType = "NEWIPC" + NEWUSER NamespaceType = "NEWUSER" +) + +// Namespace defines configuration for each namespace. It specifies an +// alternate path that is able to be joined via setns. +type Namespace struct { + Type NamespaceType `json:"type"` + Path string `json:"path,omitempty"` +} + +func (n *Namespace) Syscall() int { + return namespaceInfo[n.Type] +} + +type Namespaces []Namespace + +func (n *Namespaces) Remove(t NamespaceType) bool { + i := n.index(t) + if i == -1 { + return false + } + *n = append((*n)[:i], (*n)[i+1:]...) + return true +} + +func (n *Namespaces) Add(t NamespaceType, path string) { + i := n.index(t) + if i == -1 { + *n = append(*n, Namespace{Type: t, Path: path}) + return + } + (*n)[i].Path = path +} + +func (n *Namespaces) index(t NamespaceType) int { + for i, ns := range *n { + if ns.Type == t { + return i + } + } + return -1 +} + +func (n *Namespaces) Contains(t NamespaceType) bool { + return n.index(t) != -1 +} + +var namespaceInfo = map[NamespaceType]int{ + NEWNET: syscall.CLONE_NEWNET, + NEWNS: syscall.CLONE_NEWNS, + NEWUSER: syscall.CLONE_NEWUSER, + NEWIPC: syscall.CLONE_NEWIPC, + NEWUTS: syscall.CLONE_NEWUTS, + NEWPID: syscall.CLONE_NEWPID, +} + +// CloneFlags parses the container's Namespaces options to set the correct +// flags on clone, unshare. This functions returns flags only for new namespaces. +func (n *Namespaces) CloneFlags() uintptr { + var flag int + for _, v := range *n { + if v.Path != "" { + continue + } + flag |= namespaceInfo[v.Type] + } + return uintptr(flag) +} diff --git a/configs/network.go b/configs/network.go new file mode 100644 index 00000000..54218363 --- /dev/null +++ b/configs/network.go @@ -0,0 +1,62 @@ +package configs + +// Network defines configuration for a container's networking stack +// +// The network configuration can be omited from a container causing the +// container to be setup with the host's networking stack +type Network struct { + // Type sets the networks type, commonly veth and loopback + Type string `json:"type,omitempty"` + + // The bridge to use. + Bridge string `json:"bridge,omitempty"` + + // Prefix for the veth interfaces. + VethPrefix string `json:"veth_prefix,omitempty"` + + // MacAddress contains the MAC address to set on the network interface + MacAddress string `json:"mac_address,omitempty"` + + // Address contains the IPv4 and mask to set on the network interface + Address string `json:"address,omitempty"` + + // IPv6Address contains the IPv6 and mask to set on the network interface + IPv6Address string `json:"ipv6_address,omitempty"` + + // Gateway sets the gateway address that is used as the default for the interface + Gateway string `json:"gateway,omitempty"` + + // IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface + IPv6Gateway string `json:"ipv6_gateway,omitempty"` + + // Mtu sets the mtu value for the interface and will be mirrored on both the host and + // container's interfaces if a pair is created, specifically in the case of type veth + // Note: This does not apply to loopback interfaces. + Mtu int `json:"mtu,omitempty"` + + // TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and + // container's interfaces if a pair is created, specifically in the case of type veth + // Note: This does not apply to loopback interfaces. + TxQueueLen int `json:"txqueuelen,omitempty"` +} + +// Routes can be specified to create entries in the route table as the container is started +// +// All of destination, source, and gateway should be either IPv4 or IPv6. +// One of the three options must be present, and ommitted entries will use their +// IP family default for the route table. For IPv4 for example, setting the +// gateway to 1.2.3.4 and the interface to eth0 will set up a standard +// destination of 0.0.0.0(or *) when viewed in the route table. +type Route struct { + // Sets the destination and mask, should be a CIDR. Accepts IPv4 and IPv6 + Destination string `json:"destination,omitempty"` + + // Sets the source and mask, should be a CIDR. Accepts IPv4 and IPv6 + Source string `json:"source,omitempty"` + + // Sets the gateway. Accepts IPv4 and IPv6 + Gateway string `json:"gateway,omitempty"` + + // The device to set this route up for, for example: eth0 + InterfaceName string `json:"interface_name,omitempty"` +} diff --git a/configs/state.go b/configs/state.go index 9dc77006..27122c44 100644 --- a/configs/state.go +++ b/configs/state.go @@ -1,13 +1,5 @@ package configs -import ( - "encoding/json" - "os" - "path/filepath" - - "github.com/docker/libcontainer/network" -) - // State represents a running container's state type State struct { // InitPid is the init process id in the parent namespace @@ -17,21 +9,30 @@ type State struct { InitStartTime string `json:"init_start_time,omitempty"` // Network runtime state. - NetworkState network.NetworkState `json:"network_state,omitempty"` + NetworkState NetworkState `json:"network_state,omitempty"` // Path to all the cgroups setup for a container. Key is cgroup subsystem name. CgroupPaths map[string]string `json:"cgroup_paths,omitempty"` + + Status Status `json:"status,omitempty"` } -// The running state of the container. -type RunState int +// Struct describing the network specific runtime state that will be maintained by libcontainer for all running containers +// Do not depend on it outside of libcontainer. +// TODO: move veth names to config time +type NetworkState struct { + // The name of the veth interface on the Host. + VethHost string `json:"veth_host,omitempty"` + // The name of the veth interface created inside the container for the child. + VethChild string `json:"veth_child,omitempty"` +} + +// The status of a container. +type Status int const ( - // The name of the runtime state file - stateFile = "state.json" - // The container exists and is running. - Running RunState = iota + 1 + Running Status = iota + 1 // The container exists, it is in the process of being paused. Pausing @@ -42,36 +43,3 @@ const ( // The container does not exist. Destroyed ) - -// SaveState writes the container's runtime state to a state.json file -// in the specified path -func SaveState(basePath string, state *State) error { - f, err := os.Create(filepath.Join(basePath, stateFile)) - if err != nil { - return err - } - defer f.Close() - - return json.NewEncoder(f).Encode(state) -} - -// GetState reads the state.json file for a running container -func GetState(basePath string) (*State, error) { - f, err := os.Open(filepath.Join(basePath, stateFile)) - if err != nil { - return nil, err - } - defer f.Close() - - var state *State - if err := json.NewDecoder(f).Decode(&state); err != nil { - return nil, err - } - - return state, nil -} - -// DeleteState deletes the state.json file -func DeleteState(basePath string) error { - return os.Remove(filepath.Join(basePath, stateFile)) -} diff --git a/container.go b/container.go index e04a43df..9db1e297 100644 --- a/container.go +++ b/container.go @@ -4,9 +4,18 @@ NOTE: The API is in flux and mainly not implemented. Proceed with caution until package libcontainer import ( + "os" + + "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" + "github.com/docker/libcontainer/network" ) +type Stats struct { + NetworkStats *network.NetworkStats `json:"network_stats,omitempty"` + CgroupStats *cgroups.Stats `json:"cgroup_stats,omitempty"` +} + // A libcontainer container object. // // Each container is thread-safe within the same process. Since a container can @@ -16,15 +25,14 @@ type Container interface { // Returns the ID of the container ID() string - // Returns the current run state of the container. + // Returns the current status of the container. // // errors: - // ContainerDestroyed - Container no longer exists, // Systemerror - System error. - RunState() (configs.RunState, error) + Status() (configs.Status, error) // Returns the current config of the container. - Config() *configs.Config + Config() configs.Config // Returns the PIDs inside this container. The PIDs are in the namespace of the calling process. // @@ -41,7 +49,7 @@ type Container interface { // errors: // ContainerDestroyed - Container no longer exists, // Systemerror - System error. - Stats() (*ContainerStats, error) + Stats() (*Stats, error) // Start a process inside the container. Returns the PID of the new process (in the caller process's namespace) and a channel that will return the exit status of the process whenever it dies. // @@ -50,7 +58,7 @@ type Container interface { // ConfigInvalid - config is invalid, // ContainerPaused - Container is paused, // Systemerror - System error. - StartProcess(config *ProcessConfig) (pid int, err error) + Start(process *Process) (pid int, err error) // Destroys the container after killing all running processes. // @@ -80,25 +88,17 @@ type Container interface { // Systemerror - System error. Resume() error - // Signal sends the specified signal to a process owned by the container. + // Signal sends the specified signal to the init process of the container. // // errors: // ContainerDestroyed - Container no longer exists, // ContainerPaused - Container is paused, // Systemerror - System error. - Signal(pid, signal int) error + Signal(signal os.Signal) error - // Wait waits for the init process of the conatiner to die and returns it's exit status. + // OOM returns a read-only channel signaling when the container receives an OOM notification. // // errors: - // ContainerDestroyed - Container no longer exists, // Systemerror - System error. - Wait() (exitStatus int, err error) - - // WaitProcess waits on a process owned by the container. - // - // errors: - // ContainerDestroyed - Container no longer exists, - // Systemerror - System error. - WaitProcess(pid int) (exitStatus int, err error) + OOM() (<-chan struct{}, error) } diff --git a/devices/defaults.go b/devices/defaults.go deleted file mode 100644 index e0ad0b08..00000000 --- a/devices/defaults.go +++ /dev/null @@ -1,159 +0,0 @@ -package devices - -var ( - // These are devices that are to be both allowed and created. - - DefaultSimpleDevices = []*Device{ - // /dev/null and zero - { - Path: "/dev/null", - Type: 'c', - MajorNumber: 1, - MinorNumber: 3, - CgroupPermissions: "rwm", - FileMode: 0666, - }, - { - Path: "/dev/zero", - Type: 'c', - MajorNumber: 1, - MinorNumber: 5, - CgroupPermissions: "rwm", - FileMode: 0666, - }, - - { - Path: "/dev/full", - Type: 'c', - MajorNumber: 1, - MinorNumber: 7, - CgroupPermissions: "rwm", - FileMode: 0666, - }, - - // consoles and ttys - { - Path: "/dev/tty", - Type: 'c', - MajorNumber: 5, - MinorNumber: 0, - CgroupPermissions: "rwm", - FileMode: 0666, - }, - - // /dev/urandom,/dev/random - { - Path: "/dev/urandom", - Type: 'c', - MajorNumber: 1, - MinorNumber: 9, - CgroupPermissions: "rwm", - FileMode: 0666, - }, - { - Path: "/dev/random", - Type: 'c', - MajorNumber: 1, - MinorNumber: 8, - CgroupPermissions: "rwm", - FileMode: 0666, - }, - } - - DefaultAllowedDevices = append([]*Device{ - // allow mknod for any device - { - Type: 'c', - MajorNumber: Wildcard, - MinorNumber: Wildcard, - CgroupPermissions: "m", - }, - { - Type: 'b', - MajorNumber: Wildcard, - MinorNumber: Wildcard, - CgroupPermissions: "m", - }, - - { - Path: "/dev/console", - Type: 'c', - MajorNumber: 5, - MinorNumber: 1, - CgroupPermissions: "rwm", - }, - { - Path: "/dev/tty0", - Type: 'c', - MajorNumber: 4, - MinorNumber: 0, - CgroupPermissions: "rwm", - }, - { - Path: "/dev/tty1", - Type: 'c', - MajorNumber: 4, - MinorNumber: 1, - CgroupPermissions: "rwm", - }, - // /dev/pts/ - pts namespaces are "coming soon" - { - Path: "", - Type: 'c', - MajorNumber: 136, - MinorNumber: Wildcard, - CgroupPermissions: "rwm", - }, - { - Path: "", - Type: 'c', - MajorNumber: 5, - MinorNumber: 2, - CgroupPermissions: "rwm", - }, - - // tuntap - { - Path: "", - Type: 'c', - MajorNumber: 10, - MinorNumber: 200, - CgroupPermissions: "rwm", - }, - - /*// fuse - { - Path: "", - Type: 'c', - MajorNumber: 10, - MinorNumber: 229, - CgroupPermissions: "rwm", - }, - - // rtc - { - Path: "", - Type: 'c', - MajorNumber: 254, - MinorNumber: 0, - CgroupPermissions: "rwm", - }, - */ - }, DefaultSimpleDevices...) - - DefaultAutoCreatedDevices = append([]*Device{ - { - // /dev/fuse is created but not allowed. - // This is to allow java to work. Because java - // Insists on there being a /dev/fuse - // https://github.com/docker/docker/issues/514 - // https://github.com/docker/docker/issues/2393 - // - Path: "/dev/fuse", - Type: 'c', - MajorNumber: 10, - MinorNumber: 229, - CgroupPermissions: "rwm", - }, - }, DefaultSimpleDevices...) -) diff --git a/devices/devices.go b/devices/devices.go index 8e86d952..b3f67aa3 100644 --- a/devices/devices.go +++ b/devices/devices.go @@ -7,14 +7,12 @@ import ( "os" "path/filepath" "syscall" -) -const ( - Wildcard = -1 + "github.com/docker/libcontainer/configs" ) var ( - ErrNotADeviceNode = errors.New("not a device node") + ErrNotADevice = errors.New("not a device node") ) // Testing dependencies @@ -23,45 +21,20 @@ var ( ioutilReadDir = ioutil.ReadDir ) -type Device struct { - Type rune `json:"type,omitempty"` - Path string `json:"path,omitempty"` // It is fine if this is an empty string in the case that you are using Wildcards - MajorNumber int64 `json:"major_number,omitempty"` // Use the wildcard constant for wildcards. - MinorNumber int64 `json:"minor_number,omitempty"` // Use the wildcard constant for wildcards. - CgroupPermissions string `json:"cgroup_permissions,omitempty"` // Typically just "rwm" - FileMode os.FileMode `json:"file_mode,omitempty"` // The permission bits of the file's mode - Uid uint32 `json:"uid,omitempty"` - Gid uint32 `json:"gid,omitempty"` -} - -func GetDeviceNumberString(deviceNumber int64) string { - if deviceNumber == Wildcard { - return "*" - } else { - return fmt.Sprintf("%d", deviceNumber) - } -} - -func (device *Device) GetCgroupAllowString() string { - return fmt.Sprintf("%c %s:%s %s", device.Type, GetDeviceNumberString(device.MajorNumber), GetDeviceNumberString(device.MinorNumber), device.CgroupPermissions) -} - // Given the path to a device and it's cgroup_permissions(which cannot be easilly queried) look up the information about a linux device and return that information as a Device struct. -func GetDevice(path, cgroupPermissions string) (*Device, error) { +func DeviceFromPath(path, permissions string) (*configs.Device, error) { fileInfo, err := osLstat(path) if err != nil { return nil, err } - var ( devType rune mode = fileInfo.Mode() fileModePermissionBits = os.FileMode.Perm(mode) ) - switch { case mode&os.ModeDevice == 0: - return nil, ErrNotADeviceNode + return nil, ErrNotADevice case mode&os.ModeCharDevice != 0: fileModePermissionBits |= syscall.S_IFCHR devType = 'c' @@ -69,36 +42,33 @@ func GetDevice(path, cgroupPermissions string) (*Device, error) { fileModePermissionBits |= syscall.S_IFBLK devType = 'b' } - stat_t, ok := fileInfo.Sys().(*syscall.Stat_t) if !ok { return nil, fmt.Errorf("cannot determine the device number for device %s", path) } devNumber := int(stat_t.Rdev) - - return &Device{ - Type: devType, - Path: path, - MajorNumber: Major(devNumber), - MinorNumber: Minor(devNumber), - CgroupPermissions: cgroupPermissions, - FileMode: fileModePermissionBits, - Uid: stat_t.Uid, - Gid: stat_t.Gid, + return &configs.Device{ + Type: devType, + Path: path, + Major: Major(devNumber), + Minor: Minor(devNumber), + Permissions: permissions, + FileMode: fileModePermissionBits, + Uid: stat_t.Uid, + Gid: stat_t.Gid, }, nil } -func GetHostDeviceNodes() ([]*Device, error) { +func HostDevices() ([]*configs.Device, error) { return getDeviceNodes("/dev") } -func getDeviceNodes(path string) ([]*Device, error) { +func getDeviceNodes(path string) ([]*configs.Device, error) { files, err := ioutilReadDir(path) if err != nil { return nil, err } - - out := []*Device{} + out := []*configs.Device{} for _, f := range files { switch { case f.IsDir(): @@ -117,16 +87,14 @@ func getDeviceNodes(path string) ([]*Device, error) { case f.Name() == "console": continue } - - device, err := GetDevice(filepath.Join(path, f.Name()), "rwm") + device, err := DeviceFromPath(filepath.Join(path, f.Name()), "rwm") if err != nil { - if err == ErrNotADeviceNode { + if err == ErrNotADevice { continue } return nil, err } out = append(out, device) } - return out, nil } diff --git a/devices/devices_test.go b/devices/devices_test.go index fec40022..9e52fc4e 100644 --- a/devices/devices_test.go +++ b/devices/devices_test.go @@ -6,7 +6,7 @@ import ( "testing" ) -func TestGetDeviceLstatFailure(t *testing.T) { +func TestDeviceFromPathLstatFailure(t *testing.T) { testError := errors.New("test error") // Override os.Lstat to inject error. @@ -14,13 +14,13 @@ func TestGetDeviceLstatFailure(t *testing.T) { return nil, testError } - _, err := GetDevice("", "") + _, err := DeviceFromPath("", "") if err != testError { t.Fatalf("Unexpected error %v, expected %v", err, testError) } } -func TestGetHostDeviceNodesIoutilReadDirFailure(t *testing.T) { +func TestHostDevicesIoutilReadDirFailure(t *testing.T) { testError := errors.New("test error") // Override ioutil.ReadDir to inject error. @@ -28,13 +28,13 @@ func TestGetHostDeviceNodesIoutilReadDirFailure(t *testing.T) { return nil, testError } - _, err := GetHostDeviceNodes() + _, err := HostDevices() if err != testError { t.Fatalf("Unexpected error %v, expected %v", err, testError) } } -func TestGetHostDeviceNodesIoutilReadDirDeepFailure(t *testing.T) { +func TestHostDevicesIoutilReadDirDeepFailure(t *testing.T) { testError := errors.New("test error") called := false @@ -54,7 +54,7 @@ func TestGetHostDeviceNodesIoutilReadDirDeepFailure(t *testing.T) { return []os.FileInfo{fi}, nil } - _, err := GetHostDeviceNodes() + _, err := HostDevices() if err != testError { t.Fatalf("Unexpected error %v, expected %v", err, testError) } diff --git a/devices/number.go b/devices/number.go index 3aae380b..9e8feb83 100644 --- a/devices/number.go +++ b/devices/number.go @@ -20,7 +20,3 @@ func Major(devNumber int) int64 { func Minor(devNumber int) int64 { return int64((devNumber & 0xff) | ((devNumber >> 12) & 0xfff00)) } - -func Mkdev(majorNumber int64, minorNumber int64) int { - return int((majorNumber << 8) | (minorNumber & 0xff) | ((minorNumber & 0xfff00) << 12)) -} diff --git a/integration/exec_test.go b/integration/exec_test.go index 9ec617d1..745da157 100644 --- a/integration/exec_test.go +++ b/integration/exec_test.go @@ -5,7 +5,6 @@ import ( "io/ioutil" "os" "strings" - "syscall" "testing" "github.com/docker/libcontainer" @@ -192,6 +191,20 @@ func newTestRoot() (string, error) { return dir, nil } +func waitProcess(pid int, t *testing.T) { + p, err := os.FindProcess(pid) + if err != nil { + t.Fatal(err) + } + status, err := p.Wait() + if err != nil { + t.Fatal(err) + } + if !status.Success() { + t.Fatal(status) + } +} + func TestEnter(t *testing.T) { if testing.Short() { return @@ -229,12 +242,12 @@ func TestEnter(t *testing.T) { var stdout, stdout2 bytes.Buffer - pconfig := libcontainer.ProcessConfig{ + pconfig := libcontainer.Process{ Args: []string{"sh", "-c", "cat && readlink /proc/self/ns/pid"}, Stdin: stdinR, Stdout: &stdout, } - pid, err := container.StartProcess(&pconfig) + pid, err := container.Start(&pconfig) stdinR.Close() defer stdinW.Close() if err != nil { @@ -250,7 +263,7 @@ func TestEnter(t *testing.T) { pconfig.Stdin = stdinR2 pconfig.Stdout = &stdout2 - pid2, err := container.StartProcess(&pconfig) + pid2, err := container.Start(&pconfig) stdinR2.Close() defer stdinW2.Close() if err != nil { @@ -273,27 +286,11 @@ func TestEnter(t *testing.T) { } // Wait processes - var status syscall.WaitStatus - stdinW2.Close() - exitCode, err := container.WaitProcess(pid2) - if err != nil { - t.Fatal(err) - } - status = syscall.WaitStatus(exitCode) - if status.ExitStatus() != 0 { - t.Fatal(exitCode) - } + waitProcess(pid2, t) stdinW.Close() - exitCode, err = container.WaitProcess(pid) - if err != nil { - t.Fatal(err) - } - status = syscall.WaitStatus(exitCode) - if status.ExitStatus() != 0 { - t.Fatal(exitCode) - } + waitProcess(pid, t) // Check that both processes live in the same pidns pidns := string(stdout.Bytes()) @@ -345,11 +342,11 @@ func TestFreeze(t *testing.T) { t.Fatal(err) } - pconfig := libcontainer.ProcessConfig{ + pconfig := libcontainer.Process{ Args: []string{"cat"}, Stdin: stdinR, } - pid, err := container.StartProcess(&pconfig) + pid, err := container.Start(&pconfig) stdinR.Close() defer stdinW.Close() if err != nil { @@ -364,7 +361,7 @@ func TestFreeze(t *testing.T) { if err := container.Pause(); err != nil { t.Fatal(err) } - state, err := container.RunState() + state, err := container.Status() if err != nil { t.Fatal(err) } diff --git a/integration/init_test.go b/integration/init_test.go index f9c1e3cf..6b4bc32d 100644 --- a/integration/init_test.go +++ b/integration/init_test.go @@ -6,7 +6,7 @@ import ( "runtime" "github.com/docker/libcontainer" - _ "github.com/docker/libcontainer/namespaces/nsenter" + _ "github.com/docker/libcontainer/nsenter" ) // init runs the libcontainer initialization code because of the busybox style needs diff --git a/integration/template_test.go b/integration/template_test.go index 372cc695..28c80196 100644 --- a/integration/template_test.go +++ b/integration/template_test.go @@ -3,9 +3,7 @@ package integration import ( "syscall" - "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/devices" ) // newTemplateConfig returns a base template for running a container @@ -15,7 +13,6 @@ import ( func newTemplateConfig(rootfs string) *configs.Config { return &configs.Config{ RootFs: rootfs, - Tty: false, Capabilities: []string{ "CHOWN", "DAC_OVERRIDE", @@ -39,17 +36,15 @@ func newTemplateConfig(rootfs string) *configs.Config { {Type: configs.NEWPID}, {Type: configs.NEWNET}, }), - Cgroups: &cgroups.Cgroup{ + Cgroups: &configs.Cgroup{ Name: "test", Parent: "integration", AllowAllDevices: false, - AllowedDevices: devices.DefaultAllowedDevices, + AllowedDevices: configs.DefaultAllowedDevices, }, - MountConfig: &configs.MountConfig{ - DeviceNodes: devices.DefaultAutoCreatedDevices, - }, - Hostname: "integration", + DeviceNodes: configs.DefaultAutoCreatedDevices, + Hostname: "integration", Env: []string{ "HOME=/root", "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", diff --git a/integration/utils_test.go b/integration/utils_test.go index 93fe3b4b..39c9a126 100644 --- a/integration/utils_test.go +++ b/integration/utils_test.go @@ -91,9 +91,8 @@ func runContainer(config *configs.Config, console string, args ...string) (buffe buffers = newStdBuffers() - process := &libcontainer.ProcessConfig{ + process := &libcontainer.Process{ Args: args, - Env: make([]string, 0), Stdin: buffers.Stdin, Stdout: buffers.Stdout, Stderr: buffers.Stderr, @@ -110,7 +109,7 @@ func runContainer(config *configs.Config, console string, args ...string) (buffe } defer container.Destroy() - pid, err := container.StartProcess(process) + pid, err := container.Start(process) if err != nil { return nil, -1, err } diff --git a/linux_container.go b/linux_container.go index e5c6826a..bf501c86 100644 --- a/linux_container.go +++ b/linux_container.go @@ -5,18 +5,35 @@ package libcontainer import ( "encoding/json" "fmt" + "io" + "io/ioutil" "os" "os/exec" "path/filepath" "syscall" + "github.com/docker/libcontainer/apparmor" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/namespaces" + "github.com/docker/libcontainer/label" + "github.com/docker/libcontainer/mount" "github.com/docker/libcontainer/network" + "github.com/docker/libcontainer/system" "github.com/golang/glog" ) +const ( + EXIT_SIGNAL_OFFSET = 128 +) + +type initError struct { + Message string `json:"message,omitempty"` +} + +func (i initError) Error() string { + return i.Message +} + type linuxContainer struct { id string root string @@ -26,19 +43,20 @@ type linuxContainer struct { initArgs []string } +// ID returns the container's unique ID func (c *linuxContainer) ID() string { return c.id } -func (c *linuxContainer) Config() *configs.Config { - return c.config +// Config returns the container's configuration +func (c *linuxContainer) Config() configs.Config { + return *c.config } -func (c *linuxContainer) RunState() (configs.RunState, error) { +func (c *linuxContainer) Status() (configs.Status, error) { if c.state.InitPid <= 0 { return configs.Destroyed, nil } - // return Running if the init process is alive err := syscall.Kill(c.state.InitPid, 0) if err != nil { @@ -47,14 +65,10 @@ func (c *linuxContainer) RunState() (configs.RunState, error) { } return 0, err } - if c.config.Cgroups != nil && - c.config.Cgroups.Freezer == cgroups.Frozen { + c.config.Cgroups.Freezer == configs.Frozen { return configs.Paused, nil } - - //FIXME get a cgroup state to check other states - return configs.Running, nil } @@ -67,13 +81,12 @@ func (c *linuxContainer) Processes() ([]int, error) { return pids, nil } -func (c *linuxContainer) Stats() (*ContainerStats, error) { +func (c *linuxContainer) Stats() (*Stats, error) { glog.Info("fetch container stats") var ( err error - stats = &ContainerStats{} + stats = &Stats{} ) - if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil { return stats, newGenericError(err, SystemError) } @@ -83,114 +96,484 @@ func (c *linuxContainer) Stats() (*ContainerStats, error) { return stats, nil } -func (c *linuxContainer) StartProcess(config *ProcessConfig) (int, error) { - state, err := c.RunState() +func (c *linuxContainer) Start(process *Process) (int, error) { + status, err := c.Status() if err != nil { return -1, err } - cmd := exec.Command(c.initArgs[0], c.initArgs[1:]...) - cmd.Stdin = config.Stdin - cmd.Stdout = config.Stdout - cmd.Stderr = config.Stderr - - cmd.Env = config.Env + cmd.Stdin = process.Stdin + cmd.Stdout = process.Stdout + cmd.Stderr = process.Stderr + cmd.Env = c.config.Env cmd.Dir = c.config.RootFs - if cmd.SysProcAttr == nil { cmd.SysProcAttr = &syscall.SysProcAttr{} } - + // TODO: add pdeath to config for a container cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL - - if state != configs.Destroyed { + if status != configs.Destroyed { glog.Info("start new container process") - return namespaces.ExecIn(config.Args, config.Env, config.Console, cmd, c.config, c.state) + // TODO: (crosbymichael) check out console use for execin + //return namespaces.ExecIn(process.Args, c.config.Env, "", cmd, c.config, c.state) + return c.startNewProcess(cmd, process.Args) } - - if err := c.startInitProcess(cmd, config); err != nil { + if err := c.startInitProcess(cmd, process.Args); err != nil { return -1, err } - return c.state.InitPid, nil } +func (c *linuxContainer) startNewProcess(cmd *exec.Cmd, args []string) (int, error) { + var err error + parent, child, err := newInitPipe() + if err != nil { + return -1, err + } + defer parent.Close() + cmd.ExtraFiles = []*os.File{child} + cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.state.InitPid)) + if err := cmd.Start(); err != nil { + child.Close() + return -1, err + } + child.Close() + s, err := cmd.Process.Wait() + if err != nil { + return -1, err + } + if !s.Success() { + return -1, &exec.ExitError{s} + } + decoder := json.NewDecoder(parent) + var pid *pid + if err := decoder.Decode(&pid); err != nil { + return -1, err + } + p, err := os.FindProcess(pid.Pid) + if err != nil { + return -1, err + } + terminate := func(terr error) (int, error) { + // TODO: log the errors for kill and wait + p.Kill() + p.Wait() + return -1, terr + } + // Enter cgroups. + if err := enterCgroups(c.state, pid.Pid); err != nil { + return terminate(err) + } + encoder := json.NewEncoder(parent) + if err := encoder.Encode(c.config); err != nil { + return terminate(err) + } + process := processArgs{ + Config: c.config, + Args: args, + } + if err := encoder.Encode(process); err != nil { + return terminate(err) + } + return pid.Pid, nil +} + +func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, args []string) error { + // create a pipe so that we can syncronize with the namespaced process and + // pass the state and configuration to the child process + parent, child, err := newInitPipe() + if err != nil { + return err + } + defer parent.Close() + cmd.ExtraFiles = []*os.File{child} + cmd.SysProcAttr.Cloneflags = c.config.Namespaces.CloneFlags() + if c.config.Namespaces.Contains(configs.NEWUSER) { + addUidGidMappings(cmd.SysProcAttr, c.config) + // Default to root user when user namespaces are enabled. + if cmd.SysProcAttr.Credential == nil { + cmd.SysProcAttr.Credential = &syscall.Credential{} + } + } + glog.Info("starting container init process") + err = cmd.Start() + child.Close() + if err != nil { + return newGenericError(err, SystemError) + } + wait := func() (*os.ProcessState, error) { + ps, err := cmd.Process.Wait() + // we should kill all processes in cgroup when init is died if we use + // host PID namespace + if !c.config.Namespaces.Contains(configs.NEWPID) { + c.killAllPids() + } + return ps, newGenericError(err, SystemError) + } + terminate := func(terr error) error { + // TODO: log the errors for kill and wait + cmd.Process.Kill() + wait() + return terr + } + started, err := system.GetProcessStartTime(cmd.Process.Pid) + if err != nil { + return terminate(err) + } + // Do this before syncing with child so that no children + // can escape the cgroup + if err := c.cgroupManager.Apply(cmd.Process.Pid); err != nil { + return terminate(err) + } + defer func() { + if err != nil { + c.cgroupManager.Destroy() + } + }() + var networkState configs.NetworkState + if err := c.initializeNetworking(cmd.Process.Pid, &networkState); err != nil { + return terminate(err) + } + process := processArgs{ + Args: args, + Config: c.config, + NetworkState: &networkState, + } + // Start the setup process to setup the init process + if c.config.Namespaces.Contains(configs.NEWUSER) { + if err = executeSetupCmd(cmd.Args, cmd.Process.Pid, c.config, &process, &networkState); err != nil { + return terminate(err) + } + } + // send the state to the container's init process then shutdown writes for the parent + if err := json.NewEncoder(parent).Encode(process); err != nil { + return terminate(err) + } + // shutdown writes for the parent side of the pipe + if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil { + return terminate(err) + } + // wait for the child process to fully complete and receive an error message + // if one was encoutered + var ierr *initError + if err := json.NewDecoder(parent).Decode(&ierr); err != nil && err != io.EOF { + return terminate(err) + } + if ierr != nil { + return terminate(ierr) + } + + c.state.InitPid = cmd.Process.Pid + c.state.InitStartTime = started + c.state.NetworkState = networkState + c.state.CgroupPaths = c.cgroupManager.GetPaths() + + return nil +} + +func (c *linuxContainer) Destroy() error { + status, err := c.Status() + if err != nil { + return err + } + if status != configs.Destroyed { + return newGenericError(nil, ContainerNotStopped) + } + return os.RemoveAll(c.root) +} + +func (c *linuxContainer) Pause() error { + return c.cgroupManager.Freeze(configs.Frozen) +} + +func (c *linuxContainer) Resume() error { + return c.cgroupManager.Freeze(configs.Thawed) +} + +func (c *linuxContainer) Signal(signal os.Signal) error { + glog.Infof("sending signal %d to pid %d", signal, c.state.InitPid) + panic("not implemented") +} + +func (c *linuxContainer) OOM() (<-chan struct{}, error) { + return NotifyOnOOM(c.state) +} + func (c *linuxContainer) updateStateFile() error { fnew := filepath.Join(c.root, fmt.Sprintf("%s.new", stateFilename)) f, err := os.Create(fnew) if err != nil { return newGenericError(err, SystemError) } + defer f.Close() - err = json.NewEncoder(f).Encode(c.state) - if err != nil { + if err := json.NewEncoder(f).Encode(c.state); err != nil { f.Close() os.Remove(fnew) return newGenericError(err, SystemError) } - f.Close() - fname := filepath.Join(c.root, stateFilename) if err := os.Rename(fnew, fname); err != nil { return newGenericError(err, SystemError) } - return nil } -func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, config *ProcessConfig) error { - err := namespaces.Exec(config.Args, config.Env, config.Console, cmd, c.config, c.cgroupManager, c.state) +// New returns a newly initialized Pipe for communication between processes +func newInitPipe() (parent *os.File, child *os.File, err error) { + fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0) + if err != nil { + return nil, nil, err + } + return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil +} + +// Converts IDMap to SysProcIDMap array and adds it to SysProcAttr. +func addUidGidMappings(sys *syscall.SysProcAttr, container *configs.Config) { + if container.UidMappings != nil { + sys.UidMappings = make([]syscall.SysProcIDMap, len(container.UidMappings)) + for i, um := range container.UidMappings { + sys.UidMappings[i].ContainerID = um.ContainerID + sys.UidMappings[i].HostID = um.HostID + sys.UidMappings[i].Size = um.Size + } + } + + if container.GidMappings != nil { + sys.GidMappings = make([]syscall.SysProcIDMap, len(container.GidMappings)) + for i, gm := range container.GidMappings { + sys.GidMappings[i].ContainerID = gm.ContainerID + sys.GidMappings[i].HostID = gm.HostID + sys.GidMappings[i].Size = gm.Size + } + } +} + +// killAllPids iterates over all of the container's processes +// sending a SIGKILL to each process. +func (c *linuxContainer) killAllPids() error { + glog.Info("killing all processes in container") + var procs []*os.Process + c.cgroupManager.Freeze(configs.Frozen) + pids, err := c.cgroupManager.GetPids() if err != nil { return err } + for _, pid := range pids { + // TODO: log err without aborting if we are unable to find + // a single PID + if p, err := os.FindProcess(pid); err == nil { + procs = append(procs, p) + p.Kill() + } + } + c.cgroupManager.Freeze(configs.Thawed) + for _, p := range procs { + p.Wait() + } + return err +} - err = c.updateStateFile() +// initializeNetworking creates the container's network stack outside of the namespace and moves +// interfaces into the container's net namespaces if necessary +func (c *linuxContainer) initializeNetworking(nspid int, networkState *configs.NetworkState) error { + glog.Info("initailzing container's network stack") + for _, config := range c.config.Networks { + strategy, err := network.GetStrategy(config.Type) + if err != nil { + return err + } + if err := strategy.Create(config, nspid, networkState); err != nil { + return err + } + } + return nil +} + +func executeSetupCmd(args []string, ppid int, container *configs.Config, process *processArgs, networkState *configs.NetworkState) error { + command := exec.Command(args[0], args[1:]...) + parent, child, err := newInitPipe() if err != nil { - // FIXME c.Kill() return err } + defer parent.Close() + command.ExtraFiles = []*os.File{child} + command.Dir = container.RootFs + command.Env = append(command.Env, + fmt.Sprintf("_LIBCONTAINER_INITPID=%d", ppid), + fmt.Sprintf("_LIBCONTAINER_USERNS=1")) + err = command.Start() + child.Close() + if err != nil { + return err + } + s, err := command.Process.Wait() + if err != nil { + return err + } + if !s.Success() { + return &exec.ExitError{s} + } + decoder := json.NewDecoder(parent) + var pid *pid + if err := decoder.Decode(&pid); err != nil { + return err + } + p, err := os.FindProcess(pid.Pid) + if err != nil { + return err + } + terminate := func(terr error) error { + // TODO: log the errors for kill and wait + p.Kill() + p.Wait() + return terr + } + // send the state to the container's init process then shutdown writes for the parent + if err := json.NewEncoder(parent).Encode(process); err != nil { + return terminate(err) + } + // shutdown writes for the parent side of the pipe + if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil { + return terminate(err) + } + // wait for the child process to fully complete and receive an error message + // if one was encoutered + var ierr *initError + if err := decoder.Decode(&ierr); err != nil && err != io.EOF { + return terminate(err) + } + if ierr != nil { + return ierr + } + s, err = p.Wait() + if err != nil { + return err + } + if !s.Success() { + return &exec.ExitError{s} + } + return nil +} + +type pid struct { + Pid int `json:"Pid"` +} + +// Finalize entering into a container and execute a specified command +func InitIn(pipe *os.File) (err error) { + defer func() { + // if we have an error during the initialization of the container's init then send it back to the + // parent process in the form of an initError. + if err != nil { + // ensure that any data sent from the parent is consumed so it doesn't + // receive ECONNRESET when the child writes to the pipe. + ioutil.ReadAll(pipe) + if err := json.NewEncoder(pipe).Encode(initError{ + Message: err.Error(), + }); err != nil { + panic(err) + } + } + // ensure that this pipe is always closed + pipe.Close() + }() + decoder := json.NewDecoder(pipe) + var config *configs.Config + if err := decoder.Decode(&config); err != nil { + return err + } + var process *processArgs + if err := decoder.Decode(&process); err != nil { + return err + } + if err := finalizeSetns(config); err != nil { + return err + } + if err := system.Execv(process.Args[0], process.Args[0:], config.Env); err != nil { + return err + } + panic("unreachable") +} + +// finalize expects that the setns calls have been setup and that is has joined an +// existing namespace +func finalizeSetns(container *configs.Config) error { + // clear the current processes env and replace it with the environment defined on the container + if err := loadContainerEnvironment(container); err != nil { + return err + } + + if err := setupRlimits(container); err != nil { + return fmt.Errorf("setup rlimits %s", err) + } + + if err := finalizeNamespace(container); err != nil { + return err + } + + if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil { + return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err) + } + + if container.ProcessLabel != "" { + if err := label.SetProcessLabel(container.ProcessLabel); err != nil { + return err + } + } return nil } -func (c *linuxContainer) Destroy() error { - state, err := c.RunState() - if err != nil { +// SetupContainer is run to setup mounts and networking related operations +// for a user namespace enabled process as a user namespace root doesn't +// have permissions to perform these operations. +// The setup process joins all the namespaces of user namespace enabled init +// except the user namespace, so it run as root in the root user namespace +// to perform these operations. +func SetupContainer(process *processArgs) error { + container := process.Config + networkState := process.NetworkState + + // TODO : move to validation + /* + rootfs, err := utils.ResolveRootfs(container.RootFs) + if err != nil { + return err + } + */ + + // clear the current processes env and replace it with the environment + // defined on the container + if err := loadContainerEnvironment(container); err != nil { return err } - if state != configs.Destroyed { - return newGenericError(nil, ContainerNotStopped) + cloneFlags := container.Namespaces.CloneFlags() + if (cloneFlags & syscall.CLONE_NEWNET) == 0 { + if len(container.Networks) != 0 || len(container.Routes) != 0 { + return fmt.Errorf("unable to apply network parameters without network namespace") + } + } else { + if err := setupNetwork(container, networkState); err != nil { + return fmt.Errorf("setup networking %s", err) + } + if err := setupRoute(container); err != nil { + return fmt.Errorf("setup route %s", err) + } } - os.RemoveAll(c.root) + label.Init() + + // InitializeMountNamespace() can be executed only for a new mount namespace + if (cloneFlags & syscall.CLONE_NEWNS) != 0 { + if err := mount.InitializeMountNamespace(container); err != nil { + return fmt.Errorf("setup mount namespace %s", err) + } + } return nil } -func (c *linuxContainer) Pause() error { - return c.cgroupManager.Freeze(cgroups.Frozen) -} - -func (c *linuxContainer) Resume() error { - return c.cgroupManager.Freeze(cgroups.Thawed) -} - -func (c *linuxContainer) Signal(pid, signal int) error { - glog.Infof("sending signal %d to pid %d", signal, pid) - panic("not implemented") -} - -func (c *linuxContainer) Wait() (int, error) { - return c.WaitProcess(c.state.InitPid) -} - -func (c *linuxContainer) WaitProcess(pid int) (int, error) { - var status syscall.WaitStatus - - _, err := syscall.Wait4(pid, &status, 0, nil) - if err != nil { - return -1, newGenericError(err, SystemError) - } - - return int(status), err +func enterCgroups(state *configs.State, pid int) error { + return cgroups.EnterPid(state.CgroupPaths, pid) } diff --git a/linux_container_test.go b/linux_container_test.go index 17bd8b9a..7d4eae92 100644 --- a/linux_container_test.go +++ b/linux_container_test.go @@ -34,7 +34,7 @@ func (m *mockCgroupManager) GetPaths() map[string]string { return nil } -func (m *mockCgroupManager) Freeze(state cgroups.FreezerState) error { +func (m *mockCgroupManager) Freeze(state configs.FreezerState) error { return nil } diff --git a/linux_factory.go b/linux_factory.go index b88a66de..d7bf8a50 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -5,15 +5,28 @@ package libcontainer import ( "encoding/json" "fmt" + "io/ioutil" "os" "path/filepath" "regexp" + "strings" + "syscall" "github.com/golang/glog" + "github.com/docker/libcontainer/apparmor" cgroups "github.com/docker/libcontainer/cgroups/manager" "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/namespaces" + "github.com/docker/libcontainer/console" + "github.com/docker/libcontainer/label" + "github.com/docker/libcontainer/mount" + "github.com/docker/libcontainer/netlink" + "github.com/docker/libcontainer/network" + "github.com/docker/libcontainer/security/capabilities" + "github.com/docker/libcontainer/security/restrict" + "github.com/docker/libcontainer/system" + "github.com/docker/libcontainer/user" + "github.com/docker/libcontainer/utils" ) const ( @@ -26,6 +39,13 @@ var ( maxIdLen = 1024 ) +// Process is used for transferring parameters from Exec() to Init() +type processArgs struct { + Args []string `json:"args,omitempty"` + Config *configs.Config `json:"config,omitempty"` + NetworkState *configs.NetworkState `json:"network_state,omitempty"` +} + // New returns a linux based container factory based in the root directory. func New(root string, initArgs []string) (Factory, error) { if root != "" { @@ -33,7 +53,6 @@ func New(root string, initArgs []string) (Factory, error) { return nil, newGenericError(err, SystemError) } } - return &linuxFactory{ root: root, initArgs: initArgs, @@ -51,44 +70,32 @@ func (l *linuxFactory) Create(id string, config *configs.Config) (Container, err if l.root == "" { return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid) } - if !idRegex.MatchString(id) { - return nil, newGenericError(fmt.Errorf("Invalid id format: %v", id), InvalidIdFormat) + if err := l.validateID(id); err != nil { + return nil, err } - - if len(id) > maxIdLen { - return nil, newGenericError(fmt.Errorf("Invalid id format: %v", id), InvalidIdFormat) - } - containerRoot := filepath.Join(l.root, id) - _, err := os.Stat(containerRoot) - if err == nil { + if _, err := os.Stat(containerRoot); err == nil { return nil, newGenericError(fmt.Errorf("Container with id exists: %v", id), IdInUse) } else if !os.IsNotExist(err) { return nil, newGenericError(err, SystemError) } - data, err := json.MarshalIndent(config, "", "\t") if err != nil { return nil, newGenericError(err, SystemError) } - if err := os.MkdirAll(containerRoot, 0700); err != nil { return nil, newGenericError(err, SystemError) } - f, err := os.Create(filepath.Join(containerRoot, configFilename)) if err != nil { os.RemoveAll(containerRoot) return nil, newGenericError(err, SystemError) } defer f.Close() - - _, err = f.Write(data) - if err != nil { + if _, err := f.Write(data); err != nil { os.RemoveAll(containerRoot) return nil, newGenericError(err, SystemError) } - cgroupManager := cgroups.NewCgroupManager(config.Cgroups) return &linuxContainer{ id: id, @@ -110,13 +117,11 @@ func (l *linuxFactory) Load(id string) (Container, error) { if err != nil { return nil, err } - glog.Infof("loading container state from %s", containerRoot) state, err := l.loadContainerState(containerRoot) if err != nil { return nil, err } - cgroupManager := cgroups.LoadCgroupManager(config.Cgroups, state.CgroupPaths) glog.Infof("using %s as cgroup manager", cgroupManager) return &linuxContainer{ @@ -129,6 +134,54 @@ func (l *linuxFactory) Load(id string) (Container, error) { }, nil } +// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state +// This is a low level implementation detail of the reexec and should not be consumed externally +func (l *linuxFactory) StartInitialization(pipefd uintptr) (err error) { + pipe := os.NewFile(uintptr(pipefd), "pipe") + setupUserns := os.Getenv("_LIBCONTAINER_USERNS") != "" + pid := os.Getenv("_LIBCONTAINER_INITPID") + if pid != "" && !setupUserns { + return InitIn(pipe) + } + defer func() { + // if we have an error during the initialization of the container's init then send it back to the + // parent process in the form of an initError. + if err != nil { + // ensure that any data sent from the parent is consumed so it doesn't + // receive ECONNRESET when the child writes to the pipe. + ioutil.ReadAll(pipe) + if err := json.NewEncoder(pipe).Encode(initError{ + Message: err.Error(), + }); err != nil { + panic(err) + } + } + // ensure that this pipe is always closed + pipe.Close() + }() + uncleanRootfs, err := os.Getwd() + if err != nil { + return err + } + var process *processArgs + // We always read this as it is a way to sync with the parent as well + if err := json.NewDecoder(pipe).Decode(&process); err != nil { + return err + } + if setupUserns { + err = SetupContainer(process) + if err == nil { + os.Exit(0) + } else { + os.Exit(1) + } + } + if process.Config.Namespaces.Contains(configs.NEWUSER) { + return l.initUserNs(uncleanRootfs, process) + } + return l.initDefault(uncleanRootfs, process) +} + func (l *linuxFactory) loadContainerConfig(root string) (*configs.Config, error) { f, err := os.Open(filepath.Join(root, configFilename)) if err != nil { @@ -138,7 +191,6 @@ func (l *linuxFactory) loadContainerConfig(root string) (*configs.Config, error) return nil, newGenericError(err, SystemError) } defer f.Close() - var config *configs.Config if err := json.NewDecoder(f).Decode(&config); err != nil { return nil, newGenericError(err, ConfigInvalid) @@ -155,7 +207,6 @@ func (l *linuxFactory) loadContainerState(root string) (*configs.State, error) { return nil, newGenericError(err, SystemError) } defer f.Close() - var state *configs.State if err := json.NewDecoder(f).Decode(&state); err != nil { return nil, newGenericError(err, SystemError) @@ -163,16 +214,346 @@ func (l *linuxFactory) loadContainerState(root string) (*configs.State, error) { return state, nil } -// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state -// This is a low level implementation detail of the reexec and should not be consumed externally -func (f *linuxFactory) StartInitialization(pipefd uintptr) (err error) { - pipe := os.NewFile(uintptr(pipefd), "pipe") +func (l *linuxFactory) validateID(id string) error { + if !idRegex.MatchString(id) { + return newGenericError(fmt.Errorf("Invalid id format: %v", id), InvalidIdFormat) + } + if len(id) > maxIdLen { + return newGenericError(fmt.Errorf("Invalid id format: %v", id), InvalidIdFormat) + } + return nil +} - setupUserns := os.Getenv("_LIBCONTAINER_USERNS") - pid := os.Getenv("_LIBCONTAINER_INITPID") - if pid != "" && setupUserns == "" { - return namespaces.InitIn(pipe) +func (l *linuxFactory) initDefault(uncleanRootfs string, process *processArgs) (err error) { + config := process.Config + networkState := process.NetworkState + + // TODO: move to validation + /* + rootfs, err := utils.ResolveRootfs(uncleanRootfs) + if err != nil { + return err + } + */ + + // clear the current processes env and replace it with the environment + // defined on the container + if err := loadContainerEnvironment(config); err != nil { + return err + } + // join any namespaces via a path to the namespace fd if provided + if err := joinExistingNamespaces(config.Namespaces); err != nil { + return err + } + if config.Console != "" { + if err := console.OpenAndDup(config.Console); err != nil { + return err + } + } + if _, err := syscall.Setsid(); err != nil { + return fmt.Errorf("setsid %s", err) + } + if config.Console != "" { + if err := system.Setctty(); err != nil { + return fmt.Errorf("setctty %s", err) + } } - return namespaces.Init(pipe, setupUserns != "") + cloneFlags := config.Namespaces.CloneFlags() + if (cloneFlags & syscall.CLONE_NEWNET) == 0 { + if len(config.Networks) != 0 || len(config.Routes) != 0 { + return fmt.Errorf("unable to apply network parameters without network namespace") + } + } else { + if err := setupNetwork(config, networkState); err != nil { + return fmt.Errorf("setup networking %s", err) + } + if err := setupRoute(config); err != nil { + return fmt.Errorf("setup route %s", err) + } + } + if err := setupRlimits(config); err != nil { + return fmt.Errorf("setup rlimits %s", err) + } + label.Init() + // InitializeMountNamespace() can be executed only for a new mount namespace + if (cloneFlags & syscall.CLONE_NEWNS) != 0 { + if err := mount.InitializeMountNamespace(config); err != nil { + return err + } + } + if config.Hostname != "" { + // TODO: (crosbymichael) move this to pre spawn validation + if (cloneFlags & syscall.CLONE_NEWUTS) == 0 { + return fmt.Errorf("unable to set the hostname without UTS namespace") + } + if err := syscall.Sethostname([]byte(config.Hostname)); err != nil { + return fmt.Errorf("unable to sethostname %q: %s", config.Hostname, err) + } + } + if err := apparmor.ApplyProfile(config.AppArmorProfile); err != nil { + return fmt.Errorf("set apparmor profile %s: %s", config.AppArmorProfile, err) + } + if err := label.SetProcessLabel(config.ProcessLabel); err != nil { + return fmt.Errorf("set process label %s", err) + } + // TODO: (crosbymichael) make this configurable at the Config level + if config.RestrictSys { + if (cloneFlags & syscall.CLONE_NEWNS) == 0 { + return fmt.Errorf("unable to restrict access to kernel files without mount namespace") + } + if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { + return err + } + } + pdeathSignal, err := system.GetParentDeathSignal() + if err != nil { + return fmt.Errorf("get parent death signal %s", err) + } + if err := finalizeNamespace(config); err != nil { + return fmt.Errorf("finalize namespace %s", err) + } + // finalizeNamespace can change user/group which clears the parent death + // signal, so we restore it here. + if err := restoreParentDeathSignal(pdeathSignal); err != nil { + return fmt.Errorf("restore parent death signal %s", err) + } + return system.Execv(process.Args[0], process.Args[0:], config.Env) +} + +func (l *linuxFactory) initUserNs(uncleanRootfs string, process *processArgs) (err error) { + config := process.Config + // clear the current processes env and replace it with the environment + // defined on the config + if err := loadContainerEnvironment(config); err != nil { + return err + } + // join any namespaces via a path to the namespace fd if provided + if err := joinExistingNamespaces(config.Namespaces); err != nil { + return err + } + if config.Console != "" { + if err := console.OpenAndDup("/dev/console"); err != nil { + return err + } + } + if _, err := syscall.Setsid(); err != nil { + return fmt.Errorf("setsid %s", err) + } + if config.Console != "" { + if err := system.Setctty(); err != nil { + return fmt.Errorf("setctty %s", err) + } + } + if config.WorkingDir == "" { + config.WorkingDir = "/" + } + + if err := setupRlimits(config); err != nil { + return fmt.Errorf("setup rlimits %s", err) + } + cloneFlags := config.Namespaces.CloneFlags() + if config.Hostname != "" { + // TODO: move validation + if (cloneFlags & syscall.CLONE_NEWUTS) == 0 { + return fmt.Errorf("unable to set the hostname without UTS namespace") + } + if err := syscall.Sethostname([]byte(config.Hostname)); err != nil { + return fmt.Errorf("unable to sethostname %q: %s", config.Hostname, err) + } + } + if err := apparmor.ApplyProfile(config.AppArmorProfile); err != nil { + return fmt.Errorf("set apparmor profile %s: %s", config.AppArmorProfile, err) + } + if err := label.SetProcessLabel(config.ProcessLabel); err != nil { + return fmt.Errorf("set process label %s", err) + } + if config.RestrictSys { + if (cloneFlags & syscall.CLONE_NEWNS) == 0 { + return fmt.Errorf("unable to restrict access to kernel files without mount namespace") + } + if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { + return err + } + } + pdeathSignal, err := system.GetParentDeathSignal() + if err != nil { + return fmt.Errorf("get parent death signal %s", err) + } + if err := finalizeNamespace(config); err != nil { + return fmt.Errorf("finalize namespace %s", err) + } + // finalizeNamespace can change user/group which clears the parent death + // signal, so we restore it here. + if err := restoreParentDeathSignal(pdeathSignal); err != nil { + return fmt.Errorf("restore parent death signal %s", err) + } + return system.Execv(process.Args[0], process.Args[0:], config.Env) +} + +// restoreParentDeathSignal sets the parent death signal to old. +func restoreParentDeathSignal(old int) error { + if old == 0 { + return nil + } + current, err := system.GetParentDeathSignal() + if err != nil { + return fmt.Errorf("get parent death signal %s", err) + } + if old == current { + return nil + } + if err := system.ParentDeathSignal(uintptr(old)); err != nil { + return fmt.Errorf("set parent death signal %s", err) + } + // Signal self if parent is already dead. Does nothing if running in a new + // PID namespace, as Getppid will always return 0. + if syscall.Getppid() == 1 { + return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) + } + return nil +} + +// setupUser changes the groups, gid, and uid for the user inside the container +func setupUser(config *configs.Config) error { + // Set up defaults. + defaultExecUser := user.ExecUser{ + Uid: syscall.Getuid(), + Gid: syscall.Getgid(), + Home: "/", + } + passwdPath, err := user.GetPasswdPath() + if err != nil { + return err + } + groupPath, err := user.GetGroupPath() + if err != nil { + return err + } + execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath) + if err != nil { + return fmt.Errorf("get supplementary groups %s", err) + } + suppGroups := append(execUser.Sgids, config.AdditionalGroups...) + if err := syscall.Setgroups(suppGroups); err != nil { + return fmt.Errorf("setgroups %s", err) + } + if err := system.Setgid(execUser.Gid); err != nil { + return fmt.Errorf("setgid %s", err) + } + if err := system.Setuid(execUser.Uid); err != nil { + return fmt.Errorf("setuid %s", err) + } + // if we didn't get HOME already, set it based on the user's HOME + if envHome := os.Getenv("HOME"); envHome == "" { + if err := os.Setenv("HOME", execUser.Home); err != nil { + return fmt.Errorf("set HOME %s", err) + } + } + return nil +} + +// setupVethNetwork uses the Network config if it is not nil to initialize +// the new veth interface inside the container for use by changing the name to eth0 +// setting the MTU and IP address along with the default gateway +func setupNetwork(config *configs.Config, networkState *configs.NetworkState) error { + for _, config := range config.Networks { + strategy, err := network.GetStrategy(config.Type) + if err != nil { + return err + } + err1 := strategy.Initialize(config, networkState) + if err1 != nil { + return err1 + } + } + return nil +} + +func setupRoute(config *configs.Config) error { + for _, config := range config.Routes { + if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil { + return err + } + } + return nil +} + +func setupRlimits(config *configs.Config) error { + for _, rlimit := range config.Rlimits { + l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft} + if err := syscall.Setrlimit(rlimit.Type, l); err != nil { + return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err) + } + } + return nil +} + +// finalizeNamespace drops the caps, sets the correct user +// and working dir, and closes any leaky file descriptors +// before execing the command inside the namespace +func finalizeNamespace(config *configs.Config) error { + // Ensure that all non-standard fds we may have accidentally + // inherited are marked close-on-exec so they stay out of the + // container + if err := utils.CloseExecFrom(3); err != nil { + return fmt.Errorf("close open file descriptors %s", err) + } + // drop capabilities in bounding set before changing user + if err := capabilities.DropBoundingSet(config.Capabilities); err != nil { + return fmt.Errorf("drop bounding set %s", err) + } + // preserve existing capabilities while we change users + if err := system.SetKeepCaps(); err != nil { + return fmt.Errorf("set keep caps %s", err) + } + if err := setupUser(config); err != nil { + return fmt.Errorf("setup user %s", err) + } + if err := system.ClearKeepCaps(); err != nil { + return fmt.Errorf("clear keep caps %s", err) + } + // drop all other capabilities + if err := capabilities.DropCapabilities(config.Capabilities); err != nil { + return fmt.Errorf("drop capabilities %s", err) + } + if config.WorkingDir != "" { + if err := syscall.Chdir(config.WorkingDir); err != nil { + return fmt.Errorf("chdir to %s %s", config.WorkingDir, err) + } + } + return nil +} + +func loadContainerEnvironment(config *configs.Config) error { + os.Clearenv() + for _, pair := range config.Env { + p := strings.SplitN(pair, "=", 2) + if len(p) < 2 { + return fmt.Errorf("invalid environment '%v'", pair) + } + if err := os.Setenv(p[0], p[1]); err != nil { + return err + } + } + return nil +} + +// joinExistingNamespaces gets all the namespace paths specified for the container and +// does a setns on the namespace fd so that the current process joins the namespace. +func joinExistingNamespaces(namespaces []configs.Namespace) error { + for _, ns := range namespaces { + if ns.Path != "" { + f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0) + if err != nil { + return err + } + err = system.Setns(f.Fd(), uintptr(ns.Syscall())) + f.Close() + if err != nil { + return err + } + } + } + return nil } diff --git a/linux_factory_test.go b/linux_factory_test.go index 3c1e275c..fd1e830a 100644 --- a/linux_factory_test.go +++ b/linux_factory_test.go @@ -118,9 +118,6 @@ func TestFactoryLoadContainer(t *testing.T) { } config := container.Config() - if config == nil { - t.Fatal("expected non nil container config") - } if config.RootFs != expectedConfig.RootFs { t.Fatalf("expected rootfs %q but received %q", expectedConfig.RootFs, config.RootFs) diff --git a/mount/init.go b/mount/init.go index 91a27294..4f994333 100644 --- a/mount/init.go +++ b/mount/init.go @@ -8,11 +8,10 @@ import ( "path/filepath" "syscall" + "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/label" - "github.com/docker/libcontainer/mount/nodes" ) -// default mount point flags const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV type mount struct { @@ -25,85 +24,60 @@ type mount struct { // InitializeMountNamespace sets up the devices, mount points, and filesystems for use inside a // new mount namespace. -func InitializeMountNamespace(rootfs, console string, sysReadonly bool, hostRootUid, hostRootGid int, mountConfig *MountConfig) error { - var ( - err error - flag = syscall.MS_PRIVATE - ) - - if mountConfig.NoPivotRoot { - flag = syscall.MS_SLAVE +func InitializeMountNamespace(config *configs.Config) (err error) { + if err := prepareRoot(config); err != nil { + return err } - - if err := syscall.Mount("", "/", "", uintptr(flag|syscall.MS_REC), ""); err != nil { - return fmt.Errorf("mounting / with flags %X %s", (flag | syscall.MS_REC), err) + if err := mountSystem(config); err != nil { + return err } - - if err := syscall.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil { - return fmt.Errorf("mouting %s as bind %s", rootfs, err) - } - - if err := mountSystem(rootfs, sysReadonly, mountConfig); err != nil { - return fmt.Errorf("mount system %s", err) - } - // apply any user specified mounts within the new mount namespace - for _, m := range mountConfig.Mounts { - if err := m.Mount(rootfs, mountConfig.MountLabel); err != nil { + for _, m := range config.Mounts { + if err := m.Mount(config.RootFs, config.MountLabel); err != nil { return err } } - - if err := nodes.CreateDeviceNodes(rootfs, mountConfig.DeviceNodes); err != nil { - return fmt.Errorf("create device nodes %s", err) - } - - if err := SetupPtmx(rootfs, console, mountConfig.MountLabel, hostRootUid, hostRootGid); err != nil { + if err := createDeviceNodes(config); err != nil { + return err + } + if err := setupPtmx(config); err != nil { return err } - // stdin, stdout and stderr could be pointing to /dev/null from parent namespace. // Re-open them inside this namespace. // FIXME: Need to fix this for user namespaces. - if hostRootUid == 0 { - if err := reOpenDevNull(rootfs); err != nil { - return fmt.Errorf("Failed to reopen /dev/null %s", err) + if 0 == 0 { + if err := reOpenDevNull(config.RootFs); err != nil { + return err } } - - if err := setupDevSymlinks(rootfs); err != nil { - return fmt.Errorf("dev symlinks %s", err) + if err := setupDevSymlinks(config.RootFs); err != nil { + return err } - - if err := syscall.Chdir(rootfs); err != nil { - return fmt.Errorf("chdir into %s %s", rootfs, err) + if err := syscall.Chdir(config.RootFs); err != nil { + return err } - - if mountConfig.NoPivotRoot { - err = MsMoveRoot(rootfs) + if config.NoPivotRoot { + err = msMoveRoot(config.RootFs) } else { - err = PivotRoot(rootfs, mountConfig.PivotDir) + err = pivotRoot(config.RootFs, config.PivotDir) } - if err != nil { return err } - - if mountConfig.ReadonlyFs { - if err := SetReadonly(); err != nil { + if config.ReadonlyFs { + if err := setReadonly(); err != nil { return fmt.Errorf("set readonly %s", err) } } - syscall.Umask(0022) - return nil } // mountSystem sets up linux specific system mounts like mqueue, sys, proc, shm, and devpts // inside the mount namespace -func mountSystem(rootfs string, sysReadonly bool, mountConfig *MountConfig) error { - for _, m := range newSystemMounts(rootfs, mountConfig.MountLabel, sysReadonly) { +func mountSystem(config *configs.Config) error { + for _, m := range newSystemMounts(config.RootFs, config.MountLabel, config.RestrictSys) { if err := os.MkdirAll(m.path, 0755); err != nil && !os.IsExist(err) { return fmt.Errorf("mkdirall %s %s", m.path, err) } @@ -114,28 +88,6 @@ func mountSystem(rootfs string, sysReadonly bool, mountConfig *MountConfig) erro return nil } -func createIfNotExists(path string, isDir bool) error { - if _, err := os.Stat(path); err != nil { - if os.IsNotExist(err) { - if isDir { - if err := os.MkdirAll(path, 0755); err != nil { - return err - } - } else { - if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { - return err - } - f, err := os.OpenFile(path, os.O_CREATE, 0755) - if err != nil { - return err - } - f.Close() - } - } - } - return nil -} - func setupDevSymlinks(rootfs string) error { var links = [][2]string{ {"/proc/self/fd", "/dev/fd"}, @@ -210,3 +162,54 @@ func reOpenDevNull(rootfs string) error { } return nil } + +// Create the device nodes in the container. +func createDeviceNodes(config *configs.Config) error { + oldMask := syscall.Umask(0000) + for _, node := range config.DeviceNodes { + if err := createDeviceNode(config.RootFs, node); err != nil { + syscall.Umask(oldMask) + return err + } + } + syscall.Umask(oldMask) + return nil +} + +// Creates the device node in the rootfs of the container. +func createDeviceNode(rootfs string, node *configs.Device) error { + var ( + dest = filepath.Join(rootfs, node.Path) + parent = filepath.Dir(dest) + ) + if err := os.MkdirAll(parent, 0755); err != nil { + return err + } + fileMode := node.FileMode + switch node.Type { + case 'c': + fileMode |= syscall.S_IFCHR + case 'b': + fileMode |= syscall.S_IFBLK + default: + return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path) + } + if err := syscall.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil && !os.IsExist(err) { + return fmt.Errorf("mknod %s %s", node.Path, err) + } + if err := syscall.Chown(dest, int(node.Uid), int(node.Gid)); err != nil { + return fmt.Errorf("chown %s to %d:%d", node.Path, node.Uid, node.Gid) + } + return nil +} + +func prepareRoot(config *configs.Config) error { + flag := syscall.MS_PRIVATE | syscall.MS_REC + if config.NoPivotRoot { + flag = syscall.MS_SLAVE | syscall.MS_REC + } + if err := syscall.Mount("", "/", "", uintptr(flag), ""); err != nil { + return err + } + return syscall.Mount(config.RootFs, config.RootFs, "bind", syscall.MS_BIND|syscall.MS_REC, "") +} diff --git a/mount/mount_config.go b/mount/mount_config.go deleted file mode 100644 index f19465e6..00000000 --- a/mount/mount_config.go +++ /dev/null @@ -1,33 +0,0 @@ -package mount - -import ( - "errors" - - "github.com/docker/libcontainer/devices" -) - -var ErrUnsupported = errors.New("Unsupported method") - -type MountConfig struct { - // NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs - // This is a common option when the container is running in ramdisk - NoPivotRoot bool `json:"no_pivot_root,omitempty"` - - // PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set. - // When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable. - // This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot. - PivotDir string `json:"pivot_dir,omitempty"` - - // ReadonlyFs will remount the container's rootfs as readonly where only externally mounted - // bind mounts are writtable - ReadonlyFs bool `json:"readonly_fs,omitempty"` - - // Mounts specify additional source and destination paths that will be mounted inside the container's - // rootfs and mount namespace if specified - Mounts []*Mount `json:"mounts,omitempty"` - - // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well! - DeviceNodes []*devices.Device `json:"device_nodes,omitempty"` - - MountLabel string `json:"mount_label,omitempty"` -} diff --git a/mount/msmoveroot.go b/mount/msmoveroot.go index 94afd3a9..17b73293 100644 --- a/mount/msmoveroot.go +++ b/mount/msmoveroot.go @@ -2,19 +2,14 @@ package mount -import ( - "fmt" - "syscall" -) +import "syscall" -func MsMoveRoot(rootfs string) error { +func msMoveRoot(rootfs string) error { if err := syscall.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil { - return fmt.Errorf("mount move %s into / %s", rootfs, err) + return err } - if err := syscall.Chroot("."); err != nil { - return fmt.Errorf("chroot . %s", err) + return err } - return syscall.Chdir("/") } diff --git a/mount/nodes/nodes.go b/mount/nodes/nodes.go deleted file mode 100644 index 322c0c0e..00000000 --- a/mount/nodes/nodes.go +++ /dev/null @@ -1,57 +0,0 @@ -// +build linux - -package nodes - -import ( - "fmt" - "os" - "path/filepath" - "syscall" - - "github.com/docker/libcontainer/devices" -) - -// Create the device nodes in the container. -func CreateDeviceNodes(rootfs string, nodesToCreate []*devices.Device) error { - oldMask := syscall.Umask(0000) - defer syscall.Umask(oldMask) - - for _, node := range nodesToCreate { - if err := CreateDeviceNode(rootfs, node); err != nil { - return err - } - } - return nil -} - -// Creates the device node in the rootfs of the container. -func CreateDeviceNode(rootfs string, node *devices.Device) error { - var ( - dest = filepath.Join(rootfs, node.Path) - parent = filepath.Dir(dest) - ) - - if err := os.MkdirAll(parent, 0755); err != nil { - return err - } - - fileMode := node.FileMode - switch node.Type { - case 'c': - fileMode |= syscall.S_IFCHR - case 'b': - fileMode |= syscall.S_IFBLK - default: - return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path) - } - - if err := syscall.Mknod(dest, uint32(fileMode), devices.Mkdev(node.MajorNumber, node.MinorNumber)); err != nil && !os.IsExist(err) { - return fmt.Errorf("mknod %s %s", node.Path, err) - } - - if err := syscall.Chown(dest, int(node.Uid), int(node.Gid)); err != nil { - return fmt.Errorf("chown %s to %d:%d", node.Path, node.Uid, node.Gid) - } - - return nil -} diff --git a/mount/nodes/nodes_unsupported.go b/mount/nodes/nodes_unsupported.go deleted file mode 100644 index 83660715..00000000 --- a/mount/nodes/nodes_unsupported.go +++ /dev/null @@ -1,13 +0,0 @@ -// +build !linux - -package nodes - -import ( - "errors" - - "github.com/docker/libcontainer/devices" -) - -func CreateDeviceNodes(rootfs string, nodesToCreate []*devices.Device) error { - return errors.New("Unsupported method") -} diff --git a/mount/pivotroot.go b/mount/pivotroot.go index acc3be24..3d422774 100644 --- a/mount/pivotroot.go +++ b/mount/pivotroot.go @@ -10,7 +10,7 @@ import ( "syscall" ) -func PivotRoot(rootfs, pivotBaseDir string) error { +func pivotRoot(rootfs, pivotBaseDir string) error { if pivotBaseDir == "" { pivotBaseDir = "/" } @@ -22,20 +22,16 @@ func PivotRoot(rootfs, pivotBaseDir string) error { if err != nil { return fmt.Errorf("can't create pivot_root dir %s, error %v", pivotDir, err) } - if err := syscall.PivotRoot(rootfs, pivotDir); err != nil { return fmt.Errorf("pivot_root %s", err) } - if err := syscall.Chdir("/"); err != nil { return fmt.Errorf("chdir / %s", err) } - // path to pivot dir now changed, update pivotDir = filepath.Join(pivotBaseDir, filepath.Base(pivotDir)) if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil { return fmt.Errorf("unmount pivot_root dir %s", err) } - return os.Remove(pivotDir) } diff --git a/mount/ptmx.go b/mount/ptmx.go index 5b558775..657318dc 100644 --- a/mount/ptmx.go +++ b/mount/ptmx.go @@ -7,24 +7,28 @@ import ( "os" "path/filepath" + "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/console" ) -func SetupPtmx(rootfs, consolePath, mountLabel string, hostRootUid, hostRootGid int) error { - ptmx := filepath.Join(rootfs, "dev/ptmx") +func setupPtmx(config *configs.Config) error { + ptmx := filepath.Join(config.RootFs, "dev/ptmx") if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) { return err } - if err := os.Symlink("pts/ptmx", ptmx); err != nil { return fmt.Errorf("symlink dev ptmx %s", err) } - - if consolePath != "" { - if err := console.Setup(rootfs, consolePath, mountLabel, hostRootUid, hostRootGid); err != nil { + if config.Console != "" { + uid, err := config.HostUID() + if err != nil { return err } + gid, err := config.HostGID() + if err != nil { + return err + } + return console.Setup(config.RootFs, config.Console, config.MountLabel, uid, gid) } - return nil } diff --git a/mount/readonly.go b/mount/readonly.go index 9b4a6f70..855c9f99 100644 --- a/mount/readonly.go +++ b/mount/readonly.go @@ -6,6 +6,6 @@ import ( "syscall" ) -func SetReadonly() error { +func setReadonly() error { return syscall.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "") } diff --git a/mount/remount.go b/mount/remount.go deleted file mode 100644 index 99a01209..00000000 --- a/mount/remount.go +++ /dev/null @@ -1,31 +0,0 @@ -// +build linux - -package mount - -import "syscall" - -func RemountProc() error { - if err := syscall.Unmount("/proc", syscall.MNT_DETACH); err != nil { - return err - } - - if err := syscall.Mount("proc", "/proc", "proc", uintptr(defaultMountFlags), ""); err != nil { - return err - } - - return nil -} - -func RemountSys() error { - if err := syscall.Unmount("/sys", syscall.MNT_DETACH); err != nil { - if err != syscall.EINVAL { - return err - } - } else { - if err := syscall.Mount("sysfs", "/sys", "sysfs", uintptr(defaultMountFlags), ""); err != nil { - return err - } - } - - return nil -} diff --git a/namespaces/exec.go b/namespaces/exec.go deleted file mode 100644 index 1c157e3a..00000000 --- a/namespaces/exec.go +++ /dev/null @@ -1,323 +0,0 @@ -// +build linux - -package namespaces - -import ( - "encoding/json" - "fmt" - "io" - "os" - "os/exec" - "syscall" - - "github.com/docker/libcontainer/cgroups" - "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/network" - "github.com/docker/libcontainer/system" -) - -const ( - EXIT_SIGNAL_OFFSET = 128 -) - -func executeSetupCmd(args []string, ppid int, container *configs.Config, process *processArgs, networkState *network.NetworkState) error { - command := exec.Command(args[0], args[1:]...) - - parent, child, err := newInitPipe() - if err != nil { - return err - } - defer parent.Close() - command.ExtraFiles = []*os.File{child} - - command.Dir = container.RootFs - command.Env = append(command.Env, - fmt.Sprintf("_LIBCONTAINER_INITPID=%d", ppid), - fmt.Sprintf("_LIBCONTAINER_USERNS=1")) - - err = command.Start() - child.Close() - if err != nil { - return err - } - - s, err := command.Process.Wait() - if err != nil { - return err - } - if !s.Success() { - return &exec.ExitError{s} - } - - decoder := json.NewDecoder(parent) - var pid *pid - - if err := decoder.Decode(&pid); err != nil { - return err - } - - p, err := os.FindProcess(pid.Pid) - if err != nil { - return err - } - - terminate := func(terr error) error { - // TODO: log the errors for kill and wait - p.Kill() - p.Wait() - return terr - } - - // send the state to the container's init process then shutdown writes for the parent - if err := json.NewEncoder(parent).Encode(process); err != nil { - return terminate(err) - } - - // shutdown writes for the parent side of the pipe - if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil { - return terminate(err) - } - - // wait for the child process to fully complete and receive an error message - // if one was encoutered - var ierr *initError - if err := decoder.Decode(&ierr); err != nil && err != io.EOF { - return terminate(err) - } - if ierr != nil { - return ierr - } - - s, err = p.Wait() - if err != nil { - return err - } - if !s.Success() { - return &exec.ExitError{s} - } - - return nil -} - -// TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work. -// Move this to libcontainer package. -// Exec performs setup outside of a namespace so that a container can be -// executed. Exec is a high level function for working with container namespaces. -func Exec(args []string, env []string, console string, command *exec.Cmd, container *configs.Config, cgroupManager cgroups.Manager, state *configs.State) (err error) { - // create a pipe so that we can syncronize with the namespaced process and - // pass the state and configuration to the child process - parent, child, err := newInitPipe() - if err != nil { - return err - } - defer parent.Close() - command.ExtraFiles = []*os.File{child} - - command.Dir = container.RootFs - command.SysProcAttr.Cloneflags = uintptr(GetNamespaceFlags(container.Namespaces)) - - if container.Namespaces.Contains(configs.NEWUSER) { - AddUidGidMappings(command.SysProcAttr, container) - - // Default to root user when user namespaces are enabled. - if command.SysProcAttr.Credential == nil { - command.SysProcAttr.Credential = &syscall.Credential{} - } - } - - if err := command.Start(); err != nil { - child.Close() - return err - } - child.Close() - - wait := func() (*os.ProcessState, error) { - ps, err := command.Process.Wait() - // we should kill all processes in cgroup when init is died if we use - // host PID namespace - if !container.Namespaces.Contains(configs.NEWPID) { - killAllPids(cgroupManager) - } - return ps, err - } - - terminate := func(terr error) error { - // TODO: log the errors for kill and wait - command.Process.Kill() - wait() - return terr - } - - started, err := system.GetProcessStartTime(command.Process.Pid) - if err != nil { - return terminate(err) - } - - // Do this before syncing with child so that no children - // can escape the cgroup - err = cgroupManager.Apply(command.Process.Pid) - if err != nil { - return terminate(err) - } - defer func() { - if err != nil { - cgroupManager.Destroy() - } - }() - - var networkState network.NetworkState - if err := InitializeNetworking(container, command.Process.Pid, &networkState); err != nil { - return terminate(err) - } - - process := processArgs{ - Env: append(env[0:], container.Env...), - Args: args, - ConsolePath: console, - Config: container, - NetworkState: &networkState, - } - - // Start the setup process to setup the init process - if container.Namespaces.Contains(configs.NEWUSER) { - if err = executeSetupCmd(command.Args, command.Process.Pid, container, &process, &networkState); err != nil { - return terminate(err) - } - } - - // send the state to the container's init process then shutdown writes for the parent - if err := json.NewEncoder(parent).Encode(process); err != nil { - return terminate(err) - } - // shutdown writes for the parent side of the pipe - if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil { - return terminate(err) - } - - // wait for the child process to fully complete and receive an error message - // if one was encoutered - var ierr *initError - if err := json.NewDecoder(parent).Decode(&ierr); err != nil && err != io.EOF { - return terminate(err) - } - if ierr != nil { - return terminate(ierr) - } - - state.InitPid = command.Process.Pid - state.InitStartTime = started - state.NetworkState = networkState - state.CgroupPaths = cgroupManager.GetPaths() - - return nil -} - -// killAllPids iterates over all of the container's processes -// sending a SIGKILL to each process. -func killAllPids(m cgroups.Manager) error { - var ( - procs []*os.Process - ) - m.Freeze(cgroups.Frozen) - pids, err := m.GetPids() - if err != nil { - return err - } - for _, pid := range pids { - // TODO: log err without aborting if we are unable to find - // a single PID - if p, err := os.FindProcess(pid); err == nil { - procs = append(procs, p) - p.Kill() - } - } - m.Freeze(cgroups.Thawed) - for _, p := range procs { - p.Wait() - } - return err -} - -// Utility function that gets a host ID for a container ID from user namespace map -// if that ID is present in the map. -func hostIDFromMapping(containerID int, uMap []configs.IDMap) (int, bool) { - for _, m := range uMap { - if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) { - hostID := m.HostID + (containerID - m.ContainerID) - return hostID, true - } - } - return -1, false -} - -// Gets the root uid for the process on host which could be non-zero -// when user namespaces are enabled. -func GetHostRootGid(container *configs.Config) (int, error) { - if container.Namespaces.Contains(configs.NEWUSER) { - if container.GidMappings == nil { - return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.") - } - hostRootGid, found := hostIDFromMapping(0, container.GidMappings) - if !found { - return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.") - } - return hostRootGid, nil - } - - // Return default root uid 0 - return 0, nil -} - -// Gets the root uid for the process on host which could be non-zero -// when user namespaces are enabled. -func GetHostRootUid(container *configs.Config) (int, error) { - if container.Namespaces.Contains(configs.NEWUSER) { - if container.UidMappings == nil { - return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.") - } - hostRootUid, found := hostIDFromMapping(0, container.UidMappings) - if !found { - return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.") - } - return hostRootUid, nil - } - - // Return default root uid 0 - return 0, nil -} - -// Converts IDMap to SysProcIDMap array and adds it to SysProcAttr. -func AddUidGidMappings(sys *syscall.SysProcAttr, container *configs.Config) { - if container.UidMappings != nil { - sys.UidMappings = make([]syscall.SysProcIDMap, len(container.UidMappings)) - for i, um := range container.UidMappings { - sys.UidMappings[i].ContainerID = um.ContainerID - sys.UidMappings[i].HostID = um.HostID - sys.UidMappings[i].Size = um.Size - } - } - - if container.GidMappings != nil { - sys.GidMappings = make([]syscall.SysProcIDMap, len(container.GidMappings)) - for i, gm := range container.GidMappings { - sys.GidMappings[i].ContainerID = gm.ContainerID - sys.GidMappings[i].HostID = gm.HostID - sys.GidMappings[i].Size = gm.Size - } - } -} - -// InitializeNetworking creates the container's network stack outside of the namespace and moves -// interfaces into the container's net namespaces if necessary -func InitializeNetworking(container *configs.Config, nspid int, networkState *network.NetworkState) error { - for _, config := range container.Networks { - strategy, err := network.GetStrategy(config.Type) - if err != nil { - return err - } - if err := strategy.Create((*network.Network)(config), nspid, networkState); err != nil { - return err - } - } - return nil -} diff --git a/namespaces/execin.go b/namespaces/execin.go deleted file mode 100644 index 3b51a587..00000000 --- a/namespaces/execin.go +++ /dev/null @@ -1,235 +0,0 @@ -// +build linux - -package namespaces - -import ( - "encoding/json" - "fmt" - "io/ioutil" - "os" - "os/exec" - "syscall" - - "github.com/docker/libcontainer/apparmor" - "github.com/docker/libcontainer/cgroups" - "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/label" - "github.com/docker/libcontainer/mount" - "github.com/docker/libcontainer/system" - "github.com/docker/libcontainer/utils" -) - -type pid struct { - Pid int `json:"Pid"` -} - -// ExecIn reexec's cmd with _LIBCONTAINER_INITPID=PID so that it is able to run the -// setns code in a single threaded environment joining the existing containers' namespaces. -func ExecIn(args []string, env []string, console string, cmd *exec.Cmd, container *configs.Config, state *configs.State) (int, error) { - var err error - - parent, child, err := newInitPipe() - if err != nil { - return -1, err - } - defer parent.Close() - - cmd.ExtraFiles = []*os.File{child} - cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", state.InitPid)) - - if err := cmd.Start(); err != nil { - child.Close() - return -1, err - } - child.Close() - - s, err := cmd.Process.Wait() - if err != nil { - return -1, err - } - if !s.Success() { - return -1, &exec.ExitError{s} - } - - decoder := json.NewDecoder(parent) - var pid *pid - - if err := decoder.Decode(&pid); err != nil { - return -1, err - } - - p, err := os.FindProcess(pid.Pid) - if err != nil { - return -1, err - } - - terminate := func(terr error) (int, error) { - // TODO: log the errors for kill and wait - p.Kill() - p.Wait() - return -1, terr - } - - // Enter cgroups. - if err := EnterCgroups(state, pid.Pid); err != nil { - return terminate(err) - } - - encoder := json.NewEncoder(parent) - - if err := encoder.Encode(container); err != nil { - return terminate(err) - } - - process := processArgs{ - Env: append(env[0:], container.Env...), - Args: args, - ConsolePath: console, - } - if err := encoder.Encode(process); err != nil { - return terminate(err) - } - - return pid.Pid, nil -} - -// Finalize entering into a container and execute a specified command -func InitIn(pipe *os.File) (err error) { - defer func() { - // if we have an error during the initialization of the container's init then send it back to the - // parent process in the form of an initError. - if err != nil { - // ensure that any data sent from the parent is consumed so it doesn't - // receive ECONNRESET when the child writes to the pipe. - ioutil.ReadAll(pipe) - if err := json.NewEncoder(pipe).Encode(initError{ - Message: err.Error(), - }); err != nil { - panic(err) - } - } - // ensure that this pipe is always closed - pipe.Close() - }() - - decoder := json.NewDecoder(pipe) - - var container *configs.Config - if err := decoder.Decode(&container); err != nil { - return err - } - - var process *processArgs - if err := decoder.Decode(&process); err != nil { - return err - } - - if err := FinalizeSetns(container); err != nil { - return err - } - - if err := system.Execv(process.Args[0], process.Args[0:], process.Env); err != nil { - return err - } - - panic("unreachable") -} - -// Finalize expects that the setns calls have been setup and that is has joined an -// existing namespace -func FinalizeSetns(container *configs.Config) error { - // clear the current processes env and replace it with the environment defined on the container - if err := LoadContainerEnvironment(container); err != nil { - return err - } - - if err := setupRlimits(container); err != nil { - return fmt.Errorf("setup rlimits %s", err) - } - - if err := FinalizeNamespace(container); err != nil { - return err - } - - if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil { - return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err) - } - - if container.ProcessLabel != "" { - if err := label.SetProcessLabel(container.ProcessLabel); err != nil { - return err - } - } - - return nil -} - -// SetupContainer is run to setup mounts and networking related operations -// for a user namespace enabled process as a user namespace root doesn't -// have permissions to perform these operations. -// The setup process joins all the namespaces of user namespace enabled init -// except the user namespace, so it run as root in the root user namespace -// to perform these operations. -func SetupContainer(process *processArgs) error { - container := process.Config - networkState := process.NetworkState - consolePath := process.ConsolePath - - rootfs, err := utils.ResolveRootfs(container.RootFs) - if err != nil { - return err - } - - // clear the current processes env and replace it with the environment - // defined on the container - if err := LoadContainerEnvironment(container); err != nil { - return err - } - - cloneFlags := GetNamespaceFlags(container.Namespaces) - - if (cloneFlags & syscall.CLONE_NEWNET) == 0 { - if len(container.Networks) != 0 || len(container.Routes) != 0 { - return fmt.Errorf("unable to apply network parameters without network namespace") - } - } else { - if err := setupNetwork(container, networkState); err != nil { - return fmt.Errorf("setup networking %s", err) - } - if err := setupRoute(container); err != nil { - return fmt.Errorf("setup route %s", err) - } - } - - label.Init() - - hostRootUid, err := GetHostRootUid(container) - if err != nil { - return fmt.Errorf("failed to get hostRootUid %s", err) - } - - hostRootGid, err := GetHostRootGid(container) - if err != nil { - return fmt.Errorf("failed to get hostRootGid %s", err) - } - - // InitializeMountNamespace() can be executed only for a new mount namespace - if (cloneFlags & syscall.CLONE_NEWNS) == 0 { - if container.MountConfig != nil { - return fmt.Errorf("mount config is set without mount namespace") - } - } else if err := mount.InitializeMountNamespace(rootfs, - consolePath, - container.RestrictSys, - hostRootUid, - hostRootGid, - (*mount.MountConfig)(container.MountConfig)); err != nil { - return fmt.Errorf("setup mount namespace %s", err) - } - - return nil -} - -func EnterCgroups(state *configs.State, pid int) error { - return cgroups.EnterPid(state.CgroupPaths, pid) -} diff --git a/namespaces/init.go b/namespaces/init.go deleted file mode 100644 index 58254514..00000000 --- a/namespaces/init.go +++ /dev/null @@ -1,468 +0,0 @@ -// +build linux - -package namespaces - -import ( - "encoding/json" - "fmt" - "io/ioutil" - "os" - "strings" - "syscall" - - "github.com/docker/libcontainer/apparmor" - "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/console" - "github.com/docker/libcontainer/label" - "github.com/docker/libcontainer/mount" - "github.com/docker/libcontainer/netlink" - "github.com/docker/libcontainer/network" - "github.com/docker/libcontainer/security/capabilities" - "github.com/docker/libcontainer/security/restrict" - "github.com/docker/libcontainer/system" - "github.com/docker/libcontainer/user" - "github.com/docker/libcontainer/utils" -) - -// Process is used for transferring parameters from Exec() to Init() -type processArgs struct { - Args []string `json:"args,omitempty"` - Env []string `json:"environment,omitempty"` - ConsolePath string `json:"console_path,omitempty"` - Config *configs.Config `json:"config,omitempty"` - NetworkState *network.NetworkState `json:"network_state,omitempty"` -} - -// TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work. -// Move this to libcontainer package. -// Init is the init process that first runs inside a new namespace to setup mounts, users, networking, -// and other options required for the new container. -// The caller of Init function has to ensure that the go runtime is locked to an OS thread -// (using runtime.LockOSThread) else system calls like setns called within Init may not work as intended. -func Init(pipe *os.File, setupUserns bool) (err error) { - defer func() { - // if we have an error during the initialization of the container's init then send it back to the - // parent process in the form of an initError. - if err != nil { - // ensure that any data sent from the parent is consumed so it doesn't - // receive ECONNRESET when the child writes to the pipe. - ioutil.ReadAll(pipe) - if err := json.NewEncoder(pipe).Encode(initError{ - Message: err.Error(), - }); err != nil { - panic(err) - } - } - // ensure that this pipe is always closed - pipe.Close() - }() - - uncleanRootfs, err := os.Getwd() - if err != nil { - return err - } - - var process *processArgs - // We always read this as it is a way to sync with the parent as well - if err := json.NewDecoder(pipe).Decode(&process); err != nil { - return err - } - - if setupUserns { - err = SetupContainer(process) - if err == nil { - os.Exit(0) - } else { - os.Exit(1) - } - } - - if process.Config.Namespaces.Contains(configs.NEWUSER) { - return initUserNs(uncleanRootfs, process) - } else { - return initDefault(uncleanRootfs, process) - } -} - -func initDefault(uncleanRootfs string, process *processArgs) (err error) { - container := process.Config - networkState := process.NetworkState - - rootfs, err := utils.ResolveRootfs(uncleanRootfs) - if err != nil { - return err - } - - // clear the current processes env and replace it with the environment - // defined on the container - if err := LoadContainerEnvironment(container); err != nil { - return err - } - - // join any namespaces via a path to the namespace fd if provided - if err := joinExistingNamespaces(container.Namespaces); err != nil { - return err - } - if process.ConsolePath != "" { - if err := console.OpenAndDup(process.ConsolePath); err != nil { - return err - } - } - if _, err := syscall.Setsid(); err != nil { - return fmt.Errorf("setsid %s", err) - } - if process.ConsolePath != "" { - if err := system.Setctty(); err != nil { - return fmt.Errorf("setctty %s", err) - } - } - - cloneFlags := GetNamespaceFlags(container.Namespaces) - - if (cloneFlags & syscall.CLONE_NEWNET) == 0 { - if len(container.Networks) != 0 || len(container.Routes) != 0 { - return fmt.Errorf("unable to apply network parameters without network namespace") - } - } else { - if err := setupNetwork(container, networkState); err != nil { - return fmt.Errorf("setup networking %s", err) - } - if err := setupRoute(container); err != nil { - return fmt.Errorf("setup route %s", err) - } - } - - if err := setupRlimits(container); err != nil { - return fmt.Errorf("setup rlimits %s", err) - } - - label.Init() - - // InitializeMountNamespace() can be executed only for a new mount namespace - if (cloneFlags & syscall.CLONE_NEWNS) == 0 { - if container.MountConfig != nil { - return fmt.Errorf("mount config is set without mount namespace") - } - } else if err := mount.InitializeMountNamespace(rootfs, - process.ConsolePath, - container.RestrictSys, - 0, // Default Root Uid - 0, // Default Root Gid - (*mount.MountConfig)(container.MountConfig)); err != nil { - return fmt.Errorf("setup mount namespace %s", err) - } - - if container.Hostname != "" { - if (cloneFlags & syscall.CLONE_NEWUTS) == 0 { - return fmt.Errorf("unable to set the hostname without UTS namespace") - } - if err := syscall.Sethostname([]byte(container.Hostname)); err != nil { - return fmt.Errorf("unable to sethostname %q: %s", container.Hostname, err) - } - } - - if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil { - return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err) - } - - if err := label.SetProcessLabel(container.ProcessLabel); err != nil { - return fmt.Errorf("set process label %s", err) - } - - // TODO: (crosbymichael) make this configurable at the Config level - if container.RestrictSys { - if (cloneFlags & syscall.CLONE_NEWNS) == 0 { - return fmt.Errorf("unable to restrict access to kernel files without mount namespace") - } - if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { - return err - } - } - - pdeathSignal, err := system.GetParentDeathSignal() - if err != nil { - return fmt.Errorf("get parent death signal %s", err) - } - - if err := FinalizeNamespace(container); err != nil { - return fmt.Errorf("finalize namespace %s", err) - } - - // FinalizeNamespace can change user/group which clears the parent death - // signal, so we restore it here. - if err := RestoreParentDeathSignal(pdeathSignal); err != nil { - return fmt.Errorf("restore parent death signal %s", err) - } - - return system.Execv(process.Args[0], process.Args[0:], process.Env) -} - -func initUserNs(uncleanRootfs string, process *processArgs) (err error) { - container := process.Config - - // clear the current processes env and replace it with the environment - // defined on the container - if err := LoadContainerEnvironment(container); err != nil { - return err - } - - // join any namespaces via a path to the namespace fd if provided - if err := joinExistingNamespaces(container.Namespaces); err != nil { - return err - } - if process.ConsolePath != "" { - if err := console.OpenAndDup("/dev/console"); err != nil { - return err - } - } - if _, err := syscall.Setsid(); err != nil { - return fmt.Errorf("setsid %s", err) - } - if process.ConsolePath != "" { - if err := system.Setctty(); err != nil { - return fmt.Errorf("setctty %s", err) - } - } - - if container.WorkingDir == "" { - container.WorkingDir = "/" - } - - if err := setupRlimits(container); err != nil { - return fmt.Errorf("setup rlimits %s", err) - } - - cloneFlags := GetNamespaceFlags(container.Namespaces) - - if container.Hostname != "" { - if (cloneFlags & syscall.CLONE_NEWUTS) == 0 { - return fmt.Errorf("unable to set the hostname without UTS namespace") - } - if err := syscall.Sethostname([]byte(container.Hostname)); err != nil { - return fmt.Errorf("unable to sethostname %q: %s", container.Hostname, err) - } - } - - if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil { - return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err) - } - - if err := label.SetProcessLabel(container.ProcessLabel); err != nil { - return fmt.Errorf("set process label %s", err) - } - - if container.RestrictSys { - if (cloneFlags & syscall.CLONE_NEWNS) == 0 { - return fmt.Errorf("unable to restrict access to kernel files without mount namespace") - } - if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { - return err - } - } - - pdeathSignal, err := system.GetParentDeathSignal() - if err != nil { - return fmt.Errorf("get parent death signal %s", err) - } - - if err := FinalizeNamespace(container); err != nil { - return fmt.Errorf("finalize namespace %s", err) - } - - // FinalizeNamespace can change user/group which clears the parent death - // signal, so we restore it here. - if err := RestoreParentDeathSignal(pdeathSignal); err != nil { - return fmt.Errorf("restore parent death signal %s", err) - } - - return system.Execv(process.Args[0], process.Args[0:], process.Env) -} - -// RestoreParentDeathSignal sets the parent death signal to old. -func RestoreParentDeathSignal(old int) error { - if old == 0 { - return nil - } - - current, err := system.GetParentDeathSignal() - if err != nil { - return fmt.Errorf("get parent death signal %s", err) - } - - if old == current { - return nil - } - - if err := system.ParentDeathSignal(uintptr(old)); err != nil { - return fmt.Errorf("set parent death signal %s", err) - } - - // Signal self if parent is already dead. Does nothing if running in a new - // PID namespace, as Getppid will always return 0. - if syscall.Getppid() == 1 { - return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) - } - - return nil -} - -// SetupUser changes the groups, gid, and uid for the user inside the container -func SetupUser(container *configs.Config) error { - // Set up defaults. - defaultExecUser := user.ExecUser{ - Uid: syscall.Getuid(), - Gid: syscall.Getgid(), - Home: "/", - } - - passwdPath, err := user.GetPasswdPath() - if err != nil { - return err - } - - groupPath, err := user.GetGroupPath() - if err != nil { - return err - } - - execUser, err := user.GetExecUserPath(container.User, &defaultExecUser, passwdPath, groupPath) - if err != nil { - return fmt.Errorf("get supplementary groups %s", err) - } - - suppGroups := append(execUser.Sgids, container.AdditionalGroups...) - - if err := syscall.Setgroups(suppGroups); err != nil { - return fmt.Errorf("setgroups %s", err) - } - - if err := system.Setgid(execUser.Gid); err != nil { - return fmt.Errorf("setgid %s", err) - } - - if err := system.Setuid(execUser.Uid); err != nil { - return fmt.Errorf("setuid %s", err) - } - - // if we didn't get HOME already, set it based on the user's HOME - if envHome := os.Getenv("HOME"); envHome == "" { - if err := os.Setenv("HOME", execUser.Home); err != nil { - return fmt.Errorf("set HOME %s", err) - } - } - - return nil -} - -// setupVethNetwork uses the Network config if it is not nil to initialize -// the new veth interface inside the container for use by changing the name to eth0 -// setting the MTU and IP address along with the default gateway -func setupNetwork(container *configs.Config, networkState *network.NetworkState) error { - for _, config := range container.Networks { - strategy, err := network.GetStrategy(config.Type) - if err != nil { - return err - } - - err1 := strategy.Initialize((*network.Network)(config), networkState) - if err1 != nil { - return err1 - } - } - return nil -} - -func setupRoute(container *configs.Config) error { - for _, config := range container.Routes { - if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil { - return err - } - } - return nil -} - -func setupRlimits(container *configs.Config) error { - for _, rlimit := range container.Rlimits { - l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft} - if err := syscall.Setrlimit(rlimit.Type, l); err != nil { - return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err) - } - } - return nil -} - -// FinalizeNamespace drops the caps, sets the correct user -// and working dir, and closes any leaky file descriptors -// before execing the command inside the namespace -func FinalizeNamespace(container *configs.Config) error { - // Ensure that all non-standard fds we may have accidentally - // inherited are marked close-on-exec so they stay out of the - // container - if err := utils.CloseExecFrom(3); err != nil { - return fmt.Errorf("close open file descriptors %s", err) - } - - // drop capabilities in bounding set before changing user - if err := capabilities.DropBoundingSet(container.Capabilities); err != nil { - return fmt.Errorf("drop bounding set %s", err) - } - - // preserve existing capabilities while we change users - if err := system.SetKeepCaps(); err != nil { - return fmt.Errorf("set keep caps %s", err) - } - - if err := SetupUser(container); err != nil { - return fmt.Errorf("setup user %s", err) - } - - if err := system.ClearKeepCaps(); err != nil { - return fmt.Errorf("clear keep caps %s", err) - } - - // drop all other capabilities - if err := capabilities.DropCapabilities(container.Capabilities); err != nil { - return fmt.Errorf("drop capabilities %s", err) - } - - if container.WorkingDir != "" { - if err := syscall.Chdir(container.WorkingDir); err != nil { - return fmt.Errorf("chdir to %s %s", container.WorkingDir, err) - } - } - - return nil -} - -func LoadContainerEnvironment(container *configs.Config) error { - os.Clearenv() - for _, pair := range container.Env { - p := strings.SplitN(pair, "=", 2) - if len(p) < 2 { - return fmt.Errorf("invalid environment '%v'", pair) - } - if err := os.Setenv(p[0], p[1]); err != nil { - return err - } - } - return nil -} - -// joinExistingNamespaces gets all the namespace paths specified for the container and -// does a setns on the namespace fd so that the current process joins the namespace. -func joinExistingNamespaces(namespaces []configs.Namespace) error { - for _, ns := range namespaces { - if ns.Path != "" { - f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0) - if err != nil { - return err - } - err = system.Setns(f.Fd(), uintptr(namespaceInfo[ns.Type])) - f.Close() - if err != nil { - return err - } - } - } - return nil -} diff --git a/namespaces/utils.go b/namespaces/utils.go deleted file mode 100644 index 978a02d8..00000000 --- a/namespaces/utils.go +++ /dev/null @@ -1,48 +0,0 @@ -// +build linux - -package namespaces - -import ( - "os" - "syscall" - - "github.com/docker/libcontainer/configs" -) - -type initError struct { - Message string `json:"message,omitempty"` -} - -func (i initError) Error() string { - return i.Message -} - -var namespaceInfo = map[configs.NamespaceType]int{ - configs.NEWNET: syscall.CLONE_NEWNET, - configs.NEWNS: syscall.CLONE_NEWNS, - configs.NEWUSER: syscall.CLONE_NEWUSER, - configs.NEWIPC: syscall.CLONE_NEWIPC, - configs.NEWUTS: syscall.CLONE_NEWUTS, - configs.NEWPID: syscall.CLONE_NEWPID, -} - -// New returns a newly initialized Pipe for communication between processes -func newInitPipe() (parent *os.File, child *os.File, err error) { - fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0) - if err != nil { - return nil, nil, err - } - return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil -} - -// GetNamespaceFlags parses the container's Namespaces options to set the correct -// flags on clone, unshare. This functions returns flags only for new namespaces. -func GetNamespaceFlags(namespaces configs.Namespaces) (flag int) { - for _, v := range namespaces { - if v.Path != "" { - continue - } - flag |= namespaceInfo[v.Type] - } - return flag -} diff --git a/network/loopback.go b/network/loopback.go index 1667b4d8..b208dfc6 100644 --- a/network/loopback.go +++ b/network/loopback.go @@ -4,17 +4,19 @@ package network import ( "fmt" + + "github.com/docker/libcontainer/configs" ) // Loopback is a network strategy that provides a basic loopback device type Loopback struct { } -func (l *Loopback) Create(n *Network, nspid int, networkState *NetworkState) error { +func (l *Loopback) Create(n *configs.Network, nspid int, networkState *configs.NetworkState) error { return nil } -func (l *Loopback) Initialize(config *Network, networkState *NetworkState) error { +func (l *Loopback) Initialize(config *configs.Network, networkState *configs.NetworkState) error { // Do not set the MTU on the loopback interface - use the default. if err := InterfaceUp("lo"); err != nil { return fmt.Errorf("lo up %s", err) diff --git a/network/stats.go b/network/stats.go index e2156c74..329a16bf 100644 --- a/network/stats.go +++ b/network/stats.go @@ -5,6 +5,8 @@ import ( "path/filepath" "strconv" "strings" + + "github.com/docker/libcontainer/configs" ) type NetworkStats struct { @@ -19,7 +21,7 @@ type NetworkStats struct { } // Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo. -func GetStats(networkState *NetworkState) (*NetworkStats, error) { +func GetStats(networkState *configs.NetworkState) (*NetworkStats, error) { // This can happen if the network runtime information is missing - possible if the container was created by an old version of libcontainer. if networkState.VethHost == "" { return &NetworkStats{}, nil diff --git a/network/strategy.go b/network/strategy.go index 019fe62f..bc4a023b 100644 --- a/network/strategy.go +++ b/network/strategy.go @@ -4,6 +4,8 @@ package network import ( "errors" + + "github.com/docker/libcontainer/configs" ) var ( @@ -18,8 +20,8 @@ var strategies = map[string]NetworkStrategy{ // NetworkStrategy represents a specific network configuration for // a container's networking stack type NetworkStrategy interface { - Create(*Network, int, *NetworkState) error - Initialize(*Network, *NetworkState) error + Create(*configs.Network, int, *configs.NetworkState) error + Initialize(*configs.Network, *configs.NetworkState) error } // GetStrategy returns the specific network strategy for the diff --git a/network/types.go b/network/types.go index dcf00420..1ae2e9d5 100644 --- a/network/types.go +++ b/network/types.go @@ -1,50 +1 @@ package network - -// Network defines configuration for a container's networking stack -// -// The network configuration can be omited from a container causing the -// container to be setup with the host's networking stack -type Network struct { - // Type sets the networks type, commonly veth and loopback - Type string `json:"type,omitempty"` - - // The bridge to use. - Bridge string `json:"bridge,omitempty"` - - // Prefix for the veth interfaces. - VethPrefix string `json:"veth_prefix,omitempty"` - - // MacAddress contains the MAC address to set on the network interface - MacAddress string `json:"mac_address,omitempty"` - - // Address contains the IPv4 and mask to set on the network interface - Address string `json:"address,omitempty"` - - // IPv6Address contains the IPv6 and mask to set on the network interface - IPv6Address string `json:"ipv6_address,omitempty"` - - // Gateway sets the gateway address that is used as the default for the interface - Gateway string `json:"gateway,omitempty"` - - // IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface - IPv6Gateway string `json:"ipv6_gateway,omitempty"` - - // Mtu sets the mtu value for the interface and will be mirrored on both the host and - // container's interfaces if a pair is created, specifically in the case of type veth - // Note: This does not apply to loopback interfaces. - Mtu int `json:"mtu,omitempty"` - - // TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and - // container's interfaces if a pair is created, specifically in the case of type veth - // Note: This does not apply to loopback interfaces. - TxQueueLen int `json:"txqueuelen,omitempty"` -} - -// Struct describing the network specific runtime state that will be maintained by libcontainer for all running containers -// Do not depend on it outside of libcontainer. -type NetworkState struct { - // The name of the veth interface on the Host. - VethHost string `json:"veth_host,omitempty"` - // The name of the veth interface created inside the container for the child. - VethChild string `json:"veth_child,omitempty"` -} diff --git a/network/veth.go b/network/veth.go index 3d7dc872..7bcc3910 100644 --- a/network/veth.go +++ b/network/veth.go @@ -5,6 +5,7 @@ package network import ( "fmt" + "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/netlink" "github.com/docker/libcontainer/utils" ) @@ -17,7 +18,7 @@ type Veth struct { const defaultDevice = "eth0" -func (v *Veth) Create(n *Network, nspid int, networkState *NetworkState) error { +func (v *Veth) Create(n *configs.Network, nspid int, networkState *configs.NetworkState) error { var ( bridge = n.Bridge prefix = n.VethPrefix @@ -51,7 +52,7 @@ func (v *Veth) Create(n *Network, nspid int, networkState *NetworkState) error { return nil } -func (v *Veth) Initialize(config *Network, networkState *NetworkState) error { +func (v *Veth) Initialize(config *configs.Network, networkState *configs.NetworkState) error { var vethChild = networkState.VethChild if vethChild == "" { return fmt.Errorf("vethChild is not specified") diff --git a/namespaces/nsenter/README.md b/nsenter/README.md similarity index 100% rename from namespaces/nsenter/README.md rename to nsenter/README.md diff --git a/namespaces/nsenter/nsenter.go b/nsenter/nsenter.go similarity index 100% rename from namespaces/nsenter/nsenter.go rename to nsenter/nsenter.go diff --git a/namespaces/nsenter/nsenter_test.go b/nsenter/nsenter_test.go similarity index 100% rename from namespaces/nsenter/nsenter_test.go rename to nsenter/nsenter_test.go diff --git a/namespaces/nsenter/nsenter_unsupported.go b/nsenter/nsenter_unsupported.go similarity index 100% rename from namespaces/nsenter/nsenter_unsupported.go rename to nsenter/nsenter_unsupported.go diff --git a/namespaces/nsenter/nsexec.c b/nsenter/nsexec.c similarity index 100% rename from namespaces/nsenter/nsexec.c rename to nsenter/nsexec.c diff --git a/nsinit/config.go b/nsinit/config.go deleted file mode 100644 index 74c7b3c0..00000000 --- a/nsinit/config.go +++ /dev/null @@ -1,29 +0,0 @@ -package main - -import ( - "encoding/json" - "fmt" - "log" - - "github.com/codegangsta/cli" -) - -var configCommand = cli.Command{ - Name: "config", - Usage: "display the container configuration", - Action: configAction, -} - -func configAction(context *cli.Context) { - container, err := loadConfig() - if err != nil { - log.Fatal(err) - } - - data, err := json.MarshalIndent(container, "", "\t") - if err != nil { - log.Fatal(err) - } - - fmt.Printf("%s", data) -} diff --git a/nsinit/exec.go b/nsinit/exec.go index 525991d3..d12f3638 100644 --- a/nsinit/exec.go +++ b/nsinit/exec.go @@ -1,11 +1,9 @@ package main import ( - "crypto/md5" - "fmt" "io" - "log" "os" + "os/signal" "syscall" "github.com/codegangsta/cli" @@ -15,145 +13,148 @@ import ( consolepkg "github.com/docker/libcontainer/console" ) -var ( - dataPath = os.Getenv("data_path") - console = os.Getenv("console") - rawPipeFd = os.Getenv("pipe") -) +type tty struct { + master *os.File + console string + state *term.State +} + +func (t *tty) Close() error { + if t.master != nil { + t.master.Close() + } + if t.state != nil { + term.RestoreTerminal(os.Stdin.Fd(), t.state) + } + return nil +} + +func (t *tty) set(config *configs.Config) { + config.Console = t.console +} + +func (t *tty) attach(process *libcontainer.Process) { + if t.master != nil { + process.Stderr = nil + process.Stdout = nil + process.Stdin = nil + } +} + +func (t *tty) resize() error { + if t.master == nil { + return nil + } + ws, err := term.GetWinsize(os.Stdin.Fd()) + if err != nil { + return err + } + return term.SetWinsize(t.master.Fd(), ws) +} var execCommand = cli.Command{ Name: "exec", Usage: "execute a new command inside a container", Action: execAction, Flags: []cli.Flag{ - cli.BoolFlag{Name: "list", Usage: "list all registered exec functions"}, - cli.StringFlag{Name: "func", Value: "exec", Usage: "function name to exec inside a container"}, + cli.BoolFlag{Name: "tty", Usage: "allocate a TTY to the container"}, + cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"}, + cli.StringFlag{Name: "config", Value: "container.json", Usage: "path to the configuration file"}, }, } -func getContainer(context *cli.Context) (libcontainer.Container, error) { - factory, err := libcontainer.New(context.GlobalString("root"), []string{os.Args[0], "init", "--fd", "3", "--"}) +func execAction(context *cli.Context) { + factory, err := loadFactory(context) if err != nil { - log.Fatal(err) + fatal(err) } - - id := fmt.Sprintf("%x", md5.Sum([]byte(dataPath))) - container, err := factory.Load(id) - if err != nil && !os.IsNotExist(err) { - var config *configs.Config - - config, err = loadConfig() - if err != nil { - log.Fatal(err) + tty, err := newTty(context) + if err != nil { + fatal(err) + } + container, err := factory.Load(context.String("id")) + if err != nil { + if lerr, ok := err.(libcontainer.Error); !ok || lerr.Code() != libcontainer.ContainerNotExists { + fatal(err) + } + config, err := loadConfig(context) + if err != nil { + fatal(err) + } + tty.set(config) + if container, err = factory.Create(context.String("id"), config); err != nil { + fatal(err) } - container, err = factory.Create(id, config) } - - return container, err + go handleSignals(container, tty) + process := &libcontainer.Process{ + Args: context.Args(), + Stdin: os.Stdin, + Stdout: os.Stdout, + Stderr: os.Stderr, + } + tty.attach(process) + pid, err := container.Start(process) + if err != nil { + fatal(err) + } + proc, err := os.FindProcess(pid) + if err != nil { + fatal(err) + } + status, err := proc.Wait() + if err != nil { + fatal(err) + } + if err := container.Destroy(); err != nil { + fatal(err) + } + exit(status.Sys().(syscall.WaitStatus)) } -func execAction(context *cli.Context) { - var ( - master *os.File - console string - err error - - sigc = make(chan os.Signal, 10) - - stdin = os.Stdin - stdout = os.Stdout - stderr = os.Stderr - - exitCode int - ) - - container, err := getContainer(context) - if err != nil { - log.Fatal(err) - } - - if container.Config().Tty { - stdin = nil - stdout = nil - stderr = nil - - master, console, err = consolepkg.CreateMasterAndConsole() - if err != nil { - log.Fatal(err) - } - - go io.Copy(master, os.Stdin) - go io.Copy(os.Stdout, master) - - state, err := term.SetRawTerminal(os.Stdin.Fd()) - if err != nil { - log.Fatal(err) - } - - defer term.RestoreTerminal(os.Stdin.Fd(), state) - } - - process := &libcontainer.ProcessConfig{ - Args: context.Args(), - Env: context.StringSlice("env"), - Stdin: stdin, - Stdout: stdout, - Stderr: stderr, - Console: console, - } - - pid, err := container.StartProcess(process) - if err != nil { - log.Fatalf("failed to exec: %s", err) - } - - p, err := os.FindProcess(pid) - if err != nil { - log.Fatalf("Unable to find the %d process: %s", pid, err) - } - - go func() { - resizeTty(master) - - for sig := range sigc { - switch sig { - case syscall.SIGWINCH: - resizeTty(master) - default: - p.Signal(sig) - } - } - }() - - ps, err := p.Wait() - if err != nil { - log.Fatalf("Unable to wait the %d process: %s", pid, err) - } - container.Destroy() - - status := ps.Sys().(syscall.WaitStatus) +func exit(status syscall.WaitStatus) { + var exitCode int if status.Exited() { exitCode = status.ExitStatus() } else if status.Signaled() { exitCode = -int(status.Signal()) } else { - log.Fatalf("Unexpected status") + fatalf("Unexpected status") } - os.Exit(exitCode) } -func resizeTty(master *os.File) { - if master == nil { - return - } - - ws, err := term.GetWinsize(os.Stdin.Fd()) - if err != nil { - return - } - - if err := term.SetWinsize(master.Fd(), ws); err != nil { - return +func handleSignals(container libcontainer.Container, tty *tty) { + sigc := make(chan os.Signal, 10) + signal.Notify(sigc) + tty.resize() + for sig := range sigc { + switch sig { + case syscall.SIGWINCH: + tty.resize() + default: + container.Signal(sig) + } } } + +func newTty(context *cli.Context) (*tty, error) { + if context.Bool("tty") { + master, console, err := consolepkg.CreateMasterAndConsole() + if err != nil { + return nil, err + } + go io.Copy(master, os.Stdin) + go io.Copy(os.Stdout, master) + state, err := term.SetRawTerminal(os.Stdin.Fd()) + if err != nil { + return nil, err + } + return &tty{ + master: master, + console: console, + state: state, + }, nil + } + return &tty{}, nil +} diff --git a/nsinit/init.go b/nsinit/init.go index bf59345a..d45d1287 100644 --- a/nsinit/init.go +++ b/nsinit/init.go @@ -5,35 +5,27 @@ import ( "github.com/codegangsta/cli" "github.com/docker/libcontainer" - _ "github.com/docker/libcontainer/namespaces/nsenter" + _ "github.com/docker/libcontainer/nsenter" ) -var ( - initCommand = cli.Command{ - Name: "init", - Usage: "runs the init process inside the namespace", - Action: initAction, - Flags: []cli.Flag{ - cli.IntFlag{"fd", 0, "internal pipe fd"}, - }, - } -) - -func initAction(context *cli.Context) { - factory, err := libcontainer.New("", []string{}) - if err != nil { - log.Fatal(err) - } - - if context.Int("fd") == 0 { - log.Fatal("--fd must be specified for init process") - } - - fd := uintptr(context.Int("fd")) - - if err := factory.StartInitialization(fd); err != nil { - log.Fatal(err) - } - - panic("This line should never been executed") +var initCommand = cli.Command{ + Name: "init", + Usage: "runs the init process inside the namespace", + Flags: []cli.Flag{ + cli.IntFlag{Name: "fd", Value: 0, Usage: "internal pipe fd"}, + }, + Action: func(context *cli.Context) { + factory, err := libcontainer.New("", nil) + if err != nil { + log.Fatal(err) + } + if context.Int("fd") == 0 { + log.Fatal("--fd must be specified for init process") + } + fd := uintptr(context.Int("fd")) + if err := factory.StartInitialization(fd); err != nil { + log.Fatal(err) + } + panic("This line should never been executed") + }, } diff --git a/nsinit/main.go b/nsinit/main.go index 2de7bc3e..e0dcf460 100644 --- a/nsinit/main.go +++ b/nsinit/main.go @@ -7,26 +7,17 @@ import ( "github.com/codegangsta/cli" ) -var ( - logPath = os.Getenv("log") -) - func main() { app := cli.NewApp() - app.Name = "nsinit" - app.Version = "0.1" + app.Version = "1" app.Author = "libcontainer maintainers" app.Flags = []cli.Flag{ cli.StringFlag{Name: "nspid"}, cli.StringFlag{Name: "console"}, cli.StringFlag{Name: "root", Value: ".", Usage: "root directory for containers"}, } - - app.Before = preload - app.Commands = []cli.Command{ - configCommand, execCommand, initCommand, oomCommand, @@ -34,7 +25,6 @@ func main() { statsCommand, unpauseCommand, } - if err := app.Run(os.Args); err != nil { log.Fatal(err) } diff --git a/nsinit/oom.go b/nsinit/oom.go index f7a333d4..eabe0b2b 100644 --- a/nsinit/oom.go +++ b/nsinit/oom.go @@ -4,26 +4,25 @@ import ( "log" "github.com/codegangsta/cli" - "github.com/docker/libcontainer" - "github.com/docker/libcontainer/configs" ) var oomCommand = cli.Command{ - Name: "oom", - Usage: "display oom notifications for a container", - Action: oomAction, -} - -func oomAction(context *cli.Context) { - state, err := configs.GetState(dataPath) - if err != nil { - log.Fatal(err) - } - n, err := libcontainer.NotifyOnOOM(state) - if err != nil { - log.Fatal(err) - } - for range n { - log.Printf("OOM notification received") - } + Name: "oom", + Usage: "display oom notifications for a container", + Flags: []cli.Flag{ + cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"}, + }, + Action: func(context *cli.Context) { + container, err := getContainer(context) + if err != nil { + log.Fatal(err) + } + n, err := container.OOM() + if err != nil { + log.Fatal(err) + } + for range n { + log.Printf("OOM notification received") + } + }, } diff --git a/nsinit/pause.go b/nsinit/pause.go index 6ba95cd1..89af0b6f 100644 --- a/nsinit/pause.go +++ b/nsinit/pause.go @@ -7,35 +7,35 @@ import ( ) var pauseCommand = cli.Command{ - Name: "pause", - Usage: "pause the container's processes", - Action: pauseAction, + Name: "pause", + Usage: "pause the container's processes", + Flags: []cli.Flag{ + cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"}, + }, + Action: func(context *cli.Context) { + container, err := getContainer(context) + if err != nil { + log.Fatal(err) + } + if err = container.Pause(); err != nil { + log.Fatal(err) + } + }, } var unpauseCommand = cli.Command{ - Name: "unpause", - Usage: "unpause the container's processes", - Action: unpauseAction, -} - -func pauseAction(context *cli.Context) { - container, err := getContainer(context) - if err != nil { - log.Fatal(err) - } - - if err = container.Pause(); err != nil { - log.Fatal(err) - } -} - -func unpauseAction(context *cli.Context) { - container, err := getContainer(context) - if err != nil { - log.Fatal(err) - } - - if err = container.Resume(); err != nil { - log.Fatal(err) - } + Name: "unpause", + Usage: "unpause the container's processes", + Flags: []cli.Flag{ + cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"}, + }, + Action: func(context *cli.Context) { + container, err := getContainer(context) + if err != nil { + log.Fatal(err) + } + if err = container.Resume(); err != nil { + log.Fatal(err) + } + }, } diff --git a/nsinit/stats.go b/nsinit/stats.go index 6d8f7585..8320fed4 100644 --- a/nsinit/stats.go +++ b/nsinit/stats.go @@ -6,34 +6,27 @@ import ( "log" "github.com/codegangsta/cli" - "github.com/docker/libcontainer" ) var statsCommand = cli.Command{ - Name: "stats", - Usage: "display statistics for the container", - Action: statsAction, -} - -func statsAction(context *cli.Context) { - factory, err := libcontainer.New(context.GlobalString("root"), nil) - if err != nil { - log.Fatal(err) - } - - container, err := factory.Load(context.Args().First()) - if err != nil { - log.Fatal(err) - } - - stats, err := container.Stats() - if err != nil { - log.Fatal(err) - } - data, jerr := json.MarshalIndent(stats, "", "\t") - if err != nil { - log.Fatal(jerr) - } - - fmt.Printf("%s", data) + Name: "stats", + Usage: "display statistics for the container", + Flags: []cli.Flag{ + cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"}, + }, + Action: func(context *cli.Context) { + container, err := getContainer(context) + if err != nil { + log.Fatal(err) + } + stats, err := container.Stats() + if err != nil { + log.Fatal(err) + } + data, jerr := json.MarshalIndent(stats, "", "\t") + if err != nil { + log.Fatal(jerr) + } + fmt.Printf("%s", data) + }, } diff --git a/nsinit/utils.go b/nsinit/utils.go index e02a1b3a..901972e8 100644 --- a/nsinit/utils.go +++ b/nsinit/utils.go @@ -2,89 +2,52 @@ package main import ( "encoding/json" - "log" + "fmt" "os" - "path/filepath" "github.com/codegangsta/cli" + "github.com/docker/libcontainer" "github.com/docker/libcontainer/configs" ) -// rFunc is a function registration for calling after an execin -type rFunc struct { - Usage string - Action func(*configs.Config, []string) -} - -func loadConfig() (*configs.Config, error) { - f, err := os.Open(filepath.Join(dataPath, "container.json")) +func loadConfig(context *cli.Context) (*configs.Config, error) { + f, err := os.Open(context.String("config")) if err != nil { return nil, err } defer f.Close() - - var container *configs.Config - if err := json.NewDecoder(f).Decode(&container); err != nil { - return nil, err - } - - return container, nil -} - -func openLog(name string) error { - f, err := os.OpenFile(name, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0755) - if err != nil { - return err - } - - log.SetOutput(f) - - return nil -} - -func findUserArgs() []string { - i := 0 - for _, a := range os.Args { - i++ - - if a == "--" { - break - } - } - - return os.Args[i:] -} - -// loadConfigFromFd loads a container's config from the sync pipe that is provided by -// fd 3 when running a process -func loadConfigFromFd() (*configs.Config, error) { - pipe := os.NewFile(3, "pipe") - defer pipe.Close() - var config *configs.Config - if err := json.NewDecoder(pipe).Decode(&config); err != nil { + if err := json.NewDecoder(f).Decode(&config); err != nil { return nil, err } return config, nil } -func preload(context *cli.Context) error { - if logPath != "" { - if err := openLog(logPath); err != nil { - return err - } - } - - return nil +func loadFactory(context *cli.Context) (libcontainer.Factory, error) { + return libcontainer.New(context.GlobalString("root"), []string{os.Args[0], "init", "--fd", "3", "--"}) } -func runFunc(f *rFunc) { - userArgs := findUserArgs() - - config, err := loadConfigFromFd() +func getContainer(context *cli.Context) (libcontainer.Container, error) { + factory, err := loadFactory(context) if err != nil { - log.Fatalf("unable to receive config from sync pipe: %s", err) + return nil, err } - - f.Action(config, userArgs) + container, err := factory.Load(context.String("id")) + if err != nil { + return nil, err + } + return container, nil +} + +func fatal(err error) { + if lerr, ok := err.(libcontainer.Error); ok { + lerr.Detail(os.Stderr) + os.Exit(1) + } + fmt.Fprintln(os.Stderr, err) +} + +func fatalf(t string, v ...interface{}) { + fmt.Fprintf(os.Stderr, t, v...) + os.Exit(1) } diff --git a/process.go b/process.go index cd72b129..ed228f79 100644 --- a/process.go +++ b/process.go @@ -3,13 +3,9 @@ package libcontainer import "io" // Configuration for a process to be run inside a container. -type ProcessConfig struct { +type Process struct { // The command to be run followed by any arguments. Args []string - - // Map of environment variables to their values. - Env []string - // Stdin is a pointer to a reader which provides the standard input stream. // Stdout is a pointer to a writer which receives the standard output stream. // Stderr is a pointer to a writer which receives the standard error stream. @@ -21,7 +17,4 @@ type ProcessConfig struct { Stdin io.Reader Stdout io.Writer Stderr io.Writer - - // Console is the path to the pty slave for use by the master - Console string } diff --git a/sample_configs/apparmor.json b/sample_configs/apparmor.json index 96f73cb7..d044524f 100644 --- a/sample_configs/apparmor.json +++ b/sample_configs/apparmor.json @@ -16,99 +16,99 @@ "cgroups": { "allowed_devices": [ { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 98 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 1, + "permissions": "rwm", + "major": 5, + "minor": 1, "path": "/dev/console", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, + "permissions": "rwm", + "major": 4, "path": "/dev/tty0", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, - "minor_number": 1, + "permissions": "rwm", + "major": 4, + "minor": 1, "path": "/dev/tty1", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 136, - "minor_number": -1, + "permissions": "rwm", + "major": 136, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 2, + "permissions": "rwm", + "major": 5, + "minor": 2, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 10, - "minor_number": 200, + "permissions": "rwm", + "major": 10, + "minor": 200, "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } @@ -118,57 +118,55 @@ }, "restrict_sys": true, "apparmor_profile": "docker-default", - "mount_config": { "device_nodes": [ { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } - ] - }, + ], "environment": [ "HOME=/", "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", diff --git a/sample_configs/attach_to_bridge.json b/sample_configs/attach_to_bridge.json index e5c03a7e..248a8a79 100644 --- a/sample_configs/attach_to_bridge.json +++ b/sample_configs/attach_to_bridge.json @@ -16,99 +16,99 @@ "cgroups": { "allowed_devices": [ { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 98 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 1, + "permissions": "rwm", + "major": 5, + "minor": 1, "path": "/dev/console", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, + "permissions": "rwm", + "major": 4, "path": "/dev/tty0", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, - "minor_number": 1, + "permissions": "rwm", + "major": 4, + "minor": 1, "path": "/dev/tty1", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 136, - "minor_number": -1, + "permissions": "rwm", + "major": 136, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 2, + "permissions": "rwm", + "major": 5, + "minor": 2, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 10, - "minor_number": 200, + "permissions": "rwm", + "major": 10, + "minor": 200, "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } @@ -117,57 +117,55 @@ "parent": "docker" }, "restrict_sys": true, - "mount_config": { - "device_nodes": [ - { - "cgroup_permissions": "rwm", - "file_mode": 438, - "major_number": 1, - "minor_number": 3, - "path": "/dev/null", - "type": 99 - }, - { - "cgroup_permissions": "rwm", - "file_mode": 438, - "major_number": 1, - "minor_number": 5, - "path": "/dev/zero", - "type": 99 - }, - { - "cgroup_permissions": "rwm", - "file_mode": 438, - "major_number": 1, - "minor_number": 7, - "path": "/dev/full", - "type": 99 - }, - { - "cgroup_permissions": "rwm", - "file_mode": 438, - "major_number": 5, - "path": "/dev/tty", - "type": 99 - }, - { - "cgroup_permissions": "rwm", - "file_mode": 438, - "major_number": 1, - "minor_number": 9, - "path": "/dev/urandom", - "type": 99 - }, - { - "cgroup_permissions": "rwm", - "file_mode": 438, - "major_number": 1, - "minor_number": 8, - "path": "/dev/random", - "type": 99 - } - ] - }, + "device_nodes": [ + { + "permissions": "rwm", + "file_mode": 438, + "major": 1, + "minor": 3, + "path": "/dev/null", + "type": 99 + }, + { + "permissions": "rwm", + "file_mode": 438, + "major": 1, + "minor": 5, + "path": "/dev/zero", + "type": 99 + }, + { + "permissions": "rwm", + "file_mode": 438, + "major": 1, + "minor": 7, + "path": "/dev/full", + "type": 99 + }, + { + "permissions": "rwm", + "file_mode": 438, + "major": 5, + "path": "/dev/tty", + "type": 99 + }, + { + "permissions": "rwm", + "file_mode": 438, + "major": 1, + "minor": 9, + "path": "/dev/urandom", + "type": 99 + }, + { + "permissions": "rwm", + "file_mode": 438, + "major": 1, + "minor": 8, + "path": "/dev/random", + "type": 99 + } + ], "environment": [ "HOME=/", "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", diff --git a/sample_configs/host-pid.json b/sample_configs/host-pid.json index f47af930..61c3cf48 100644 --- a/sample_configs/host-pid.json +++ b/sample_configs/host-pid.json @@ -16,99 +16,99 @@ "cgroups": { "allowed_devices": [ { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 98 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 1, + "permissions": "rwm", + "major": 5, + "minor": 1, "path": "/dev/console", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, + "permissions": "rwm", + "major": 4, "path": "/dev/tty0", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, - "minor_number": 1, + "permissions": "rwm", + "major": 4, + "minor": 1, "path": "/dev/tty1", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 136, - "minor_number": -1, + "permissions": "rwm", + "major": 136, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 2, + "permissions": "rwm", + "major": 5, + "minor": 2, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 10, - "minor_number": 200, + "permissions": "rwm", + "major": 10, + "minor": 200, "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } @@ -117,52 +117,51 @@ "parent": "docker" }, "restrict_sys": true, - "mount_config": { "device_nodes": [ { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } @@ -172,8 +171,7 @@ "type": "tmpfs", "destination": "/tmp" } - ] - }, + ], "environment": [ "HOME=/", "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", diff --git a/sample_configs/minimal.json b/sample_configs/minimal.json index 01de4674..28b22cce 100644 --- a/sample_configs/minimal.json +++ b/sample_configs/minimal.json @@ -16,99 +16,99 @@ "cgroups": { "allowed_devices": [ { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 98 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 1, + "permissions": "rwm", + "major": 5, + "minor": 1, "path": "/dev/console", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, + "permissions": "rwm", + "major": 4, "path": "/dev/tty0", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, - "minor_number": 1, + "permissions": "rwm", + "major": 4, + "minor": 1, "path": "/dev/tty1", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 136, - "minor_number": -1, + "permissions": "rwm", + "major": 136, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 2, + "permissions": "rwm", + "major": 5, + "minor": 2, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 10, - "minor_number": 200, + "permissions": "rwm", + "major": 10, + "minor": 200, "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } @@ -117,52 +117,51 @@ "parent": "docker" }, "restrict_sys": true, - "mount_config": { "device_nodes": [ { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } @@ -172,8 +171,7 @@ "type": "tmpfs", "destination": "/tmp" } - ] - }, + ], "environment": [ "HOME=/", "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", diff --git a/sample_configs/route_source_address_selection.json b/sample_configs/route_source_address_selection.json index 9c62045a..b9c9ef7b 100644 --- a/sample_configs/route_source_address_selection.json +++ b/sample_configs/route_source_address_selection.json @@ -16,99 +16,99 @@ "cgroups": { "allowed_devices": [ { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 98 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 1, + "permissions": "rwm", + "major": 5, + "minor": 1, "path": "/dev/console", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, + "permissions": "rwm", + "major": 4, "path": "/dev/tty0", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, - "minor_number": 1, + "permissions": "rwm", + "major": 4, + "minor": 1, "path": "/dev/tty1", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 136, - "minor_number": -1, + "permissions": "rwm", + "major": 136, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 2, + "permissions": "rwm", + "major": 5, + "minor": 2, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 10, - "minor_number": 200, + "permissions": "rwm", + "major": 10, + "minor": 200, "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } @@ -117,57 +117,55 @@ "parent": "docker" }, "restrict_sys": true, - "mount_config": { "device_nodes": [ { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } - ] - }, + ], "environment": [ "HOME=/", "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", diff --git a/sample_configs/selinux.json b/sample_configs/selinux.json index 15556488..a415c257 100644 --- a/sample_configs/selinux.json +++ b/sample_configs/selinux.json @@ -16,99 +16,99 @@ "cgroups": { "allowed_devices": [ { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 98 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 1, + "permissions": "rwm", + "major": 5, + "minor": 1, "path": "/dev/console", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, + "permissions": "rwm", + "major": 4, "path": "/dev/tty0", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, - "minor_number": 1, + "permissions": "rwm", + "major": 4, + "minor": 1, "path": "/dev/tty1", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 136, - "minor_number": -1, + "permissions": "rwm", + "major": 136, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 2, + "permissions": "rwm", + "major": 5, + "minor": 2, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 10, - "minor_number": 200, + "permissions": "rwm", + "major": 10, + "minor": 200, "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } @@ -118,58 +118,56 @@ }, "restrict_sys": true, "process_label": "system_u:system_r:svirt_lxc_net_t:s0:c164,c475", - "mount_config": { "mount_label": "system_u:system_r:svirt_lxc_net_t:s0:c164,c475", "device_nodes": [ { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } - ] - }, + ], "environment": [ "HOME=/", "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", diff --git a/sample_configs/userns.json b/sample_configs/userns.json index 8c9c841f..1ebbad61 100644 --- a/sample_configs/userns.json +++ b/sample_configs/userns.json @@ -16,99 +16,99 @@ "cgroups": { "allowed_devices": [ { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 98 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 1, + "permissions": "rwm", + "major": 5, + "minor": 1, "path": "/dev/console", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, + "permissions": "rwm", + "major": 4, "path": "/dev/tty0", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, - "minor_number": 1, + "permissions": "rwm", + "major": 4, + "minor": 1, "path": "/dev/tty1", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 136, - "minor_number": -1, + "permissions": "rwm", + "major": 136, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 2, + "permissions": "rwm", + "major": 5, + "minor": 2, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 10, - "minor_number": 200, + "permissions": "rwm", + "major": 10, + "minor": 200, "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } @@ -117,52 +117,51 @@ "parent": "docker" }, "restrict_sys": true, - "mount_config": { "device_nodes": [ { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } @@ -172,8 +171,7 @@ "type": "tmpfs", "destination": "/tmp" } - ] - }, + ], "environment": [ "HOME=/", "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", diff --git a/types.go b/types.go deleted file mode 100644 index c341137e..00000000 --- a/types.go +++ /dev/null @@ -1,11 +0,0 @@ -package libcontainer - -import ( - "github.com/docker/libcontainer/cgroups" - "github.com/docker/libcontainer/network" -) - -type ContainerStats struct { - NetworkStats *network.NetworkStats `json:"network_stats,omitempty"` - CgroupStats *cgroups.Stats `json:"cgroup_stats,omitempty"` -}