Merge pull request #304 from mrunalp/feature/user_namespaces
Adds user namespace support to libcontainer
This commit is contained in:
commit
b89cd0cf5c
12
config.go
12
config.go
|
@ -124,6 +124,11 @@ type Config struct {
|
||||||
// AdditionalGroups specifies the gids that should be added to supplementary groups
|
// AdditionalGroups specifies the gids that should be added to supplementary groups
|
||||||
// in addition to those that the user belongs to.
|
// in addition to those that the user belongs to.
|
||||||
AdditionalGroups []int `json:"additional_groups,omitempty"`
|
AdditionalGroups []int `json:"additional_groups,omitempty"`
|
||||||
|
// UidMappings is an array of User ID mappings for User Namespaces
|
||||||
|
UidMappings []IDMap `json:"uid_mappings,omitempty"`
|
||||||
|
|
||||||
|
// GidMappings is an array of Group ID mappings for User Namespaces
|
||||||
|
GidMappings []IDMap `json:"gid_mappings,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Routes can be specified to create entries in the route table as the container is started
|
// Routes can be specified to create entries in the route table as the container is started
|
||||||
|
@ -152,3 +157,10 @@ type Rlimit struct {
|
||||||
Hard uint64 `json:"hard,omitempty"`
|
Hard uint64 `json:"hard,omitempty"`
|
||||||
Soft uint64 `json:"soft,omitempty"`
|
Soft uint64 `json:"soft,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IDMap represents UID/GID Mappings for User Namespaces.
|
||||||
|
type IDMap struct {
|
||||||
|
ContainerID int `json:"container_id,omitempty"`
|
||||||
|
HostID int `json:"host_id,omitempty"`
|
||||||
|
Size int `json:"size,omitempty"`
|
||||||
|
}
|
||||||
|
|
|
@ -13,7 +13,7 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
// Setup initializes the proper /dev/console inside the rootfs path
|
// Setup initializes the proper /dev/console inside the rootfs path
|
||||||
func Setup(rootfs, consolePath, mountLabel string) error {
|
func Setup(rootfs, consolePath, mountLabel string, hostRootUid, hostRootGid int) error {
|
||||||
oldMask := syscall.Umask(0000)
|
oldMask := syscall.Umask(0000)
|
||||||
defer syscall.Umask(oldMask)
|
defer syscall.Umask(oldMask)
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@ func Setup(rootfs, consolePath, mountLabel string) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := os.Chown(consolePath, 0, 0); err != nil {
|
if err := os.Chown(consolePath, hostRootUid, hostRootGid); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -115,6 +115,7 @@ func TestExecInRlimit(t *testing.T) {
|
||||||
func startLongRunningContainer(config *libcontainer.Config) (*exec.Cmd, string, chan error) {
|
func startLongRunningContainer(config *libcontainer.Config) (*exec.Cmd, string, chan error) {
|
||||||
containerErr := make(chan error, 1)
|
containerErr := make(chan error, 1)
|
||||||
containerCmd := &exec.Cmd{}
|
containerCmd := &exec.Cmd{}
|
||||||
|
setupContainerCmd := &exec.Cmd{}
|
||||||
var statePath string
|
var statePath string
|
||||||
|
|
||||||
createCmd := func(container *libcontainer.Config, console, dataPath, init string,
|
createCmd := func(container *libcontainer.Config, console, dataPath, init string,
|
||||||
|
@ -124,6 +125,12 @@ func startLongRunningContainer(config *libcontainer.Config) (*exec.Cmd, string,
|
||||||
return containerCmd
|
return containerCmd
|
||||||
}
|
}
|
||||||
|
|
||||||
|
setupCmd := func(container *libcontainer.Config, console, dataPath, init string) *exec.Cmd {
|
||||||
|
setupContainerCmd = namespaces.DefaultSetupCommand(container, console, dataPath, init)
|
||||||
|
statePath = dataPath
|
||||||
|
return setupContainerCmd
|
||||||
|
}
|
||||||
|
|
||||||
var containerStart sync.WaitGroup
|
var containerStart sync.WaitGroup
|
||||||
containerStart.Add(1)
|
containerStart.Add(1)
|
||||||
go func() {
|
go func() {
|
||||||
|
@ -131,7 +138,7 @@ func startLongRunningContainer(config *libcontainer.Config) (*exec.Cmd, string,
|
||||||
_, err := namespaces.Exec(config,
|
_, err := namespaces.Exec(config,
|
||||||
buffers.Stdin, buffers.Stdout, buffers.Stderr,
|
buffers.Stdin, buffers.Stdout, buffers.Stderr,
|
||||||
"", config.RootFs, []string{"sleep", "10"},
|
"", config.RootFs, []string{"sleep", "10"},
|
||||||
createCmd, containerStart.Done)
|
createCmd, setupCmd, containerStart.Done)
|
||||||
containerErr <- err
|
containerErr <- err
|
||||||
}()
|
}()
|
||||||
containerStart.Wait()
|
containerStart.Wait()
|
||||||
|
|
|
@ -90,6 +90,6 @@ func runContainer(config *libcontainer.Config, console string, args ...string) (
|
||||||
|
|
||||||
buffers = newStdBuffers()
|
buffers = newStdBuffers()
|
||||||
exitCode, err = namespaces.Exec(config, buffers.Stdin, buffers.Stdout, buffers.Stderr,
|
exitCode, err = namespaces.Exec(config, buffers.Stdin, buffers.Stdout, buffers.Stderr,
|
||||||
console, config.RootFs, args, namespaces.DefaultCreateCommand, nil)
|
console, config.RootFs, args, namespaces.DefaultCreateCommand, namespaces.DefaultSetupCommand, nil)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,7 +25,7 @@ type mount struct {
|
||||||
|
|
||||||
// InitializeMountNamespace sets up the devices, mount points, and filesystems for use inside a
|
// InitializeMountNamespace sets up the devices, mount points, and filesystems for use inside a
|
||||||
// new mount namespace.
|
// new mount namespace.
|
||||||
func InitializeMountNamespace(rootfs, console string, sysReadonly bool, mountConfig *MountConfig) error {
|
func InitializeMountNamespace(rootfs, console string, sysReadonly bool, hostRootUid, hostRootGid int, mountConfig *MountConfig) error {
|
||||||
var (
|
var (
|
||||||
err error
|
err error
|
||||||
flag = syscall.MS_PRIVATE
|
flag = syscall.MS_PRIVATE
|
||||||
|
@ -58,14 +58,17 @@ func InitializeMountNamespace(rootfs, console string, sysReadonly bool, mountCon
|
||||||
return fmt.Errorf("create device nodes %s", err)
|
return fmt.Errorf("create device nodes %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := SetupPtmx(rootfs, console, mountConfig.MountLabel); err != nil {
|
if err := SetupPtmx(rootfs, console, mountConfig.MountLabel, hostRootUid, hostRootGid); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// stdin, stdout and stderr could be pointing to /dev/null from parent namespace.
|
// stdin, stdout and stderr could be pointing to /dev/null from parent namespace.
|
||||||
// Re-open them inside this namespace.
|
// Re-open them inside this namespace.
|
||||||
if err := reOpenDevNull(rootfs); err != nil {
|
// FIXME: Need to fix this for user namespaces.
|
||||||
return fmt.Errorf("Failed to reopen /dev/null %s", err)
|
if hostRootUid == 0 {
|
||||||
|
if err := reOpenDevNull(rootfs); err != nil {
|
||||||
|
return fmt.Errorf("Failed to reopen /dev/null %s", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := setupDevSymlinks(rootfs); err != nil {
|
if err := setupDevSymlinks(rootfs); err != nil {
|
||||||
|
|
|
@ -10,7 +10,7 @@ import (
|
||||||
"github.com/docker/libcontainer/console"
|
"github.com/docker/libcontainer/console"
|
||||||
)
|
)
|
||||||
|
|
||||||
func SetupPtmx(rootfs, consolePath, mountLabel string) error {
|
func SetupPtmx(rootfs, consolePath, mountLabel string, hostRootUid, hostRootGid int) error {
|
||||||
ptmx := filepath.Join(rootfs, "dev/ptmx")
|
ptmx := filepath.Join(rootfs, "dev/ptmx")
|
||||||
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
|
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
|
||||||
return err
|
return err
|
||||||
|
@ -21,7 +21,7 @@ func SetupPtmx(rootfs, consolePath, mountLabel string) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
if consolePath != "" {
|
if consolePath != "" {
|
||||||
if err := console.Setup(rootfs, consolePath, mountLabel); err != nil {
|
if err := console.Setup(rootfs, consolePath, mountLabel, hostRootUid, hostRootGid); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,3 +8,4 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
type CreateCommand func(container *libcontainer.Config, console, dataPath, init string, childPipe *os.File, args []string) *exec.Cmd
|
type CreateCommand func(container *libcontainer.Config, console, dataPath, init string, childPipe *os.File, args []string) *exec.Cmd
|
||||||
|
type SetupCommand func(container *libcontainer.Config, console, dataPath, init string) *exec.Cmd
|
||||||
|
|
|
@ -4,6 +4,7 @@ package namespaces
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
@ -25,7 +26,7 @@ const (
|
||||||
// Move this to libcontainer package.
|
// Move this to libcontainer package.
|
||||||
// Exec performs setup outside of a namespace so that a container can be
|
// Exec performs setup outside of a namespace so that a container can be
|
||||||
// executed. Exec is a high level function for working with container namespaces.
|
// executed. Exec is a high level function for working with container namespaces.
|
||||||
func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Writer, console, dataPath string, args []string, createCommand CreateCommand, startCallback func()) (int, error) {
|
func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Writer, console, dataPath string, args []string, createCommand CreateCommand, setupCommand SetupCommand, startCallback func()) (int, error) {
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
// create a pipe so that we can syncronize with the namespaced process and
|
// create a pipe so that we can syncronize with the namespaced process and
|
||||||
|
@ -74,14 +75,6 @@ func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Wri
|
||||||
if err := InitializeNetworking(container, command.Process.Pid, &networkState); err != nil {
|
if err := InitializeNetworking(container, command.Process.Pid, &networkState); err != nil {
|
||||||
return terminate(err)
|
return terminate(err)
|
||||||
}
|
}
|
||||||
// send the state to the container's init process then shutdown writes for the parent
|
|
||||||
if err := json.NewEncoder(parent).Encode(networkState); err != nil {
|
|
||||||
return terminate(err)
|
|
||||||
}
|
|
||||||
// shutdown writes for the parent side of the pipe
|
|
||||||
if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil {
|
|
||||||
return terminate(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
state := &libcontainer.State{
|
state := &libcontainer.State{
|
||||||
InitPid: command.Process.Pid,
|
InitPid: command.Process.Pid,
|
||||||
|
@ -95,6 +88,26 @@ func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Wri
|
||||||
}
|
}
|
||||||
defer libcontainer.DeleteState(dataPath)
|
defer libcontainer.DeleteState(dataPath)
|
||||||
|
|
||||||
|
// Start the setup process to setup the init process
|
||||||
|
if container.Namespaces.Contains(libcontainer.NEWUSER) {
|
||||||
|
setupCmd := setupCommand(container, console, dataPath, os.Args[0])
|
||||||
|
output, err := setupCmd.CombinedOutput()
|
||||||
|
if err != nil || setupCmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus() != 0 {
|
||||||
|
command.Process.Kill()
|
||||||
|
command.Wait()
|
||||||
|
return -1, fmt.Errorf("setup failed: %s %s", err, output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// send the state to the container's init process then shutdown writes for the parent
|
||||||
|
if err := json.NewEncoder(parent).Encode(networkState); err != nil {
|
||||||
|
return terminate(err)
|
||||||
|
}
|
||||||
|
// shutdown writes for the parent side of the pipe
|
||||||
|
if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil {
|
||||||
|
return terminate(err)
|
||||||
|
}
|
||||||
|
|
||||||
// wait for the child process to fully complete and receive an error message
|
// wait for the child process to fully complete and receive an error message
|
||||||
// if one was encoutered
|
// if one was encoutered
|
||||||
var ierr *initError
|
var ierr *initError
|
||||||
|
@ -157,6 +170,75 @@ func killAllPids(container *libcontainer.Config) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Utility function that gets a host ID for a container ID from user namespace map
|
||||||
|
// if that ID is present in the map.
|
||||||
|
func hostIDFromMapping(containerID int, uMap []libcontainer.IDMap) (int, bool) {
|
||||||
|
for _, m := range uMap {
|
||||||
|
if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
|
||||||
|
hostID := m.HostID + (containerID - m.ContainerID)
|
||||||
|
return hostID, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1, false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Gets the root uid for the process on host which could be non-zero
|
||||||
|
// when user namespaces are enabled.
|
||||||
|
func GetHostRootGid(container *libcontainer.Config) (int, error) {
|
||||||
|
if container.Namespaces.Contains(libcontainer.NEWUSER) {
|
||||||
|
if container.GidMappings == nil {
|
||||||
|
return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
|
||||||
|
}
|
||||||
|
hostRootGid, found := hostIDFromMapping(0, container.GidMappings)
|
||||||
|
if !found {
|
||||||
|
return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.")
|
||||||
|
}
|
||||||
|
return hostRootGid, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return default root uid 0
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Gets the root uid for the process on host which could be non-zero
|
||||||
|
// when user namespaces are enabled.
|
||||||
|
func GetHostRootUid(container *libcontainer.Config) (int, error) {
|
||||||
|
if container.Namespaces.Contains(libcontainer.NEWUSER) {
|
||||||
|
if container.UidMappings == nil {
|
||||||
|
return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.")
|
||||||
|
}
|
||||||
|
hostRootUid, found := hostIDFromMapping(0, container.UidMappings)
|
||||||
|
if !found {
|
||||||
|
return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.")
|
||||||
|
}
|
||||||
|
return hostRootUid, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return default root uid 0
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Converts IDMap to SysProcIDMap array and adds it to SysProcAttr.
|
||||||
|
func AddUidGidMappings(sys *syscall.SysProcAttr, container *libcontainer.Config) {
|
||||||
|
if container.UidMappings != nil {
|
||||||
|
sys.UidMappings = make([]syscall.SysProcIDMap, len(container.UidMappings))
|
||||||
|
for i, um := range container.UidMappings {
|
||||||
|
sys.UidMappings[i].ContainerID = um.ContainerID
|
||||||
|
sys.UidMappings[i].HostID = um.HostID
|
||||||
|
sys.UidMappings[i].Size = um.Size
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if container.GidMappings != nil {
|
||||||
|
sys.GidMappings = make([]syscall.SysProcIDMap, len(container.GidMappings))
|
||||||
|
for i, gm := range container.GidMappings {
|
||||||
|
sys.GidMappings[i].ContainerID = gm.ContainerID
|
||||||
|
sys.GidMappings[i].HostID = gm.HostID
|
||||||
|
sys.GidMappings[i].Size = gm.Size
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// DefaultCreateCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces
|
// DefaultCreateCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces
|
||||||
// defined on the container's configuration and use the current binary as the init with the
|
// defined on the container's configuration and use the current binary as the init with the
|
||||||
// args provided
|
// args provided
|
||||||
|
@ -187,6 +269,46 @@ func DefaultCreateCommand(container *libcontainer.Config, console, dataPath, ini
|
||||||
command.SysProcAttr.Pdeathsig = syscall.SIGKILL
|
command.SysProcAttr.Pdeathsig = syscall.SIGKILL
|
||||||
command.ExtraFiles = []*os.File{pipe}
|
command.ExtraFiles = []*os.File{pipe}
|
||||||
|
|
||||||
|
if container.Namespaces.Contains(libcontainer.NEWUSER) {
|
||||||
|
AddUidGidMappings(command.SysProcAttr, container)
|
||||||
|
|
||||||
|
// Default to root user when user namespaces are enabled.
|
||||||
|
if command.SysProcAttr.Credential == nil {
|
||||||
|
command.SysProcAttr.Credential = &syscall.Credential{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return command
|
||||||
|
}
|
||||||
|
|
||||||
|
// DefaultSetupCommand will return an exec.Cmd that joins the init process to set it up.
|
||||||
|
//
|
||||||
|
// console: the /dev/console to setup inside the container
|
||||||
|
// init: the program executed inside the namespaces
|
||||||
|
// root: the path to the container json file and information
|
||||||
|
// args: the arguments to pass to the container to run as the user's program
|
||||||
|
func DefaultSetupCommand(container *libcontainer.Config, console, dataPath, init string) *exec.Cmd {
|
||||||
|
// get our binary name from arg0 so we can always reexec ourself
|
||||||
|
env := []string{
|
||||||
|
"console=" + console,
|
||||||
|
"data_path=" + dataPath,
|
||||||
|
}
|
||||||
|
|
||||||
|
if dataPath == "" {
|
||||||
|
dataPath, _ = os.Getwd()
|
||||||
|
}
|
||||||
|
|
||||||
|
if container.RootFs == "" {
|
||||||
|
container.RootFs, _ = os.Getwd()
|
||||||
|
}
|
||||||
|
args := []string{dataPath, container.RootFs, console}
|
||||||
|
|
||||||
|
command := exec.Command(init, append([]string{"exec", "--func", "setup", "--"}, args...)...)
|
||||||
|
|
||||||
|
// make sure the process is executed inside the context of the rootfs
|
||||||
|
command.Dir = container.RootFs
|
||||||
|
command.Env = append(os.Environ(), env...)
|
||||||
|
|
||||||
return command
|
return command
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,9 @@ import (
|
||||||
"github.com/docker/libcontainer/apparmor"
|
"github.com/docker/libcontainer/apparmor"
|
||||||
"github.com/docker/libcontainer/cgroups"
|
"github.com/docker/libcontainer/cgroups"
|
||||||
"github.com/docker/libcontainer/label"
|
"github.com/docker/libcontainer/label"
|
||||||
|
"github.com/docker/libcontainer/mount"
|
||||||
"github.com/docker/libcontainer/system"
|
"github.com/docker/libcontainer/system"
|
||||||
|
"github.com/docker/libcontainer/utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ExecIn reexec's the initPath with the argv 0 rewrite to "nsenter" so that it is able to run the
|
// ExecIn reexec's the initPath with the argv 0 rewrite to "nsenter" so that it is able to run the
|
||||||
|
@ -127,6 +129,61 @@ func FinalizeSetns(container *libcontainer.Config, args []string) error {
|
||||||
panic("unreachable")
|
panic("unreachable")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetupContainer is run to setup mounts and networking related operations
|
||||||
|
// for a user namespace enabled process as a user namespace root doesn't
|
||||||
|
// have permissions to perform these operations.
|
||||||
|
// The setup process joins all the namespaces of user namespace enabled init
|
||||||
|
// except the user namespace, so it run as root in the root user namespace
|
||||||
|
// to perform these operations.
|
||||||
|
func SetupContainer(container *libcontainer.Config, dataPath, uncleanRootfs, consolePath string) error {
|
||||||
|
rootfs, err := utils.ResolveRootfs(uncleanRootfs)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// clear the current processes env and replace it with the environment
|
||||||
|
// defined on the container
|
||||||
|
if err := LoadContainerEnvironment(container); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
state, err := libcontainer.GetState(dataPath)
|
||||||
|
if err != nil && !os.IsNotExist(err) {
|
||||||
|
return fmt.Errorf("unable to read state: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := setupNetwork(container, &state.NetworkState); err != nil {
|
||||||
|
return fmt.Errorf("setup networking %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := setupRoute(container); err != nil {
|
||||||
|
return fmt.Errorf("setup route %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
label.Init()
|
||||||
|
|
||||||
|
hostRootUid, err := GetHostRootUid(container)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to get hostRootUid %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
hostRootGid, err := GetHostRootGid(container)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to get hostRootGid %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := mount.InitializeMountNamespace(rootfs,
|
||||||
|
consolePath,
|
||||||
|
container.RestrictSys,
|
||||||
|
hostRootUid,
|
||||||
|
hostRootGid,
|
||||||
|
(*mount.MountConfig)(container.MountConfig)); err != nil {
|
||||||
|
return fmt.Errorf("setup mount namespace %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func EnterCgroups(state *libcontainer.State, pid int) error {
|
func EnterCgroups(state *libcontainer.State, pid int) error {
|
||||||
return cgroups.EnterPid(state.CgroupPaths, pid)
|
return cgroups.EnterPid(state.CgroupPaths, pid)
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,6 +48,14 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, pip
|
||||||
pipe.Close()
|
pipe.Close()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
if container.Namespaces.Contains(libcontainer.NEWUSER) {
|
||||||
|
return initUserNs(container, uncleanRootfs, consolePath, pipe, args)
|
||||||
|
} else {
|
||||||
|
return initDefault(container, uncleanRootfs, consolePath, pipe, args)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func initDefault(container *libcontainer.Config, uncleanRootfs, consolePath string, pipe *os.File, args []string) (err error) {
|
||||||
rootfs, err := utils.ResolveRootfs(uncleanRootfs)
|
rootfs, err := utils.ResolveRootfs(uncleanRootfs)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -98,6 +106,8 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, pip
|
||||||
if err := mount.InitializeMountNamespace(rootfs,
|
if err := mount.InitializeMountNamespace(rootfs,
|
||||||
consolePath,
|
consolePath,
|
||||||
container.RestrictSys,
|
container.RestrictSys,
|
||||||
|
0, // Default Root Uid
|
||||||
|
0, // Default Root Gid
|
||||||
(*mount.MountConfig)(container.MountConfig)); err != nil {
|
(*mount.MountConfig)(container.MountConfig)); err != nil {
|
||||||
return fmt.Errorf("setup mount namespace %s", err)
|
return fmt.Errorf("setup mount namespace %s", err)
|
||||||
}
|
}
|
||||||
|
@ -141,6 +151,82 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, pip
|
||||||
return system.Execv(args[0], args[0:], os.Environ())
|
return system.Execv(args[0], args[0:], os.Environ())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func initUserNs(container *libcontainer.Config, uncleanRootfs, consolePath string, pipe *os.File, args []string) (err error) {
|
||||||
|
// clear the current processes env and replace it with the environment
|
||||||
|
// defined on the container
|
||||||
|
if err := LoadContainerEnvironment(container); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// We always read this as it is a way to sync with the parent as well
|
||||||
|
var networkState *network.NetworkState
|
||||||
|
if err := json.NewDecoder(pipe).Decode(&networkState); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// join any namespaces via a path to the namespace fd if provided
|
||||||
|
if err := joinExistingNamespaces(container.Namespaces); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if consolePath != "" {
|
||||||
|
if err := console.OpenAndDup("/dev/console"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if _, err := syscall.Setsid(); err != nil {
|
||||||
|
return fmt.Errorf("setsid %s", err)
|
||||||
|
}
|
||||||
|
if consolePath != "" {
|
||||||
|
if err := system.Setctty(); err != nil {
|
||||||
|
return fmt.Errorf("setctty %s", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if container.WorkingDir == "" {
|
||||||
|
container.WorkingDir = "/"
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := setupRlimits(container); err != nil {
|
||||||
|
return fmt.Errorf("setup rlimits %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if container.Hostname != "" {
|
||||||
|
if err := syscall.Sethostname([]byte(container.Hostname)); err != nil {
|
||||||
|
return fmt.Errorf("unable to sethostname %q: %s", container.Hostname, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil {
|
||||||
|
return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := label.SetProcessLabel(container.ProcessLabel); err != nil {
|
||||||
|
return fmt.Errorf("set process label %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if container.RestrictSys {
|
||||||
|
if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pdeathSignal, err := system.GetParentDeathSignal()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("get parent death signal %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := FinalizeNamespace(container); err != nil {
|
||||||
|
return fmt.Errorf("finalize namespace %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// FinalizeNamespace can change user/group which clears the parent death
|
||||||
|
// signal, so we restore it here.
|
||||||
|
if err := RestoreParentDeathSignal(pdeathSignal); err != nil {
|
||||||
|
return fmt.Errorf("restore parent death signal %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return system.Execv(args[0], args[0:], os.Environ())
|
||||||
|
}
|
||||||
|
|
||||||
// RestoreParentDeathSignal sets the parent death signal to old.
|
// RestoreParentDeathSignal sets the parent death signal to old.
|
||||||
func RestoreParentDeathSignal(old int) error {
|
func RestoreParentDeathSignal(old int) error {
|
||||||
if old == 0 {
|
if old == 0 {
|
||||||
|
|
|
@ -84,7 +84,7 @@ func startInExistingContainer(config *libcontainer.Config, state *libcontainer.S
|
||||||
)
|
)
|
||||||
signal.Notify(sigc)
|
signal.Notify(sigc)
|
||||||
|
|
||||||
if config.Tty {
|
if config.Tty && action != "setup" {
|
||||||
stdin = nil
|
stdin = nil
|
||||||
stdout = nil
|
stdout = nil
|
||||||
stderr = nil
|
stderr = nil
|
||||||
|
@ -143,6 +143,10 @@ func startContainer(container *libcontainer.Config, dataPath string, args []stri
|
||||||
return cmd
|
return cmd
|
||||||
}
|
}
|
||||||
|
|
||||||
|
setupCommand := func(container *libcontainer.Config, console, dataPath, init string) *exec.Cmd {
|
||||||
|
return namespaces.DefaultSetupCommand(container, console, dataPath, init)
|
||||||
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
master *os.File
|
master *os.File
|
||||||
console string
|
console string
|
||||||
|
@ -189,7 +193,7 @@ func startContainer(container *libcontainer.Config, dataPath string, args []stri
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
return namespaces.Exec(container, stdin, stdout, stderr, console, dataPath, args, createCommand, startCallback)
|
return namespaces.Exec(container, stdin, stdout, stderr, console, dataPath, args, createCommand, setupCommand, startCallback)
|
||||||
}
|
}
|
||||||
|
|
||||||
func resizeTty(master *os.File) {
|
func resizeTty(master *os.File) {
|
||||||
|
|
|
@ -28,6 +28,11 @@ func init() {
|
||||||
Usage: "display the container's network interfaces",
|
Usage: "display the container's network interfaces",
|
||||||
Action: nsenterIp,
|
Action: nsenterIp,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
argvs["setup"] = &rFunc{
|
||||||
|
Usage: "finish setting up init before it is ready to exec",
|
||||||
|
Action: nsenterSetup,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
|
|
@ -82,3 +82,21 @@ func nsenterIp(config *libcontainer.Config, args []string) {
|
||||||
|
|
||||||
w.Flush()
|
w.Flush()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func nsenterSetup(config *libcontainer.Config, args []string) {
|
||||||
|
if len(args) < 2 || len(args) > 3 {
|
||||||
|
log.Fatalf("expected setup to have 2 or 3 arguments not %d", len(args))
|
||||||
|
}
|
||||||
|
|
||||||
|
dataPath := args[0]
|
||||||
|
uncleanRootfs := args[1]
|
||||||
|
|
||||||
|
consolePath := ""
|
||||||
|
if len(args) == 3 {
|
||||||
|
consolePath = args[2]
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := namespaces.SetupContainer(config, dataPath, uncleanRootfs, consolePath); err != nil {
|
||||||
|
log.Fatalf("failed to nsenter setup: %s", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,251 @@
|
||||||
|
{
|
||||||
|
"capabilities": [
|
||||||
|
"CHOWN",
|
||||||
|
"DAC_OVERRIDE",
|
||||||
|
"FOWNER",
|
||||||
|
"MKNOD",
|
||||||
|
"NET_RAW",
|
||||||
|
"SETGID",
|
||||||
|
"SETUID",
|
||||||
|
"SETFCAP",
|
||||||
|
"SETPCAP",
|
||||||
|
"NET_BIND_SERVICE",
|
||||||
|
"SYS_CHROOT",
|
||||||
|
"KILL"
|
||||||
|
],
|
||||||
|
"cgroups": {
|
||||||
|
"allowed_devices": [
|
||||||
|
{
|
||||||
|
"cgroup_permissions": "m",
|
||||||
|
"major_number": -1,
|
||||||
|
"minor_number": -1,
|
||||||
|
"type": 99
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cgroup_permissions": "m",
|
||||||
|
"major_number": -1,
|
||||||
|
"minor_number": -1,
|
||||||
|
"type": 98
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cgroup_permissions": "rwm",
|
||||||
|
"major_number": 5,
|
||||||
|
"minor_number": 1,
|
||||||
|
"path": "/dev/console",
|
||||||
|
"type": 99
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cgroup_permissions": "rwm",
|
||||||
|
"major_number": 4,
|
||||||
|
"path": "/dev/tty0",
|
||||||
|
"type": 99
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cgroup_permissions": "rwm",
|
||||||
|
"major_number": 4,
|
||||||
|
"minor_number": 1,
|
||||||
|
"path": "/dev/tty1",
|
||||||
|
"type": 99
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cgroup_permissions": "rwm",
|
||||||
|
"major_number": 136,
|
||||||
|
"minor_number": -1,
|
||||||
|
"type": 99
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cgroup_permissions": "rwm",
|
||||||
|
"major_number": 5,
|
||||||
|
"minor_number": 2,
|
||||||
|
"type": 99
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cgroup_permissions": "rwm",
|
||||||
|
"major_number": 10,
|
||||||
|
"minor_number": 200,
|
||||||
|
"type": 99
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cgroup_permissions": "rwm",
|
||||||
|
"file_mode": 438,
|
||||||
|
"major_number": 1,
|
||||||
|
"minor_number": 3,
|
||||||
|
"path": "/dev/null",
|
||||||
|
"type": 99
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cgroup_permissions": "rwm",
|
||||||
|
"file_mode": 438,
|
||||||
|
"major_number": 1,
|
||||||
|
"minor_number": 5,
|
||||||
|
"path": "/dev/zero",
|
||||||
|
"type": 99
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cgroup_permissions": "rwm",
|
||||||
|
"file_mode": 438,
|
||||||
|
"major_number": 1,
|
||||||
|
"minor_number": 7,
|
||||||
|
"path": "/dev/full",
|
||||||
|
"type": 99
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cgroup_permissions": "rwm",
|
||||||
|
"file_mode": 438,
|
||||||
|
"major_number": 5,
|
||||||
|
"path": "/dev/tty",
|
||||||
|
"type": 99
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cgroup_permissions": "rwm",
|
||||||
|
"file_mode": 438,
|
||||||
|
"major_number": 1,
|
||||||
|
"minor_number": 9,
|
||||||
|
"path": "/dev/urandom",
|
||||||
|
"type": 99
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cgroup_permissions": "rwm",
|
||||||
|
"file_mode": 438,
|
||||||
|
"major_number": 1,
|
||||||
|
"minor_number": 8,
|
||||||
|
"path": "/dev/random",
|
||||||
|
"type": 99
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"name": "docker-koye",
|
||||||
|
"parent": "docker"
|
||||||
|
},
|
||||||
|
"restrict_sys": true,
|
||||||
|
"mount_config": {
|
||||||
|
"device_nodes": [
|
||||||
|
{
|
||||||
|
"cgroup_permissions": "rwm",
|
||||||
|
"file_mode": 438,
|
||||||
|
"major_number": 1,
|
||||||
|
"minor_number": 3,
|
||||||
|
"path": "/dev/null",
|
||||||
|
"type": 99
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cgroup_permissions": "rwm",
|
||||||
|
"file_mode": 438,
|
||||||
|
"major_number": 1,
|
||||||
|
"minor_number": 5,
|
||||||
|
"path": "/dev/zero",
|
||||||
|
"type": 99
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cgroup_permissions": "rwm",
|
||||||
|
"file_mode": 438,
|
||||||
|
"major_number": 1,
|
||||||
|
"minor_number": 7,
|
||||||
|
"path": "/dev/full",
|
||||||
|
"type": 99
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cgroup_permissions": "rwm",
|
||||||
|
"file_mode": 438,
|
||||||
|
"major_number": 5,
|
||||||
|
"path": "/dev/tty",
|
||||||
|
"type": 99
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cgroup_permissions": "rwm",
|
||||||
|
"file_mode": 438,
|
||||||
|
"major_number": 1,
|
||||||
|
"minor_number": 9,
|
||||||
|
"path": "/dev/urandom",
|
||||||
|
"type": 99
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cgroup_permissions": "rwm",
|
||||||
|
"file_mode": 438,
|
||||||
|
"major_number": 1,
|
||||||
|
"minor_number": 8,
|
||||||
|
"path": "/dev/random",
|
||||||
|
"type": 99
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"mounts": [
|
||||||
|
{
|
||||||
|
"type": "tmpfs",
|
||||||
|
"destination": "/tmp"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"environment": [
|
||||||
|
"HOME=/",
|
||||||
|
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
|
||||||
|
"HOSTNAME=koye",
|
||||||
|
"TERM=xterm"
|
||||||
|
],
|
||||||
|
"hostname": "koye",
|
||||||
|
"namespaces": [
|
||||||
|
{"type": "NEWIPC"},
|
||||||
|
{"type": "NEWNET"},
|
||||||
|
{"type": "NEWNS"},
|
||||||
|
{"type": "NEWPID"},
|
||||||
|
{"type": "NEWUTS"},
|
||||||
|
{"type": "NEWUSER"}
|
||||||
|
],
|
||||||
|
"networks": [
|
||||||
|
{
|
||||||
|
"address": "127.0.0.1/0",
|
||||||
|
"gateway": "localhost",
|
||||||
|
"mtu": 1500,
|
||||||
|
"type": "loopback"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"address": "172.17.0.9/16",
|
||||||
|
"gateway": "172.17.42.1",
|
||||||
|
"bridge": "docker0",
|
||||||
|
"veth_prefix": "veth",
|
||||||
|
"mtu": 1500,
|
||||||
|
"type": "veth"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"tty": true,
|
||||||
|
"user": "root",
|
||||||
|
"uid_mappings": [
|
||||||
|
{
|
||||||
|
"container_id": 0,
|
||||||
|
"host_id": 1000,
|
||||||
|
"size": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"container_id": 1,
|
||||||
|
"host_id": 1,
|
||||||
|
"size": 999
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"container_id": 1001,
|
||||||
|
"host_id": 1001,
|
||||||
|
"size": 9000
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"gid_mappings": [
|
||||||
|
{
|
||||||
|
"container_id": 0,
|
||||||
|
"host_id": 1000,
|
||||||
|
"size": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"container_id": 1,
|
||||||
|
"host_id": 1,
|
||||||
|
"size": 999
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"container_id": 1001,
|
||||||
|
"host_id": 1001,
|
||||||
|
"size": 9000
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"rlimits": [
|
||||||
|
{
|
||||||
|
"type": 7,
|
||||||
|
"hard": 999,
|
||||||
|
"soft": 999
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
Loading…
Reference in New Issue