Remove namespaces package

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
2015-01-31 21:21:06 -08:00 · 2015-01-31 21:21:06 -08:00 · bbeae7445a
parent 8191d4d60f
commit bbeae7445a
15 changed files with 952 additions and 1085 deletions
--- a/configs/namespaces.go
+++ b/configs/namespaces.go
@ -1,5 +1,9 @@
 package configs

+import (
+	"syscall"
+)
+
 type NamespaceType string

 const (
@ -18,6 +22,10 @@ type Namespace struct {
 	Path string        `json:"path,omitempty"`
 }

+func (n *Namespace) Syscall() int {
+	return namespaceInfo[n.Type]
+}
+
 type Namespaces []Namespace

 func (n *Namespaces) Remove(t NamespaceType) bool {
@ -50,3 +58,25 @@ func (n *Namespaces) index(t NamespaceType) int {
 func (n *Namespaces) Contains(t NamespaceType) bool {
 	return n.index(t) != -1
 }
+
+var namespaceInfo = map[NamespaceType]int{
+	NEWNET:  syscall.CLONE_NEWNET,
+	NEWNS:   syscall.CLONE_NEWNS,
+	NEWUSER: syscall.CLONE_NEWUSER,
+	NEWIPC:  syscall.CLONE_NEWIPC,
+	NEWUTS:  syscall.CLONE_NEWUTS,
+	NEWPID:  syscall.CLONE_NEWPID,
+}
+
+// CloneFlags parses the container's Namespaces options to set the correct
+// flags on clone, unshare. This functions returns flags only for new namespaces.
+func (n *Namespaces) CloneFlags() uintptr {
+	var flag int
+	for _, v := range *n {
+		if v.Path != "" {
+			continue
+		}
+		flag |= namespaceInfo[v.Type]
+	}
+	return uintptr(flag)
+}
--- a/container.go
+++ b/container.go
@ -33,7 +33,7 @@ type Container interface {
 	Status() (configs.Status, error)

 	// Returns the current config of the container.
-	Config() *configs.Config
+	Config() configs.Config

 	// Returns the PIDs inside this container. The PIDs are in the namespace of the calling process.
 	//
--- a/linux_container.go
+++ b/linux_container.go
@ -5,18 +5,35 @@ package libcontainer
 import (
 	"encoding/json"
 	"fmt"
+	"io"
+	"io/ioutil"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"syscall"

+	"github.com/docker/libcontainer/apparmor"
 	"github.com/docker/libcontainer/cgroups"
 	"github.com/docker/libcontainer/configs"
-	"github.com/docker/libcontainer/namespaces"
+	"github.com/docker/libcontainer/label"
+	"github.com/docker/libcontainer/mount"
 	"github.com/docker/libcontainer/network"
+	"github.com/docker/libcontainer/system"
 	"github.com/golang/glog"
 )

+const (
+	EXIT_SIGNAL_OFFSET = 128
+)
+
+type initError struct {
+	Message string `json:"message,omitempty"`
+}
+
+func (i initError) Error() string {
+	return i.Message
+}
+
 type linuxContainer struct {
 	id            string
 	root          string
@ -26,12 +43,14 @@ type linuxContainer struct {
 	initArgs      []string
 }

+// ID returns the container's unique ID
 func (c *linuxContainer) ID() string {
 	return c.id
 }

-func (c *linuxContainer) Config() *configs.Config {
-	return c.config
+// Config returns the container's configuration
+func (c *linuxContainer) Config() configs.Config {
+	return *c.config
 }

 func (c *linuxContainer) Status() (configs.Status, error) {
@ -96,48 +115,158 @@ func (c *linuxContainer) Start(process *Process) (int, error) {
 	if status != configs.Destroyed {
 		glog.Info("start new container process")
 		// TODO: (crosbymichael) check out console use for execin
-		return namespaces.ExecIn(process.Args, c.config.Env, "", cmd, c.config, c.state)
+		//return namespaces.ExecIn(process.Args, c.config.Env, "", cmd, c.config, c.state)
+		return c.startNewProcess(cmd, process.Args)
 	}
-	if err := c.startInitProcess(cmd, process); err != nil {
+	if err := c.startInitProcess(cmd, process.Args); err != nil {
 		return -1, err
 	}
 	return c.state.InitPid, nil
 }

-func (c *linuxContainer) updateStateFile() error {
-	fnew := filepath.Join(c.root, fmt.Sprintf("%s.new", stateFilename))
-	f, err := os.Create(fnew)
+func (c *linuxContainer) startNewProcess(cmd *exec.Cmd, args []string) (int, error) {
+	var err error
+	parent, child, err := newInitPipe()
 	if err != nil {
-		return newGenericError(err, SystemError)
+		return -1, err
 	}
-
-	err = json.NewEncoder(f).Encode(c.state)
+	defer parent.Close()
+	cmd.ExtraFiles = []*os.File{child}
+	cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.state.InitPid))
+	if err := cmd.Start(); err != nil {
+		child.Close()
+		return -1, err
+	}
+	child.Close()
+	s, err := cmd.Process.Wait()
 	if err != nil {
-		f.Close()
-		os.Remove(fnew)
-		return newGenericError(err, SystemError)
+		return -1, err
 	}
-	f.Close()
-
-	fname := filepath.Join(c.root, stateFilename)
-	if err := os.Rename(fnew, fname); err != nil {
-		return newGenericError(err, SystemError)
+	if !s.Success() {
+		return -1, &exec.ExitError{s}
 	}
-
-	return nil
+	decoder := json.NewDecoder(parent)
+	var pid *pid
+	if err := decoder.Decode(&pid); err != nil {
+		return -1, err
+	}
+	p, err := os.FindProcess(pid.Pid)
+	if err != nil {
+		return -1, err
+	}
+	terminate := func(terr error) (int, error) {
+		// TODO: log the errors for kill and wait
+		p.Kill()
+		p.Wait()
+		return -1, terr
+	}
+	// Enter cgroups.
+	if err := enterCgroups(c.state, pid.Pid); err != nil {
+		return terminate(err)
+	}
+	encoder := json.NewEncoder(parent)
+	if err := encoder.Encode(c.config); err != nil {
+		return terminate(err)
+	}
+	process := processArgs{
+		Config: c.config,
+		Args:   args,
+	}
+	if err := encoder.Encode(process); err != nil {
+		return terminate(err)
+	}
+	return pid.Pid, nil
 }

-func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, config *Process) error {
-	err := namespaces.Exec(config.Args, c.config.Env, c.config.Console, cmd, c.config, c.cgroupManager, c.state)
+func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, args []string) error {
+	// create a pipe so that we can syncronize with the namespaced process and
+	// pass the state and configuration to the child process
+	parent, child, err := newInitPipe()
 	if err != nil {
 		return err
 	}
+	defer parent.Close()
+	cmd.ExtraFiles = []*os.File{child}
+	cmd.SysProcAttr.Cloneflags = c.config.Namespaces.CloneFlags()
+	if c.config.Namespaces.Contains(configs.NEWUSER) {
+		addUidGidMappings(cmd.SysProcAttr, c.config)
+		// Default to root user when user namespaces are enabled.
+		if cmd.SysProcAttr.Credential == nil {
+			cmd.SysProcAttr.Credential = &syscall.Credential{}
+		}
+	}
+	glog.Info("starting container init process")
+	err = cmd.Start()
+	child.Close()
+	if err != nil {
+		return newGenericError(err, SystemError)
+	}
+	wait := func() (*os.ProcessState, error) {
+		ps, err := cmd.Process.Wait()
+		// we should kill all processes in cgroup when init is died if we use
+		// host PID namespace
+		if !c.config.Namespaces.Contains(configs.NEWPID) {
+			c.killAllPids()
+		}
+		return ps, newGenericError(err, SystemError)
+	}
+	terminate := func(terr error) error {
+		// TODO: log the errors for kill and wait
+		cmd.Process.Kill()
+		wait()
+		return terr
+	}
+	started, err := system.GetProcessStartTime(cmd.Process.Pid)
+	if err != nil {
+		return terminate(err)
+	}
+	// Do this before syncing with child so that no children
+	// can escape the cgroup
+	if err := c.cgroupManager.Apply(cmd.Process.Pid); err != nil {
+		return terminate(err)
+	}
+	defer func() {
+		if err != nil {
+			c.cgroupManager.Destroy()
+		}
+	}()
+	var networkState configs.NetworkState
+	if err := c.initializeNetworking(cmd.Process.Pid, &networkState); err != nil {
+		return terminate(err)
+	}
+	process := processArgs{
+		Args:         args,
+		Config:       c.config,
+		NetworkState: &networkState,
+	}
+	// Start the setup process to setup the init process
+	if c.config.Namespaces.Contains(configs.NEWUSER) {
+		if err = executeSetupCmd(cmd.Args, cmd.Process.Pid, c.config, &process, &networkState); err != nil {
+			return terminate(err)
+		}
+	}
+	// send the state to the container's init process then shutdown writes for the parent
+	if err := json.NewEncoder(parent).Encode(process); err != nil {
+		return terminate(err)
+	}
+	// shutdown writes for the parent side of the pipe
+	if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil {
+		return terminate(err)
+	}
+	// wait for the child process to fully complete and receive an error message
+	// if one was encoutered
+	var ierr *initError
+	if err := json.NewDecoder(parent).Decode(&ierr); err != nil && err != io.EOF {
+		return terminate(err)
+	}
+	if ierr != nil {
+		return terminate(ierr)
+	}

-	err = c.updateStateFile()
-	if err != nil {
-		// FIXME c.Kill()
-		return err
-	}
+	c.state.InitPid = cmd.Process.Pid
+	c.state.InitStartTime = started
+	c.state.NetworkState = networkState
+	c.state.CgroupPaths = c.cgroupManager.GetPaths()

 	return nil
 }
@ -179,3 +308,282 @@ func (c *linuxContainer) Wait() (syscall.WaitStatus, error) {
 func (c *linuxContainer) OOM() (<-chan struct{}, error) {
 	return NotifyOnOOM(c.state)
 }
+
+func (c *linuxContainer) updateStateFile() error {
+	fnew := filepath.Join(c.root, fmt.Sprintf("%s.new", stateFilename))
+	f, err := os.Create(fnew)
+	if err != nil {
+		return newGenericError(err, SystemError)
+	}
+	defer f.Close()
+
+	if err := json.NewEncoder(f).Encode(c.state); err != nil {
+		f.Close()
+		os.Remove(fnew)
+		return newGenericError(err, SystemError)
+	}
+	fname := filepath.Join(c.root, stateFilename)
+	if err := os.Rename(fnew, fname); err != nil {
+		return newGenericError(err, SystemError)
+	}
+	return nil
+}
+
+// New returns a newly initialized Pipe for communication between processes
+func newInitPipe() (parent *os.File, child *os.File, err error) {
+	fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
+	if err != nil {
+		return nil, nil, err
+	}
+	return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
+}
+
+// Converts IDMap to SysProcIDMap array and adds it to SysProcAttr.
+func addUidGidMappings(sys *syscall.SysProcAttr, container *configs.Config) {
+	if container.UidMappings != nil {
+		sys.UidMappings = make([]syscall.SysProcIDMap, len(container.UidMappings))
+		for i, um := range container.UidMappings {
+			sys.UidMappings[i].ContainerID = um.ContainerID
+			sys.UidMappings[i].HostID = um.HostID
+			sys.UidMappings[i].Size = um.Size
+		}
+	}
+
+	if container.GidMappings != nil {
+		sys.GidMappings = make([]syscall.SysProcIDMap, len(container.GidMappings))
+		for i, gm := range container.GidMappings {
+			sys.GidMappings[i].ContainerID = gm.ContainerID
+			sys.GidMappings[i].HostID = gm.HostID
+			sys.GidMappings[i].Size = gm.Size
+		}
+	}
+}
+
+// killAllPids iterates over all of the container's processes
+// sending a SIGKILL to each process.
+func (c *linuxContainer) killAllPids() error {
+	glog.Info("killing all processes in container")
+	var procs []*os.Process
+	c.cgroupManager.Freeze(configs.Frozen)
+	pids, err := c.cgroupManager.GetPids()
+	if err != nil {
+		return err
+	}
+	for _, pid := range pids {
+		// TODO: log err without aborting if we are unable to find
+		// a single PID
+		if p, err := os.FindProcess(pid); err == nil {
+			procs = append(procs, p)
+			p.Kill()
+		}
+	}
+	c.cgroupManager.Freeze(configs.Thawed)
+	for _, p := range procs {
+		p.Wait()
+	}
+	return err
+}
+
+// initializeNetworking creates the container's network stack outside of the namespace and moves
+// interfaces into the container's net namespaces if necessary
+func (c *linuxContainer) initializeNetworking(nspid int, networkState *configs.NetworkState) error {
+	glog.Info("initailzing container's network stack")
+	for _, config := range c.config.Networks {
+		strategy, err := network.GetStrategy(config.Type)
+		if err != nil {
+			return err
+		}
+		if err := strategy.Create(config, nspid, networkState); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func executeSetupCmd(args []string, ppid int, container *configs.Config, process *processArgs, networkState *configs.NetworkState) error {
+	command := exec.Command(args[0], args[1:]...)
+	parent, child, err := newInitPipe()
+	if err != nil {
+		return err
+	}
+	defer parent.Close()
+	command.ExtraFiles = []*os.File{child}
+	command.Dir = container.RootFs
+	command.Env = append(command.Env,
+		fmt.Sprintf("_LIBCONTAINER_INITPID=%d", ppid),
+		fmt.Sprintf("_LIBCONTAINER_USERNS=1"))
+	err = command.Start()
+	child.Close()
+	if err != nil {
+		return err
+	}
+	s, err := command.Process.Wait()
+	if err != nil {
+		return err
+	}
+	if !s.Success() {
+		return &exec.ExitError{s}
+	}
+	decoder := json.NewDecoder(parent)
+	var pid *pid
+	if err := decoder.Decode(&pid); err != nil {
+		return err
+	}
+	p, err := os.FindProcess(pid.Pid)
+	if err != nil {
+		return err
+	}
+	terminate := func(terr error) error {
+		// TODO: log the errors for kill and wait
+		p.Kill()
+		p.Wait()
+		return terr
+	}
+	// send the state to the container's init process then shutdown writes for the parent
+	if err := json.NewEncoder(parent).Encode(process); err != nil {
+		return terminate(err)
+	}
+	// shutdown writes for the parent side of the pipe
+	if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil {
+		return terminate(err)
+	}
+	// wait for the child process to fully complete and receive an error message
+	// if one was encoutered
+	var ierr *initError
+	if err := decoder.Decode(&ierr); err != nil && err != io.EOF {
+		return terminate(err)
+	}
+	if ierr != nil {
+		return ierr
+	}
+	s, err = p.Wait()
+	if err != nil {
+		return err
+	}
+	if !s.Success() {
+		return &exec.ExitError{s}
+	}
+	return nil
+}
+
+type pid struct {
+	Pid int `json:"Pid"`
+}
+
+// Finalize entering into a container and execute a specified command
+func InitIn(pipe *os.File) (err error) {
+	defer func() {
+		// if we have an error during the initialization of the container's init then send it back to the
+		// parent process in the form of an initError.
+		if err != nil {
+			// ensure that any data sent from the parent is consumed so it doesn't
+			// receive ECONNRESET when the child writes to the pipe.
+			ioutil.ReadAll(pipe)
+			if err := json.NewEncoder(pipe).Encode(initError{
+				Message: err.Error(),
+			}); err != nil {
+				panic(err)
+			}
+		}
+		// ensure that this pipe is always closed
+		pipe.Close()
+	}()
+	decoder := json.NewDecoder(pipe)
+	var config *configs.Config
+	if err := decoder.Decode(&config); err != nil {
+		return err
+	}
+	var process *processArgs
+	if err := decoder.Decode(&process); err != nil {
+		return err
+	}
+	if err := finalizeSetns(config); err != nil {
+		return err
+	}
+	if err := system.Execv(process.Args[0], process.Args[0:], config.Env); err != nil {
+		return err
+	}
+	panic("unreachable")
+}
+
+// finalize expects that the setns calls have been setup and that is has joined an
+// existing namespace
+func finalizeSetns(container *configs.Config) error {
+	// clear the current processes env and replace it with the environment defined on the container
+	if err := loadContainerEnvironment(container); err != nil {
+		return err
+	}
+
+	if err := setupRlimits(container); err != nil {
+		return fmt.Errorf("setup rlimits %s", err)
+	}
+
+	if err := finalizeNamespace(container); err != nil {
+		return err
+	}
+
+	if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil {
+		return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err)
+	}
+
+	if container.ProcessLabel != "" {
+		if err := label.SetProcessLabel(container.ProcessLabel); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+// SetupContainer is run to setup mounts and networking related operations
+// for a user namespace enabled process as a user namespace root doesn't
+// have permissions to perform these operations.
+// The setup process joins all the namespaces of user namespace enabled init
+// except the user namespace, so it run as root in the root user namespace
+// to perform these operations.
+func SetupContainer(process *processArgs) error {
+	container := process.Config
+	networkState := process.NetworkState
+
+	// TODO : move to validation
+	/*
+		rootfs, err := utils.ResolveRootfs(container.RootFs)
+		if err != nil {
+			return err
+		}
+	*/
+
+	// clear the current processes env and replace it with the environment
+	// defined on the container
+	if err := loadContainerEnvironment(container); err != nil {
+		return err
+	}
+
+	cloneFlags := container.Namespaces.CloneFlags()
+	if (cloneFlags & syscall.CLONE_NEWNET) == 0 {
+		if len(container.Networks) != 0 || len(container.Routes) != 0 {
+			return fmt.Errorf("unable to apply network parameters without network namespace")
+		}
+	} else {
+		if err := setupNetwork(container, networkState); err != nil {
+			return fmt.Errorf("setup networking %s", err)
+		}
+		if err := setupRoute(container); err != nil {
+			return fmt.Errorf("setup route %s", err)
+		}
+	}
+
+	label.Init()
+
+	// InitializeMountNamespace() can be executed only for a new mount namespace
+	if (cloneFlags & syscall.CLONE_NEWNS) != 0 {
+		if err := mount.InitializeMountNamespace(container); err != nil {
+			return fmt.Errorf("setup mount namespace %s", err)
+		}
+	}
+	return nil
+}
+
+func enterCgroups(state *configs.State, pid int) error {
+	return cgroups.EnterPid(state.CgroupPaths, pid)
+}
--- a/linux_factory.go
+++ b/linux_factory.go
@ -5,15 +5,28 @@ package libcontainer
 import (
 	"encoding/json"
 	"fmt"
+	"io/ioutil"
 	"os"
 	"path/filepath"
 	"regexp"
+	"strings"
+	"syscall"

 	"github.com/golang/glog"

+	"github.com/docker/libcontainer/apparmor"
 	cgroups "github.com/docker/libcontainer/cgroups/manager"
 	"github.com/docker/libcontainer/configs"
-	"github.com/docker/libcontainer/namespaces"
+	"github.com/docker/libcontainer/console"
+	"github.com/docker/libcontainer/label"
+	"github.com/docker/libcontainer/mount"
+	"github.com/docker/libcontainer/netlink"
+	"github.com/docker/libcontainer/network"
+	"github.com/docker/libcontainer/security/capabilities"
+	"github.com/docker/libcontainer/security/restrict"
+	"github.com/docker/libcontainer/system"
+	"github.com/docker/libcontainer/user"
+	"github.com/docker/libcontainer/utils"
 )

 const (
@ -26,6 +39,13 @@ var (
 	maxIdLen = 1024
 )

+// Process is used for transferring parameters from Exec() to Init()
+type processArgs struct {
+	Args         []string              `json:"args,omitempty"`
+	Config       *configs.Config       `json:"config,omitempty"`
+	NetworkState *configs.NetworkState `json:"network_state,omitempty"`
+}
+
 // New returns a linux based container factory based in the root directory.
 func New(root string, initArgs []string) (Factory, error) {
 	if root != "" {
@ -116,16 +136,50 @@ func (l *linuxFactory) Load(id string) (Container, error) {

 // StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
 // This is a low level implementation detail of the reexec and should not be consumed externally
-func (f *linuxFactory) StartInitialization(pipefd uintptr) (err error) {
+func (l *linuxFactory) StartInitialization(pipefd uintptr) (err error) {
 	pipe := os.NewFile(uintptr(pipefd), "pipe")
-
-	setupUserns := os.Getenv("_LIBCONTAINER_USERNS")
+	setupUserns := os.Getenv("_LIBCONTAINER_USERNS") != ""
 	pid := os.Getenv("_LIBCONTAINER_INITPID")
-	if pid != "" && setupUserns == "" {
-		return namespaces.InitIn(pipe)
+	if pid != "" && !setupUserns {
+		return InitIn(pipe)
 	}
-
-	return namespaces.Init(pipe, setupUserns != "")
+	defer func() {
+		// if we have an error during the initialization of the container's init then send it back to the
+		// parent process in the form of an initError.
+		if err != nil {
+			// ensure that any data sent from the parent is consumed so it doesn't
+			// receive ECONNRESET when the child writes to the pipe.
+			ioutil.ReadAll(pipe)
+			if err := json.NewEncoder(pipe).Encode(initError{
+				Message: err.Error(),
+			}); err != nil {
+				panic(err)
+			}
+		}
+		// ensure that this pipe is always closed
+		pipe.Close()
+	}()
+	uncleanRootfs, err := os.Getwd()
+	if err != nil {
+		return err
+	}
+	var process *processArgs
+	// We always read this as it is a way to sync with the parent as well
+	if err := json.NewDecoder(pipe).Decode(&process); err != nil {
+		return err
+	}
+	if setupUserns {
+		err = SetupContainer(process)
+		if err == nil {
+			os.Exit(0)
+		} else {
+			os.Exit(1)
+		}
+	}
+	if process.Config.Namespaces.Contains(configs.NEWUSER) {
+		return l.initUserNs(uncleanRootfs, process)
+	}
+	return l.initDefault(uncleanRootfs, process)
 }

 func (l *linuxFactory) loadContainerConfig(root string) (*configs.Config, error) {
@ -137,7 +191,6 @@ func (l *linuxFactory) loadContainerConfig(root string) (*configs.Config, error)
 		return nil, newGenericError(err, SystemError)
 	}
 	defer f.Close()
-
 	var config *configs.Config
 	if err := json.NewDecoder(f).Decode(&config); err != nil {
 		return nil, newGenericError(err, ConfigInvalid)
@ -154,7 +207,6 @@ func (l *linuxFactory) loadContainerState(root string) (*configs.State, error) {
 		return nil, newGenericError(err, SystemError)
 	}
 	defer f.Close()
-
 	var state *configs.State
 	if err := json.NewDecoder(f).Decode(&state); err != nil {
 		return nil, newGenericError(err, SystemError)
@ -171,3 +223,337 @@ func (l *linuxFactory) validateID(id string) error {
 	}
 	return nil
 }
+
+func (l *linuxFactory) initDefault(uncleanRootfs string, process *processArgs) (err error) {
+	config := process.Config
+	networkState := process.NetworkState
+
+	// TODO: move to validation
+	/*
+		rootfs, err := utils.ResolveRootfs(uncleanRootfs)
+		if err != nil {
+			return err
+		}
+	*/
+
+	// clear the current processes env and replace it with the environment
+	// defined on the container
+	if err := loadContainerEnvironment(config); err != nil {
+		return err
+	}
+	// join any namespaces via a path to the namespace fd if provided
+	if err := joinExistingNamespaces(config.Namespaces); err != nil {
+		return err
+	}
+	if config.Console != "" {
+		if err := console.OpenAndDup(config.Console); err != nil {
+			return err
+		}
+	}
+	if _, err := syscall.Setsid(); err != nil {
+		return fmt.Errorf("setsid %s", err)
+	}
+	if config.Console != "" {
+		if err := system.Setctty(); err != nil {
+			return fmt.Errorf("setctty %s", err)
+		}
+	}
+
+	cloneFlags := config.Namespaces.CloneFlags()
+	if (cloneFlags & syscall.CLONE_NEWNET) == 0 {
+		if len(config.Networks) != 0 || len(config.Routes) != 0 {
+			return fmt.Errorf("unable to apply network parameters without network namespace")
+		}
+	} else {
+		if err := setupNetwork(config, networkState); err != nil {
+			return fmt.Errorf("setup networking %s", err)
+		}
+		if err := setupRoute(config); err != nil {
+			return fmt.Errorf("setup route %s", err)
+		}
+	}
+	if err := setupRlimits(config); err != nil {
+		return fmt.Errorf("setup rlimits %s", err)
+	}
+	label.Init()
+	// InitializeMountNamespace() can be executed only for a new mount namespace
+	if (cloneFlags & syscall.CLONE_NEWNS) != 0 {
+		if err := mount.InitializeMountNamespace(config); err != nil {
+			return err
+		}
+	}
+	if config.Hostname != "" {
+		// TODO: (crosbymichael) move this to pre spawn validation
+		if (cloneFlags & syscall.CLONE_NEWUTS) == 0 {
+			return fmt.Errorf("unable to set the hostname without UTS namespace")
+		}
+		if err := syscall.Sethostname([]byte(config.Hostname)); err != nil {
+			return fmt.Errorf("unable to sethostname %q: %s", config.Hostname, err)
+		}
+	}
+	if err := apparmor.ApplyProfile(config.AppArmorProfile); err != nil {
+		return fmt.Errorf("set apparmor profile %s: %s", config.AppArmorProfile, err)
+	}
+	if err := label.SetProcessLabel(config.ProcessLabel); err != nil {
+		return fmt.Errorf("set process label %s", err)
+	}
+	// TODO: (crosbymichael) make this configurable at the Config level
+	if config.RestrictSys {
+		if (cloneFlags & syscall.CLONE_NEWNS) == 0 {
+			return fmt.Errorf("unable to restrict access to kernel files without mount namespace")
+		}
+		if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil {
+			return err
+		}
+	}
+	pdeathSignal, err := system.GetParentDeathSignal()
+	if err != nil {
+		return fmt.Errorf("get parent death signal %s", err)
+	}
+	if err := finalizeNamespace(config); err != nil {
+		return fmt.Errorf("finalize namespace %s", err)
+	}
+	// finalizeNamespace can change user/group which clears the parent death
+	// signal, so we restore it here.
+	if err := restoreParentDeathSignal(pdeathSignal); err != nil {
+		return fmt.Errorf("restore parent death signal %s", err)
+	}
+	return system.Execv(process.Args[0], process.Args[0:], config.Env)
+}
+
+func (l *linuxFactory) initUserNs(uncleanRootfs string, process *processArgs) (err error) {
+	config := process.Config
+	// clear the current processes env and replace it with the environment
+	// defined on the config
+	if err := loadContainerEnvironment(config); err != nil {
+		return err
+	}
+	// join any namespaces via a path to the namespace fd if provided
+	if err := joinExistingNamespaces(config.Namespaces); err != nil {
+		return err
+	}
+	if config.Console != "" {
+		if err := console.OpenAndDup("/dev/console"); err != nil {
+			return err
+		}
+	}
+	if _, err := syscall.Setsid(); err != nil {
+		return fmt.Errorf("setsid %s", err)
+	}
+	if config.Console != "" {
+		if err := system.Setctty(); err != nil {
+			return fmt.Errorf("setctty %s", err)
+		}
+	}
+	if config.WorkingDir == "" {
+		config.WorkingDir = "/"
+	}
+
+	if err := setupRlimits(config); err != nil {
+		return fmt.Errorf("setup rlimits %s", err)
+	}
+	cloneFlags := config.Namespaces.CloneFlags()
+	if config.Hostname != "" {
+		// TODO: move validation
+		if (cloneFlags & syscall.CLONE_NEWUTS) == 0 {
+			return fmt.Errorf("unable to set the hostname without UTS namespace")
+		}
+		if err := syscall.Sethostname([]byte(config.Hostname)); err != nil {
+			return fmt.Errorf("unable to sethostname %q: %s", config.Hostname, err)
+		}
+	}
+	if err := apparmor.ApplyProfile(config.AppArmorProfile); err != nil {
+		return fmt.Errorf("set apparmor profile %s: %s", config.AppArmorProfile, err)
+	}
+	if err := label.SetProcessLabel(config.ProcessLabel); err != nil {
+		return fmt.Errorf("set process label %s", err)
+	}
+	if config.RestrictSys {
+		if (cloneFlags & syscall.CLONE_NEWNS) == 0 {
+			return fmt.Errorf("unable to restrict access to kernel files without mount namespace")
+		}
+		if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil {
+			return err
+		}
+	}
+	pdeathSignal, err := system.GetParentDeathSignal()
+	if err != nil {
+		return fmt.Errorf("get parent death signal %s", err)
+	}
+	if err := finalizeNamespace(config); err != nil {
+		return fmt.Errorf("finalize namespace %s", err)
+	}
+	// finalizeNamespace can change user/group which clears the parent death
+	// signal, so we restore it here.
+	if err := restoreParentDeathSignal(pdeathSignal); err != nil {
+		return fmt.Errorf("restore parent death signal %s", err)
+	}
+	return system.Execv(process.Args[0], process.Args[0:], config.Env)
+}
+
+// restoreParentDeathSignal sets the parent death signal to old.
+func restoreParentDeathSignal(old int) error {
+	if old == 0 {
+		return nil
+	}
+	current, err := system.GetParentDeathSignal()
+	if err != nil {
+		return fmt.Errorf("get parent death signal %s", err)
+	}
+	if old == current {
+		return nil
+	}
+	if err := system.ParentDeathSignal(uintptr(old)); err != nil {
+		return fmt.Errorf("set parent death signal %s", err)
+	}
+	// Signal self if parent is already dead. Does nothing if running in a new
+	// PID namespace, as Getppid will always return 0.
+	if syscall.Getppid() == 1 {
+		return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
+	}
+	return nil
+}
+
+// setupUser changes the groups, gid, and uid for the user inside the container
+func setupUser(config *configs.Config) error {
+	// Set up defaults.
+	defaultExecUser := user.ExecUser{
+		Uid:  syscall.Getuid(),
+		Gid:  syscall.Getgid(),
+		Home: "/",
+	}
+	passwdPath, err := user.GetPasswdPath()
+	if err != nil {
+		return err
+	}
+	groupPath, err := user.GetGroupPath()
+	if err != nil {
+		return err
+	}
+	execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
+	if err != nil {
+		return fmt.Errorf("get supplementary groups %s", err)
+	}
+	suppGroups := append(execUser.Sgids, config.AdditionalGroups...)
+	if err := syscall.Setgroups(suppGroups); err != nil {
+		return fmt.Errorf("setgroups %s", err)
+	}
+	if err := system.Setgid(execUser.Gid); err != nil {
+		return fmt.Errorf("setgid %s", err)
+	}
+	if err := system.Setuid(execUser.Uid); err != nil {
+		return fmt.Errorf("setuid %s", err)
+	}
+	// if we didn't get HOME already, set it based on the user's HOME
+	if envHome := os.Getenv("HOME"); envHome == "" {
+		if err := os.Setenv("HOME", execUser.Home); err != nil {
+			return fmt.Errorf("set HOME %s", err)
+		}
+	}
+	return nil
+}
+
+// setupVethNetwork uses the Network config if it is not nil to initialize
+// the new veth interface inside the container for use by changing the name to eth0
+// setting the MTU and IP address along with the default gateway
+func setupNetwork(config *configs.Config, networkState *configs.NetworkState) error {
+	for _, config := range config.Networks {
+		strategy, err := network.GetStrategy(config.Type)
+		if err != nil {
+			return err
+		}
+		err1 := strategy.Initialize(config, networkState)
+		if err1 != nil {
+			return err1
+		}
+	}
+	return nil
+}
+
+func setupRoute(config *configs.Config) error {
+	for _, config := range config.Routes {
+		if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func setupRlimits(config *configs.Config) error {
+	for _, rlimit := range config.Rlimits {
+		l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}
+		if err := syscall.Setrlimit(rlimit.Type, l); err != nil {
+			return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
+		}
+	}
+	return nil
+}
+
+// finalizeNamespace drops the caps, sets the correct user
+// and working dir, and closes any leaky file descriptors
+// before execing the command inside the namespace
+func finalizeNamespace(config *configs.Config) error {
+	// Ensure that all non-standard fds we may have accidentally
+	// inherited are marked close-on-exec so they stay out of the
+	// container
+	if err := utils.CloseExecFrom(3); err != nil {
+		return fmt.Errorf("close open file descriptors %s", err)
+	}
+	// drop capabilities in bounding set before changing user
+	if err := capabilities.DropBoundingSet(config.Capabilities); err != nil {
+		return fmt.Errorf("drop bounding set %s", err)
+	}
+	// preserve existing capabilities while we change users
+	if err := system.SetKeepCaps(); err != nil {
+		return fmt.Errorf("set keep caps %s", err)
+	}
+	if err := setupUser(config); err != nil {
+		return fmt.Errorf("setup user %s", err)
+	}
+	if err := system.ClearKeepCaps(); err != nil {
+		return fmt.Errorf("clear keep caps %s", err)
+	}
+	// drop all other capabilities
+	if err := capabilities.DropCapabilities(config.Capabilities); err != nil {
+		return fmt.Errorf("drop capabilities %s", err)
+	}
+	if config.WorkingDir != "" {
+		if err := syscall.Chdir(config.WorkingDir); err != nil {
+			return fmt.Errorf("chdir to %s %s", config.WorkingDir, err)
+		}
+	}
+	return nil
+}
+
+func loadContainerEnvironment(config *configs.Config) error {
+	os.Clearenv()
+	for _, pair := range config.Env {
+		p := strings.SplitN(pair, "=", 2)
+		if len(p) < 2 {
+			return fmt.Errorf("invalid environment '%v'", pair)
+		}
+		if err := os.Setenv(p[0], p[1]); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// joinExistingNamespaces gets all the namespace paths specified for the container and
+// does a setns on the namespace fd so that the current process joins the namespace.
+func joinExistingNamespaces(namespaces []configs.Namespace) error {
+	for _, ns := range namespaces {
+		if ns.Path != "" {
+			f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0)
+			if err != nil {
+				return err
+			}
+			err = system.Setns(f.Fd(), uintptr(ns.Syscall()))
+			f.Close()
+			if err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
--- a/namespaces/exec.go
+++ b/namespaces/exec.go
@ -1,262 +0,0 @@
-// +build linux
-
-package namespaces
-
-import (
-	"encoding/json"
-	"fmt"
-	"io"
-	"os"
-	"os/exec"
-	"syscall"
-
-	"github.com/docker/libcontainer/cgroups"
-	"github.com/docker/libcontainer/configs"
-	"github.com/docker/libcontainer/network"
-	"github.com/docker/libcontainer/system"
-)
-
-const (
-	EXIT_SIGNAL_OFFSET = 128
-)
-
-func executeSetupCmd(args []string, ppid int, container *configs.Config, process *processArgs, networkState *configs.NetworkState) error {
-	command := exec.Command(args[0], args[1:]...)
-	parent, child, err := newInitPipe()
-	if err != nil {
-		return err
-	}
-	defer parent.Close()
-	command.ExtraFiles = []*os.File{child}
-	command.Dir = container.RootFs
-	command.Env = append(command.Env,
-		fmt.Sprintf("_LIBCONTAINER_INITPID=%d", ppid),
-		fmt.Sprintf("_LIBCONTAINER_USERNS=1"))
-	err = command.Start()
-	child.Close()
-	if err != nil {
-		return err
-	}
-	s, err := command.Process.Wait()
-	if err != nil {
-		return err
-	}
-	if !s.Success() {
-		return &exec.ExitError{s}
-	}
-	decoder := json.NewDecoder(parent)
-	var pid *pid
-	if err := decoder.Decode(&pid); err != nil {
-		return err
-	}
-	p, err := os.FindProcess(pid.Pid)
-	if err != nil {
-		return err
-	}
-	terminate := func(terr error) error {
-		// TODO: log the errors for kill and wait
-		p.Kill()
-		p.Wait()
-		return terr
-	}
-	// send the state to the container's init process then shutdown writes for the parent
-	if err := json.NewEncoder(parent).Encode(process); err != nil {
-		return terminate(err)
-	}
-	// shutdown writes for the parent side of the pipe
-	if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil {
-		return terminate(err)
-	}
-	// wait for the child process to fully complete and receive an error message
-	// if one was encoutered
-	var ierr *initError
-	if err := decoder.Decode(&ierr); err != nil && err != io.EOF {
-		return terminate(err)
-	}
-	if ierr != nil {
-		return ierr
-	}
-	s, err = p.Wait()
-	if err != nil {
-		return err
-	}
-	if !s.Success() {
-		return &exec.ExitError{s}
-	}
-	return nil
-}
-
-// TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work.
-// Move this to libcontainer package.
-// Exec performs setup outside of a namespace so that a container can be
-// executed.  Exec is a high level function for working with container namespaces.
-func Exec(args []string, env []string, console string, command *exec.Cmd, container *configs.Config, cgroupManager cgroups.Manager, state *configs.State) (err error) {
-	// create a pipe so that we can syncronize with the namespaced process and
-	// pass the state and configuration to the child process
-	parent, child, err := newInitPipe()
-	if err != nil {
-		return err
-	}
-	defer parent.Close()
-	command.ExtraFiles = []*os.File{child}
-
-	command.Dir = container.RootFs
-	command.SysProcAttr.Cloneflags = uintptr(GetNamespaceFlags(container.Namespaces))
-
-	if container.Namespaces.Contains(configs.NEWUSER) {
-		AddUidGidMappings(command.SysProcAttr, container)
-
-		// Default to root user when user namespaces are enabled.
-		if command.SysProcAttr.Credential == nil {
-			command.SysProcAttr.Credential = &syscall.Credential{}
-		}
-	}
-
-	if err := command.Start(); err != nil {
-		child.Close()
-		return err
-	}
-	child.Close()
-
-	wait := func() (*os.ProcessState, error) {
-		ps, err := command.Process.Wait()
-		// we should kill all processes in cgroup when init is died if we use
-		// host PID namespace
-		if !container.Namespaces.Contains(configs.NEWPID) {
-			killAllPids(cgroupManager)
-		}
-		return ps, err
-	}
-
-	terminate := func(terr error) error {
-		// TODO: log the errors for kill and wait
-		command.Process.Kill()
-		wait()
-		return terr
-	}
-
-	started, err := system.GetProcessStartTime(command.Process.Pid)
-	if err != nil {
-		return terminate(err)
-	}
-
-	// Do this before syncing with child so that no children
-	// can escape the cgroup
-	err = cgroupManager.Apply(command.Process.Pid)
-	if err != nil {
-		return terminate(err)
-	}
-	defer func() {
-		if err != nil {
-			cgroupManager.Destroy()
-		}
-	}()
-
-	var networkState configs.NetworkState
-	if err := InitializeNetworking(container, command.Process.Pid, &networkState); err != nil {
-		return terminate(err)
-	}
-
-	process := processArgs{
-		Env:          append(env[0:], container.Env...),
-		Args:         args,
-		ConsolePath:  console,
-		Config:       container,
-		NetworkState: &networkState,
-	}
-
-	// Start the setup process to setup the init process
-	if container.Namespaces.Contains(configs.NEWUSER) {
-		if err = executeSetupCmd(command.Args, command.Process.Pid, container, &process, &networkState); err != nil {
-			return terminate(err)
-		}
-	}
-
-	// send the state to the container's init process then shutdown writes for the parent
-	if err := json.NewEncoder(parent).Encode(process); err != nil {
-		return terminate(err)
-	}
-	// shutdown writes for the parent side of the pipe
-	if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil {
-		return terminate(err)
-	}
-
-	// wait for the child process to fully complete and receive an error message
-	// if one was encoutered
-	var ierr *initError
-	if err := json.NewDecoder(parent).Decode(&ierr); err != nil && err != io.EOF {
-		return terminate(err)
-	}
-	if ierr != nil {
-		return terminate(ierr)
-	}
-
-	state.InitPid = command.Process.Pid
-	state.InitStartTime = started
-	state.NetworkState = networkState
-	state.CgroupPaths = cgroupManager.GetPaths()
-
-	return nil
-}
-
-// killAllPids iterates over all of the container's processes
-// sending a SIGKILL to each process.
-func killAllPids(m cgroups.Manager) error {
-	var (
-		procs []*os.Process
-	)
-	m.Freeze(configs.Frozen)
-	pids, err := m.GetPids()
-	if err != nil {
-		return err
-	}
-	for _, pid := range pids {
-		// TODO: log err without aborting if we are unable to find
-		// a single PID
-		if p, err := os.FindProcess(pid); err == nil {
-			procs = append(procs, p)
-			p.Kill()
-		}
-	}
-	m.Freeze(configs.Thawed)
-	for _, p := range procs {
-		p.Wait()
-	}
-	return err
-}
-
-// Converts IDMap to SysProcIDMap array and adds it to SysProcAttr.
-func AddUidGidMappings(sys *syscall.SysProcAttr, container *configs.Config) {
-	if container.UidMappings != nil {
-		sys.UidMappings = make([]syscall.SysProcIDMap, len(container.UidMappings))
-		for i, um := range container.UidMappings {
-			sys.UidMappings[i].ContainerID = um.ContainerID
-			sys.UidMappings[i].HostID = um.HostID
-			sys.UidMappings[i].Size = um.Size
-		}
-	}
-
-	if container.GidMappings != nil {
-		sys.GidMappings = make([]syscall.SysProcIDMap, len(container.GidMappings))
-		for i, gm := range container.GidMappings {
-			sys.GidMappings[i].ContainerID = gm.ContainerID
-			sys.GidMappings[i].HostID = gm.HostID
-			sys.GidMappings[i].Size = gm.Size
-		}
-	}
-}
-
-// InitializeNetworking creates the container's network stack outside of the namespace and moves
-// interfaces into the container's net namespaces if necessary
-func InitializeNetworking(container *configs.Config, nspid int, networkState *configs.NetworkState) error {
-	for _, config := range container.Networks {
-		strategy, err := network.GetStrategy(config.Type)
-		if err != nil {
-			return err
-		}
-		if err := strategy.Create(config, nspid, networkState); err != nil {
-			return err
-		}
-	}
-	return nil
-}
--- a/namespaces/execin.go
+++ b/namespaces/execin.go
@ -1,218 +0,0 @@
-// +build linux
-
-package namespaces
-
-import (
-	"encoding/json"
-	"fmt"
-	"io/ioutil"
-	"os"
-	"os/exec"
-	"syscall"
-
-	"github.com/docker/libcontainer/apparmor"
-	"github.com/docker/libcontainer/cgroups"
-	"github.com/docker/libcontainer/configs"
-	"github.com/docker/libcontainer/label"
-	"github.com/docker/libcontainer/mount"
-	"github.com/docker/libcontainer/system"
-)
-
-type pid struct {
-	Pid int `json:"Pid"`
-}
-
-// ExecIn reexec's cmd with _LIBCONTAINER_INITPID=PID so that it is able to run the
-// setns code in a single threaded environment joining the existing containers' namespaces.
-func ExecIn(args []string, env []string, console string, cmd *exec.Cmd, container *configs.Config, state *configs.State) (int, error) {
-	var err error
-
-	parent, child, err := newInitPipe()
-	if err != nil {
-		return -1, err
-	}
-	defer parent.Close()
-
-	cmd.ExtraFiles = []*os.File{child}
-	cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", state.InitPid))
-
-	if err := cmd.Start(); err != nil {
-		child.Close()
-		return -1, err
-	}
-	child.Close()
-
-	s, err := cmd.Process.Wait()
-	if err != nil {
-		return -1, err
-	}
-	if !s.Success() {
-		return -1, &exec.ExitError{s}
-	}
-
-	decoder := json.NewDecoder(parent)
-	var pid *pid
-
-	if err := decoder.Decode(&pid); err != nil {
-		return -1, err
-	}
-
-	p, err := os.FindProcess(pid.Pid)
-	if err != nil {
-		return -1, err
-	}
-
-	terminate := func(terr error) (int, error) {
-		// TODO: log the errors for kill and wait
-		p.Kill()
-		p.Wait()
-		return -1, terr
-	}
-
-	// Enter cgroups.
-	if err := EnterCgroups(state, pid.Pid); err != nil {
-		return terminate(err)
-	}
-
-	encoder := json.NewEncoder(parent)
-
-	if err := encoder.Encode(container); err != nil {
-		return terminate(err)
-	}
-
-	process := processArgs{
-		Env:         append(env[0:], container.Env...),
-		Args:        args,
-		ConsolePath: console,
-	}
-	if err := encoder.Encode(process); err != nil {
-		return terminate(err)
-	}
-
-	return pid.Pid, nil
-}
-
-// Finalize entering into a container and execute a specified command
-func InitIn(pipe *os.File) (err error) {
-	defer func() {
-		// if we have an error during the initialization of the container's init then send it back to the
-		// parent process in the form of an initError.
-		if err != nil {
-			// ensure that any data sent from the parent is consumed so it doesn't
-			// receive ECONNRESET when the child writes to the pipe.
-			ioutil.ReadAll(pipe)
-			if err := json.NewEncoder(pipe).Encode(initError{
-				Message: err.Error(),
-			}); err != nil {
-				panic(err)
-			}
-		}
-		// ensure that this pipe is always closed
-		pipe.Close()
-	}()
-
-	decoder := json.NewDecoder(pipe)
-
-	var container *configs.Config
-	if err := decoder.Decode(&container); err != nil {
-		return err
-	}
-
-	var process *processArgs
-	if err := decoder.Decode(&process); err != nil {
-		return err
-	}
-
-	if err := FinalizeSetns(container); err != nil {
-		return err
-	}
-
-	if err := system.Execv(process.Args[0], process.Args[0:], process.Env); err != nil {
-		return err
-	}
-
-	panic("unreachable")
-}
-
-// Finalize expects that the setns calls have been setup and that is has joined an
-// existing namespace
-func FinalizeSetns(container *configs.Config) error {
-	// clear the current processes env and replace it with the environment defined on the container
-	if err := LoadContainerEnvironment(container); err != nil {
-		return err
-	}
-
-	if err := setupRlimits(container); err != nil {
-		return fmt.Errorf("setup rlimits %s", err)
-	}
-
-	if err := FinalizeNamespace(container); err != nil {
-		return err
-	}
-
-	if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil {
-		return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err)
-	}
-
-	if container.ProcessLabel != "" {
-		if err := label.SetProcessLabel(container.ProcessLabel); err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
-
-// SetupContainer is run to setup mounts and networking related operations
-// for a user namespace enabled process as a user namespace root doesn't
-// have permissions to perform these operations.
-// The setup process joins all the namespaces of user namespace enabled init
-// except the user namespace, so it run as root in the root user namespace
-// to perform these operations.
-func SetupContainer(process *processArgs) error {
-	container := process.Config
-	networkState := process.NetworkState
-
-	// TODO : move to validation
-	/*
-		rootfs, err := utils.ResolveRootfs(container.RootFs)
-		if err != nil {
-			return err
-		}
-	*/
-
-	// clear the current processes env and replace it with the environment
-	// defined on the container
-	if err := LoadContainerEnvironment(container); err != nil {
-		return err
-	}
-
-	cloneFlags := GetNamespaceFlags(container.Namespaces)
-
-	if (cloneFlags & syscall.CLONE_NEWNET) == 0 {
-		if len(container.Networks) != 0 || len(container.Routes) != 0 {
-			return fmt.Errorf("unable to apply network parameters without network namespace")
-		}
-	} else {
-		if err := setupNetwork(container, networkState); err != nil {
-			return fmt.Errorf("setup networking %s", err)
-		}
-		if err := setupRoute(container); err != nil {
-			return fmt.Errorf("setup route %s", err)
-		}
-	}
-
-	label.Init()
-
-	// InitializeMountNamespace() can be executed only for a new mount namespace
-	if (cloneFlags & syscall.CLONE_NEWNS) != 0 {
-		if err := mount.InitializeMountNamespace(container); err != nil {
-			return fmt.Errorf("setup mount namespace %s", err)
-		}
-	}
-	return nil
-}
-
-func EnterCgroups(state *configs.State, pid int) error {
-	return cgroups.EnterPid(state.CgroupPaths, pid)
-}
--- a/namespaces/init.go
+++ b/namespaces/init.go
@ -1,465 +0,0 @@
-// +build linux
-
-package namespaces
-
-import (
-	"encoding/json"
-	"fmt"
-	"io/ioutil"
-	"os"
-	"strings"
-	"syscall"
-
-	"github.com/docker/libcontainer/apparmor"
-	"github.com/docker/libcontainer/configs"
-	"github.com/docker/libcontainer/console"
-	"github.com/docker/libcontainer/label"
-	"github.com/docker/libcontainer/mount"
-	"github.com/docker/libcontainer/netlink"
-	"github.com/docker/libcontainer/network"
-	"github.com/docker/libcontainer/security/capabilities"
-	"github.com/docker/libcontainer/security/restrict"
-	"github.com/docker/libcontainer/system"
-	"github.com/docker/libcontainer/user"
-	"github.com/docker/libcontainer/utils"
-)
-
-// Process is used for transferring parameters from Exec() to Init()
-type processArgs struct {
-	Args         []string              `json:"args,omitempty"`
-	Env          []string              `json:"environment,omitempty"`
-	ConsolePath  string                `json:"console_path,omitempty"`
-	Config       *configs.Config       `json:"config,omitempty"`
-	NetworkState *configs.NetworkState `json:"network_state,omitempty"`
-}
-
-// TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work.
-// Move this to libcontainer package.
-// Init is the init process that first runs inside a new namespace to setup mounts, users, networking,
-// and other options required for the new container.
-// The caller of Init function has to ensure that the go runtime is locked to an OS thread
-// (using runtime.LockOSThread) else system calls like setns called within Init may not work as intended.
-func Init(pipe *os.File, setupUserns bool) (err error) {
-	defer func() {
-		// if we have an error during the initialization of the container's init then send it back to the
-		// parent process in the form of an initError.
-		if err != nil {
-			// ensure that any data sent from the parent is consumed so it doesn't
-			// receive ECONNRESET when the child writes to the pipe.
-			ioutil.ReadAll(pipe)
-			if err := json.NewEncoder(pipe).Encode(initError{
-				Message: err.Error(),
-			}); err != nil {
-				panic(err)
-			}
-		}
-		// ensure that this pipe is always closed
-		pipe.Close()
-	}()
-
-	uncleanRootfs, err := os.Getwd()
-	if err != nil {
-		return err
-	}
-
-	var process *processArgs
-	// We always read this as it is a way to sync with the parent as well
-	if err := json.NewDecoder(pipe).Decode(&process); err != nil {
-		return err
-	}
-
-	if setupUserns {
-		err = SetupContainer(process)
-		if err == nil {
-			os.Exit(0)
-		} else {
-			os.Exit(1)
-		}
-	}
-
-	if process.Config.Namespaces.Contains(configs.NEWUSER) {
-		return initUserNs(uncleanRootfs, process)
-	} else {
-		return initDefault(uncleanRootfs, process)
-	}
-}
-
-func initDefault(uncleanRootfs string, process *processArgs) (err error) {
-	container := process.Config
-	networkState := process.NetworkState
-
-	// TODO: move to validation
-	/*
-		rootfs, err := utils.ResolveRootfs(uncleanRootfs)
-		if err != nil {
-			return err
-		}
-	*/
-
-	// clear the current processes env and replace it with the environment
-	// defined on the container
-	if err := LoadContainerEnvironment(container); err != nil {
-		return err
-	}
-
-	// join any namespaces via a path to the namespace fd if provided
-	if err := joinExistingNamespaces(container.Namespaces); err != nil {
-		return err
-	}
-	if process.ConsolePath != "" {
-		if err := console.OpenAndDup(process.ConsolePath); err != nil {
-			return err
-		}
-	}
-	if _, err := syscall.Setsid(); err != nil {
-		return fmt.Errorf("setsid %s", err)
-	}
-	if process.ConsolePath != "" {
-		if err := system.Setctty(); err != nil {
-			return fmt.Errorf("setctty %s", err)
-		}
-	}
-
-	cloneFlags := GetNamespaceFlags(container.Namespaces)
-
-	if (cloneFlags & syscall.CLONE_NEWNET) == 0 {
-		if len(container.Networks) != 0 || len(container.Routes) != 0 {
-			return fmt.Errorf("unable to apply network parameters without network namespace")
-		}
-	} else {
-		if err := setupNetwork(container, networkState); err != nil {
-			return fmt.Errorf("setup networking %s", err)
-		}
-		if err := setupRoute(container); err != nil {
-			return fmt.Errorf("setup route %s", err)
-		}
-	}
-
-	if err := setupRlimits(container); err != nil {
-		return fmt.Errorf("setup rlimits %s", err)
-	}
-
-	label.Init()
-
-	// InitializeMountNamespace() can be executed only for a new mount namespace
-	if (cloneFlags & syscall.CLONE_NEWNS) != 0 {
-		if err := mount.InitializeMountNamespace(container); err != nil {
-			return err
-		}
-	}
-
-	if container.Hostname != "" {
-		// TODO: (crosbymichael) move this to pre spawn validation
-		if (cloneFlags & syscall.CLONE_NEWUTS) == 0 {
-			return fmt.Errorf("unable to set the hostname without UTS namespace")
-		}
-		if err := syscall.Sethostname([]byte(container.Hostname)); err != nil {
-			return fmt.Errorf("unable to sethostname %q: %s", container.Hostname, err)
-		}
-	}
-
-	if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil {
-		return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err)
-	}
-
-	if err := label.SetProcessLabel(container.ProcessLabel); err != nil {
-		return fmt.Errorf("set process label %s", err)
-	}
-
-	// TODO: (crosbymichael) make this configurable at the Config level
-	if container.RestrictSys {
-		if (cloneFlags & syscall.CLONE_NEWNS) == 0 {
-			return fmt.Errorf("unable to restrict access to kernel files without mount namespace")
-		}
-		if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil {
-			return err
-		}
-	}
-
-	pdeathSignal, err := system.GetParentDeathSignal()
-	if err != nil {
-		return fmt.Errorf("get parent death signal %s", err)
-	}
-
-	if err := FinalizeNamespace(container); err != nil {
-		return fmt.Errorf("finalize namespace %s", err)
-	}
-
-	// FinalizeNamespace can change user/group which clears the parent death
-	// signal, so we restore it here.
-	if err := RestoreParentDeathSignal(pdeathSignal); err != nil {
-		return fmt.Errorf("restore parent death signal %s", err)
-	}
-
-	return system.Execv(process.Args[0], process.Args[0:], process.Env)
-}
-
-func initUserNs(uncleanRootfs string, process *processArgs) (err error) {
-	container := process.Config
-
-	// clear the current processes env and replace it with the environment
-	// defined on the container
-	if err := LoadContainerEnvironment(container); err != nil {
-		return err
-	}
-
-	// join any namespaces via a path to the namespace fd if provided
-	if err := joinExistingNamespaces(container.Namespaces); err != nil {
-		return err
-	}
-	if process.ConsolePath != "" {
-		if err := console.OpenAndDup("/dev/console"); err != nil {
-			return err
-		}
-	}
-	if _, err := syscall.Setsid(); err != nil {
-		return fmt.Errorf("setsid %s", err)
-	}
-	if process.ConsolePath != "" {
-		if err := system.Setctty(); err != nil {
-			return fmt.Errorf("setctty %s", err)
-		}
-	}
-
-	if container.WorkingDir == "" {
-		container.WorkingDir = "/"
-	}
-
-	if err := setupRlimits(container); err != nil {
-		return fmt.Errorf("setup rlimits %s", err)
-	}
-
-	cloneFlags := GetNamespaceFlags(container.Namespaces)
-
-	if container.Hostname != "" {
-		if (cloneFlags & syscall.CLONE_NEWUTS) == 0 {
-			return fmt.Errorf("unable to set the hostname without UTS namespace")
-		}
-		if err := syscall.Sethostname([]byte(container.Hostname)); err != nil {
-			return fmt.Errorf("unable to sethostname %q: %s", container.Hostname, err)
-		}
-	}
-
-	if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil {
-		return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err)
-	}
-
-	if err := label.SetProcessLabel(container.ProcessLabel); err != nil {
-		return fmt.Errorf("set process label %s", err)
-	}
-
-	if container.RestrictSys {
-		if (cloneFlags & syscall.CLONE_NEWNS) == 0 {
-			return fmt.Errorf("unable to restrict access to kernel files without mount namespace")
-		}
-		if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil {
-			return err
-		}
-	}
-
-	pdeathSignal, err := system.GetParentDeathSignal()
-	if err != nil {
-		return fmt.Errorf("get parent death signal %s", err)
-	}
-
-	if err := FinalizeNamespace(container); err != nil {
-		return fmt.Errorf("finalize namespace %s", err)
-	}
-
-	// FinalizeNamespace can change user/group which clears the parent death
-	// signal, so we restore it here.
-	if err := RestoreParentDeathSignal(pdeathSignal); err != nil {
-		return fmt.Errorf("restore parent death signal %s", err)
-	}
-
-	return system.Execv(process.Args[0], process.Args[0:], process.Env)
-}
-
-// RestoreParentDeathSignal sets the parent death signal to old.
-func RestoreParentDeathSignal(old int) error {
-	if old == 0 {
-		return nil
-	}
-
-	current, err := system.GetParentDeathSignal()
-	if err != nil {
-		return fmt.Errorf("get parent death signal %s", err)
-	}
-
-	if old == current {
-		return nil
-	}
-
-	if err := system.ParentDeathSignal(uintptr(old)); err != nil {
-		return fmt.Errorf("set parent death signal %s", err)
-	}
-
-	// Signal self if parent is already dead. Does nothing if running in a new
-	// PID namespace, as Getppid will always return 0.
-	if syscall.Getppid() == 1 {
-		return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
-	}
-
-	return nil
-}
-
-// SetupUser changes the groups, gid, and uid for the user inside the container
-func SetupUser(container *configs.Config) error {
-	// Set up defaults.
-	defaultExecUser := user.ExecUser{
-		Uid:  syscall.Getuid(),
-		Gid:  syscall.Getgid(),
-		Home: "/",
-	}
-
-	passwdPath, err := user.GetPasswdPath()
-	if err != nil {
-		return err
-	}
-
-	groupPath, err := user.GetGroupPath()
-	if err != nil {
-		return err
-	}
-
-	execUser, err := user.GetExecUserPath(container.User, &defaultExecUser, passwdPath, groupPath)
-	if err != nil {
-		return fmt.Errorf("get supplementary groups %s", err)
-	}
-
-	suppGroups := append(execUser.Sgids, container.AdditionalGroups...)
-
-	if err := syscall.Setgroups(suppGroups); err != nil {
-		return fmt.Errorf("setgroups %s", err)
-	}
-
-	if err := system.Setgid(execUser.Gid); err != nil {
-		return fmt.Errorf("setgid %s", err)
-	}
-
-	if err := system.Setuid(execUser.Uid); err != nil {
-		return fmt.Errorf("setuid %s", err)
-	}
-
-	// if we didn't get HOME already, set it based on the user's HOME
-	if envHome := os.Getenv("HOME"); envHome == "" {
-		if err := os.Setenv("HOME", execUser.Home); err != nil {
-			return fmt.Errorf("set HOME %s", err)
-		}
-	}
-
-	return nil
-}
-
-// setupVethNetwork uses the Network config if it is not nil to initialize
-// the new veth interface inside the container for use by changing the name to eth0
-// setting the MTU and IP address along with the default gateway
-func setupNetwork(container *configs.Config, networkState *configs.NetworkState) error {
-	for _, config := range container.Networks {
-		strategy, err := network.GetStrategy(config.Type)
-		if err != nil {
-			return err
-		}
-
-		err1 := strategy.Initialize(config, networkState)
-		if err1 != nil {
-			return err1
-		}
-	}
-	return nil
-}
-
-func setupRoute(container *configs.Config) error {
-	for _, config := range container.Routes {
-		if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-func setupRlimits(container *configs.Config) error {
-	for _, rlimit := range container.Rlimits {
-		l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}
-		if err := syscall.Setrlimit(rlimit.Type, l); err != nil {
-			return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
-		}
-	}
-	return nil
-}
-
-// FinalizeNamespace drops the caps, sets the correct user
-// and working dir, and closes any leaky file descriptors
-// before execing the command inside the namespace
-func FinalizeNamespace(container *configs.Config) error {
-	// Ensure that all non-standard fds we may have accidentally
-	// inherited are marked close-on-exec so they stay out of the
-	// container
-	if err := utils.CloseExecFrom(3); err != nil {
-		return fmt.Errorf("close open file descriptors %s", err)
-	}
-
-	// drop capabilities in bounding set before changing user
-	if err := capabilities.DropBoundingSet(container.Capabilities); err != nil {
-		return fmt.Errorf("drop bounding set %s", err)
-	}
-
-	// preserve existing capabilities while we change users
-	if err := system.SetKeepCaps(); err != nil {
-		return fmt.Errorf("set keep caps %s", err)
-	}
-
-	if err := SetupUser(container); err != nil {
-		return fmt.Errorf("setup user %s", err)
-	}
-
-	if err := system.ClearKeepCaps(); err != nil {
-		return fmt.Errorf("clear keep caps %s", err)
-	}
-
-	// drop all other capabilities
-	if err := capabilities.DropCapabilities(container.Capabilities); err != nil {
-		return fmt.Errorf("drop capabilities %s", err)
-	}
-
-	if container.WorkingDir != "" {
-		if err := syscall.Chdir(container.WorkingDir); err != nil {
-			return fmt.Errorf("chdir to %s %s", container.WorkingDir, err)
-		}
-	}
-
-	return nil
-}
-
-func LoadContainerEnvironment(container *configs.Config) error {
-	os.Clearenv()
-	for _, pair := range container.Env {
-		p := strings.SplitN(pair, "=", 2)
-		if len(p) < 2 {
-			return fmt.Errorf("invalid environment '%v'", pair)
-		}
-		if err := os.Setenv(p[0], p[1]); err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-// joinExistingNamespaces gets all the namespace paths specified for the container and
-// does a setns on the namespace fd so that the current process joins the namespace.
-func joinExistingNamespaces(namespaces []configs.Namespace) error {
-	for _, ns := range namespaces {
-		if ns.Path != "" {
-			f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0)
-			if err != nil {
-				return err
-			}
-			err = system.Setns(f.Fd(), uintptr(namespaceInfo[ns.Type]))
-			f.Close()
-			if err != nil {
-				return err
-			}
-		}
-	}
-	return nil
-}
--- a/namespaces/utils.go
+++ b/namespaces/utils.go
@ -1,48 +0,0 @@
-// +build linux
-
-package namespaces
-
-import (
-	"os"
-	"syscall"
-
-	"github.com/docker/libcontainer/configs"
-)
-
-type initError struct {
-	Message string `json:"message,omitempty"`
-}
-
-func (i initError) Error() string {
-	return i.Message
-}
-
-var namespaceInfo = map[configs.NamespaceType]int{
-	configs.NEWNET:  syscall.CLONE_NEWNET,
-	configs.NEWNS:   syscall.CLONE_NEWNS,
-	configs.NEWUSER: syscall.CLONE_NEWUSER,
-	configs.NEWIPC:  syscall.CLONE_NEWIPC,
-	configs.NEWUTS:  syscall.CLONE_NEWUTS,
-	configs.NEWPID:  syscall.CLONE_NEWPID,
-}
-
-// New returns a newly initialized Pipe for communication between processes
-func newInitPipe() (parent *os.File, child *os.File, err error) {
-	fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
-	if err != nil {
-		return nil, nil, err
-	}
-	return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
-}
-
-// GetNamespaceFlags parses the container's Namespaces options to set the correct
-// flags on clone, unshare. This functions returns flags only for new namespaces.
-func GetNamespaceFlags(namespaces configs.Namespaces) (flag int) {
-	for _, v := range namespaces {
-		if v.Path != "" {
-			continue
-		}
-		flag |= namespaceInfo[v.Type]
-	}
-	return flag
-}
--- a/namespaces/nsenter/README.md
+++ b/namespaces/nsenter/README.md
--- a/namespaces/nsenter/nsenter.go
+++ b/namespaces/nsenter/nsenter.go
--- a/namespaces/nsenter/nsenter_test.go
+++ b/namespaces/nsenter/nsenter_test.go
--- a/namespaces/nsenter/nsenter_unsupported.go
+++ b/namespaces/nsenter/nsenter_unsupported.go
--- a/namespaces/nsenter/nsexec.c
+++ b/namespaces/nsenter/nsexec.c
--- a/nsinit/exec.go
+++ b/nsinit/exec.go
@ -3,14 +3,55 @@ package main
 import (
 	"io"
 	"os"
+	"os/signal"
 	"syscall"

 	"github.com/codegangsta/cli"
 	"github.com/docker/docker/pkg/term"
 	"github.com/docker/libcontainer"
+	"github.com/docker/libcontainer/configs"
 	consolepkg "github.com/docker/libcontainer/console"
 )

+type tty struct {
+	master  *os.File
+	console string
+	state   *term.State
+}
+
+func (t *tty) Close() error {
+	if t.master != nil {
+		t.master.Close()
+	}
+	if t.state != nil {
+		term.RestoreTerminal(os.Stdin.Fd(), t.state)
+	}
+	return nil
+}
+
+func (t *tty) set(config *configs.Config) {
+	config.Console = t.console
+}
+
+func (t *tty) attach(process *libcontainer.Process) {
+	if t.master != nil {
+		process.Stderr = nil
+		process.Stdout = nil
+		process.Stdin = nil
+	}
+}
+
+func (t *tty) resize() error {
+	if t.master == nil {
+		return nil
+	}
+	ws, err := term.GetWinsize(os.Stdin.Fd())
+	if err != nil {
+		return err
+	}
+	return term.SetWinsize(t.master.Fd(), ws)
+}
+
 var execCommand = cli.Command{
 	Name:   "exec",
 	Usage:  "execute a new command inside a container",
@ -23,24 +64,14 @@ var execCommand = cli.Command{
 }

 func execAction(context *cli.Context) {
-	var (
-		master  *os.File
-		console string
-		err     error
-
-		sigc = make(chan os.Signal, 10)
-
-		stdin  = os.Stdin
-		stdout = os.Stdout
-		stderr = os.Stderr
-
-		exitCode int
-	)
-
 	factory, err := loadFactory(context)
 	if err != nil {
 		fatal(err)
 	}
+	tty, err := newTty(context)
+	if err != nil {
+		fatal(err)
+	}
 	container, err := factory.Load(context.String("id"))
 	if err != nil {
 		if lerr, ok := err.(libcontainer.Error); !ok || lerr.Code() != libcontainer.ContainerNotExists {
@ -50,46 +81,22 @@ func execAction(context *cli.Context) {
 		if err != nil {
 			fatal(err)
 		}
-		if context.Bool("tty") {
-			stdin = nil
-			stdout = nil
-			stderr = nil
-			if master, console, err = consolepkg.CreateMasterAndConsole(); err != nil {
-				fatal(err)
-			}
-			go io.Copy(master, os.Stdin)
-			go io.Copy(os.Stdout, master)
-			state, err := term.SetRawTerminal(os.Stdin.Fd())
-			if err != nil {
-				fatal(err)
-			}
-			defer term.RestoreTerminal(os.Stdin.Fd(), state)
-			config.Console = console
-		}
+		tty.set(config)
 		if container, err = factory.Create(context.String("id"), config); err != nil {
 			fatal(err)
 		}
 	}
+	go handleSignals(container, tty)
 	process := &libcontainer.Process{
 		Args:   context.Args(),
-		Stdin:  stdin,
-		Stdout: stdout,
-		Stderr: stderr,
+		Stdin:  os.Stdin,
+		Stdout: os.Stdout,
+		Stderr: os.Stderr,
 	}
+	tty.attach(process)
 	if _, err := container.Start(process); err != nil {
 		fatal(err)
 	}
-	go func() {
-		resizeTty(master)
-		for sig := range sigc {
-			switch sig {
-			case syscall.SIGWINCH:
-				resizeTty(master)
-			default:
-				container.Signal(sig)
-			}
-		}
-	}()
 	status, err := container.Wait()
 	if err != nil {
 		fatal(err)
@ -97,6 +104,11 @@ func execAction(context *cli.Context) {
 	if err := container.Destroy(); err != nil {
 		fatal(err)
 	}
+	exit(status)
+}
+
+func exit(status syscall.WaitStatus) {
+	var exitCode int
 	if status.Exited() {
 		exitCode = status.ExitStatus()
 	} else if status.Signaled() {
@ -107,13 +119,37 @@ func execAction(context *cli.Context) {
 	os.Exit(exitCode)
 }

-func resizeTty(master *os.File) {
-	if master == nil {
-		return
+func handleSignals(container libcontainer.Container, tty *tty) {
+	sigc := make(chan os.Signal, 10)
+	signal.Notify(sigc)
+	tty.resize()
+	for sig := range sigc {
+		switch sig {
+		case syscall.SIGWINCH:
+			tty.resize()
+		default:
+			container.Signal(sig)
+		}
 	}
-	ws, err := term.GetWinsize(os.Stdin.Fd())
-	if err != nil {
-		return
-	}
-	term.SetWinsize(master.Fd(), ws)
+}
+
+func newTty(context *cli.Context) (*tty, error) {
+	if context.Bool("tty") {
+		master, console, err := consolepkg.CreateMasterAndConsole()
+		if err != nil {
+			return nil, err
+		}
+		go io.Copy(master, os.Stdin)
+		go io.Copy(os.Stdout, master)
+		state, err := term.SetRawTerminal(os.Stdin.Fd())
+		if err != nil {
+			return nil, err
+		}
+		return &tty{
+			master:  master,
+			console: console,
+			state:   state,
+		}, nil
+	}
+	return &tty{}, nil
 }
--- a/nsinit/init.go
+++ b/nsinit/init.go
@ -5,7 +5,7 @@ import (

 	"github.com/codegangsta/cli"
 	"github.com/docker/libcontainer"
-	_ "github.com/docker/libcontainer/namespaces/nsenter"
+	_ "github.com/docker/libcontainer/nsenter"
 )

 var initCommand = cli.Command{