Remove namespaces package
Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
parent
8191d4d60f
commit
bbeae7445a
|
@ -1,5 +1,9 @@
|
|||
package configs
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
)
|
||||
|
||||
type NamespaceType string
|
||||
|
||||
const (
|
||||
|
@ -18,6 +22,10 @@ type Namespace struct {
|
|||
Path string `json:"path,omitempty"`
|
||||
}
|
||||
|
||||
func (n *Namespace) Syscall() int {
|
||||
return namespaceInfo[n.Type]
|
||||
}
|
||||
|
||||
type Namespaces []Namespace
|
||||
|
||||
func (n *Namespaces) Remove(t NamespaceType) bool {
|
||||
|
@ -50,3 +58,25 @@ func (n *Namespaces) index(t NamespaceType) int {
|
|||
func (n *Namespaces) Contains(t NamespaceType) bool {
|
||||
return n.index(t) != -1
|
||||
}
|
||||
|
||||
var namespaceInfo = map[NamespaceType]int{
|
||||
NEWNET: syscall.CLONE_NEWNET,
|
||||
NEWNS: syscall.CLONE_NEWNS,
|
||||
NEWUSER: syscall.CLONE_NEWUSER,
|
||||
NEWIPC: syscall.CLONE_NEWIPC,
|
||||
NEWUTS: syscall.CLONE_NEWUTS,
|
||||
NEWPID: syscall.CLONE_NEWPID,
|
||||
}
|
||||
|
||||
// CloneFlags parses the container's Namespaces options to set the correct
|
||||
// flags on clone, unshare. This functions returns flags only for new namespaces.
|
||||
func (n *Namespaces) CloneFlags() uintptr {
|
||||
var flag int
|
||||
for _, v := range *n {
|
||||
if v.Path != "" {
|
||||
continue
|
||||
}
|
||||
flag |= namespaceInfo[v.Type]
|
||||
}
|
||||
return uintptr(flag)
|
||||
}
|
||||
|
|
|
@ -33,7 +33,7 @@ type Container interface {
|
|||
Status() (configs.Status, error)
|
||||
|
||||
// Returns the current config of the container.
|
||||
Config() *configs.Config
|
||||
Config() configs.Config
|
||||
|
||||
// Returns the PIDs inside this container. The PIDs are in the namespace of the calling process.
|
||||
//
|
||||
|
|
|
@ -5,18 +5,35 @@ package libcontainer
|
|||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
|
||||
"github.com/docker/libcontainer/apparmor"
|
||||
"github.com/docker/libcontainer/cgroups"
|
||||
"github.com/docker/libcontainer/configs"
|
||||
"github.com/docker/libcontainer/namespaces"
|
||||
"github.com/docker/libcontainer/label"
|
||||
"github.com/docker/libcontainer/mount"
|
||||
"github.com/docker/libcontainer/network"
|
||||
"github.com/docker/libcontainer/system"
|
||||
"github.com/golang/glog"
|
||||
)
|
||||
|
||||
const (
|
||||
EXIT_SIGNAL_OFFSET = 128
|
||||
)
|
||||
|
||||
type initError struct {
|
||||
Message string `json:"message,omitempty"`
|
||||
}
|
||||
|
||||
func (i initError) Error() string {
|
||||
return i.Message
|
||||
}
|
||||
|
||||
type linuxContainer struct {
|
||||
id string
|
||||
root string
|
||||
|
@ -26,12 +43,14 @@ type linuxContainer struct {
|
|||
initArgs []string
|
||||
}
|
||||
|
||||
// ID returns the container's unique ID
|
||||
func (c *linuxContainer) ID() string {
|
||||
return c.id
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Config() *configs.Config {
|
||||
return c.config
|
||||
// Config returns the container's configuration
|
||||
func (c *linuxContainer) Config() configs.Config {
|
||||
return *c.config
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Status() (configs.Status, error) {
|
||||
|
@ -96,48 +115,158 @@ func (c *linuxContainer) Start(process *Process) (int, error) {
|
|||
if status != configs.Destroyed {
|
||||
glog.Info("start new container process")
|
||||
// TODO: (crosbymichael) check out console use for execin
|
||||
return namespaces.ExecIn(process.Args, c.config.Env, "", cmd, c.config, c.state)
|
||||
//return namespaces.ExecIn(process.Args, c.config.Env, "", cmd, c.config, c.state)
|
||||
return c.startNewProcess(cmd, process.Args)
|
||||
}
|
||||
if err := c.startInitProcess(cmd, process); err != nil {
|
||||
if err := c.startInitProcess(cmd, process.Args); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
return c.state.InitPid, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) updateStateFile() error {
|
||||
fnew := filepath.Join(c.root, fmt.Sprintf("%s.new", stateFilename))
|
||||
f, err := os.Create(fnew)
|
||||
func (c *linuxContainer) startNewProcess(cmd *exec.Cmd, args []string) (int, error) {
|
||||
var err error
|
||||
parent, child, err := newInitPipe()
|
||||
if err != nil {
|
||||
return newGenericError(err, SystemError)
|
||||
return -1, err
|
||||
}
|
||||
|
||||
err = json.NewEncoder(f).Encode(c.state)
|
||||
defer parent.Close()
|
||||
cmd.ExtraFiles = []*os.File{child}
|
||||
cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.state.InitPid))
|
||||
if err := cmd.Start(); err != nil {
|
||||
child.Close()
|
||||
return -1, err
|
||||
}
|
||||
child.Close()
|
||||
s, err := cmd.Process.Wait()
|
||||
if err != nil {
|
||||
f.Close()
|
||||
os.Remove(fnew)
|
||||
return newGenericError(err, SystemError)
|
||||
return -1, err
|
||||
}
|
||||
f.Close()
|
||||
|
||||
fname := filepath.Join(c.root, stateFilename)
|
||||
if err := os.Rename(fnew, fname); err != nil {
|
||||
return newGenericError(err, SystemError)
|
||||
if !s.Success() {
|
||||
return -1, &exec.ExitError{s}
|
||||
}
|
||||
|
||||
return nil
|
||||
decoder := json.NewDecoder(parent)
|
||||
var pid *pid
|
||||
if err := decoder.Decode(&pid); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
p, err := os.FindProcess(pid.Pid)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
terminate := func(terr error) (int, error) {
|
||||
// TODO: log the errors for kill and wait
|
||||
p.Kill()
|
||||
p.Wait()
|
||||
return -1, terr
|
||||
}
|
||||
// Enter cgroups.
|
||||
if err := enterCgroups(c.state, pid.Pid); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
encoder := json.NewEncoder(parent)
|
||||
if err := encoder.Encode(c.config); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
process := processArgs{
|
||||
Config: c.config,
|
||||
Args: args,
|
||||
}
|
||||
if err := encoder.Encode(process); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
return pid.Pid, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, config *Process) error {
|
||||
err := namespaces.Exec(config.Args, c.config.Env, c.config.Console, cmd, c.config, c.cgroupManager, c.state)
|
||||
func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, args []string) error {
|
||||
// create a pipe so that we can syncronize with the namespaced process and
|
||||
// pass the state and configuration to the child process
|
||||
parent, child, err := newInitPipe()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer parent.Close()
|
||||
cmd.ExtraFiles = []*os.File{child}
|
||||
cmd.SysProcAttr.Cloneflags = c.config.Namespaces.CloneFlags()
|
||||
if c.config.Namespaces.Contains(configs.NEWUSER) {
|
||||
addUidGidMappings(cmd.SysProcAttr, c.config)
|
||||
// Default to root user when user namespaces are enabled.
|
||||
if cmd.SysProcAttr.Credential == nil {
|
||||
cmd.SysProcAttr.Credential = &syscall.Credential{}
|
||||
}
|
||||
}
|
||||
glog.Info("starting container init process")
|
||||
err = cmd.Start()
|
||||
child.Close()
|
||||
if err != nil {
|
||||
return newGenericError(err, SystemError)
|
||||
}
|
||||
wait := func() (*os.ProcessState, error) {
|
||||
ps, err := cmd.Process.Wait()
|
||||
// we should kill all processes in cgroup when init is died if we use
|
||||
// host PID namespace
|
||||
if !c.config.Namespaces.Contains(configs.NEWPID) {
|
||||
c.killAllPids()
|
||||
}
|
||||
return ps, newGenericError(err, SystemError)
|
||||
}
|
||||
terminate := func(terr error) error {
|
||||
// TODO: log the errors for kill and wait
|
||||
cmd.Process.Kill()
|
||||
wait()
|
||||
return terr
|
||||
}
|
||||
started, err := system.GetProcessStartTime(cmd.Process.Pid)
|
||||
if err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
// Do this before syncing with child so that no children
|
||||
// can escape the cgroup
|
||||
if err := c.cgroupManager.Apply(cmd.Process.Pid); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
c.cgroupManager.Destroy()
|
||||
}
|
||||
}()
|
||||
var networkState configs.NetworkState
|
||||
if err := c.initializeNetworking(cmd.Process.Pid, &networkState); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
process := processArgs{
|
||||
Args: args,
|
||||
Config: c.config,
|
||||
NetworkState: &networkState,
|
||||
}
|
||||
// Start the setup process to setup the init process
|
||||
if c.config.Namespaces.Contains(configs.NEWUSER) {
|
||||
if err = executeSetupCmd(cmd.Args, cmd.Process.Pid, c.config, &process, &networkState); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
}
|
||||
// send the state to the container's init process then shutdown writes for the parent
|
||||
if err := json.NewEncoder(parent).Encode(process); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
// shutdown writes for the parent side of the pipe
|
||||
if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
// wait for the child process to fully complete and receive an error message
|
||||
// if one was encoutered
|
||||
var ierr *initError
|
||||
if err := json.NewDecoder(parent).Decode(&ierr); err != nil && err != io.EOF {
|
||||
return terminate(err)
|
||||
}
|
||||
if ierr != nil {
|
||||
return terminate(ierr)
|
||||
}
|
||||
|
||||
err = c.updateStateFile()
|
||||
if err != nil {
|
||||
// FIXME c.Kill()
|
||||
return err
|
||||
}
|
||||
c.state.InitPid = cmd.Process.Pid
|
||||
c.state.InitStartTime = started
|
||||
c.state.NetworkState = networkState
|
||||
c.state.CgroupPaths = c.cgroupManager.GetPaths()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
@ -179,3 +308,282 @@ func (c *linuxContainer) Wait() (syscall.WaitStatus, error) {
|
|||
func (c *linuxContainer) OOM() (<-chan struct{}, error) {
|
||||
return NotifyOnOOM(c.state)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) updateStateFile() error {
|
||||
fnew := filepath.Join(c.root, fmt.Sprintf("%s.new", stateFilename))
|
||||
f, err := os.Create(fnew)
|
||||
if err != nil {
|
||||
return newGenericError(err, SystemError)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
if err := json.NewEncoder(f).Encode(c.state); err != nil {
|
||||
f.Close()
|
||||
os.Remove(fnew)
|
||||
return newGenericError(err, SystemError)
|
||||
}
|
||||
fname := filepath.Join(c.root, stateFilename)
|
||||
if err := os.Rename(fnew, fname); err != nil {
|
||||
return newGenericError(err, SystemError)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// New returns a newly initialized Pipe for communication between processes
|
||||
func newInitPipe() (parent *os.File, child *os.File, err error) {
|
||||
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
|
||||
}
|
||||
|
||||
// Converts IDMap to SysProcIDMap array and adds it to SysProcAttr.
|
||||
func addUidGidMappings(sys *syscall.SysProcAttr, container *configs.Config) {
|
||||
if container.UidMappings != nil {
|
||||
sys.UidMappings = make([]syscall.SysProcIDMap, len(container.UidMappings))
|
||||
for i, um := range container.UidMappings {
|
||||
sys.UidMappings[i].ContainerID = um.ContainerID
|
||||
sys.UidMappings[i].HostID = um.HostID
|
||||
sys.UidMappings[i].Size = um.Size
|
||||
}
|
||||
}
|
||||
|
||||
if container.GidMappings != nil {
|
||||
sys.GidMappings = make([]syscall.SysProcIDMap, len(container.GidMappings))
|
||||
for i, gm := range container.GidMappings {
|
||||
sys.GidMappings[i].ContainerID = gm.ContainerID
|
||||
sys.GidMappings[i].HostID = gm.HostID
|
||||
sys.GidMappings[i].Size = gm.Size
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// killAllPids iterates over all of the container's processes
|
||||
// sending a SIGKILL to each process.
|
||||
func (c *linuxContainer) killAllPids() error {
|
||||
glog.Info("killing all processes in container")
|
||||
var procs []*os.Process
|
||||
c.cgroupManager.Freeze(configs.Frozen)
|
||||
pids, err := c.cgroupManager.GetPids()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, pid := range pids {
|
||||
// TODO: log err without aborting if we are unable to find
|
||||
// a single PID
|
||||
if p, err := os.FindProcess(pid); err == nil {
|
||||
procs = append(procs, p)
|
||||
p.Kill()
|
||||
}
|
||||
}
|
||||
c.cgroupManager.Freeze(configs.Thawed)
|
||||
for _, p := range procs {
|
||||
p.Wait()
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// initializeNetworking creates the container's network stack outside of the namespace and moves
|
||||
// interfaces into the container's net namespaces if necessary
|
||||
func (c *linuxContainer) initializeNetworking(nspid int, networkState *configs.NetworkState) error {
|
||||
glog.Info("initailzing container's network stack")
|
||||
for _, config := range c.config.Networks {
|
||||
strategy, err := network.GetStrategy(config.Type)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := strategy.Create(config, nspid, networkState); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func executeSetupCmd(args []string, ppid int, container *configs.Config, process *processArgs, networkState *configs.NetworkState) error {
|
||||
command := exec.Command(args[0], args[1:]...)
|
||||
parent, child, err := newInitPipe()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer parent.Close()
|
||||
command.ExtraFiles = []*os.File{child}
|
||||
command.Dir = container.RootFs
|
||||
command.Env = append(command.Env,
|
||||
fmt.Sprintf("_LIBCONTAINER_INITPID=%d", ppid),
|
||||
fmt.Sprintf("_LIBCONTAINER_USERNS=1"))
|
||||
err = command.Start()
|
||||
child.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
s, err := command.Process.Wait()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !s.Success() {
|
||||
return &exec.ExitError{s}
|
||||
}
|
||||
decoder := json.NewDecoder(parent)
|
||||
var pid *pid
|
||||
if err := decoder.Decode(&pid); err != nil {
|
||||
return err
|
||||
}
|
||||
p, err := os.FindProcess(pid.Pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
terminate := func(terr error) error {
|
||||
// TODO: log the errors for kill and wait
|
||||
p.Kill()
|
||||
p.Wait()
|
||||
return terr
|
||||
}
|
||||
// send the state to the container's init process then shutdown writes for the parent
|
||||
if err := json.NewEncoder(parent).Encode(process); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
// shutdown writes for the parent side of the pipe
|
||||
if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
// wait for the child process to fully complete and receive an error message
|
||||
// if one was encoutered
|
||||
var ierr *initError
|
||||
if err := decoder.Decode(&ierr); err != nil && err != io.EOF {
|
||||
return terminate(err)
|
||||
}
|
||||
if ierr != nil {
|
||||
return ierr
|
||||
}
|
||||
s, err = p.Wait()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !s.Success() {
|
||||
return &exec.ExitError{s}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type pid struct {
|
||||
Pid int `json:"Pid"`
|
||||
}
|
||||
|
||||
// Finalize entering into a container and execute a specified command
|
||||
func InitIn(pipe *os.File) (err error) {
|
||||
defer func() {
|
||||
// if we have an error during the initialization of the container's init then send it back to the
|
||||
// parent process in the form of an initError.
|
||||
if err != nil {
|
||||
// ensure that any data sent from the parent is consumed so it doesn't
|
||||
// receive ECONNRESET when the child writes to the pipe.
|
||||
ioutil.ReadAll(pipe)
|
||||
if err := json.NewEncoder(pipe).Encode(initError{
|
||||
Message: err.Error(),
|
||||
}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
// ensure that this pipe is always closed
|
||||
pipe.Close()
|
||||
}()
|
||||
decoder := json.NewDecoder(pipe)
|
||||
var config *configs.Config
|
||||
if err := decoder.Decode(&config); err != nil {
|
||||
return err
|
||||
}
|
||||
var process *processArgs
|
||||
if err := decoder.Decode(&process); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := finalizeSetns(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.Execv(process.Args[0], process.Args[0:], config.Env); err != nil {
|
||||
return err
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
// finalize expects that the setns calls have been setup and that is has joined an
|
||||
// existing namespace
|
||||
func finalizeSetns(container *configs.Config) error {
|
||||
// clear the current processes env and replace it with the environment defined on the container
|
||||
if err := loadContainerEnvironment(container); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := setupRlimits(container); err != nil {
|
||||
return fmt.Errorf("setup rlimits %s", err)
|
||||
}
|
||||
|
||||
if err := finalizeNamespace(container); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil {
|
||||
return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err)
|
||||
}
|
||||
|
||||
if container.ProcessLabel != "" {
|
||||
if err := label.SetProcessLabel(container.ProcessLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetupContainer is run to setup mounts and networking related operations
|
||||
// for a user namespace enabled process as a user namespace root doesn't
|
||||
// have permissions to perform these operations.
|
||||
// The setup process joins all the namespaces of user namespace enabled init
|
||||
// except the user namespace, so it run as root in the root user namespace
|
||||
// to perform these operations.
|
||||
func SetupContainer(process *processArgs) error {
|
||||
container := process.Config
|
||||
networkState := process.NetworkState
|
||||
|
||||
// TODO : move to validation
|
||||
/*
|
||||
rootfs, err := utils.ResolveRootfs(container.RootFs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
*/
|
||||
|
||||
// clear the current processes env and replace it with the environment
|
||||
// defined on the container
|
||||
if err := loadContainerEnvironment(container); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cloneFlags := container.Namespaces.CloneFlags()
|
||||
if (cloneFlags & syscall.CLONE_NEWNET) == 0 {
|
||||
if len(container.Networks) != 0 || len(container.Routes) != 0 {
|
||||
return fmt.Errorf("unable to apply network parameters without network namespace")
|
||||
}
|
||||
} else {
|
||||
if err := setupNetwork(container, networkState); err != nil {
|
||||
return fmt.Errorf("setup networking %s", err)
|
||||
}
|
||||
if err := setupRoute(container); err != nil {
|
||||
return fmt.Errorf("setup route %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
label.Init()
|
||||
|
||||
// InitializeMountNamespace() can be executed only for a new mount namespace
|
||||
if (cloneFlags & syscall.CLONE_NEWNS) != 0 {
|
||||
if err := mount.InitializeMountNamespace(container); err != nil {
|
||||
return fmt.Errorf("setup mount namespace %s", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func enterCgroups(state *configs.State, pid int) error {
|
||||
return cgroups.EnterPid(state.CgroupPaths, pid)
|
||||
}
|
||||
|
|
406
linux_factory.go
406
linux_factory.go
|
@ -5,15 +5,28 @@ package libcontainer
|
|||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/golang/glog"
|
||||
|
||||
"github.com/docker/libcontainer/apparmor"
|
||||
cgroups "github.com/docker/libcontainer/cgroups/manager"
|
||||
"github.com/docker/libcontainer/configs"
|
||||
"github.com/docker/libcontainer/namespaces"
|
||||
"github.com/docker/libcontainer/console"
|
||||
"github.com/docker/libcontainer/label"
|
||||
"github.com/docker/libcontainer/mount"
|
||||
"github.com/docker/libcontainer/netlink"
|
||||
"github.com/docker/libcontainer/network"
|
||||
"github.com/docker/libcontainer/security/capabilities"
|
||||
"github.com/docker/libcontainer/security/restrict"
|
||||
"github.com/docker/libcontainer/system"
|
||||
"github.com/docker/libcontainer/user"
|
||||
"github.com/docker/libcontainer/utils"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -26,6 +39,13 @@ var (
|
|||
maxIdLen = 1024
|
||||
)
|
||||
|
||||
// Process is used for transferring parameters from Exec() to Init()
|
||||
type processArgs struct {
|
||||
Args []string `json:"args,omitempty"`
|
||||
Config *configs.Config `json:"config,omitempty"`
|
||||
NetworkState *configs.NetworkState `json:"network_state,omitempty"`
|
||||
}
|
||||
|
||||
// New returns a linux based container factory based in the root directory.
|
||||
func New(root string, initArgs []string) (Factory, error) {
|
||||
if root != "" {
|
||||
|
@ -116,16 +136,50 @@ func (l *linuxFactory) Load(id string) (Container, error) {
|
|||
|
||||
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
|
||||
// This is a low level implementation detail of the reexec and should not be consumed externally
|
||||
func (f *linuxFactory) StartInitialization(pipefd uintptr) (err error) {
|
||||
func (l *linuxFactory) StartInitialization(pipefd uintptr) (err error) {
|
||||
pipe := os.NewFile(uintptr(pipefd), "pipe")
|
||||
|
||||
setupUserns := os.Getenv("_LIBCONTAINER_USERNS")
|
||||
setupUserns := os.Getenv("_LIBCONTAINER_USERNS") != ""
|
||||
pid := os.Getenv("_LIBCONTAINER_INITPID")
|
||||
if pid != "" && setupUserns == "" {
|
||||
return namespaces.InitIn(pipe)
|
||||
if pid != "" && !setupUserns {
|
||||
return InitIn(pipe)
|
||||
}
|
||||
|
||||
return namespaces.Init(pipe, setupUserns != "")
|
||||
defer func() {
|
||||
// if we have an error during the initialization of the container's init then send it back to the
|
||||
// parent process in the form of an initError.
|
||||
if err != nil {
|
||||
// ensure that any data sent from the parent is consumed so it doesn't
|
||||
// receive ECONNRESET when the child writes to the pipe.
|
||||
ioutil.ReadAll(pipe)
|
||||
if err := json.NewEncoder(pipe).Encode(initError{
|
||||
Message: err.Error(),
|
||||
}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
// ensure that this pipe is always closed
|
||||
pipe.Close()
|
||||
}()
|
||||
uncleanRootfs, err := os.Getwd()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var process *processArgs
|
||||
// We always read this as it is a way to sync with the parent as well
|
||||
if err := json.NewDecoder(pipe).Decode(&process); err != nil {
|
||||
return err
|
||||
}
|
||||
if setupUserns {
|
||||
err = SetupContainer(process)
|
||||
if err == nil {
|
||||
os.Exit(0)
|
||||
} else {
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
if process.Config.Namespaces.Contains(configs.NEWUSER) {
|
||||
return l.initUserNs(uncleanRootfs, process)
|
||||
}
|
||||
return l.initDefault(uncleanRootfs, process)
|
||||
}
|
||||
|
||||
func (l *linuxFactory) loadContainerConfig(root string) (*configs.Config, error) {
|
||||
|
@ -137,7 +191,6 @@ func (l *linuxFactory) loadContainerConfig(root string) (*configs.Config, error)
|
|||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var config *configs.Config
|
||||
if err := json.NewDecoder(f).Decode(&config); err != nil {
|
||||
return nil, newGenericError(err, ConfigInvalid)
|
||||
|
@ -154,7 +207,6 @@ func (l *linuxFactory) loadContainerState(root string) (*configs.State, error) {
|
|||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var state *configs.State
|
||||
if err := json.NewDecoder(f).Decode(&state); err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
|
@ -171,3 +223,337 @@ func (l *linuxFactory) validateID(id string) error {
|
|||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *linuxFactory) initDefault(uncleanRootfs string, process *processArgs) (err error) {
|
||||
config := process.Config
|
||||
networkState := process.NetworkState
|
||||
|
||||
// TODO: move to validation
|
||||
/*
|
||||
rootfs, err := utils.ResolveRootfs(uncleanRootfs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
*/
|
||||
|
||||
// clear the current processes env and replace it with the environment
|
||||
// defined on the container
|
||||
if err := loadContainerEnvironment(config); err != nil {
|
||||
return err
|
||||
}
|
||||
// join any namespaces via a path to the namespace fd if provided
|
||||
if err := joinExistingNamespaces(config.Namespaces); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Console != "" {
|
||||
if err := console.OpenAndDup(config.Console); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if _, err := syscall.Setsid(); err != nil {
|
||||
return fmt.Errorf("setsid %s", err)
|
||||
}
|
||||
if config.Console != "" {
|
||||
if err := system.Setctty(); err != nil {
|
||||
return fmt.Errorf("setctty %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
cloneFlags := config.Namespaces.CloneFlags()
|
||||
if (cloneFlags & syscall.CLONE_NEWNET) == 0 {
|
||||
if len(config.Networks) != 0 || len(config.Routes) != 0 {
|
||||
return fmt.Errorf("unable to apply network parameters without network namespace")
|
||||
}
|
||||
} else {
|
||||
if err := setupNetwork(config, networkState); err != nil {
|
||||
return fmt.Errorf("setup networking %s", err)
|
||||
}
|
||||
if err := setupRoute(config); err != nil {
|
||||
return fmt.Errorf("setup route %s", err)
|
||||
}
|
||||
}
|
||||
if err := setupRlimits(config); err != nil {
|
||||
return fmt.Errorf("setup rlimits %s", err)
|
||||
}
|
||||
label.Init()
|
||||
// InitializeMountNamespace() can be executed only for a new mount namespace
|
||||
if (cloneFlags & syscall.CLONE_NEWNS) != 0 {
|
||||
if err := mount.InitializeMountNamespace(config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if config.Hostname != "" {
|
||||
// TODO: (crosbymichael) move this to pre spawn validation
|
||||
if (cloneFlags & syscall.CLONE_NEWUTS) == 0 {
|
||||
return fmt.Errorf("unable to set the hostname without UTS namespace")
|
||||
}
|
||||
if err := syscall.Sethostname([]byte(config.Hostname)); err != nil {
|
||||
return fmt.Errorf("unable to sethostname %q: %s", config.Hostname, err)
|
||||
}
|
||||
}
|
||||
if err := apparmor.ApplyProfile(config.AppArmorProfile); err != nil {
|
||||
return fmt.Errorf("set apparmor profile %s: %s", config.AppArmorProfile, err)
|
||||
}
|
||||
if err := label.SetProcessLabel(config.ProcessLabel); err != nil {
|
||||
return fmt.Errorf("set process label %s", err)
|
||||
}
|
||||
// TODO: (crosbymichael) make this configurable at the Config level
|
||||
if config.RestrictSys {
|
||||
if (cloneFlags & syscall.CLONE_NEWNS) == 0 {
|
||||
return fmt.Errorf("unable to restrict access to kernel files without mount namespace")
|
||||
}
|
||||
if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
pdeathSignal, err := system.GetParentDeathSignal()
|
||||
if err != nil {
|
||||
return fmt.Errorf("get parent death signal %s", err)
|
||||
}
|
||||
if err := finalizeNamespace(config); err != nil {
|
||||
return fmt.Errorf("finalize namespace %s", err)
|
||||
}
|
||||
// finalizeNamespace can change user/group which clears the parent death
|
||||
// signal, so we restore it here.
|
||||
if err := restoreParentDeathSignal(pdeathSignal); err != nil {
|
||||
return fmt.Errorf("restore parent death signal %s", err)
|
||||
}
|
||||
return system.Execv(process.Args[0], process.Args[0:], config.Env)
|
||||
}
|
||||
|
||||
func (l *linuxFactory) initUserNs(uncleanRootfs string, process *processArgs) (err error) {
|
||||
config := process.Config
|
||||
// clear the current processes env and replace it with the environment
|
||||
// defined on the config
|
||||
if err := loadContainerEnvironment(config); err != nil {
|
||||
return err
|
||||
}
|
||||
// join any namespaces via a path to the namespace fd if provided
|
||||
if err := joinExistingNamespaces(config.Namespaces); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Console != "" {
|
||||
if err := console.OpenAndDup("/dev/console"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if _, err := syscall.Setsid(); err != nil {
|
||||
return fmt.Errorf("setsid %s", err)
|
||||
}
|
||||
if config.Console != "" {
|
||||
if err := system.Setctty(); err != nil {
|
||||
return fmt.Errorf("setctty %s", err)
|
||||
}
|
||||
}
|
||||
if config.WorkingDir == "" {
|
||||
config.WorkingDir = "/"
|
||||
}
|
||||
|
||||
if err := setupRlimits(config); err != nil {
|
||||
return fmt.Errorf("setup rlimits %s", err)
|
||||
}
|
||||
cloneFlags := config.Namespaces.CloneFlags()
|
||||
if config.Hostname != "" {
|
||||
// TODO: move validation
|
||||
if (cloneFlags & syscall.CLONE_NEWUTS) == 0 {
|
||||
return fmt.Errorf("unable to set the hostname without UTS namespace")
|
||||
}
|
||||
if err := syscall.Sethostname([]byte(config.Hostname)); err != nil {
|
||||
return fmt.Errorf("unable to sethostname %q: %s", config.Hostname, err)
|
||||
}
|
||||
}
|
||||
if err := apparmor.ApplyProfile(config.AppArmorProfile); err != nil {
|
||||
return fmt.Errorf("set apparmor profile %s: %s", config.AppArmorProfile, err)
|
||||
}
|
||||
if err := label.SetProcessLabel(config.ProcessLabel); err != nil {
|
||||
return fmt.Errorf("set process label %s", err)
|
||||
}
|
||||
if config.RestrictSys {
|
||||
if (cloneFlags & syscall.CLONE_NEWNS) == 0 {
|
||||
return fmt.Errorf("unable to restrict access to kernel files without mount namespace")
|
||||
}
|
||||
if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
pdeathSignal, err := system.GetParentDeathSignal()
|
||||
if err != nil {
|
||||
return fmt.Errorf("get parent death signal %s", err)
|
||||
}
|
||||
if err := finalizeNamespace(config); err != nil {
|
||||
return fmt.Errorf("finalize namespace %s", err)
|
||||
}
|
||||
// finalizeNamespace can change user/group which clears the parent death
|
||||
// signal, so we restore it here.
|
||||
if err := restoreParentDeathSignal(pdeathSignal); err != nil {
|
||||
return fmt.Errorf("restore parent death signal %s", err)
|
||||
}
|
||||
return system.Execv(process.Args[0], process.Args[0:], config.Env)
|
||||
}
|
||||
|
||||
// restoreParentDeathSignal sets the parent death signal to old.
|
||||
func restoreParentDeathSignal(old int) error {
|
||||
if old == 0 {
|
||||
return nil
|
||||
}
|
||||
current, err := system.GetParentDeathSignal()
|
||||
if err != nil {
|
||||
return fmt.Errorf("get parent death signal %s", err)
|
||||
}
|
||||
if old == current {
|
||||
return nil
|
||||
}
|
||||
if err := system.ParentDeathSignal(uintptr(old)); err != nil {
|
||||
return fmt.Errorf("set parent death signal %s", err)
|
||||
}
|
||||
// Signal self if parent is already dead. Does nothing if running in a new
|
||||
// PID namespace, as Getppid will always return 0.
|
||||
if syscall.Getppid() == 1 {
|
||||
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupUser changes the groups, gid, and uid for the user inside the container
|
||||
func setupUser(config *configs.Config) error {
|
||||
// Set up defaults.
|
||||
defaultExecUser := user.ExecUser{
|
||||
Uid: syscall.Getuid(),
|
||||
Gid: syscall.Getgid(),
|
||||
Home: "/",
|
||||
}
|
||||
passwdPath, err := user.GetPasswdPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
groupPath, err := user.GetGroupPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("get supplementary groups %s", err)
|
||||
}
|
||||
suppGroups := append(execUser.Sgids, config.AdditionalGroups...)
|
||||
if err := syscall.Setgroups(suppGroups); err != nil {
|
||||
return fmt.Errorf("setgroups %s", err)
|
||||
}
|
||||
if err := system.Setgid(execUser.Gid); err != nil {
|
||||
return fmt.Errorf("setgid %s", err)
|
||||
}
|
||||
if err := system.Setuid(execUser.Uid); err != nil {
|
||||
return fmt.Errorf("setuid %s", err)
|
||||
}
|
||||
// if we didn't get HOME already, set it based on the user's HOME
|
||||
if envHome := os.Getenv("HOME"); envHome == "" {
|
||||
if err := os.Setenv("HOME", execUser.Home); err != nil {
|
||||
return fmt.Errorf("set HOME %s", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupVethNetwork uses the Network config if it is not nil to initialize
|
||||
// the new veth interface inside the container for use by changing the name to eth0
|
||||
// setting the MTU and IP address along with the default gateway
|
||||
func setupNetwork(config *configs.Config, networkState *configs.NetworkState) error {
|
||||
for _, config := range config.Networks {
|
||||
strategy, err := network.GetStrategy(config.Type)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err1 := strategy.Initialize(config, networkState)
|
||||
if err1 != nil {
|
||||
return err1
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupRoute(config *configs.Config) error {
|
||||
for _, config := range config.Routes {
|
||||
if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupRlimits(config *configs.Config) error {
|
||||
for _, rlimit := range config.Rlimits {
|
||||
l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}
|
||||
if err := syscall.Setrlimit(rlimit.Type, l); err != nil {
|
||||
return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// finalizeNamespace drops the caps, sets the correct user
|
||||
// and working dir, and closes any leaky file descriptors
|
||||
// before execing the command inside the namespace
|
||||
func finalizeNamespace(config *configs.Config) error {
|
||||
// Ensure that all non-standard fds we may have accidentally
|
||||
// inherited are marked close-on-exec so they stay out of the
|
||||
// container
|
||||
if err := utils.CloseExecFrom(3); err != nil {
|
||||
return fmt.Errorf("close open file descriptors %s", err)
|
||||
}
|
||||
// drop capabilities in bounding set before changing user
|
||||
if err := capabilities.DropBoundingSet(config.Capabilities); err != nil {
|
||||
return fmt.Errorf("drop bounding set %s", err)
|
||||
}
|
||||
// preserve existing capabilities while we change users
|
||||
if err := system.SetKeepCaps(); err != nil {
|
||||
return fmt.Errorf("set keep caps %s", err)
|
||||
}
|
||||
if err := setupUser(config); err != nil {
|
||||
return fmt.Errorf("setup user %s", err)
|
||||
}
|
||||
if err := system.ClearKeepCaps(); err != nil {
|
||||
return fmt.Errorf("clear keep caps %s", err)
|
||||
}
|
||||
// drop all other capabilities
|
||||
if err := capabilities.DropCapabilities(config.Capabilities); err != nil {
|
||||
return fmt.Errorf("drop capabilities %s", err)
|
||||
}
|
||||
if config.WorkingDir != "" {
|
||||
if err := syscall.Chdir(config.WorkingDir); err != nil {
|
||||
return fmt.Errorf("chdir to %s %s", config.WorkingDir, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func loadContainerEnvironment(config *configs.Config) error {
|
||||
os.Clearenv()
|
||||
for _, pair := range config.Env {
|
||||
p := strings.SplitN(pair, "=", 2)
|
||||
if len(p) < 2 {
|
||||
return fmt.Errorf("invalid environment '%v'", pair)
|
||||
}
|
||||
if err := os.Setenv(p[0], p[1]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// joinExistingNamespaces gets all the namespace paths specified for the container and
|
||||
// does a setns on the namespace fd so that the current process joins the namespace.
|
||||
func joinExistingNamespaces(namespaces []configs.Namespace) error {
|
||||
for _, ns := range namespaces {
|
||||
if ns.Path != "" {
|
||||
f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = system.Setns(f.Fd(), uintptr(ns.Syscall()))
|
||||
f.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -1,262 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package namespaces
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"syscall"
|
||||
|
||||
"github.com/docker/libcontainer/cgroups"
|
||||
"github.com/docker/libcontainer/configs"
|
||||
"github.com/docker/libcontainer/network"
|
||||
"github.com/docker/libcontainer/system"
|
||||
)
|
||||
|
||||
const (
|
||||
EXIT_SIGNAL_OFFSET = 128
|
||||
)
|
||||
|
||||
func executeSetupCmd(args []string, ppid int, container *configs.Config, process *processArgs, networkState *configs.NetworkState) error {
|
||||
command := exec.Command(args[0], args[1:]...)
|
||||
parent, child, err := newInitPipe()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer parent.Close()
|
||||
command.ExtraFiles = []*os.File{child}
|
||||
command.Dir = container.RootFs
|
||||
command.Env = append(command.Env,
|
||||
fmt.Sprintf("_LIBCONTAINER_INITPID=%d", ppid),
|
||||
fmt.Sprintf("_LIBCONTAINER_USERNS=1"))
|
||||
err = command.Start()
|
||||
child.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
s, err := command.Process.Wait()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !s.Success() {
|
||||
return &exec.ExitError{s}
|
||||
}
|
||||
decoder := json.NewDecoder(parent)
|
||||
var pid *pid
|
||||
if err := decoder.Decode(&pid); err != nil {
|
||||
return err
|
||||
}
|
||||
p, err := os.FindProcess(pid.Pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
terminate := func(terr error) error {
|
||||
// TODO: log the errors for kill and wait
|
||||
p.Kill()
|
||||
p.Wait()
|
||||
return terr
|
||||
}
|
||||
// send the state to the container's init process then shutdown writes for the parent
|
||||
if err := json.NewEncoder(parent).Encode(process); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
// shutdown writes for the parent side of the pipe
|
||||
if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
// wait for the child process to fully complete and receive an error message
|
||||
// if one was encoutered
|
||||
var ierr *initError
|
||||
if err := decoder.Decode(&ierr); err != nil && err != io.EOF {
|
||||
return terminate(err)
|
||||
}
|
||||
if ierr != nil {
|
||||
return ierr
|
||||
}
|
||||
s, err = p.Wait()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !s.Success() {
|
||||
return &exec.ExitError{s}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work.
|
||||
// Move this to libcontainer package.
|
||||
// Exec performs setup outside of a namespace so that a container can be
|
||||
// executed. Exec is a high level function for working with container namespaces.
|
||||
func Exec(args []string, env []string, console string, command *exec.Cmd, container *configs.Config, cgroupManager cgroups.Manager, state *configs.State) (err error) {
|
||||
// create a pipe so that we can syncronize with the namespaced process and
|
||||
// pass the state and configuration to the child process
|
||||
parent, child, err := newInitPipe()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer parent.Close()
|
||||
command.ExtraFiles = []*os.File{child}
|
||||
|
||||
command.Dir = container.RootFs
|
||||
command.SysProcAttr.Cloneflags = uintptr(GetNamespaceFlags(container.Namespaces))
|
||||
|
||||
if container.Namespaces.Contains(configs.NEWUSER) {
|
||||
AddUidGidMappings(command.SysProcAttr, container)
|
||||
|
||||
// Default to root user when user namespaces are enabled.
|
||||
if command.SysProcAttr.Credential == nil {
|
||||
command.SysProcAttr.Credential = &syscall.Credential{}
|
||||
}
|
||||
}
|
||||
|
||||
if err := command.Start(); err != nil {
|
||||
child.Close()
|
||||
return err
|
||||
}
|
||||
child.Close()
|
||||
|
||||
wait := func() (*os.ProcessState, error) {
|
||||
ps, err := command.Process.Wait()
|
||||
// we should kill all processes in cgroup when init is died if we use
|
||||
// host PID namespace
|
||||
if !container.Namespaces.Contains(configs.NEWPID) {
|
||||
killAllPids(cgroupManager)
|
||||
}
|
||||
return ps, err
|
||||
}
|
||||
|
||||
terminate := func(terr error) error {
|
||||
// TODO: log the errors for kill and wait
|
||||
command.Process.Kill()
|
||||
wait()
|
||||
return terr
|
||||
}
|
||||
|
||||
started, err := system.GetProcessStartTime(command.Process.Pid)
|
||||
if err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
|
||||
// Do this before syncing with child so that no children
|
||||
// can escape the cgroup
|
||||
err = cgroupManager.Apply(command.Process.Pid)
|
||||
if err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
cgroupManager.Destroy()
|
||||
}
|
||||
}()
|
||||
|
||||
var networkState configs.NetworkState
|
||||
if err := InitializeNetworking(container, command.Process.Pid, &networkState); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
|
||||
process := processArgs{
|
||||
Env: append(env[0:], container.Env...),
|
||||
Args: args,
|
||||
ConsolePath: console,
|
||||
Config: container,
|
||||
NetworkState: &networkState,
|
||||
}
|
||||
|
||||
// Start the setup process to setup the init process
|
||||
if container.Namespaces.Contains(configs.NEWUSER) {
|
||||
if err = executeSetupCmd(command.Args, command.Process.Pid, container, &process, &networkState); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
}
|
||||
|
||||
// send the state to the container's init process then shutdown writes for the parent
|
||||
if err := json.NewEncoder(parent).Encode(process); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
// shutdown writes for the parent side of the pipe
|
||||
if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
|
||||
// wait for the child process to fully complete and receive an error message
|
||||
// if one was encoutered
|
||||
var ierr *initError
|
||||
if err := json.NewDecoder(parent).Decode(&ierr); err != nil && err != io.EOF {
|
||||
return terminate(err)
|
||||
}
|
||||
if ierr != nil {
|
||||
return terminate(ierr)
|
||||
}
|
||||
|
||||
state.InitPid = command.Process.Pid
|
||||
state.InitStartTime = started
|
||||
state.NetworkState = networkState
|
||||
state.CgroupPaths = cgroupManager.GetPaths()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// killAllPids iterates over all of the container's processes
|
||||
// sending a SIGKILL to each process.
|
||||
func killAllPids(m cgroups.Manager) error {
|
||||
var (
|
||||
procs []*os.Process
|
||||
)
|
||||
m.Freeze(configs.Frozen)
|
||||
pids, err := m.GetPids()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, pid := range pids {
|
||||
// TODO: log err without aborting if we are unable to find
|
||||
// a single PID
|
||||
if p, err := os.FindProcess(pid); err == nil {
|
||||
procs = append(procs, p)
|
||||
p.Kill()
|
||||
}
|
||||
}
|
||||
m.Freeze(configs.Thawed)
|
||||
for _, p := range procs {
|
||||
p.Wait()
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Converts IDMap to SysProcIDMap array and adds it to SysProcAttr.
|
||||
func AddUidGidMappings(sys *syscall.SysProcAttr, container *configs.Config) {
|
||||
if container.UidMappings != nil {
|
||||
sys.UidMappings = make([]syscall.SysProcIDMap, len(container.UidMappings))
|
||||
for i, um := range container.UidMappings {
|
||||
sys.UidMappings[i].ContainerID = um.ContainerID
|
||||
sys.UidMappings[i].HostID = um.HostID
|
||||
sys.UidMappings[i].Size = um.Size
|
||||
}
|
||||
}
|
||||
|
||||
if container.GidMappings != nil {
|
||||
sys.GidMappings = make([]syscall.SysProcIDMap, len(container.GidMappings))
|
||||
for i, gm := range container.GidMappings {
|
||||
sys.GidMappings[i].ContainerID = gm.ContainerID
|
||||
sys.GidMappings[i].HostID = gm.HostID
|
||||
sys.GidMappings[i].Size = gm.Size
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// InitializeNetworking creates the container's network stack outside of the namespace and moves
|
||||
// interfaces into the container's net namespaces if necessary
|
||||
func InitializeNetworking(container *configs.Config, nspid int, networkState *configs.NetworkState) error {
|
||||
for _, config := range container.Networks {
|
||||
strategy, err := network.GetStrategy(config.Type)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := strategy.Create(config, nspid, networkState); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -1,218 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package namespaces
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"syscall"
|
||||
|
||||
"github.com/docker/libcontainer/apparmor"
|
||||
"github.com/docker/libcontainer/cgroups"
|
||||
"github.com/docker/libcontainer/configs"
|
||||
"github.com/docker/libcontainer/label"
|
||||
"github.com/docker/libcontainer/mount"
|
||||
"github.com/docker/libcontainer/system"
|
||||
)
|
||||
|
||||
type pid struct {
|
||||
Pid int `json:"Pid"`
|
||||
}
|
||||
|
||||
// ExecIn reexec's cmd with _LIBCONTAINER_INITPID=PID so that it is able to run the
|
||||
// setns code in a single threaded environment joining the existing containers' namespaces.
|
||||
func ExecIn(args []string, env []string, console string, cmd *exec.Cmd, container *configs.Config, state *configs.State) (int, error) {
|
||||
var err error
|
||||
|
||||
parent, child, err := newInitPipe()
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
defer parent.Close()
|
||||
|
||||
cmd.ExtraFiles = []*os.File{child}
|
||||
cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", state.InitPid))
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
child.Close()
|
||||
return -1, err
|
||||
}
|
||||
child.Close()
|
||||
|
||||
s, err := cmd.Process.Wait()
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
if !s.Success() {
|
||||
return -1, &exec.ExitError{s}
|
||||
}
|
||||
|
||||
decoder := json.NewDecoder(parent)
|
||||
var pid *pid
|
||||
|
||||
if err := decoder.Decode(&pid); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
p, err := os.FindProcess(pid.Pid)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
terminate := func(terr error) (int, error) {
|
||||
// TODO: log the errors for kill and wait
|
||||
p.Kill()
|
||||
p.Wait()
|
||||
return -1, terr
|
||||
}
|
||||
|
||||
// Enter cgroups.
|
||||
if err := EnterCgroups(state, pid.Pid); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
|
||||
encoder := json.NewEncoder(parent)
|
||||
|
||||
if err := encoder.Encode(container); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
|
||||
process := processArgs{
|
||||
Env: append(env[0:], container.Env...),
|
||||
Args: args,
|
||||
ConsolePath: console,
|
||||
}
|
||||
if err := encoder.Encode(process); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
|
||||
return pid.Pid, nil
|
||||
}
|
||||
|
||||
// Finalize entering into a container and execute a specified command
|
||||
func InitIn(pipe *os.File) (err error) {
|
||||
defer func() {
|
||||
// if we have an error during the initialization of the container's init then send it back to the
|
||||
// parent process in the form of an initError.
|
||||
if err != nil {
|
||||
// ensure that any data sent from the parent is consumed so it doesn't
|
||||
// receive ECONNRESET when the child writes to the pipe.
|
||||
ioutil.ReadAll(pipe)
|
||||
if err := json.NewEncoder(pipe).Encode(initError{
|
||||
Message: err.Error(),
|
||||
}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
// ensure that this pipe is always closed
|
||||
pipe.Close()
|
||||
}()
|
||||
|
||||
decoder := json.NewDecoder(pipe)
|
||||
|
||||
var container *configs.Config
|
||||
if err := decoder.Decode(&container); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var process *processArgs
|
||||
if err := decoder.Decode(&process); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := FinalizeSetns(container); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := system.Execv(process.Args[0], process.Args[0:], process.Env); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
// Finalize expects that the setns calls have been setup and that is has joined an
|
||||
// existing namespace
|
||||
func FinalizeSetns(container *configs.Config) error {
|
||||
// clear the current processes env and replace it with the environment defined on the container
|
||||
if err := LoadContainerEnvironment(container); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := setupRlimits(container); err != nil {
|
||||
return fmt.Errorf("setup rlimits %s", err)
|
||||
}
|
||||
|
||||
if err := FinalizeNamespace(container); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil {
|
||||
return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err)
|
||||
}
|
||||
|
||||
if container.ProcessLabel != "" {
|
||||
if err := label.SetProcessLabel(container.ProcessLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetupContainer is run to setup mounts and networking related operations
|
||||
// for a user namespace enabled process as a user namespace root doesn't
|
||||
// have permissions to perform these operations.
|
||||
// The setup process joins all the namespaces of user namespace enabled init
|
||||
// except the user namespace, so it run as root in the root user namespace
|
||||
// to perform these operations.
|
||||
func SetupContainer(process *processArgs) error {
|
||||
container := process.Config
|
||||
networkState := process.NetworkState
|
||||
|
||||
// TODO : move to validation
|
||||
/*
|
||||
rootfs, err := utils.ResolveRootfs(container.RootFs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
*/
|
||||
|
||||
// clear the current processes env and replace it with the environment
|
||||
// defined on the container
|
||||
if err := LoadContainerEnvironment(container); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cloneFlags := GetNamespaceFlags(container.Namespaces)
|
||||
|
||||
if (cloneFlags & syscall.CLONE_NEWNET) == 0 {
|
||||
if len(container.Networks) != 0 || len(container.Routes) != 0 {
|
||||
return fmt.Errorf("unable to apply network parameters without network namespace")
|
||||
}
|
||||
} else {
|
||||
if err := setupNetwork(container, networkState); err != nil {
|
||||
return fmt.Errorf("setup networking %s", err)
|
||||
}
|
||||
if err := setupRoute(container); err != nil {
|
||||
return fmt.Errorf("setup route %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
label.Init()
|
||||
|
||||
// InitializeMountNamespace() can be executed only for a new mount namespace
|
||||
if (cloneFlags & syscall.CLONE_NEWNS) != 0 {
|
||||
if err := mount.InitializeMountNamespace(container); err != nil {
|
||||
return fmt.Errorf("setup mount namespace %s", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func EnterCgroups(state *configs.State, pid int) error {
|
||||
return cgroups.EnterPid(state.CgroupPaths, pid)
|
||||
}
|
|
@ -1,465 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package namespaces
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/docker/libcontainer/apparmor"
|
||||
"github.com/docker/libcontainer/configs"
|
||||
"github.com/docker/libcontainer/console"
|
||||
"github.com/docker/libcontainer/label"
|
||||
"github.com/docker/libcontainer/mount"
|
||||
"github.com/docker/libcontainer/netlink"
|
||||
"github.com/docker/libcontainer/network"
|
||||
"github.com/docker/libcontainer/security/capabilities"
|
||||
"github.com/docker/libcontainer/security/restrict"
|
||||
"github.com/docker/libcontainer/system"
|
||||
"github.com/docker/libcontainer/user"
|
||||
"github.com/docker/libcontainer/utils"
|
||||
)
|
||||
|
||||
// Process is used for transferring parameters from Exec() to Init()
|
||||
type processArgs struct {
|
||||
Args []string `json:"args,omitempty"`
|
||||
Env []string `json:"environment,omitempty"`
|
||||
ConsolePath string `json:"console_path,omitempty"`
|
||||
Config *configs.Config `json:"config,omitempty"`
|
||||
NetworkState *configs.NetworkState `json:"network_state,omitempty"`
|
||||
}
|
||||
|
||||
// TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work.
|
||||
// Move this to libcontainer package.
|
||||
// Init is the init process that first runs inside a new namespace to setup mounts, users, networking,
|
||||
// and other options required for the new container.
|
||||
// The caller of Init function has to ensure that the go runtime is locked to an OS thread
|
||||
// (using runtime.LockOSThread) else system calls like setns called within Init may not work as intended.
|
||||
func Init(pipe *os.File, setupUserns bool) (err error) {
|
||||
defer func() {
|
||||
// if we have an error during the initialization of the container's init then send it back to the
|
||||
// parent process in the form of an initError.
|
||||
if err != nil {
|
||||
// ensure that any data sent from the parent is consumed so it doesn't
|
||||
// receive ECONNRESET when the child writes to the pipe.
|
||||
ioutil.ReadAll(pipe)
|
||||
if err := json.NewEncoder(pipe).Encode(initError{
|
||||
Message: err.Error(),
|
||||
}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
// ensure that this pipe is always closed
|
||||
pipe.Close()
|
||||
}()
|
||||
|
||||
uncleanRootfs, err := os.Getwd()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var process *processArgs
|
||||
// We always read this as it is a way to sync with the parent as well
|
||||
if err := json.NewDecoder(pipe).Decode(&process); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if setupUserns {
|
||||
err = SetupContainer(process)
|
||||
if err == nil {
|
||||
os.Exit(0)
|
||||
} else {
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
if process.Config.Namespaces.Contains(configs.NEWUSER) {
|
||||
return initUserNs(uncleanRootfs, process)
|
||||
} else {
|
||||
return initDefault(uncleanRootfs, process)
|
||||
}
|
||||
}
|
||||
|
||||
func initDefault(uncleanRootfs string, process *processArgs) (err error) {
|
||||
container := process.Config
|
||||
networkState := process.NetworkState
|
||||
|
||||
// TODO: move to validation
|
||||
/*
|
||||
rootfs, err := utils.ResolveRootfs(uncleanRootfs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
*/
|
||||
|
||||
// clear the current processes env and replace it with the environment
|
||||
// defined on the container
|
||||
if err := LoadContainerEnvironment(container); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// join any namespaces via a path to the namespace fd if provided
|
||||
if err := joinExistingNamespaces(container.Namespaces); err != nil {
|
||||
return err
|
||||
}
|
||||
if process.ConsolePath != "" {
|
||||
if err := console.OpenAndDup(process.ConsolePath); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if _, err := syscall.Setsid(); err != nil {
|
||||
return fmt.Errorf("setsid %s", err)
|
||||
}
|
||||
if process.ConsolePath != "" {
|
||||
if err := system.Setctty(); err != nil {
|
||||
return fmt.Errorf("setctty %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
cloneFlags := GetNamespaceFlags(container.Namespaces)
|
||||
|
||||
if (cloneFlags & syscall.CLONE_NEWNET) == 0 {
|
||||
if len(container.Networks) != 0 || len(container.Routes) != 0 {
|
||||
return fmt.Errorf("unable to apply network parameters without network namespace")
|
||||
}
|
||||
} else {
|
||||
if err := setupNetwork(container, networkState); err != nil {
|
||||
return fmt.Errorf("setup networking %s", err)
|
||||
}
|
||||
if err := setupRoute(container); err != nil {
|
||||
return fmt.Errorf("setup route %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := setupRlimits(container); err != nil {
|
||||
return fmt.Errorf("setup rlimits %s", err)
|
||||
}
|
||||
|
||||
label.Init()
|
||||
|
||||
// InitializeMountNamespace() can be executed only for a new mount namespace
|
||||
if (cloneFlags & syscall.CLONE_NEWNS) != 0 {
|
||||
if err := mount.InitializeMountNamespace(container); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if container.Hostname != "" {
|
||||
// TODO: (crosbymichael) move this to pre spawn validation
|
||||
if (cloneFlags & syscall.CLONE_NEWUTS) == 0 {
|
||||
return fmt.Errorf("unable to set the hostname without UTS namespace")
|
||||
}
|
||||
if err := syscall.Sethostname([]byte(container.Hostname)); err != nil {
|
||||
return fmt.Errorf("unable to sethostname %q: %s", container.Hostname, err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil {
|
||||
return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err)
|
||||
}
|
||||
|
||||
if err := label.SetProcessLabel(container.ProcessLabel); err != nil {
|
||||
return fmt.Errorf("set process label %s", err)
|
||||
}
|
||||
|
||||
// TODO: (crosbymichael) make this configurable at the Config level
|
||||
if container.RestrictSys {
|
||||
if (cloneFlags & syscall.CLONE_NEWNS) == 0 {
|
||||
return fmt.Errorf("unable to restrict access to kernel files without mount namespace")
|
||||
}
|
||||
if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
pdeathSignal, err := system.GetParentDeathSignal()
|
||||
if err != nil {
|
||||
return fmt.Errorf("get parent death signal %s", err)
|
||||
}
|
||||
|
||||
if err := FinalizeNamespace(container); err != nil {
|
||||
return fmt.Errorf("finalize namespace %s", err)
|
||||
}
|
||||
|
||||
// FinalizeNamespace can change user/group which clears the parent death
|
||||
// signal, so we restore it here.
|
||||
if err := RestoreParentDeathSignal(pdeathSignal); err != nil {
|
||||
return fmt.Errorf("restore parent death signal %s", err)
|
||||
}
|
||||
|
||||
return system.Execv(process.Args[0], process.Args[0:], process.Env)
|
||||
}
|
||||
|
||||
func initUserNs(uncleanRootfs string, process *processArgs) (err error) {
|
||||
container := process.Config
|
||||
|
||||
// clear the current processes env and replace it with the environment
|
||||
// defined on the container
|
||||
if err := LoadContainerEnvironment(container); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// join any namespaces via a path to the namespace fd if provided
|
||||
if err := joinExistingNamespaces(container.Namespaces); err != nil {
|
||||
return err
|
||||
}
|
||||
if process.ConsolePath != "" {
|
||||
if err := console.OpenAndDup("/dev/console"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if _, err := syscall.Setsid(); err != nil {
|
||||
return fmt.Errorf("setsid %s", err)
|
||||
}
|
||||
if process.ConsolePath != "" {
|
||||
if err := system.Setctty(); err != nil {
|
||||
return fmt.Errorf("setctty %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
if container.WorkingDir == "" {
|
||||
container.WorkingDir = "/"
|
||||
}
|
||||
|
||||
if err := setupRlimits(container); err != nil {
|
||||
return fmt.Errorf("setup rlimits %s", err)
|
||||
}
|
||||
|
||||
cloneFlags := GetNamespaceFlags(container.Namespaces)
|
||||
|
||||
if container.Hostname != "" {
|
||||
if (cloneFlags & syscall.CLONE_NEWUTS) == 0 {
|
||||
return fmt.Errorf("unable to set the hostname without UTS namespace")
|
||||
}
|
||||
if err := syscall.Sethostname([]byte(container.Hostname)); err != nil {
|
||||
return fmt.Errorf("unable to sethostname %q: %s", container.Hostname, err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil {
|
||||
return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err)
|
||||
}
|
||||
|
||||
if err := label.SetProcessLabel(container.ProcessLabel); err != nil {
|
||||
return fmt.Errorf("set process label %s", err)
|
||||
}
|
||||
|
||||
if container.RestrictSys {
|
||||
if (cloneFlags & syscall.CLONE_NEWNS) == 0 {
|
||||
return fmt.Errorf("unable to restrict access to kernel files without mount namespace")
|
||||
}
|
||||
if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
pdeathSignal, err := system.GetParentDeathSignal()
|
||||
if err != nil {
|
||||
return fmt.Errorf("get parent death signal %s", err)
|
||||
}
|
||||
|
||||
if err := FinalizeNamespace(container); err != nil {
|
||||
return fmt.Errorf("finalize namespace %s", err)
|
||||
}
|
||||
|
||||
// FinalizeNamespace can change user/group which clears the parent death
|
||||
// signal, so we restore it here.
|
||||
if err := RestoreParentDeathSignal(pdeathSignal); err != nil {
|
||||
return fmt.Errorf("restore parent death signal %s", err)
|
||||
}
|
||||
|
||||
return system.Execv(process.Args[0], process.Args[0:], process.Env)
|
||||
}
|
||||
|
||||
// RestoreParentDeathSignal sets the parent death signal to old.
|
||||
func RestoreParentDeathSignal(old int) error {
|
||||
if old == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
current, err := system.GetParentDeathSignal()
|
||||
if err != nil {
|
||||
return fmt.Errorf("get parent death signal %s", err)
|
||||
}
|
||||
|
||||
if old == current {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := system.ParentDeathSignal(uintptr(old)); err != nil {
|
||||
return fmt.Errorf("set parent death signal %s", err)
|
||||
}
|
||||
|
||||
// Signal self if parent is already dead. Does nothing if running in a new
|
||||
// PID namespace, as Getppid will always return 0.
|
||||
if syscall.Getppid() == 1 {
|
||||
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetupUser changes the groups, gid, and uid for the user inside the container
|
||||
func SetupUser(container *configs.Config) error {
|
||||
// Set up defaults.
|
||||
defaultExecUser := user.ExecUser{
|
||||
Uid: syscall.Getuid(),
|
||||
Gid: syscall.Getgid(),
|
||||
Home: "/",
|
||||
}
|
||||
|
||||
passwdPath, err := user.GetPasswdPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
groupPath, err := user.GetGroupPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
execUser, err := user.GetExecUserPath(container.User, &defaultExecUser, passwdPath, groupPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("get supplementary groups %s", err)
|
||||
}
|
||||
|
||||
suppGroups := append(execUser.Sgids, container.AdditionalGroups...)
|
||||
|
||||
if err := syscall.Setgroups(suppGroups); err != nil {
|
||||
return fmt.Errorf("setgroups %s", err)
|
||||
}
|
||||
|
||||
if err := system.Setgid(execUser.Gid); err != nil {
|
||||
return fmt.Errorf("setgid %s", err)
|
||||
}
|
||||
|
||||
if err := system.Setuid(execUser.Uid); err != nil {
|
||||
return fmt.Errorf("setuid %s", err)
|
||||
}
|
||||
|
||||
// if we didn't get HOME already, set it based on the user's HOME
|
||||
if envHome := os.Getenv("HOME"); envHome == "" {
|
||||
if err := os.Setenv("HOME", execUser.Home); err != nil {
|
||||
return fmt.Errorf("set HOME %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupVethNetwork uses the Network config if it is not nil to initialize
|
||||
// the new veth interface inside the container for use by changing the name to eth0
|
||||
// setting the MTU and IP address along with the default gateway
|
||||
func setupNetwork(container *configs.Config, networkState *configs.NetworkState) error {
|
||||
for _, config := range container.Networks {
|
||||
strategy, err := network.GetStrategy(config.Type)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err1 := strategy.Initialize(config, networkState)
|
||||
if err1 != nil {
|
||||
return err1
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupRoute(container *configs.Config) error {
|
||||
for _, config := range container.Routes {
|
||||
if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupRlimits(container *configs.Config) error {
|
||||
for _, rlimit := range container.Rlimits {
|
||||
l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}
|
||||
if err := syscall.Setrlimit(rlimit.Type, l); err != nil {
|
||||
return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// FinalizeNamespace drops the caps, sets the correct user
|
||||
// and working dir, and closes any leaky file descriptors
|
||||
// before execing the command inside the namespace
|
||||
func FinalizeNamespace(container *configs.Config) error {
|
||||
// Ensure that all non-standard fds we may have accidentally
|
||||
// inherited are marked close-on-exec so they stay out of the
|
||||
// container
|
||||
if err := utils.CloseExecFrom(3); err != nil {
|
||||
return fmt.Errorf("close open file descriptors %s", err)
|
||||
}
|
||||
|
||||
// drop capabilities in bounding set before changing user
|
||||
if err := capabilities.DropBoundingSet(container.Capabilities); err != nil {
|
||||
return fmt.Errorf("drop bounding set %s", err)
|
||||
}
|
||||
|
||||
// preserve existing capabilities while we change users
|
||||
if err := system.SetKeepCaps(); err != nil {
|
||||
return fmt.Errorf("set keep caps %s", err)
|
||||
}
|
||||
|
||||
if err := SetupUser(container); err != nil {
|
||||
return fmt.Errorf("setup user %s", err)
|
||||
}
|
||||
|
||||
if err := system.ClearKeepCaps(); err != nil {
|
||||
return fmt.Errorf("clear keep caps %s", err)
|
||||
}
|
||||
|
||||
// drop all other capabilities
|
||||
if err := capabilities.DropCapabilities(container.Capabilities); err != nil {
|
||||
return fmt.Errorf("drop capabilities %s", err)
|
||||
}
|
||||
|
||||
if container.WorkingDir != "" {
|
||||
if err := syscall.Chdir(container.WorkingDir); err != nil {
|
||||
return fmt.Errorf("chdir to %s %s", container.WorkingDir, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func LoadContainerEnvironment(container *configs.Config) error {
|
||||
os.Clearenv()
|
||||
for _, pair := range container.Env {
|
||||
p := strings.SplitN(pair, "=", 2)
|
||||
if len(p) < 2 {
|
||||
return fmt.Errorf("invalid environment '%v'", pair)
|
||||
}
|
||||
if err := os.Setenv(p[0], p[1]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// joinExistingNamespaces gets all the namespace paths specified for the container and
|
||||
// does a setns on the namespace fd so that the current process joins the namespace.
|
||||
func joinExistingNamespaces(namespaces []configs.Namespace) error {
|
||||
for _, ns := range namespaces {
|
||||
if ns.Path != "" {
|
||||
f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = system.Setns(f.Fd(), uintptr(namespaceInfo[ns.Type]))
|
||||
f.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -1,48 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package namespaces
|
||||
|
||||
import (
|
||||
"os"
|
||||
"syscall"
|
||||
|
||||
"github.com/docker/libcontainer/configs"
|
||||
)
|
||||
|
||||
type initError struct {
|
||||
Message string `json:"message,omitempty"`
|
||||
}
|
||||
|
||||
func (i initError) Error() string {
|
||||
return i.Message
|
||||
}
|
||||
|
||||
var namespaceInfo = map[configs.NamespaceType]int{
|
||||
configs.NEWNET: syscall.CLONE_NEWNET,
|
||||
configs.NEWNS: syscall.CLONE_NEWNS,
|
||||
configs.NEWUSER: syscall.CLONE_NEWUSER,
|
||||
configs.NEWIPC: syscall.CLONE_NEWIPC,
|
||||
configs.NEWUTS: syscall.CLONE_NEWUTS,
|
||||
configs.NEWPID: syscall.CLONE_NEWPID,
|
||||
}
|
||||
|
||||
// New returns a newly initialized Pipe for communication between processes
|
||||
func newInitPipe() (parent *os.File, child *os.File, err error) {
|
||||
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
|
||||
}
|
||||
|
||||
// GetNamespaceFlags parses the container's Namespaces options to set the correct
|
||||
// flags on clone, unshare. This functions returns flags only for new namespaces.
|
||||
func GetNamespaceFlags(namespaces configs.Namespaces) (flag int) {
|
||||
for _, v := range namespaces {
|
||||
if v.Path != "" {
|
||||
continue
|
||||
}
|
||||
flag |= namespaceInfo[v.Type]
|
||||
}
|
||||
return flag
|
||||
}
|
140
nsinit/exec.go
140
nsinit/exec.go
|
@ -3,14 +3,55 @@ package main
|
|||
import (
|
||||
"io"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"github.com/codegangsta/cli"
|
||||
"github.com/docker/docker/pkg/term"
|
||||
"github.com/docker/libcontainer"
|
||||
"github.com/docker/libcontainer/configs"
|
||||
consolepkg "github.com/docker/libcontainer/console"
|
||||
)
|
||||
|
||||
type tty struct {
|
||||
master *os.File
|
||||
console string
|
||||
state *term.State
|
||||
}
|
||||
|
||||
func (t *tty) Close() error {
|
||||
if t.master != nil {
|
||||
t.master.Close()
|
||||
}
|
||||
if t.state != nil {
|
||||
term.RestoreTerminal(os.Stdin.Fd(), t.state)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *tty) set(config *configs.Config) {
|
||||
config.Console = t.console
|
||||
}
|
||||
|
||||
func (t *tty) attach(process *libcontainer.Process) {
|
||||
if t.master != nil {
|
||||
process.Stderr = nil
|
||||
process.Stdout = nil
|
||||
process.Stdin = nil
|
||||
}
|
||||
}
|
||||
|
||||
func (t *tty) resize() error {
|
||||
if t.master == nil {
|
||||
return nil
|
||||
}
|
||||
ws, err := term.GetWinsize(os.Stdin.Fd())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return term.SetWinsize(t.master.Fd(), ws)
|
||||
}
|
||||
|
||||
var execCommand = cli.Command{
|
||||
Name: "exec",
|
||||
Usage: "execute a new command inside a container",
|
||||
|
@ -23,24 +64,14 @@ var execCommand = cli.Command{
|
|||
}
|
||||
|
||||
func execAction(context *cli.Context) {
|
||||
var (
|
||||
master *os.File
|
||||
console string
|
||||
err error
|
||||
|
||||
sigc = make(chan os.Signal, 10)
|
||||
|
||||
stdin = os.Stdin
|
||||
stdout = os.Stdout
|
||||
stderr = os.Stderr
|
||||
|
||||
exitCode int
|
||||
)
|
||||
|
||||
factory, err := loadFactory(context)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
tty, err := newTty(context)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
container, err := factory.Load(context.String("id"))
|
||||
if err != nil {
|
||||
if lerr, ok := err.(libcontainer.Error); !ok || lerr.Code() != libcontainer.ContainerNotExists {
|
||||
|
@ -50,46 +81,22 @@ func execAction(context *cli.Context) {
|
|||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
if context.Bool("tty") {
|
||||
stdin = nil
|
||||
stdout = nil
|
||||
stderr = nil
|
||||
if master, console, err = consolepkg.CreateMasterAndConsole(); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
go io.Copy(master, os.Stdin)
|
||||
go io.Copy(os.Stdout, master)
|
||||
state, err := term.SetRawTerminal(os.Stdin.Fd())
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
defer term.RestoreTerminal(os.Stdin.Fd(), state)
|
||||
config.Console = console
|
||||
}
|
||||
tty.set(config)
|
||||
if container, err = factory.Create(context.String("id"), config); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
}
|
||||
go handleSignals(container, tty)
|
||||
process := &libcontainer.Process{
|
||||
Args: context.Args(),
|
||||
Stdin: stdin,
|
||||
Stdout: stdout,
|
||||
Stderr: stderr,
|
||||
Stdin: os.Stdin,
|
||||
Stdout: os.Stdout,
|
||||
Stderr: os.Stderr,
|
||||
}
|
||||
tty.attach(process)
|
||||
if _, err := container.Start(process); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
go func() {
|
||||
resizeTty(master)
|
||||
for sig := range sigc {
|
||||
switch sig {
|
||||
case syscall.SIGWINCH:
|
||||
resizeTty(master)
|
||||
default:
|
||||
container.Signal(sig)
|
||||
}
|
||||
}
|
||||
}()
|
||||
status, err := container.Wait()
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
|
@ -97,6 +104,11 @@ func execAction(context *cli.Context) {
|
|||
if err := container.Destroy(); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
exit(status)
|
||||
}
|
||||
|
||||
func exit(status syscall.WaitStatus) {
|
||||
var exitCode int
|
||||
if status.Exited() {
|
||||
exitCode = status.ExitStatus()
|
||||
} else if status.Signaled() {
|
||||
|
@ -107,13 +119,37 @@ func execAction(context *cli.Context) {
|
|||
os.Exit(exitCode)
|
||||
}
|
||||
|
||||
func resizeTty(master *os.File) {
|
||||
if master == nil {
|
||||
return
|
||||
func handleSignals(container libcontainer.Container, tty *tty) {
|
||||
sigc := make(chan os.Signal, 10)
|
||||
signal.Notify(sigc)
|
||||
tty.resize()
|
||||
for sig := range sigc {
|
||||
switch sig {
|
||||
case syscall.SIGWINCH:
|
||||
tty.resize()
|
||||
default:
|
||||
container.Signal(sig)
|
||||
}
|
||||
}
|
||||
ws, err := term.GetWinsize(os.Stdin.Fd())
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
term.SetWinsize(master.Fd(), ws)
|
||||
}
|
||||
|
||||
func newTty(context *cli.Context) (*tty, error) {
|
||||
if context.Bool("tty") {
|
||||
master, console, err := consolepkg.CreateMasterAndConsole()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
go io.Copy(master, os.Stdin)
|
||||
go io.Copy(os.Stdout, master)
|
||||
state, err := term.SetRawTerminal(os.Stdin.Fd())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &tty{
|
||||
master: master,
|
||||
console: console,
|
||||
state: state,
|
||||
}, nil
|
||||
}
|
||||
return &tty{}, nil
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@ import (
|
|||
|
||||
"github.com/codegangsta/cli"
|
||||
"github.com/docker/libcontainer"
|
||||
_ "github.com/docker/libcontainer/namespaces/nsenter"
|
||||
_ "github.com/docker/libcontainer/nsenter"
|
||||
)
|
||||
|
||||
var initCommand = cli.Command{
|
||||
|
|
Loading…
Reference in New Issue