Refactor init actions into separate types
Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
parent
5fc19e8db5
commit
8850636eb3
|
@ -0,0 +1,75 @@
|
|||
package validate
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/docker/libcontainer/configs"
|
||||
)
|
||||
|
||||
type Validator interface {
|
||||
Validate(*configs.Config) error
|
||||
}
|
||||
|
||||
func New() Validator {
|
||||
return &ConfigValidator{}
|
||||
}
|
||||
|
||||
type ConfigValidator struct {
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) Validate(config *configs.Config) error {
|
||||
if err := v.rootfs(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.network(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.hostname(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.security(config); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// rootfs validates the the rootfs is an absolute path and is not a symlink
|
||||
// to the container's root filesystem.
|
||||
func (v *ConfigValidator) rootfs(config *configs.Config) error {
|
||||
cleaned, err := filepath.Abs(config.Rootfs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Rootfs != cleaned {
|
||||
return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) network(config *configs.Config) error {
|
||||
if !config.Namespaces.Contains(configs.NEWNET) {
|
||||
if len(config.Networks) > 0 || len(config.Routes) > 0 {
|
||||
return fmt.Errorf("unable to apply network settings without a private NET namespace")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) hostname(config *configs.Config) error {
|
||||
if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) {
|
||||
return fmt.Errorf("unable to set hostname without a private UTS namespace")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) security(config *configs.Config) error {
|
||||
// restrict sys without mount namespace
|
||||
if config.RestrictSys && !config.Namespaces.Contains(configs.NEWNS) {
|
||||
return fmt.Errorf("unable to restrict sys entries without a private MNT namespace")
|
||||
}
|
||||
return nil
|
||||
}
|
8
error.go
8
error.go
|
@ -54,3 +54,11 @@ type Error interface {
|
|||
// Returns the error code for this error.
|
||||
Code() ErrorCode
|
||||
}
|
||||
|
||||
type initError struct {
|
||||
Message string `json:"message,omitempty"`
|
||||
}
|
||||
|
||||
func (i initError) Error() string {
|
||||
return i.Message
|
||||
}
|
||||
|
|
|
@ -22,12 +22,8 @@ const (
|
|||
EXIT_SIGNAL_OFFSET = 128
|
||||
)
|
||||
|
||||
type initError struct {
|
||||
Message string `json:"message,omitempty"`
|
||||
}
|
||||
|
||||
func (i initError) Error() string {
|
||||
return i.Message
|
||||
type pid struct {
|
||||
Pid int `json:"Pid"`
|
||||
}
|
||||
|
||||
type linuxContainer struct {
|
||||
|
@ -97,6 +93,21 @@ func (c *linuxContainer) Start(process *Process) (int, error) {
|
|||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
cmd := c.commandTemplate(process)
|
||||
if status != configs.Destroyed {
|
||||
// TODO: (crosbymichael) check out console use for execin
|
||||
return c.startNewProcess(cmd, process.Args)
|
||||
//return namespaces.ExecIn(process.Args, c.config.Env, "", cmd, c.config, c.state)
|
||||
}
|
||||
if err := c.startInitialProcess(cmd, process.Args); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
return c.state.InitPid, nil
|
||||
}
|
||||
|
||||
// commandTemplate creates a template *exec.Cmd. It uses the init arguments provided
|
||||
// to the factory and attaches IO to the process.
|
||||
func (c *linuxContainer) commandTemplate(process *Process) *exec.Cmd {
|
||||
cmd := exec.Command(c.initArgs[0], c.initArgs[1:]...)
|
||||
cmd.Stdin = process.Stdin
|
||||
cmd.Stdout = process.Stdout
|
||||
|
@ -108,32 +119,26 @@ func (c *linuxContainer) Start(process *Process) (int, error) {
|
|||
}
|
||||
// TODO: add pdeath to config for a container
|
||||
cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL
|
||||
if status != configs.Destroyed {
|
||||
glog.Info("start new container process")
|
||||
// TODO: (crosbymichael) check out console use for execin
|
||||
//return namespaces.ExecIn(process.Args, c.config.Env, "", cmd, c.config, c.state)
|
||||
return c.startNewProcess(cmd, process.Args)
|
||||
}
|
||||
if err := c.startInitProcess(cmd, process.Args); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
return c.state.InitPid, nil
|
||||
return cmd
|
||||
}
|
||||
|
||||
// startNewProcess adds another process to an already running container
|
||||
func (c *linuxContainer) startNewProcess(cmd *exec.Cmd, args []string) (int, error) {
|
||||
var err error
|
||||
glog.Info("start new container process")
|
||||
parent, child, err := newInitPipe()
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
defer parent.Close()
|
||||
cmd.ExtraFiles = []*os.File{child}
|
||||
cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.state.InitPid))
|
||||
if err := cmd.Start(); err != nil {
|
||||
cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.state.InitPid), "_LIBCONTAINER_INITTYPE=setns")
|
||||
|
||||
// start the command
|
||||
err = cmd.Start()
|
||||
child.Close()
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
child.Close()
|
||||
s, err := cmd.Process.Wait()
|
||||
if err != nil {
|
||||
return -1, err
|
||||
|
@ -152,29 +157,28 @@ func (c *linuxContainer) startNewProcess(cmd *exec.Cmd, args []string) (int, err
|
|||
}
|
||||
terminate := func(terr error) (int, error) {
|
||||
// TODO: log the errors for kill and wait
|
||||
p.Kill()
|
||||
p.Wait()
|
||||
if err := p.Kill(); err != nil {
|
||||
glog.Warning(err)
|
||||
}
|
||||
if _, err := p.Wait(); err != nil {
|
||||
glog.Warning(err)
|
||||
}
|
||||
return -1, terr
|
||||
}
|
||||
// Enter cgroups.
|
||||
if err := c.enterCgroups(pid.Pid); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
encoder := json.NewEncoder(parent)
|
||||
if err := encoder.Encode(c.config); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
process := processArgs{
|
||||
if err := json.NewEncoder(parent).Encode(&initConfig{
|
||||
Config: c.config,
|
||||
Args: args,
|
||||
}
|
||||
if err := encoder.Encode(process); err != nil {
|
||||
}); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
return pid.Pid, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, args []string) error {
|
||||
func (c *linuxContainer) startInitialProcess(cmd *exec.Cmd, args []string) error {
|
||||
glog.Info("starting container initial process")
|
||||
// create a pipe so that we can syncronize with the namespaced process and
|
||||
// pass the state and configuration to the child process
|
||||
parent, child, err := newInitPipe()
|
||||
|
@ -184,6 +188,9 @@ func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, args []string) error {
|
|||
defer parent.Close()
|
||||
cmd.ExtraFiles = []*os.File{child}
|
||||
cmd.SysProcAttr.Cloneflags = c.config.Namespaces.CloneFlags()
|
||||
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE=standard")
|
||||
// if the container is configured to use user namespaces we have to setup the
|
||||
// uid:gid mapping on the command.
|
||||
if c.config.Namespaces.Contains(configs.NEWUSER) {
|
||||
addUidGidMappings(cmd.SysProcAttr, c.config)
|
||||
// Default to root user when user namespaces are enabled.
|
||||
|
@ -191,7 +198,6 @@ func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, args []string) error {
|
|||
cmd.SysProcAttr.Credential = &syscall.Credential{}
|
||||
}
|
||||
}
|
||||
glog.Info("starting container init process")
|
||||
err = cmd.Start()
|
||||
child.Close()
|
||||
if err != nil {
|
||||
|
@ -199,12 +205,15 @@ func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, args []string) error {
|
|||
}
|
||||
wait := func() (*os.ProcessState, error) {
|
||||
ps, err := cmd.Process.Wait()
|
||||
if err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
// we should kill all processes in cgroup when init is died if we use
|
||||
// host PID namespace
|
||||
if !c.config.Namespaces.Contains(configs.NEWPID) {
|
||||
c.killAllPids()
|
||||
}
|
||||
return ps, newGenericError(err, SystemError)
|
||||
return ps, nil
|
||||
}
|
||||
terminate := func(terr error) error {
|
||||
// TODO: log the errors for kill and wait
|
||||
|
@ -230,19 +239,19 @@ func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, args []string) error {
|
|||
if err := c.initializeNetworking(cmd.Process.Pid, &networkState); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
process := processArgs{
|
||||
iconfig := &initConfig{
|
||||
Args: args,
|
||||
Config: c.config,
|
||||
NetworkState: &networkState,
|
||||
}
|
||||
// Start the setup process to setup the init process
|
||||
if c.config.Namespaces.Contains(configs.NEWUSER) {
|
||||
if err = executeSetupCmd(cmd.Args, cmd.Process.Pid, c.config, &process, &networkState); err != nil {
|
||||
if err = executeSetupCmd(cmd.Args, cmd.Process.Pid, c.config, iconfig, &networkState); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
}
|
||||
// send the state to the container's init process then shutdown writes for the parent
|
||||
if err := json.NewEncoder(parent).Encode(process); err != nil {
|
||||
if err := json.NewEncoder(parent).Encode(iconfig); err != nil {
|
||||
return terminate(err)
|
||||
}
|
||||
// shutdown writes for the parent side of the pipe
|
||||
|
@ -258,12 +267,10 @@ func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, args []string) error {
|
|||
if ierr != nil {
|
||||
return terminate(ierr)
|
||||
}
|
||||
|
||||
c.state.InitPid = cmd.Process.Pid
|
||||
c.state.InitStartTime = started
|
||||
c.state.NetworkState = networkState
|
||||
c.state.CgroupPaths = c.cgroupManager.GetPaths()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -386,7 +393,7 @@ func (c *linuxContainer) initializeNetworking(nspid int, networkState *configs.N
|
|||
return nil
|
||||
}
|
||||
|
||||
func executeSetupCmd(args []string, ppid int, container *configs.Config, process *processArgs, networkState *configs.NetworkState) error {
|
||||
func executeSetupCmd(args []string, ppid int, container *configs.Config, process *initConfig, networkState *configs.NetworkState) error {
|
||||
command := exec.Command(args[0], args[1:]...)
|
||||
parent, child, err := newInitPipe()
|
||||
if err != nil {
|
||||
|
@ -397,7 +404,7 @@ func executeSetupCmd(args []string, ppid int, container *configs.Config, process
|
|||
command.Dir = container.Rootfs
|
||||
command.Env = append(command.Env,
|
||||
fmt.Sprintf("_LIBCONTAINER_INITPID=%d", ppid),
|
||||
fmt.Sprintf("_LIBCONTAINER_USERNS=1"))
|
||||
fmt.Sprintf("_LIBCONTAINER_INITTYPE=userns_sidecar"))
|
||||
err = command.Start()
|
||||
child.Close()
|
||||
if err != nil {
|
||||
|
@ -452,10 +459,6 @@ func executeSetupCmd(args []string, ppid int, container *configs.Config, process
|
|||
return nil
|
||||
}
|
||||
|
||||
type pid struct {
|
||||
Pid int `json:"Pid"`
|
||||
}
|
||||
|
||||
func (c *linuxContainer) enterCgroups(pid int) error {
|
||||
return cgroups.EnterPid(c.state.CgroupPaths, pid)
|
||||
}
|
||||
|
|
509
linux_factory.go
509
linux_factory.go
|
@ -9,24 +9,12 @@ import (
|
|||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/golang/glog"
|
||||
|
||||
"github.com/docker/libcontainer/apparmor"
|
||||
cgroups "github.com/docker/libcontainer/cgroups/manager"
|
||||
"github.com/docker/libcontainer/configs"
|
||||
"github.com/docker/libcontainer/console"
|
||||
"github.com/docker/libcontainer/label"
|
||||
"github.com/docker/libcontainer/mount"
|
||||
"github.com/docker/libcontainer/netlink"
|
||||
"github.com/docker/libcontainer/network"
|
||||
"github.com/docker/libcontainer/security/capabilities"
|
||||
"github.com/docker/libcontainer/security/restrict"
|
||||
"github.com/docker/libcontainer/system"
|
||||
"github.com/docker/libcontainer/user"
|
||||
"github.com/docker/libcontainer/utils"
|
||||
"github.com/docker/libcontainer/configs/validate"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -39,13 +27,6 @@ var (
|
|||
maxIdLen = 1024
|
||||
)
|
||||
|
||||
// Process is used for transferring parameters from Exec() to Init()
|
||||
type processArgs struct {
|
||||
Args []string `json:"args,omitempty"`
|
||||
Config *configs.Config `json:"config,omitempty"`
|
||||
NetworkState *configs.NetworkState `json:"network_state,omitempty"`
|
||||
}
|
||||
|
||||
// New returns a linux based container factory based in the root directory.
|
||||
func New(root string, initArgs []string) (Factory, error) {
|
||||
if root != "" {
|
||||
|
@ -56,6 +37,7 @@ func New(root string, initArgs []string) (Factory, error) {
|
|||
return &linuxFactory{
|
||||
root: root,
|
||||
initArgs: initArgs,
|
||||
validator: validate.New(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
@ -64,6 +46,7 @@ type linuxFactory struct {
|
|||
// root is the root directory
|
||||
root string
|
||||
initArgs []string
|
||||
validator validate.Validator
|
||||
}
|
||||
|
||||
func (l *linuxFactory) Create(id string, config *configs.Config) (Container, error) {
|
||||
|
@ -73,6 +56,9 @@ func (l *linuxFactory) Create(id string, config *configs.Config) (Container, err
|
|||
if err := l.validateID(id); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := l.validator.Validate(config); err != nil {
|
||||
return nil, newGenericError(err, ConfigInvalid)
|
||||
}
|
||||
containerRoot := filepath.Join(l.root, id)
|
||||
if _, err := os.Stat(containerRoot); err == nil {
|
||||
return nil, newGenericError(fmt.Errorf("Container with id exists: %v", id), IdInUse)
|
||||
|
@ -96,14 +82,13 @@ func (l *linuxFactory) Create(id string, config *configs.Config) (Container, err
|
|||
os.RemoveAll(containerRoot)
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
cgroupManager := cgroups.NewCgroupManager(config.Cgroups)
|
||||
return &linuxContainer{
|
||||
id: id,
|
||||
root: containerRoot,
|
||||
config: config,
|
||||
initArgs: l.initArgs,
|
||||
state: &configs.State{},
|
||||
cgroupManager: cgroupManager,
|
||||
cgroupManager: cgroups.NewCgroupManager(config.Cgroups),
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
@ -137,12 +122,13 @@ func (l *linuxFactory) Load(id string) (Container, error) {
|
|||
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
|
||||
// This is a low level implementation detail of the reexec and should not be consumed externally
|
||||
func (l *linuxFactory) StartInitialization(pipefd uintptr) (err error) {
|
||||
pipe := os.NewFile(uintptr(pipefd), "pipe")
|
||||
setupUserns := os.Getenv("_LIBCONTAINER_USERNS") != ""
|
||||
pid := os.Getenv("_LIBCONTAINER_INITPID")
|
||||
if pid != "" && !setupUserns {
|
||||
return initIn(pipe)
|
||||
}
|
||||
var (
|
||||
pipe = os.NewFile(uintptr(pipefd), "pipe")
|
||||
it = initType(os.Getenv("_LIBCONTAINER_INITTYPE"))
|
||||
)
|
||||
// clear the current process's environment to clean any libcontainer
|
||||
// specific env vars.
|
||||
os.Clearenv()
|
||||
defer func() {
|
||||
// if we have an error during the initialization of the container's init then send it back to the
|
||||
// parent process in the form of an initError.
|
||||
|
@ -159,27 +145,11 @@ func (l *linuxFactory) StartInitialization(pipefd uintptr) (err error) {
|
|||
// ensure that this pipe is always closed
|
||||
pipe.Close()
|
||||
}()
|
||||
uncleanRootfs, err := os.Getwd()
|
||||
i, err := newContainerInit(it, pipe)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var process *processArgs
|
||||
// We always read this as it is a way to sync with the parent as well
|
||||
if err := json.NewDecoder(pipe).Decode(&process); err != nil {
|
||||
return err
|
||||
}
|
||||
if setupUserns {
|
||||
err = setupContainer(process)
|
||||
if err == nil {
|
||||
os.Exit(0)
|
||||
} else {
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
if process.Config.Namespaces.Contains(configs.NEWUSER) {
|
||||
return l.initUserNs(uncleanRootfs, process)
|
||||
}
|
||||
return l.initDefault(uncleanRootfs, process)
|
||||
return i.Init()
|
||||
}
|
||||
|
||||
func (l *linuxFactory) loadContainerConfig(root string) (*configs.Config, error) {
|
||||
|
@ -223,450 +193,3 @@ func (l *linuxFactory) validateID(id string) error {
|
|||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *linuxFactory) initDefault(uncleanRootfs string, process *processArgs) (err error) {
|
||||
config := process.Config
|
||||
networkState := process.NetworkState
|
||||
|
||||
// TODO: move to validation
|
||||
/*
|
||||
rootfs, err := utils.ResolveRootfs(uncleanRootfs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
*/
|
||||
|
||||
// clear the current processes env and replace it with the environment
|
||||
// defined on the container
|
||||
if err := loadContainerEnvironment(config); err != nil {
|
||||
return err
|
||||
}
|
||||
// join any namespaces via a path to the namespace fd if provided
|
||||
if err := joinExistingNamespaces(config.Namespaces); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Console != "" {
|
||||
if err := console.OpenAndDup(config.Console); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if _, err := syscall.Setsid(); err != nil {
|
||||
return fmt.Errorf("setsid %s", err)
|
||||
}
|
||||
if config.Console != "" {
|
||||
if err := system.Setctty(); err != nil {
|
||||
return fmt.Errorf("setctty %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
cloneFlags := config.Namespaces.CloneFlags()
|
||||
if (cloneFlags & syscall.CLONE_NEWNET) == 0 {
|
||||
if len(config.Networks) != 0 || len(config.Routes) != 0 {
|
||||
return fmt.Errorf("unable to apply network parameters without network namespace")
|
||||
}
|
||||
} else {
|
||||
if err := setupNetwork(config, networkState); err != nil {
|
||||
return fmt.Errorf("setup networking %s", err)
|
||||
}
|
||||
if err := setupRoute(config); err != nil {
|
||||
return fmt.Errorf("setup route %s", err)
|
||||
}
|
||||
}
|
||||
if err := setupRlimits(config); err != nil {
|
||||
return fmt.Errorf("setup rlimits %s", err)
|
||||
}
|
||||
label.Init()
|
||||
// InitializeMountNamespace() can be executed only for a new mount namespace
|
||||
if (cloneFlags & syscall.CLONE_NEWNS) != 0 {
|
||||
if err := mount.InitializeMountNamespace(config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if config.Hostname != "" {
|
||||
// TODO: (crosbymichael) move this to pre spawn validation
|
||||
if (cloneFlags & syscall.CLONE_NEWUTS) == 0 {
|
||||
return fmt.Errorf("unable to set the hostname without UTS namespace")
|
||||
}
|
||||
if err := syscall.Sethostname([]byte(config.Hostname)); err != nil {
|
||||
return fmt.Errorf("unable to sethostname %q: %s", config.Hostname, err)
|
||||
}
|
||||
}
|
||||
if err := apparmor.ApplyProfile(config.AppArmorProfile); err != nil {
|
||||
return fmt.Errorf("set apparmor profile %s: %s", config.AppArmorProfile, err)
|
||||
}
|
||||
if err := label.SetProcessLabel(config.ProcessLabel); err != nil {
|
||||
return fmt.Errorf("set process label %s", err)
|
||||
}
|
||||
// TODO: (crosbymichael) make this configurable at the Config level
|
||||
if config.RestrictSys {
|
||||
if (cloneFlags & syscall.CLONE_NEWNS) == 0 {
|
||||
return fmt.Errorf("unable to restrict access to kernel files without mount namespace")
|
||||
}
|
||||
if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
pdeathSignal, err := system.GetParentDeathSignal()
|
||||
if err != nil {
|
||||
return fmt.Errorf("get parent death signal %s", err)
|
||||
}
|
||||
if err := finalizeNamespace(config); err != nil {
|
||||
return fmt.Errorf("finalize namespace %s", err)
|
||||
}
|
||||
// finalizeNamespace can change user/group which clears the parent death
|
||||
// signal, so we restore it here.
|
||||
if err := restoreParentDeathSignal(pdeathSignal); err != nil {
|
||||
return fmt.Errorf("restore parent death signal %s", err)
|
||||
}
|
||||
return system.Execv(process.Args[0], process.Args[0:], config.Env)
|
||||
}
|
||||
|
||||
func (l *linuxFactory) initUserNs(uncleanRootfs string, process *processArgs) (err error) {
|
||||
config := process.Config
|
||||
// clear the current processes env and replace it with the environment
|
||||
// defined on the config
|
||||
if err := loadContainerEnvironment(config); err != nil {
|
||||
return err
|
||||
}
|
||||
// join any namespaces via a path to the namespace fd if provided
|
||||
if err := joinExistingNamespaces(config.Namespaces); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Console != "" {
|
||||
if err := console.OpenAndDup("/dev/console"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if _, err := syscall.Setsid(); err != nil {
|
||||
return fmt.Errorf("setsid %s", err)
|
||||
}
|
||||
if config.Console != "" {
|
||||
if err := system.Setctty(); err != nil {
|
||||
return fmt.Errorf("setctty %s", err)
|
||||
}
|
||||
}
|
||||
if config.WorkingDir == "" {
|
||||
config.WorkingDir = "/"
|
||||
}
|
||||
if err := setupRlimits(config); err != nil {
|
||||
return fmt.Errorf("setup rlimits %s", err)
|
||||
}
|
||||
cloneFlags := config.Namespaces.CloneFlags()
|
||||
if config.Hostname != "" {
|
||||
// TODO: move validation
|
||||
if (cloneFlags & syscall.CLONE_NEWUTS) == 0 {
|
||||
return fmt.Errorf("unable to set the hostname without UTS namespace")
|
||||
}
|
||||
if err := syscall.Sethostname([]byte(config.Hostname)); err != nil {
|
||||
return fmt.Errorf("unable to sethostname %q: %s", config.Hostname, err)
|
||||
}
|
||||
}
|
||||
if err := apparmor.ApplyProfile(config.AppArmorProfile); err != nil {
|
||||
return fmt.Errorf("set apparmor profile %s: %s", config.AppArmorProfile, err)
|
||||
}
|
||||
if err := label.SetProcessLabel(config.ProcessLabel); err != nil {
|
||||
return fmt.Errorf("set process label %s", err)
|
||||
}
|
||||
if config.RestrictSys {
|
||||
if (cloneFlags & syscall.CLONE_NEWNS) == 0 {
|
||||
return fmt.Errorf("unable to restrict access to kernel files without mount namespace")
|
||||
}
|
||||
if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
pdeathSignal, err := system.GetParentDeathSignal()
|
||||
if err != nil {
|
||||
return fmt.Errorf("get parent death signal %s", err)
|
||||
}
|
||||
if err := finalizeNamespace(config); err != nil {
|
||||
return fmt.Errorf("finalize namespace %s", err)
|
||||
}
|
||||
// finalizeNamespace can change user/group which clears the parent death
|
||||
// signal, so we restore it here.
|
||||
if err := restoreParentDeathSignal(pdeathSignal); err != nil {
|
||||
return fmt.Errorf("restore parent death signal %s", err)
|
||||
}
|
||||
return system.Execv(process.Args[0], process.Args[0:], config.Env)
|
||||
}
|
||||
|
||||
// restoreParentDeathSignal sets the parent death signal to old.
|
||||
func restoreParentDeathSignal(old int) error {
|
||||
if old == 0 {
|
||||
return nil
|
||||
}
|
||||
current, err := system.GetParentDeathSignal()
|
||||
if err != nil {
|
||||
return fmt.Errorf("get parent death signal %s", err)
|
||||
}
|
||||
if old == current {
|
||||
return nil
|
||||
}
|
||||
if err := system.ParentDeathSignal(uintptr(old)); err != nil {
|
||||
return fmt.Errorf("set parent death signal %s", err)
|
||||
}
|
||||
// Signal self if parent is already dead. Does nothing if running in a new
|
||||
// PID namespace, as Getppid will always return 0.
|
||||
if syscall.Getppid() == 1 {
|
||||
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupUser changes the groups, gid, and uid for the user inside the container
|
||||
func setupUser(config *configs.Config) error {
|
||||
// Set up defaults.
|
||||
defaultExecUser := user.ExecUser{
|
||||
Uid: syscall.Getuid(),
|
||||
Gid: syscall.Getgid(),
|
||||
Home: "/",
|
||||
}
|
||||
passwdPath, err := user.GetPasswdPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
groupPath, err := user.GetGroupPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("get supplementary groups %s", err)
|
||||
}
|
||||
suppGroups := append(execUser.Sgids, config.AdditionalGroups...)
|
||||
if err := syscall.Setgroups(suppGroups); err != nil {
|
||||
return fmt.Errorf("setgroups %s", err)
|
||||
}
|
||||
if err := system.Setgid(execUser.Gid); err != nil {
|
||||
return fmt.Errorf("setgid %s", err)
|
||||
}
|
||||
if err := system.Setuid(execUser.Uid); err != nil {
|
||||
return fmt.Errorf("setuid %s", err)
|
||||
}
|
||||
// if we didn't get HOME already, set it based on the user's HOME
|
||||
if envHome := os.Getenv("HOME"); envHome == "" {
|
||||
if err := os.Setenv("HOME", execUser.Home); err != nil {
|
||||
return fmt.Errorf("set HOME %s", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupVethNetwork uses the Network config if it is not nil to initialize
|
||||
// the new veth interface inside the container for use by changing the name to eth0
|
||||
// setting the MTU and IP address along with the default gateway
|
||||
func setupNetwork(config *configs.Config, networkState *configs.NetworkState) error {
|
||||
for _, config := range config.Networks {
|
||||
strategy, err := network.GetStrategy(config.Type)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err1 := strategy.Initialize(config, networkState)
|
||||
if err1 != nil {
|
||||
return err1
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupRoute(config *configs.Config) error {
|
||||
for _, config := range config.Routes {
|
||||
if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupRlimits(config *configs.Config) error {
|
||||
for _, rlimit := range config.Rlimits {
|
||||
l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}
|
||||
if err := syscall.Setrlimit(rlimit.Type, l); err != nil {
|
||||
return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// finalizeNamespace drops the caps, sets the correct user
|
||||
// and working dir, and closes any leaky file descriptors
|
||||
// before execing the command inside the namespace
|
||||
func finalizeNamespace(config *configs.Config) error {
|
||||
// Ensure that all non-standard fds we may have accidentally
|
||||
// inherited are marked close-on-exec so they stay out of the
|
||||
// container
|
||||
if err := utils.CloseExecFrom(3); err != nil {
|
||||
return fmt.Errorf("close open file descriptors %s", err)
|
||||
}
|
||||
// drop capabilities in bounding set before changing user
|
||||
if err := capabilities.DropBoundingSet(config.Capabilities); err != nil {
|
||||
return fmt.Errorf("drop bounding set %s", err)
|
||||
}
|
||||
// preserve existing capabilities while we change users
|
||||
if err := system.SetKeepCaps(); err != nil {
|
||||
return fmt.Errorf("set keep caps %s", err)
|
||||
}
|
||||
if err := setupUser(config); err != nil {
|
||||
return fmt.Errorf("setup user %s", err)
|
||||
}
|
||||
if err := system.ClearKeepCaps(); err != nil {
|
||||
return fmt.Errorf("clear keep caps %s", err)
|
||||
}
|
||||
// drop all other capabilities
|
||||
if err := capabilities.DropCapabilities(config.Capabilities); err != nil {
|
||||
return fmt.Errorf("drop capabilities %s", err)
|
||||
}
|
||||
if config.WorkingDir != "" {
|
||||
if err := syscall.Chdir(config.WorkingDir); err != nil {
|
||||
return fmt.Errorf("chdir to %s %s", config.WorkingDir, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func loadContainerEnvironment(config *configs.Config) error {
|
||||
os.Clearenv()
|
||||
for _, pair := range config.Env {
|
||||
p := strings.SplitN(pair, "=", 2)
|
||||
if len(p) < 2 {
|
||||
return fmt.Errorf("invalid environment '%v'", pair)
|
||||
}
|
||||
if err := os.Setenv(p[0], p[1]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// joinExistingNamespaces gets all the namespace paths specified for the container and
|
||||
// does a setns on the namespace fd so that the current process joins the namespace.
|
||||
func joinExistingNamespaces(namespaces []configs.Namespace) error {
|
||||
for _, ns := range namespaces {
|
||||
if ns.Path != "" {
|
||||
f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = system.Setns(f.Fd(), uintptr(ns.Syscall()))
|
||||
f.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupContainer is run to setup mounts and networking related operations
|
||||
// for a user namespace enabled process as a user namespace root doesn't
|
||||
// have permissions to perform these operations.
|
||||
// The setup process joins all the namespaces of user namespace enabled init
|
||||
// except the user namespace, so it run as root in the root user namespace
|
||||
// to perform these operations.
|
||||
func setupContainer(process *processArgs) error {
|
||||
container := process.Config
|
||||
networkState := process.NetworkState
|
||||
|
||||
// TODO : move to validation
|
||||
/*
|
||||
rootfs, err := utils.ResolveRootfs(container.Rootfs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
*/
|
||||
|
||||
// clear the current processes env and replace it with the environment
|
||||
// defined on the container
|
||||
if err := loadContainerEnvironment(container); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cloneFlags := container.Namespaces.CloneFlags()
|
||||
if (cloneFlags & syscall.CLONE_NEWNET) == 0 {
|
||||
if len(container.Networks) != 0 || len(container.Routes) != 0 {
|
||||
return fmt.Errorf("unable to apply network parameters without network namespace")
|
||||
}
|
||||
} else {
|
||||
if err := setupNetwork(container, networkState); err != nil {
|
||||
return fmt.Errorf("setup networking %s", err)
|
||||
}
|
||||
if err := setupRoute(container); err != nil {
|
||||
return fmt.Errorf("setup route %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
label.Init()
|
||||
|
||||
// InitializeMountNamespace() can be executed only for a new mount namespace
|
||||
if (cloneFlags & syscall.CLONE_NEWNS) != 0 {
|
||||
if err := mount.InitializeMountNamespace(container); err != nil {
|
||||
return fmt.Errorf("setup mount namespace %s", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Finalize entering into a container and execute a specified command
|
||||
func initIn(pipe *os.File) (err error) {
|
||||
defer func() {
|
||||
// if we have an error during the initialization of the container's init then send it back to the
|
||||
// parent process in the form of an initError.
|
||||
if err != nil {
|
||||
// ensure that any data sent from the parent is consumed so it doesn't
|
||||
// receive ECONNRESET when the child writes to the pipe.
|
||||
ioutil.ReadAll(pipe)
|
||||
if err := json.NewEncoder(pipe).Encode(initError{
|
||||
Message: err.Error(),
|
||||
}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
// ensure that this pipe is always closed
|
||||
pipe.Close()
|
||||
}()
|
||||
decoder := json.NewDecoder(pipe)
|
||||
var config *configs.Config
|
||||
if err := decoder.Decode(&config); err != nil {
|
||||
return err
|
||||
}
|
||||
var process *processArgs
|
||||
if err := decoder.Decode(&process); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := finalizeSetns(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.Execv(process.Args[0], process.Args[0:], config.Env); err != nil {
|
||||
return err
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
// finalize expects that the setns calls have been setup and that is has joined an
|
||||
// existing namespace
|
||||
func finalizeSetns(container *configs.Config) error {
|
||||
// clear the current processes env and replace it with the environment defined on the container
|
||||
if err := loadContainerEnvironment(container); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := setupRlimits(container); err != nil {
|
||||
return fmt.Errorf("setup rlimits %s", err)
|
||||
}
|
||||
|
||||
if err := finalizeNamespace(container); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil {
|
||||
return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err)
|
||||
}
|
||||
|
||||
if container.ProcessLabel != "" {
|
||||
if err := label.SetProcessLabel(container.ProcessLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -0,0 +1,216 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/docker/libcontainer/configs"
|
||||
"github.com/docker/libcontainer/netlink"
|
||||
"github.com/docker/libcontainer/network"
|
||||
"github.com/docker/libcontainer/security/capabilities"
|
||||
"github.com/docker/libcontainer/system"
|
||||
"github.com/docker/libcontainer/user"
|
||||
"github.com/docker/libcontainer/utils"
|
||||
)
|
||||
|
||||
type initType string
|
||||
|
||||
const (
|
||||
initSetns initType = "setns"
|
||||
initStandard initType = "standard"
|
||||
initUserns initType = "userns"
|
||||
initUsernsSideCar initType = "userns_sidecar"
|
||||
)
|
||||
|
||||
// Process is used for transferring parameters from Exec() to Init()
|
||||
type initConfig struct {
|
||||
Args []string `json:"args,omitempty"`
|
||||
Config *configs.Config `json:"config,omitempty"`
|
||||
NetworkState *configs.NetworkState `json:"network_state,omitempty"`
|
||||
}
|
||||
|
||||
type initer interface {
|
||||
Init() error
|
||||
}
|
||||
|
||||
func newContainerInit(t initType, pipe *os.File) (initer, error) {
|
||||
var config *initConfig
|
||||
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := populateProcessEnvironment(config.Config.Env); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
switch t {
|
||||
case initSetns:
|
||||
return &linuxSetnsInit{
|
||||
args: config.Args,
|
||||
config: config.Config,
|
||||
}, nil
|
||||
case initUserns:
|
||||
return &linuxUsernsInit{
|
||||
args: config.Args,
|
||||
config: config.Config,
|
||||
}, nil
|
||||
case initUsernsSideCar:
|
||||
return &linuxUsernsSideCar{
|
||||
config: config.Config,
|
||||
network: config.NetworkState,
|
||||
}, nil
|
||||
case initStandard:
|
||||
return &linuxStandardInit{
|
||||
config: config,
|
||||
}, nil
|
||||
}
|
||||
return nil, fmt.Errorf("unknown init type %q", t)
|
||||
}
|
||||
|
||||
// populateProcessEnvironment loads the provided environment variables into the
|
||||
// current processes's environment.
|
||||
func populateProcessEnvironment(env []string) error {
|
||||
for _, pair := range env {
|
||||
p := strings.SplitN(pair, "=", 2)
|
||||
if len(p) < 2 {
|
||||
return fmt.Errorf("invalid environment '%v'", pair)
|
||||
}
|
||||
if err := os.Setenv(p[0], p[1]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// finalizeNamespace drops the caps, sets the correct user
|
||||
// and working dir, and closes any leaky file descriptors
|
||||
// before execing the command inside the namespace
|
||||
func finalizeNamespace(config *configs.Config) error {
|
||||
// Ensure that all non-standard fds we may have accidentally
|
||||
// inherited are marked close-on-exec so they stay out of the
|
||||
// container
|
||||
if err := utils.CloseExecFrom(3); err != nil {
|
||||
return err
|
||||
}
|
||||
// drop capabilities in bounding set before changing user
|
||||
if err := capabilities.DropBoundingSet(config.Capabilities); err != nil {
|
||||
return err
|
||||
}
|
||||
// preserve existing capabilities while we change users
|
||||
if err := system.SetKeepCaps(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setupUser(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.ClearKeepCaps(); err != nil {
|
||||
return err
|
||||
}
|
||||
// drop all other capabilities
|
||||
if err := capabilities.DropCapabilities(config.Capabilities); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.WorkingDir != "" {
|
||||
if err := syscall.Chdir(config.WorkingDir); err != nil {
|
||||
return fmt.Errorf("chdir to %s %s", config.WorkingDir, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// joinExistingNamespaces gets all the namespace paths specified for the container and
|
||||
// does a setns on the namespace fd so that the current process joins the namespace.
|
||||
func joinExistingNamespaces(namespaces []configs.Namespace) error {
|
||||
for _, ns := range namespaces {
|
||||
if ns.Path != "" {
|
||||
f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = system.Setns(f.Fd(), uintptr(ns.Syscall()))
|
||||
f.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupUser changes the groups, gid, and uid for the user inside the container
|
||||
func setupUser(config *configs.Config) error {
|
||||
// Set up defaults.
|
||||
defaultExecUser := user.ExecUser{
|
||||
Uid: syscall.Getuid(),
|
||||
Gid: syscall.Getgid(),
|
||||
Home: "/",
|
||||
}
|
||||
passwdPath, err := user.GetPasswdPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
groupPath, err := user.GetGroupPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("get supplementary groups %s", err)
|
||||
}
|
||||
suppGroups := append(execUser.Sgids, config.AdditionalGroups...)
|
||||
if err := syscall.Setgroups(suppGroups); err != nil {
|
||||
return fmt.Errorf("setgroups %s", err)
|
||||
}
|
||||
if err := system.Setgid(execUser.Gid); err != nil {
|
||||
return fmt.Errorf("setgid %s", err)
|
||||
}
|
||||
if err := system.Setuid(execUser.Uid); err != nil {
|
||||
return fmt.Errorf("setuid %s", err)
|
||||
}
|
||||
// if we didn't get HOME already, set it based on the user's HOME
|
||||
if envHome := os.Getenv("HOME"); envHome == "" {
|
||||
if err := os.Setenv("HOME", execUser.Home); err != nil {
|
||||
return fmt.Errorf("set HOME %s", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupVethNetwork uses the Network config if it is not nil to initialize
|
||||
// the new veth interface inside the container for use by changing the name to eth0
|
||||
// setting the MTU and IP address along with the default gateway
|
||||
func setupNetwork(config *configs.Config, networkState *configs.NetworkState) error {
|
||||
for _, config := range config.Networks {
|
||||
strategy, err := network.GetStrategy(config.Type)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err1 := strategy.Initialize(config, networkState)
|
||||
if err1 != nil {
|
||||
return err1
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupRoute(config *configs.Config) error {
|
||||
for _, config := range config.Routes {
|
||||
if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupRlimits(config *configs.Config) error {
|
||||
for _, rlimit := range config.Rlimits {
|
||||
l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}
|
||||
if err := syscall.Setrlimit(rlimit.Type, l); err != nil {
|
||||
return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"github.com/docker/libcontainer/apparmor"
|
||||
"github.com/docker/libcontainer/configs"
|
||||
"github.com/docker/libcontainer/label"
|
||||
"github.com/docker/libcontainer/system"
|
||||
)
|
||||
|
||||
// linuxSetnsInit performs the container's initialization for running a new process
|
||||
// inside an existing container.
|
||||
type linuxSetnsInit struct {
|
||||
args []string
|
||||
config *configs.Config
|
||||
}
|
||||
|
||||
func (l *linuxSetnsInit) Init() error {
|
||||
if err := setupRlimits(l.config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := finalizeNamespace(l.config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
|
||||
return err
|
||||
}
|
||||
if l.config.ProcessLabel != "" {
|
||||
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return system.Execv(l.args[0], l.args[0:], l.config.Env)
|
||||
}
|
|
@ -0,0 +1,90 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
|
||||
"github.com/docker/libcontainer/apparmor"
|
||||
"github.com/docker/libcontainer/configs"
|
||||
consolepkg "github.com/docker/libcontainer/console"
|
||||
"github.com/docker/libcontainer/label"
|
||||
"github.com/docker/libcontainer/mount"
|
||||
"github.com/docker/libcontainer/security/restrict"
|
||||
"github.com/docker/libcontainer/system"
|
||||
)
|
||||
|
||||
type linuxStandardInit struct {
|
||||
config *initConfig
|
||||
}
|
||||
|
||||
func (l *linuxStandardInit) Init() error {
|
||||
// join any namespaces via a path to the namespace fd if provided
|
||||
if err := joinExistingNamespaces(l.config.Config.Namespaces); err != nil {
|
||||
return err
|
||||
}
|
||||
console := l.config.Config.Console
|
||||
if console != "" {
|
||||
if err := consolepkg.OpenAndDup(console); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if _, err := syscall.Setsid(); err != nil {
|
||||
return err
|
||||
}
|
||||
if console != "" {
|
||||
if err := system.Setctty(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := setupNetwork(l.config.Config, l.config.NetworkState); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setupRoute(l.config.Config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setupRlimits(l.config.Config); err != nil {
|
||||
return err
|
||||
}
|
||||
label.Init()
|
||||
// InitializeMountNamespace() can be executed only for a new mount namespace
|
||||
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
|
||||
if err := mount.InitializeMountNamespace(l.config.Config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if hostname := l.config.Config.Hostname; hostname != "" {
|
||||
if err := syscall.Sethostname([]byte(hostname)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
if l.config.Config.RestrictSys {
|
||||
if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
pdeath, err := system.GetParentDeathSignal()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := finalizeNamespace(l.config.Config); err != nil {
|
||||
return err
|
||||
}
|
||||
// finalizeNamespace can change user/group which clears the parent death
|
||||
// signal, so we restore it here.
|
||||
if err := pdeath.Restore(); err != nil {
|
||||
return err
|
||||
}
|
||||
// Signal self if parent is already dead. Does nothing if running in a new
|
||||
// PID namespace, as Getppid will always return 0.
|
||||
if syscall.Getppid() == 1 {
|
||||
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
|
||||
}
|
||||
return system.Execv(l.config.Args[0], l.config.Args[0:], l.config.Config.Env)
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
|
||||
"github.com/docker/libcontainer/apparmor"
|
||||
"github.com/docker/libcontainer/configs"
|
||||
consolepkg "github.com/docker/libcontainer/console"
|
||||
"github.com/docker/libcontainer/label"
|
||||
"github.com/docker/libcontainer/security/restrict"
|
||||
"github.com/docker/libcontainer/system"
|
||||
)
|
||||
|
||||
type linuxUsernsInit struct {
|
||||
args []string
|
||||
config *configs.Config
|
||||
}
|
||||
|
||||
func (l *linuxUsernsInit) Init() error {
|
||||
// join any namespaces via a path to the namespace fd if provided
|
||||
if err := joinExistingNamespaces(l.config.Namespaces); err != nil {
|
||||
return err
|
||||
}
|
||||
console := l.config.Console
|
||||
if console != "" {
|
||||
if err := consolepkg.OpenAndDup("/dev/console"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if _, err := syscall.Setsid(); err != nil {
|
||||
return err
|
||||
}
|
||||
if console != "" {
|
||||
if err := system.Setctty(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if l.config.WorkingDir == "" {
|
||||
l.config.WorkingDir = "/"
|
||||
}
|
||||
if err := setupRlimits(l.config); err != nil {
|
||||
return err
|
||||
}
|
||||
if hostname := l.config.Hostname; hostname != "" {
|
||||
if err := syscall.Sethostname([]byte(hostname)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
if l.config.RestrictSys {
|
||||
if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
pdeath, err := system.GetParentDeathSignal()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := finalizeNamespace(l.config); err != nil {
|
||||
return err
|
||||
}
|
||||
// finalizeNamespace can change user/group which clears the parent death
|
||||
// signal, so we restore it here.
|
||||
if err := pdeath.Restore(); err != nil {
|
||||
return err
|
||||
}
|
||||
// Signal self if parent is already dead. Does nothing if running in a new
|
||||
// PID namespace, as Getppid will always return 0.
|
||||
if syscall.Getppid() == 1 {
|
||||
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
|
||||
}
|
||||
return system.Execv(l.args[0], l.args[0:], l.config.Env)
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"github.com/docker/libcontainer/configs"
|
||||
"github.com/docker/libcontainer/label"
|
||||
"github.com/docker/libcontainer/mount"
|
||||
)
|
||||
|
||||
// linuxUsernsSideCar is run to setup mounts and networking related operations
|
||||
// for a user namespace enabled process as a user namespace root doesn't
|
||||
// have permissions to perform these operations.
|
||||
// The setup process joins all the namespaces of user namespace enabled init
|
||||
// except the user namespace, so it run as root in the root user namespace
|
||||
// to perform these operations.
|
||||
type linuxUsernsSideCar struct {
|
||||
config *configs.Config
|
||||
network *configs.NetworkState
|
||||
}
|
||||
|
||||
func (l *linuxUsernsSideCar) Init() error {
|
||||
if err := setupNetwork(l.config, l.network); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setupRoute(l.config); err != nil {
|
||||
return err
|
||||
}
|
||||
label.Init()
|
||||
// InitializeMountNamespace() can be executed only for a new mount namespace
|
||||
if l.config.Namespaces.Contains(configs.NEWNET) {
|
||||
if err := mount.InitializeMountNamespace(l.config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -22,14 +22,14 @@ struct clone_arg {
|
|||
* Reserve some space for clone() to locate arguments
|
||||
* and retcode in this place
|
||||
*/
|
||||
char stack[4096] __attribute__((aligned (8)));
|
||||
char stack[4096] __attribute__ ((aligned(8)));
|
||||
char stack_ptr[0];
|
||||
jmp_buf *env;
|
||||
};
|
||||
|
||||
static int child_func(void *_arg)
|
||||
{
|
||||
struct clone_arg *arg = (struct clone_arg *) _arg;
|
||||
struct clone_arg *arg = (struct clone_arg *)_arg;
|
||||
longjmp(*arg->env, 1);
|
||||
}
|
||||
|
||||
|
@ -47,8 +47,8 @@ int setns(int fd, int nstype)
|
|||
#endif
|
||||
#endif
|
||||
|
||||
static int clone_parent(jmp_buf *env) __attribute__ ((noinline));
|
||||
static int clone_parent(jmp_buf *env)
|
||||
static int clone_parent(jmp_buf * env) __attribute__ ((noinline));
|
||||
static int clone_parent(jmp_buf * env)
|
||||
{
|
||||
struct clone_arg ca;
|
||||
int child;
|
||||
|
@ -100,7 +100,8 @@ void nsexec()
|
|||
|
||||
fd = openat(tfd, namespaces[i], O_RDONLY);
|
||||
if (fd == -1) {
|
||||
pr_perror("Failed to open ns file %s for ns %s", buf, namespaces[i]);
|
||||
pr_perror("Failed to open ns file %s for ns %s", buf,
|
||||
namespaces[i]);
|
||||
exit(1);
|
||||
}
|
||||
// Set the namespace.
|
||||
|
|
|
@ -10,7 +10,6 @@ func Capture(userSkip int) Stacktrace {
|
|||
skip = userSkip + 1 // add one for our own function
|
||||
frames []Frame
|
||||
)
|
||||
|
||||
for i := skip; ; i++ {
|
||||
pc, file, line, ok := runtime.Caller(i)
|
||||
if !ok {
|
||||
|
@ -18,7 +17,6 @@ func Capture(userSkip int) Stacktrace {
|
|||
}
|
||||
frames = append(frames, NewFrame(pc, file, line))
|
||||
}
|
||||
|
||||
return Stacktrace{
|
||||
Frames: frames,
|
||||
}
|
||||
|
|
|
@ -8,6 +8,26 @@ import (
|
|||
"unsafe"
|
||||
)
|
||||
|
||||
type ParentDeathSignal int
|
||||
|
||||
func (p ParentDeathSignal) Restore() error {
|
||||
if p == 0 {
|
||||
return nil
|
||||
}
|
||||
current, err := GetParentDeathSignal()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if p == current {
|
||||
return nil
|
||||
}
|
||||
return p.Set()
|
||||
}
|
||||
|
||||
func (p ParentDeathSignal) Set() error {
|
||||
return SetParentDeathSignal(uintptr(p))
|
||||
}
|
||||
|
||||
func Execv(cmd string, args []string, env []string) error {
|
||||
name, err := exec.LookPath(cmd)
|
||||
if err != nil {
|
||||
|
@ -17,23 +37,20 @@ func Execv(cmd string, args []string, env []string) error {
|
|||
return syscall.Exec(name, args, env)
|
||||
}
|
||||
|
||||
func ParentDeathSignal(sig uintptr) error {
|
||||
func SetParentDeathSignal(sig uintptr) error {
|
||||
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, sig, 0); err != 0 {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetParentDeathSignal() (int, error) {
|
||||
func GetParentDeathSignal() (ParentDeathSignal, error) {
|
||||
var sig int
|
||||
|
||||
_, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0)
|
||||
|
||||
if err != 0 {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
return sig, nil
|
||||
return ParentDeathSignal(sig), nil
|
||||
}
|
||||
|
||||
func SetKeepCaps() error {
|
||||
|
|
Loading…
Reference in New Issue