// +build linux package libcontainer import ( "encoding/json" "errors" "io" "os" "os/exec" "path/filepath" "strconv" "syscall" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/system" ) type parentProcess interface { // pid returns the pid for the running process. pid() int // start starts the process execution. start() error // send a SIGKILL to the process and wait for the exit. terminate() error // wait waits on the process returning the process state. wait() (*os.ProcessState, error) // startTime return's the process start time. startTime() (string, error) signal(os.Signal) error externalDescriptors() []string setExternalDescriptors(fds []string) } type setnsProcess struct { cmd *exec.Cmd parentPipe *os.File childPipe *os.File cgroupPaths map[string]string config *initConfig fds []string process *Process } func (p *setnsProcess) startTime() (string, error) { return system.GetProcessStartTime(p.pid()) } func (p *setnsProcess) signal(sig os.Signal) error { s, ok := sig.(syscall.Signal) if !ok { return errors.New("os: unsupported signal type") } return syscall.Kill(p.pid(), s) } func (p *setnsProcess) start() (err error) { defer p.parentPipe.Close() if err = p.execSetns(); err != nil { return newSystemError(err) } if len(p.cgroupPaths) > 0 { if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil { return newSystemError(err) } } if err := json.NewEncoder(p.parentPipe).Encode(p.config); err != nil { return newSystemError(err) } if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil { return newSystemError(err) } // wait for the child process to fully complete and receive an error message // if one was encoutered var ierr *genericError if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF { return newSystemError(err) } if ierr != nil { p.wait() return newSystemError(ierr) } return nil } // execSetns runs the process that executes C code to perform the setns calls // because setns support requires the C process to fork off a child and perform the setns // before the go runtime boots, we wait on the process to die and receive the child's pid // over the provided pipe. func (p *setnsProcess) execSetns() error { err := p.cmd.Start() p.childPipe.Close() if err != nil { return newSystemError(err) } status, err := p.cmd.Process.Wait() if err != nil { p.cmd.Wait() return newSystemError(err) } if !status.Success() { p.cmd.Wait() return newSystemError(&exec.ExitError{ProcessState: status}) } var pid *pid if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil { p.cmd.Wait() return newSystemError(err) } process, err := os.FindProcess(pid.Pid) if err != nil { return err } p.cmd.Process = process p.process.ops = p return nil } // terminate sends a SIGKILL to the forked process for the setns routine then waits to // avoid the process becomming a zombie. func (p *setnsProcess) terminate() error { if p.cmd.Process == nil { return nil } err := p.cmd.Process.Kill() if _, werr := p.wait(); err == nil { err = werr } return err } func (p *setnsProcess) wait() (*os.ProcessState, error) { err := p.cmd.Wait() // Return actual ProcessState even on Wait error return p.cmd.ProcessState, err } func (p *setnsProcess) pid() int { return p.cmd.Process.Pid } func (p *setnsProcess) externalDescriptors() []string { return p.fds } func (p *setnsProcess) setExternalDescriptors(newFds []string) { p.fds = newFds } type initProcess struct { cmd *exec.Cmd parentPipe *os.File childPipe *os.File config *initConfig manager cgroups.Manager container *linuxContainer fds []string process *Process } func (p *initProcess) pid() int { return p.cmd.Process.Pid } func (p *initProcess) externalDescriptors() []string { return p.fds } func (p *initProcess) start() (err error) { defer p.parentPipe.Close() err = p.cmd.Start() p.process.ops = p p.childPipe.Close() if err != nil { p.process.ops = nil return newSystemError(err) } // Save the standard descriptor names before the container process // can potentially move them (e.g., via dup2()). If we don't do this now, // we won't know at checkpoint time which file descriptor to look up. fds, err := getPipeFds(p.pid()) if err != nil { return newSystemError(err) } p.setExternalDescriptors(fds) // Do this before syncing with child so that no children // can escape the cgroup if err := p.manager.Apply(p.pid()); err != nil { return newSystemError(err) } defer func() { if err != nil { // TODO: should not be the responsibility to call here p.manager.Destroy() } }() if p.config.Config.Hooks != nil { s := configs.HookState{ Version: p.container.config.Version, ID: p.container.id, Pid: p.pid(), Root: p.config.Config.Rootfs, } for _, hook := range p.config.Config.Hooks.Prestart { if err := hook.Run(s); err != nil { return newSystemError(err) } } } if err := p.createNetworkInterfaces(); err != nil { return newSystemError(err) } if err := p.sendConfig(); err != nil { return newSystemError(err) } // wait for the child process to fully complete and receive an error message // if one was encoutered var ierr *genericError if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF { return newSystemError(err) } if ierr != nil { return newSystemError(ierr) } return nil } func (p *initProcess) wait() (*os.ProcessState, error) { err := p.cmd.Wait() if err != nil { return p.cmd.ProcessState, err } // we should kill all processes in cgroup when init is died if we use host PID namespace if p.cmd.SysProcAttr.Cloneflags&syscall.CLONE_NEWPID == 0 { killCgroupProcesses(p.manager) } return p.cmd.ProcessState, nil } func (p *initProcess) terminate() error { if p.cmd.Process == nil { return nil } err := p.cmd.Process.Kill() if _, werr := p.wait(); err == nil { err = werr } return err } func (p *initProcess) startTime() (string, error) { return system.GetProcessStartTime(p.pid()) } func (p *initProcess) sendConfig() error { // send the state to the container's init process then shutdown writes for the parent if err := json.NewEncoder(p.parentPipe).Encode(p.config); err != nil { return err } // shutdown writes for the parent side of the pipe return syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR) } func (p *initProcess) createNetworkInterfaces() error { for _, config := range p.config.Config.Networks { strategy, err := getStrategy(config.Type) if err != nil { return err } n := &network{ Network: *config, } if err := strategy.create(n, p.pid()); err != nil { return err } p.config.Networks = append(p.config.Networks, n) } return nil } func (p *initProcess) signal(sig os.Signal) error { s, ok := sig.(syscall.Signal) if !ok { return errors.New("os: unsupported signal type") } return syscall.Kill(p.pid(), s) } func (p *initProcess) setExternalDescriptors(newFds []string) { p.fds = newFds } func getPipeFds(pid int) ([]string, error) { fds := make([]string, 3) dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd") for i := 0; i < 3; i++ { f := filepath.Join(dirPath, strconv.Itoa(i)) target, err := os.Readlink(f) if err != nil { return fds, err } fds[i] = target } return fds, nil }