273 lines
6.4 KiB
Go
273 lines
6.4 KiB
Go
// +build linux
|
|
|
|
package libcontainer
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"os/exec"
|
|
"syscall"
|
|
|
|
"github.com/docker/libcontainer/cgroups"
|
|
"github.com/docker/libcontainer/configs"
|
|
"github.com/docker/libcontainer/network"
|
|
"github.com/docker/libcontainer/system"
|
|
"github.com/golang/glog"
|
|
)
|
|
|
|
type parentProcess interface {
|
|
// pid returns the pid for the running process.
|
|
pid() int
|
|
|
|
// start starts the process execution.
|
|
start() error
|
|
|
|
// send a SIGKILL to the process and wait for the exit.
|
|
terminate() error
|
|
|
|
// wait waits on the process returning the process state.
|
|
wait() (*os.ProcessState, error)
|
|
|
|
// startTime return's the process start time.
|
|
startTime() (string, error)
|
|
|
|
signal(os.Signal) error
|
|
}
|
|
|
|
type setnsProcess struct {
|
|
cmd *exec.Cmd
|
|
parentPipe *os.File
|
|
childPipe *os.File
|
|
forkedProcess *os.Process
|
|
cgroupPaths map[string]string
|
|
config *initConfig
|
|
}
|
|
|
|
func (p *setnsProcess) startTime() (string, error) {
|
|
return system.GetProcessStartTime(p.pid())
|
|
}
|
|
|
|
func (p *setnsProcess) signal(s os.Signal) error {
|
|
return p.forkedProcess.Signal(s)
|
|
}
|
|
|
|
func (p *setnsProcess) start() (err error) {
|
|
defer p.parentPipe.Close()
|
|
if p.forkedProcess, err = p.execSetns(); err != nil {
|
|
return err
|
|
}
|
|
if len(p.cgroupPaths) > 0 {
|
|
if err := cgroups.EnterPid(p.cgroupPaths, p.forkedProcess.Pid); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if err := json.NewEncoder(p.parentPipe).Encode(p.config); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// execSetns runs the process that executes C code to perform the setns calls
|
|
// because setns support requires the C process to fork off a child and perform the setns
|
|
// before the go runtime boots, we wait on the process to die and receive the child's pid
|
|
// over the provided pipe.
|
|
func (p *setnsProcess) execSetns() (*os.Process, error) {
|
|
err := p.cmd.Start()
|
|
p.childPipe.Close()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
status, err := p.cmd.Process.Wait()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if !status.Success() {
|
|
return nil, &exec.ExitError{status}
|
|
}
|
|
var pid *pid
|
|
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
|
|
return nil, err
|
|
}
|
|
return os.FindProcess(pid.Pid)
|
|
}
|
|
|
|
// terminate sends a SIGKILL to the forked process for the setns routine then waits to
|
|
// avoid the process becomming a zombie.
|
|
func (p *setnsProcess) terminate() error {
|
|
if p.forkedProcess == nil {
|
|
return nil
|
|
}
|
|
err := p.forkedProcess.Kill()
|
|
if _, werr := p.wait(); err == nil {
|
|
err = werr
|
|
}
|
|
return err
|
|
}
|
|
|
|
func (p *setnsProcess) wait() (*os.ProcessState, error) {
|
|
return p.forkedProcess.Wait()
|
|
}
|
|
|
|
func (p *setnsProcess) pid() int {
|
|
return p.forkedProcess.Pid
|
|
}
|
|
|
|
type initProcess struct {
|
|
cmd *exec.Cmd
|
|
parentPipe *os.File
|
|
childPipe *os.File
|
|
config *initConfig
|
|
manager cgroups.Manager
|
|
}
|
|
|
|
func (p *initProcess) pid() int {
|
|
return p.cmd.Process.Pid
|
|
}
|
|
|
|
func (p *initProcess) start() error {
|
|
defer p.parentPipe.Close()
|
|
err := p.cmd.Start()
|
|
p.childPipe.Close()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// Do this before syncing with child so that no children
|
|
// can escape the cgroup
|
|
if err := p.manager.Apply(p.pid()); err != nil {
|
|
return err
|
|
}
|
|
defer func() {
|
|
if err != nil {
|
|
// TODO: should not be the responsibility to call here
|
|
p.manager.Destroy()
|
|
}
|
|
}()
|
|
if err := p.createNetworkInterfaces(); err != nil {
|
|
return err
|
|
}
|
|
// Start the setup process to setup the init process
|
|
if p.cmd.SysProcAttr.Cloneflags&syscall.CLONE_NEWUSER != 0 {
|
|
parent, err := p.newUsernsSetupProcess()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := parent.start(); err != nil {
|
|
if err := parent.terminate(); err != nil {
|
|
glog.Warning(err)
|
|
}
|
|
return err
|
|
}
|
|
if _, err := parent.wait(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if err := p.sendConfig(); err != nil {
|
|
return err
|
|
}
|
|
// wait for the child process to fully complete and receive an error message
|
|
// if one was encoutered
|
|
var ierr *initError
|
|
if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF {
|
|
return err
|
|
}
|
|
if ierr != nil {
|
|
return ierr
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (p *initProcess) wait() (*os.ProcessState, error) {
|
|
state, err := p.cmd.Process.Wait()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
// we should kill all processes in cgroup when init is died if we use host PID namespace
|
|
if p.cmd.SysProcAttr.Cloneflags&syscall.CLONE_NEWPID == 0 {
|
|
// TODO: this will not work for the success path because libcontainer
|
|
// does not wait on the process. This needs to be moved to destroy or add a Wait()
|
|
// method back onto the container.
|
|
var procs []*os.Process
|
|
p.manager.Freeze(configs.Frozen)
|
|
pids, err := p.manager.GetPids()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
for _, pid := range pids {
|
|
// TODO: log err without aborting if we are unable to find
|
|
// a single PID
|
|
if p, err := os.FindProcess(pid); err == nil {
|
|
procs = append(procs, p)
|
|
p.Kill()
|
|
}
|
|
}
|
|
p.manager.Freeze(configs.Thawed)
|
|
for _, p := range procs {
|
|
p.Wait()
|
|
}
|
|
}
|
|
return state, nil
|
|
}
|
|
|
|
func (p *initProcess) terminate() error {
|
|
if p.cmd.Process == nil {
|
|
return nil
|
|
}
|
|
err := p.cmd.Process.Kill()
|
|
if _, werr := p.wait(); err == nil {
|
|
err = werr
|
|
}
|
|
return err
|
|
}
|
|
|
|
func (p *initProcess) startTime() (string, error) {
|
|
return system.GetProcessStartTime(p.pid())
|
|
}
|
|
|
|
func (p *initProcess) sendConfig() error {
|
|
// send the state to the container's init process then shutdown writes for the parent
|
|
if err := json.NewEncoder(p.parentPipe).Encode(p.config); err != nil {
|
|
return err
|
|
}
|
|
// shutdown writes for the parent side of the pipe
|
|
return syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR)
|
|
}
|
|
|
|
func (p *initProcess) createNetworkInterfaces() error {
|
|
for _, config := range p.config.Config.Networks {
|
|
strategy, err := network.GetStrategy(config.Type)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := strategy.Create(config, p.pid()); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (p *initProcess) newUsernsSetupProcess() (parentProcess, error) {
|
|
parentPipe, childPipe, err := newPipe()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
cmd := exec.Command(p.cmd.Args[0], p.cmd.Args[1:]...)
|
|
cmd.ExtraFiles = []*os.File{childPipe}
|
|
cmd.Dir = p.cmd.Dir
|
|
cmd.Env = append(cmd.Env,
|
|
fmt.Sprintf("_LIBCONTAINER_INITPID=%d", p.pid()),
|
|
fmt.Sprintf("_LIBCONTAINER_INITTYPE=userns_setup"),
|
|
)
|
|
return &setnsProcess{
|
|
cmd: cmd,
|
|
childPipe: childPipe,
|
|
parentPipe: parentPipe,
|
|
config: p.config,
|
|
}, nil
|
|
}
|
|
|
|
func (p *initProcess) signal(s os.Signal) error {
|
|
return p.cmd.Process.Signal(s)
|
|
}
|