From f15aba685bc41539c37cb6f8b7cda22ee8068f09 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Thu, 12 Mar 2015 21:45:43 -0700 Subject: [PATCH] Update criu support with restored processes Also use pipes for non tty so that the parent's tty of the nsinit process does not leak into the conatiner. Signed-off-by: Michael Crosby --- container.go | 2 +- container_linux.go | 49 ++++++++++++++---- factory_linux.go | 34 +------------ nsinit/restore.go | 27 +++++++++- nsinit/tty.go | 33 +++++++++++- nsinit/utils.go | 8 ++- restored_process.go | 119 ++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 222 insertions(+), 50 deletions(-) create mode 100644 restored_process.go diff --git a/container.go b/container.go index 511e18f8..ee45bd9b 100644 --- a/container.go +++ b/container.go @@ -118,7 +118,7 @@ type Container interface { // // errors: // Systemerror - System error. - Restore() error + Restore() (*Process, error) // Destroys the container after killing all running processes. // diff --git a/container_linux.go b/container_linux.go index d55e16ad..b1243712 100644 --- a/container_linux.go +++ b/container_linux.go @@ -219,6 +219,12 @@ func newPipe() (parent *os.File, child *os.File, err error) { func (c *linuxContainer) Destroy() error { c.m.Lock() defer c.m.Unlock() + // Since the state.json and CRIU image files are in the c.root + // directory, we should not remove it after checkpoint. Also, + // when CRIU exits after restore, we should not kill the processes. + if _, err := os.Stat(filepath.Join(c.root, "checkpoint")); err == nil { + return nil + } status, err := c.currentStatus() if err != nil { return err @@ -256,6 +262,8 @@ func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) { } func (c *linuxContainer) Checkpoint() error { + c.m.Lock() + defer c.m.Unlock() dir := filepath.Join(c.root, "checkpoint") if err := os.Mkdir(dir, 0655); err != nil { return err @@ -274,15 +282,22 @@ func (c *linuxContainer) Checkpoint() error { "--ext-mount-map", fmt.Sprintf("%s:%s", m.Destination, m.Destination)) } } - return c.execCriu(args) + if err := exec.Command(c.criuPath, args...).Run(); err != nil { + return err + } + return nil } -func (c *linuxContainer) Restore() error { +func (c *linuxContainer) Restore() (*Process, error) { + c.m.Lock() + defer c.m.Unlock() + pidfile := filepath.Join(c.root, "restoredpid") args := []string{ - "restore", "-d", "-v4", + "restore", "-v4", "-D", filepath.Join(c.root, "checkpoint"), "-o", "restore.log", "--root", c.config.Rootfs, + "--pidfile", pidfile, "--manage-cgroups", "--evasive-devices", } for _, m := range c.config.Mounts { @@ -291,15 +306,27 @@ func (c *linuxContainer) Restore() error { fmt.Sprintf("%s:%s", m.Destination, m.Source)) } } - return c.execCriu(args) -} - -func (c *linuxContainer) execCriu(args []string) error { - output, err := exec.Command(c.criuPath, args...).CombinedOutput() - if err != nil { - return fmt.Errorf("%s: %s", err, output) + // remount root for restore + if err := syscall.Mount(c.config.Rootfs, c.config.Rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil { + return nil, err } - return nil + defer syscall.Unmount(c.config.Rootfs, syscall.MNT_DETACH) + cmd := exec.Command(c.criuPath, args...) + if err := cmd.Start(); err != nil { + return nil, err + } + r, err := newRestoredProcess(pidfile, cmd) + if err != nil { + return nil, err + } + // TODO: crosbymichael restore previous process information by saving the init process information in + // the conatiner's state file or separate process state files. + if err := c.updateState(r); err != nil { + return nil, err + } + return &Process{ + ops: r, + }, nil } func (c *linuxContainer) updateState(process parentProcess) error { diff --git a/factory_linux.go b/factory_linux.go index 085a4919..2d3ef32f 100644 --- a/factory_linux.go +++ b/factory_linux.go @@ -179,7 +179,7 @@ func (l *LinuxFactory) Load(id string) (Container, error) { if err != nil { return nil, err } - r := &restoredProcess{ + r := &nonChildProcess{ processPid: state.InitProcessPid, processStartTime: state.InitProcessStartTime, } @@ -259,35 +259,3 @@ func (l *LinuxFactory) validateID(id string) error { } return nil } - -// restoredProcess represents a process where the calling process may or may not be -// the parent process. This process is created when a factory loads a container from -// a persisted state. -type restoredProcess struct { - processPid int - processStartTime string -} - -func (p *restoredProcess) start() error { - return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError) -} - -func (p *restoredProcess) pid() int { - return p.processPid -} - -func (p *restoredProcess) terminate() error { - return newGenericError(fmt.Errorf("restored process cannot be terminated"), SystemError) -} - -func (p *restoredProcess) wait() (*os.ProcessState, error) { - return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError) -} - -func (p *restoredProcess) startTime() (string, error) { - return p.processStartTime, nil -} - -func (p *restoredProcess) signal(s os.Signal) error { - return newGenericError(fmt.Errorf("restored process cannot be signaled"), SystemError) -} diff --git a/nsinit/restore.go b/nsinit/restore.go index f5a329ea..87bea7ce 100644 --- a/nsinit/restore.go +++ b/nsinit/restore.go @@ -1,6 +1,13 @@ package main -import "github.com/codegangsta/cli" +import ( + "os" + "os/exec" + "syscall" + + "github.com/codegangsta/cli" + "github.com/docker/libcontainer/utils" +) var restoreCommand = cli.Command{ Name: "restore", @@ -13,8 +20,24 @@ var restoreCommand = cli.Command{ if err != nil { fatal(err) } - if err := container.Restore(); err != nil { + process, err := container.Restore() + if err != nil { fatal(err) } + go handleSignals(process, &tty{}) + status, err := process.Wait() + if err != nil { + exitError, ok := err.(*exec.ExitError) + if ok { + status = exitError.ProcessState + } else { + container.Destroy() + fatal(err) + } + } + if err := container.Destroy(); err != nil { + fatal(err) + } + os.Exit(utils.ExitStatus(status.Sys().(syscall.WaitStatus))) }, } diff --git a/nsinit/tty.go b/nsinit/tty.go index 66893974..1cec2192 100644 --- a/nsinit/tty.go +++ b/nsinit/tty.go @@ -17,6 +17,9 @@ func newTty(context *cli.Context, p *libcontainer.Process, rootuid int) (*tty, e } return &tty{ console: console, + closers: []io.Closer{ + console, + }, }, nil } return &tty{}, nil @@ -25,11 +28,12 @@ func newTty(context *cli.Context, p *libcontainer.Process, rootuid int) (*tty, e type tty struct { console libcontainer.Console state *term.State + closers []io.Closer } func (t *tty) Close() error { - if t.console != nil { - t.console.Close() + for _, c := range t.closers { + c.Close() } if t.state != nil { term.RestoreTerminal(os.Stdin.Fd(), t.state) @@ -49,10 +53,35 @@ func (t *tty) attach(process *libcontainer.Process) error { process.Stderr = nil process.Stdout = nil process.Stdin = nil + } else { + // setup standard pipes so that the TTY of the calling nsinit process + // is not inherited by the container. + r, w, err := os.Pipe() + if err != nil { + return err + } + go io.Copy(w, os.Stdin) + t.closers = append(t.closers, w) + process.Stdin = r + if r, w, err = os.Pipe(); err != nil { + return err + } + go io.Copy(os.Stdout, r) + process.Stdout = w + t.closers = append(t.closers, r) + if r, w, err = os.Pipe(); err != nil { + return err + } + go io.Copy(os.Stderr, r) + process.Stderr = w + t.closers = append(t.closers, r) } return nil } +func (t *tty) setupPipe() { +} + func (t *tty) resize() error { if t.console == nil { return nil diff --git a/nsinit/utils.go b/nsinit/utils.go index 162b189a..87dcfa4a 100644 --- a/nsinit/utils.go +++ b/nsinit/utils.go @@ -4,6 +4,7 @@ import ( "encoding/json" "fmt" "os" + "path/filepath" "github.com/Sirupsen/logrus" @@ -40,7 +41,12 @@ func loadFactory(context *cli.Context) (libcontainer.Factory, error) { logrus.Warn("systemd cgroup flag passed, but systemd support for managing cgroups is not available.") } } - return libcontainer.New(context.GlobalString("root"), cgm, func(l *libcontainer.LinuxFactory) error { + root := context.GlobalString("root") + abs, err := filepath.Abs(root) + if err != nil { + return nil, err + } + return libcontainer.New(abs, libcontainer.Cgroupfs, func(l *libcontainer.LinuxFactory) error { l.CriuPath = context.GlobalString("criu") return nil }) diff --git a/restored_process.go b/restored_process.go new file mode 100644 index 00000000..942fb119 --- /dev/null +++ b/restored_process.go @@ -0,0 +1,119 @@ +// +build linux + +package libcontainer + +import ( + "fmt" + "io/ioutil" + "os" + "os/exec" + "strconv" + "time" + + "github.com/docker/libcontainer/system" +) + +func newRestoredProcess(pidfile string, criuCommand *exec.Cmd) (*restoredProcess, error) { + var ( + data []byte + err error + ) + for i := 0; i < 20; i++ { + data, err = ioutil.ReadFile(pidfile) + if err == nil { + break + } + if !os.IsNotExist(err) { + return nil, err + } + time.Sleep(100 * time.Millisecond) + } + pid, err := strconv.Atoi(string(data)) + if err != nil { + return nil, err + } + proc, err := os.FindProcess(pid) + if err != nil { + return nil, err + } + started, err := system.GetProcessStartTime(pid) + if err != nil { + return nil, err + } + return &restoredProcess{ + criuCommand: criuCommand, + proc: proc, + processStartTime: started, + }, nil +} + +type restoredProcess struct { + criuCommand *exec.Cmd + proc *os.Process + processStartTime string +} + +func (p *restoredProcess) start() error { + return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError) +} + +func (p *restoredProcess) pid() int { + return p.proc.Pid +} + +func (p *restoredProcess) terminate() error { + err := p.proc.Kill() + if _, werr := p.wait(); err == nil { + err = werr + } + return err +} + +func (p *restoredProcess) wait() (*os.ProcessState, error) { + // TODO: how do we wait on the actual process? + // maybe use --exec-cmd in criu + if err := p.criuCommand.Wait(); err != nil { + return nil, err + } + return p.criuCommand.ProcessState, nil +} + +func (p *restoredProcess) startTime() (string, error) { + return p.processStartTime, nil +} + +func (p *restoredProcess) signal(s os.Signal) error { + return p.proc.Signal(s) +} + +// nonChildProcess represents a process where the calling process is not +// the parent process. This process is created when a factory loads a container from +// a persisted state. +type nonChildProcess struct { + processPid int + processStartTime string +} + +func (p *nonChildProcess) start() error { + return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError) +} + +func (p *nonChildProcess) pid() int { + return p.processPid +} + +func (p *nonChildProcess) terminate() error { + return newGenericError(fmt.Errorf("restored process cannot be terminated"), SystemError) +} + +func (p *nonChildProcess) wait() (*os.ProcessState, error) { + return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError) +} + +func (p *nonChildProcess) startTime() (string, error) { + return p.processStartTime, nil +} + +func (p *nonChildProcess) signal(s os.Signal) error { + return newGenericError(fmt.Errorf("restored process cannot be signaled"), SystemError) +}