Some C/R bug fixes and changes in the new libcontainer and nsinit.

This is work in progress, integrating C/R support from
https://github.com/SaiedKazemi/docker/tree/cr into the
new libcontainer and nsinit.

Signed-off-by: Saied Kazemi <saied@google.com>
This commit is contained in:
Saied Kazemi 2015-03-18 20:22:21 -07:00 committed by Michael Crosby
parent 406f32a774
commit 9212f68293
5 changed files with 104 additions and 12 deletions

View File

@ -118,7 +118,7 @@ type Container interface {
//
// errors:
// Systemerror - System error.
Restore() (*Process, error)
Restore(*Process) error
// Destroys the container after killing all running processes.
//

View File

@ -9,6 +9,7 @@ import (
"os/exec"
"path/filepath"
"strconv"
"strings"
"sync"
"syscall"
@ -261,11 +262,23 @@ func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) {
return notifyOnOOM(c.cgroupManager.GetPaths())
}
// XXX debug support, remove when debugging done.
func addArgsFromEnv(evar string, args *[]string) {
if e := os.Getenv(evar); e != "" {
for _, f := range strings.Fields(e) {
*args = append(*args, f)
}
}
fmt.Printf(">>> criu %v\n", *args)
}
func (c *linuxContainer) Checkpoint() error {
c.m.Lock()
defer c.m.Unlock()
dir := filepath.Join(c.root, "checkpoint")
if err := os.Mkdir(dir, 0655); err != nil {
// Since a container can be C/R'ed multiple times,
// the checkpoint directory may already exist.
if err := os.Mkdir(dir, 0655); err != nil && !os.IsExist(err) {
return err
}
args := []string{
@ -282,16 +295,27 @@ func (c *linuxContainer) Checkpoint() error {
"--ext-mount-map", fmt.Sprintf("%s:%s", m.Destination, m.Destination))
}
}
addArgsFromEnv("CRIU_C", &args) // XXX debug
if err := exec.Command(c.criuPath, args...).Run(); err != nil {
return err
}
return nil
}
func (c *linuxContainer) Restore() (*Process, error) {
func (c *linuxContainer) Restore(process *Process) error {
c.m.Lock()
defer c.m.Unlock()
pidfile := filepath.Join(c.root, "restoredpid")
// Make sure pidfile doesn't already exist from a
// previous restore. Otherwise, CRIU will fail.
if err := os.Remove(pidfile); err != nil && !os.IsNotExist(err) {
return err
}
// XXX We should do the restore in detached mode (-d).
// To do this, we need an "init" process that executes
// CRIU and waits for it, reaping its children, and
// waiting for the container.
args := []string{
"restore", "-v4",
"-D", filepath.Join(c.root, "checkpoint"),
@ -306,27 +330,43 @@ func (c *linuxContainer) Restore() (*Process, error) {
fmt.Sprintf("%s:%s", m.Destination, m.Source))
}
}
// Pipes that were previously set up for std{in,out,err}
// were removed after checkpoint. Use the new ones.
for i := 0; i < 3; i++ {
if s := c.config.StdFds[i]; strings.Contains(s, "pipe:") {
args = append(args, "--inherit-fd", fmt.Sprintf("fd[%d]:%s", i, s))
}
}
addArgsFromEnv("CRIU_R", &args) // XXX debug
// XXX This doesn't really belong here as our caller should have
// already set up root (including devices) and mounted it.
/*
// remount root for restore
if err := syscall.Mount(c.config.Rootfs, c.config.Rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
return nil, err
return err
}
*/
defer syscall.Unmount(c.config.Rootfs, syscall.MNT_DETACH)
cmd := exec.Command(c.criuPath, args...)
cmd.Stdin = process.Stdin
cmd.Stdout = process.Stdout
cmd.Stderr = process.Stderr
if err := cmd.Start(); err != nil {
return nil, err
return err
}
r, err := newRestoredProcess(pidfile, cmd)
if err != nil {
return nil, err
return err
}
// TODO: crosbymichael restore previous process information by saving the init process information in
// the conatiner's state file or separate process state files.
if err := c.updateState(r); err != nil {
return nil, err
return err
}
return &Process{
ops: r,
}, nil
process.ops = r
return nil
}
func (c *linuxContainer) updateState(process parentProcess) error {

View File

@ -6,6 +6,7 @@ import (
"syscall"
"github.com/codegangsta/cli"
"github.com/docker/libcontainer"
"github.com/docker/libcontainer/utils"
)
@ -20,11 +21,28 @@ var restoreCommand = cli.Command{
if err != nil {
fatal(err)
}
process, err := container.Restore()
process := &libcontainer.Process{
Stdin: os.Stdin,
Stdout: os.Stdout,
Stderr: os.Stderr,
}
//rootuid, err := config.HostUID()
//if err != nil {
//fatal(err)
//}
rootuid := 0 // XXX
tty, err := newTty(context, process, rootuid)
if err != nil {
fatal(err)
}
go handleSignals(process, &tty{})
if err := tty.attach(process); err != nil {
fatal(err)
}
err = container.Restore(process)
if err != nil {
fatal(err)
}
go handleSignals(process, tty)
status, err := process.Wait()
if err != nil {
exitError, ok := err.(*exec.ExitError)

View File

@ -8,6 +8,8 @@ import (
"io"
"os"
"os/exec"
"path/filepath"
"strconv"
"syscall"
"github.com/docker/libcontainer/cgroups"
@ -161,6 +163,12 @@ func (p *initProcess) start() error {
if err != nil {
return newSystemError(err)
}
// Save the standard descriptor names before the container process
// can potentially move them (e.g., via dup2()). If we don't do this now,
// we won't know at checkpoint time which file descriptor to look up.
if err = p.saveStdPipes(); err != nil {
return newSystemError(err)
}
// Do this before syncing with child so that no children
// can escape the cgroup
if err := p.manager.Apply(p.pid()); err != nil {
@ -250,3 +258,19 @@ func (p *initProcess) signal(sig os.Signal) error {
}
return syscall.Kill(p.cmd.Process.Pid, s)
}
// Save process's std{in,out,err} file names as these will be
// removed if/when the container is checkpointed. We will need
// this info to restore the container.
func (p *initProcess) saveStdPipes() error {
dirPath := filepath.Join("/proc", strconv.Itoa(p.pid()), "/fd")
for i := 0; i < 3; i++ {
f := filepath.Join(dirPath, strconv.Itoa(i))
target, err := os.Readlink(f)
if err != nil {
return err
}
p.config.Config.StdFds[i] = target
}
return nil
}

View File

@ -18,6 +18,9 @@ func newRestoredProcess(pidfile string, criuCommand *exec.Cmd) (*restoredProcess
data []byte
err error
)
// XXX The loop below should be replaced by a wait
// on CRIU to complete. See the comment at the
// begining of Restore() in "container_linux.go.
for i := 0; i < 20; i++ {
data, err = ioutil.ReadFile(pidfile)
if err == nil {
@ -28,6 +31,13 @@ func newRestoredProcess(pidfile string, criuCommand *exec.Cmd) (*restoredProcess
}
time.Sleep(100 * time.Millisecond)
}
// Did CRIU fail?
if os.IsNotExist(err) {
return nil, err
}
if len(data) == 0 {
return nil, fmt.Errorf("empty pidfile, restore failed")
}
pid, err := strconv.Atoi(string(data))
if err != nil {
return nil, err