Some C/R bug fixes and changes in the new libcontainer and nsinit.
This is work in progress, integrating C/R support from https://github.com/SaiedKazemi/docker/tree/cr into the new libcontainer and nsinit. Signed-off-by: Saied Kazemi <saied@google.com>
This commit is contained in:
parent
406f32a774
commit
9212f68293
|
@ -118,7 +118,7 @@ type Container interface {
|
|||
//
|
||||
// errors:
|
||||
// Systemerror - System error.
|
||||
Restore() (*Process, error)
|
||||
Restore(*Process) error
|
||||
|
||||
// Destroys the container after killing all running processes.
|
||||
//
|
||||
|
|
|
@ -9,6 +9,7 @@ import (
|
|||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
|
||||
|
@ -261,11 +262,23 @@ func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) {
|
|||
return notifyOnOOM(c.cgroupManager.GetPaths())
|
||||
}
|
||||
|
||||
// XXX debug support, remove when debugging done.
|
||||
func addArgsFromEnv(evar string, args *[]string) {
|
||||
if e := os.Getenv(evar); e != "" {
|
||||
for _, f := range strings.Fields(e) {
|
||||
*args = append(*args, f)
|
||||
}
|
||||
}
|
||||
fmt.Printf(">>> criu %v\n", *args)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Checkpoint() error {
|
||||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
dir := filepath.Join(c.root, "checkpoint")
|
||||
if err := os.Mkdir(dir, 0655); err != nil {
|
||||
// Since a container can be C/R'ed multiple times,
|
||||
// the checkpoint directory may already exist.
|
||||
if err := os.Mkdir(dir, 0655); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
args := []string{
|
||||
|
@ -282,16 +295,27 @@ func (c *linuxContainer) Checkpoint() error {
|
|||
"--ext-mount-map", fmt.Sprintf("%s:%s", m.Destination, m.Destination))
|
||||
}
|
||||
}
|
||||
addArgsFromEnv("CRIU_C", &args) // XXX debug
|
||||
if err := exec.Command(c.criuPath, args...).Run(); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Restore() (*Process, error) {
|
||||
func (c *linuxContainer) Restore(process *Process) error {
|
||||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
|
||||
pidfile := filepath.Join(c.root, "restoredpid")
|
||||
// Make sure pidfile doesn't already exist from a
|
||||
// previous restore. Otherwise, CRIU will fail.
|
||||
if err := os.Remove(pidfile); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
// XXX We should do the restore in detached mode (-d).
|
||||
// To do this, we need an "init" process that executes
|
||||
// CRIU and waits for it, reaping its children, and
|
||||
// waiting for the container.
|
||||
args := []string{
|
||||
"restore", "-v4",
|
||||
"-D", filepath.Join(c.root, "checkpoint"),
|
||||
|
@ -306,27 +330,43 @@ func (c *linuxContainer) Restore() (*Process, error) {
|
|||
fmt.Sprintf("%s:%s", m.Destination, m.Source))
|
||||
}
|
||||
}
|
||||
// Pipes that were previously set up for std{in,out,err}
|
||||
// were removed after checkpoint. Use the new ones.
|
||||
for i := 0; i < 3; i++ {
|
||||
if s := c.config.StdFds[i]; strings.Contains(s, "pipe:") {
|
||||
args = append(args, "--inherit-fd", fmt.Sprintf("fd[%d]:%s", i, s))
|
||||
}
|
||||
}
|
||||
addArgsFromEnv("CRIU_R", &args) // XXX debug
|
||||
|
||||
// XXX This doesn't really belong here as our caller should have
|
||||
// already set up root (including devices) and mounted it.
|
||||
/*
|
||||
// remount root for restore
|
||||
if err := syscall.Mount(c.config.Rootfs, c.config.Rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
|
||||
return nil, err
|
||||
return err
|
||||
}
|
||||
*/
|
||||
|
||||
defer syscall.Unmount(c.config.Rootfs, syscall.MNT_DETACH)
|
||||
cmd := exec.Command(c.criuPath, args...)
|
||||
cmd.Stdin = process.Stdin
|
||||
cmd.Stdout = process.Stdout
|
||||
cmd.Stderr = process.Stderr
|
||||
if err := cmd.Start(); err != nil {
|
||||
return nil, err
|
||||
return err
|
||||
}
|
||||
r, err := newRestoredProcess(pidfile, cmd)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return err
|
||||
}
|
||||
// TODO: crosbymichael restore previous process information by saving the init process information in
|
||||
// the conatiner's state file or separate process state files.
|
||||
if err := c.updateState(r); err != nil {
|
||||
return nil, err
|
||||
return err
|
||||
}
|
||||
return &Process{
|
||||
ops: r,
|
||||
}, nil
|
||||
process.ops = r
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) updateState(process parentProcess) error {
|
||||
|
|
|
@ -6,6 +6,7 @@ import (
|
|||
"syscall"
|
||||
|
||||
"github.com/codegangsta/cli"
|
||||
"github.com/docker/libcontainer"
|
||||
"github.com/docker/libcontainer/utils"
|
||||
)
|
||||
|
||||
|
@ -20,11 +21,28 @@ var restoreCommand = cli.Command{
|
|||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
process, err := container.Restore()
|
||||
process := &libcontainer.Process{
|
||||
Stdin: os.Stdin,
|
||||
Stdout: os.Stdout,
|
||||
Stderr: os.Stderr,
|
||||
}
|
||||
//rootuid, err := config.HostUID()
|
||||
//if err != nil {
|
||||
//fatal(err)
|
||||
//}
|
||||
rootuid := 0 // XXX
|
||||
tty, err := newTty(context, process, rootuid)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
go handleSignals(process, &tty{})
|
||||
if err := tty.attach(process); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
err = container.Restore(process)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
go handleSignals(process, tty)
|
||||
status, err := process.Wait()
|
||||
if err != nil {
|
||||
exitError, ok := err.(*exec.ExitError)
|
||||
|
|
|
@ -8,6 +8,8 @@ import (
|
|||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"syscall"
|
||||
|
||||
"github.com/docker/libcontainer/cgroups"
|
||||
|
@ -161,6 +163,12 @@ func (p *initProcess) start() error {
|
|||
if err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
// Save the standard descriptor names before the container process
|
||||
// can potentially move them (e.g., via dup2()). If we don't do this now,
|
||||
// we won't know at checkpoint time which file descriptor to look up.
|
||||
if err = p.saveStdPipes(); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
// Do this before syncing with child so that no children
|
||||
// can escape the cgroup
|
||||
if err := p.manager.Apply(p.pid()); err != nil {
|
||||
|
@ -250,3 +258,19 @@ func (p *initProcess) signal(sig os.Signal) error {
|
|||
}
|
||||
return syscall.Kill(p.cmd.Process.Pid, s)
|
||||
}
|
||||
|
||||
// Save process's std{in,out,err} file names as these will be
|
||||
// removed if/when the container is checkpointed. We will need
|
||||
// this info to restore the container.
|
||||
func (p *initProcess) saveStdPipes() error {
|
||||
dirPath := filepath.Join("/proc", strconv.Itoa(p.pid()), "/fd")
|
||||
for i := 0; i < 3; i++ {
|
||||
f := filepath.Join(dirPath, strconv.Itoa(i))
|
||||
target, err := os.Readlink(f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
p.config.Config.StdFds[i] = target
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -18,6 +18,9 @@ func newRestoredProcess(pidfile string, criuCommand *exec.Cmd) (*restoredProcess
|
|||
data []byte
|
||||
err error
|
||||
)
|
||||
// XXX The loop below should be replaced by a wait
|
||||
// on CRIU to complete. See the comment at the
|
||||
// begining of Restore() in "container_linux.go.
|
||||
for i := 0; i < 20; i++ {
|
||||
data, err = ioutil.ReadFile(pidfile)
|
||||
if err == nil {
|
||||
|
@ -28,6 +31,13 @@ func newRestoredProcess(pidfile string, criuCommand *exec.Cmd) (*restoredProcess
|
|||
}
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
// Did CRIU fail?
|
||||
if os.IsNotExist(err) {
|
||||
return nil, err
|
||||
}
|
||||
if len(data) == 0 {
|
||||
return nil, fmt.Errorf("empty pidfile, restore failed")
|
||||
}
|
||||
pid, err := strconv.Atoi(string(data))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
Loading…
Reference in New Issue