diff --git a/errors.go b/errors.go deleted file mode 100644 index c6964ee8..00000000 --- a/errors.go +++ /dev/null @@ -1,9 +0,0 @@ -package libcontainer - -import ( - "errors" -) - -var ( - ErrInvalidPid = errors.New("no ns pid found") -) diff --git a/network/network.go b/network/network.go index 31c5d324..8c7a4b61 100644 --- a/network/network.go +++ b/network/network.go @@ -1,15 +1,10 @@ package network import ( - "errors" "github.com/dotcloud/docker/pkg/netlink" "net" ) -var ( - ErrNoDefaultRoute = errors.New("no default network route found") -) - func InterfaceUp(name string) error { iface, err := net.InterfaceByName(name) if err != nil { @@ -46,14 +41,6 @@ func SetInterfaceInNamespacePid(name string, nsPid int) error { return netlink.NetworkSetNsPid(iface, nsPid) } -func SetInterfaceInNamespaceFd(name string, fd int) error { - iface, err := net.InterfaceByName(name) - if err != nil { - return err - } - return netlink.NetworkSetNsFd(iface, fd) -} - func SetInterfaceMaster(name, master string) error { iface, err := net.InterfaceByName(name) if err != nil { @@ -89,16 +76,3 @@ func SetMtu(name string, mtu int) error { } return netlink.NetworkSetMTU(iface, mtu) } - -func GetDefaultMtu() (int, error) { - routes, err := netlink.NetworkGetRoutes() - if err != nil { - return -1, err - } - for _, r := range routes { - if r.Default { - return r.Iface.MTU, nil - } - } - return -1, ErrNoDefaultRoute -} diff --git a/nsinit/exec.go b/nsinit/exec.go index e0324074..4ac070db 100644 --- a/nsinit/exec.go +++ b/nsinit/exec.go @@ -8,65 +8,54 @@ import ( "github.com/dotcloud/docker/pkg/system" "github.com/dotcloud/docker/pkg/term" "io" - "log" + "io/ioutil" "os" "os/exec" "syscall" ) -func execCommand(container *libcontainer.Container) (pid int, err error) { +func execCommand(container *libcontainer.Container) (int, error) { master, console, err := createMasterAndConsole() if err != nil { return -1, err } - // we need CLONE_VFORK so we can wait on the child - flag := uintptr(getNamespaceFlags(container.Namespaces) | CLONE_VFORK) - - command := exec.Command("nsinit", console) + command := exec.Command("nsinit", "init", console) command.SysProcAttr = &syscall.SysProcAttr{ - Cloneflags: flag, + Cloneflags: uintptr(getNamespaceFlags(container.Namespaces) | syscall.CLONE_VFORK), // we need CLONE_VFORK so we can wait on the child } inPipe, err := command.StdinPipe() if err != nil { return -1, err } - if err := command.Start(); err != nil { return -1, err } - pid = command.Process.Pid + if err := writePidFile(command); err != nil { + return -1, err + } if container.Network != nil { name1, name2, err := createVethPair() if err != nil { - log.Fatal(err) + return -1, err } if err := network.SetInterfaceMaster(name1, container.Network.Bridge); err != nil { - log.Fatal(err) + return -1, err } if err := network.InterfaceUp(name1); err != nil { - log.Fatal(err) + return -1, err } - if err := network.SetInterfaceInNamespacePid(name2, pid); err != nil { - log.Fatal(err) + if err := network.SetInterfaceInNamespacePid(name2, command.Process.Pid); err != nil { + return -1, err } fmt.Fprint(inPipe, name2) inPipe.Close() } - go func() { - if _, err := io.Copy(os.Stdout, master); err != nil { - log.Println(err) - } - }() - - go func() { - if _, err := io.Copy(master, os.Stdin); err != nil { - log.Println(err) - } - }() + go io.Copy(os.Stdout, master) + go io.Copy(master, os.Stdin) ws, err := term.GetWinsize(os.Stdin.Fd()) if err != nil { @@ -83,9 +72,11 @@ func execCommand(container *libcontainer.Container) (pid int, err error) { defer term.RestoreTerminal(os.Stdin.Fd(), state) if err := command.Wait(); err != nil { - return pid, err + if _, ok := err.(*exec.ExitError); !ok { + return -1, err + } } - return pid, nil + return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil } func createMasterAndConsole() (*os.File, string, error) { @@ -93,12 +84,10 @@ func createMasterAndConsole() (*os.File, string, error) { if err != nil { return nil, "", err } - console, err := system.Ptsname(master) if err != nil { return nil, "", err } - if err := system.Unlockpt(master); err != nil { return nil, "", err } @@ -119,3 +108,7 @@ func createVethPair() (name1 string, name2 string, err error) { } return } + +func writePidFile(command *exec.Cmd) error { + return ioutil.WriteFile(".nspid", []byte(fmt.Sprint(command.Process.Pid)), 0655) +} diff --git a/nsinit/init.go b/nsinit/init.go index fe8fd4b4..16a30812 100644 --- a/nsinit/init.go +++ b/nsinit/init.go @@ -1,7 +1,6 @@ package main import ( - "encoding/json" "fmt" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/capabilities" @@ -14,49 +13,21 @@ import ( "syscall" ) -func loadContainer() (*libcontainer.Container, error) { - f, err := os.Open("container.json") - if err != nil { - return nil, err - } - defer f.Close() - - var container *libcontainer.Container - if err := json.NewDecoder(f).Decode(&container); err != nil { - return nil, err - } - return container, nil -} - -func main() { - container, err := loadContainer() - if err != nil { - log.Fatal(err) - } - - if os.Args[1] == "exec" { - _, err := execCommand(container) - if err != nil { - log.Fatal(err) - } - os.Exit(0) - } - console := os.Args[1] - +func initCommand(container *libcontainer.Container, console string) error { if err := setLogFile(container); err != nil { - log.Fatal(err) + return err } rootfs, err := resolveRootfs() if err != nil { - log.Fatal(err) + return err } var tempVethName string if container.Network != nil { data, err := ioutil.ReadAll(os.Stdin) if err != nil { - log.Fatalf("error reading from stdin %s", err) + return fmt.Errorf("error reading from stdin %s", err) } tempVethName = string(data) } @@ -68,48 +39,48 @@ func main() { slave, err := openTerminal(console, syscall.O_RDWR) if err != nil { - log.Fatalf("open terminal %s", err) + return fmt.Errorf("open terminal %s", err) } if slave.Fd() != 0 { - log.Fatalf("slave fd should be 0") + return fmt.Errorf("slave fd should be 0") } if err := dupSlave(slave); err != nil { - log.Fatalf("dup2 slave %s", err) + return fmt.Errorf("dup2 slave %s", err) } if _, err := system.Setsid(); err != nil { - log.Fatalf("setsid %s", err) + return fmt.Errorf("setsid %s", err) } if err := system.Setctty(); err != nil { - log.Fatalf("setctty %s", err) + return fmt.Errorf("setctty %s", err) } if err := system.ParentDeathSignal(); err != nil { - log.Fatalf("parent deth signal %s", err) + return fmt.Errorf("parent deth signal %s", err) } if err := setupNewMountNamespace(rootfs, console, container.ReadonlyFs); err != nil { - log.Fatalf("setup mount namespace %s", err) + return fmt.Errorf("setup mount namespace %s", err) } if container.Network != nil { if err := setupNetworking(container.Network, tempVethName); err != nil { - log.Fatalf("setup networking %s", err) + return fmt.Errorf("setup networking %s", err) } } if err := system.Sethostname(container.ID); err != nil { - log.Fatalf("sethostname %s", err) + return fmt.Errorf("sethostname %s", err) } if err := capabilities.DropCapabilities(container); err != nil { - log.Fatalf("drop capabilities %s", err) + return fmt.Errorf("drop capabilities %s", err) } if err := setupUser(container); err != nil { - log.Fatalf("setup user %s", err) + return fmt.Errorf("setup user %s", err) } if container.WorkingDir != "" { if err := system.Chdir(container.WorkingDir); err != nil { - log.Fatalf("chdir to %s %s", container.WorkingDir, err) + return fmt.Errorf("chdir to %s %s", container.WorkingDir, err) } } if err := system.Exec(container.Command.Args[0], container.Command.Args[0:], container.Command.Env); err != nil { - log.Fatalf("exec %s", err) + return fmt.Errorf("exec %s", err) } panic("unreachable") } diff --git a/nsinit/main.go b/nsinit/main.go new file mode 100644 index 00000000..47abcce0 --- /dev/null +++ b/nsinit/main.go @@ -0,0 +1,42 @@ +package main + +import ( + "encoding/json" + "github.com/dotcloud/docker/pkg/libcontainer" + "log" + "os" +) + +func main() { + container, err := loadContainer() + if err != nil { + log.Fatal(err) + } + + switch os.Args[1] { + case "exec": + exitCode, err := execCommand(container) + if err != nil { + log.Fatal(err) + } + os.Exit(exitCode) + case "init": + if err := initCommand(container, os.Args[2]); err != nil { + log.Fatal(err) + } + } +} + +func loadContainer() (*libcontainer.Container, error) { + f, err := os.Open("container.json") + if err != nil { + return nil, err + } + defer f.Close() + + var container *libcontainer.Container + if err := json.NewDecoder(f).Decode(&container); err != nil { + return nil, err + } + return container, nil +} diff --git a/nsinit/mount.go b/nsinit/mount.go index f9ee9696..13ee13e0 100644 --- a/nsinit/mount.go +++ b/nsinit/mount.go @@ -3,68 +3,47 @@ package main import ( "fmt" "github.com/dotcloud/docker/pkg/system" - "log" "os" "path/filepath" "syscall" ) -var ( - // default mount point options - defaults = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV -) +// default mount point options +const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV func setupNewMountNamespace(rootfs, console string, readonly bool) error { if err := system.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil { return fmt.Errorf("mounting / as slave %s", err) } - if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil { return fmt.Errorf("mouting %s as bind %s", rootfs, err) } - if readonly { if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, ""); err != nil { return fmt.Errorf("mounting %s as readonly %s", rootfs, err) } } - if err := mountSystem(rootfs); err != nil { return fmt.Errorf("mount system %s", err) } - if err := copyDevNodes(rootfs); err != nil { return fmt.Errorf("copy dev nodes %s", err) } - - ptmx := filepath.Join(rootfs, "dev/ptmx") - if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) { - return err - } - if err := os.Symlink("pts/ptmx", ptmx); err != nil { - return fmt.Errorf("symlink dev ptmx %s", err) - } - if err := setupDev(rootfs); err != nil { return err } - - if err := setupConsole(rootfs, console); err != nil { + if err := setupPtmx(rootfs, console); err != nil { return err } - if err := system.Chdir(rootfs); err != nil { return fmt.Errorf("chdir into %s %s", rootfs, err) } - if err := system.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil { return fmt.Errorf("mount move %s into / %s", rootfs, err) } - if err := system.Chroot("."); err != nil { return fmt.Errorf("chroot . %s", err) } - if err := system.Chdir("/"); err != nil { return fmt.Errorf("chdir / %s", err) } @@ -90,13 +69,10 @@ func copyDevNodes(rootfs string) error { if err != nil { return err } - var ( dest = filepath.Join(rootfs, "dev", node) st = stat.Sys().(*syscall.Stat_t) ) - - log.Printf("copy %s to %s %d\n", node, dest, st.Rdev) if err := system.Mknod(dest, st.Mode, int(st.Rdev)); err != nil && !os.IsExist(err) { return fmt.Errorf("copy %s %s", node, err) } @@ -134,24 +110,22 @@ func setupConsole(rootfs, console string) error { if err != nil { return fmt.Errorf("stat console %s %s", console, err) } - st := stat.Sys().(*syscall.Stat_t) - - dest := filepath.Join(rootfs, "dev/console") + var ( + st = stat.Sys().(*syscall.Stat_t) + dest = filepath.Join(rootfs, "dev/console") + ) if err := os.Remove(dest); err != nil && !os.IsNotExist(err) { return fmt.Errorf("remove %s %s", dest, err) } - if err := os.Chmod(console, 0600); err != nil { return err } if err := os.Chown(console, 0, 0); err != nil { return err } - if err := system.Mknod(dest, (st.Mode&^07777)|0600, int(st.Rdev)); err != nil { return fmt.Errorf("mknod %s %s", dest, err) } - if err := system.Mount(console, dest, "bind", syscall.MS_BIND, ""); err != nil { return fmt.Errorf("bind %s to %s %s", console, dest, err) } @@ -168,10 +142,10 @@ func mountSystem(rootfs string) error { flags int data string }{ - {source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaults}, - {source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaults}, + {source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags}, + {source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags}, {source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: "mode=755"}, - {source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaults, data: "mode=1777"}, + {source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: "mode=1777"}, {source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: "newinstance,ptmxmode=0666,mode=620,gid=5"}, {source: "tmpfs", path: filepath.Join(rootfs, "run"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_STRICTATIME, data: "mode=755"}, } { @@ -189,7 +163,7 @@ func remountProc() error { if err := system.Unmount("/proc", syscall.MNT_DETACH); err != nil { return err } - if err := system.Mount("proc", "/proc", "proc", uintptr(defaults), ""); err != nil { + if err := system.Mount("proc", "/proc", "proc", uintptr(defaultMountFlags), ""); err != nil { return err } return nil @@ -201,9 +175,20 @@ func remountSys() error { return err } } else { - if err := system.Mount("sysfs", "/sys", "sysfs", uintptr(defaults), ""); err != nil { + if err := system.Mount("sysfs", "/sys", "sysfs", uintptr(defaultMountFlags), ""); err != nil { return err } } return nil } + +func setupPtmx(rootfs, console string) error { + ptmx := filepath.Join(rootfs, "dev/ptmx") + if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) { + return err + } + if err := os.Symlink("pts/ptmx", ptmx); err != nil { + return fmt.Errorf("symlink dev ptmx %s", err) + } + return setupConsole(rootfs, console) +} diff --git a/nsinit/ns_linux.go b/nsinit/ns_linux.go index b54bc2b9..2392ffd7 100644 --- a/nsinit/ns_linux.go +++ b/nsinit/ns_linux.go @@ -2,27 +2,16 @@ package main import ( "github.com/dotcloud/docker/pkg/libcontainer" -) - -const ( - SIGCHLD = 0x14 - CLONE_VFORK = 0x00004000 - CLONE_NEWNS = 0x00020000 - CLONE_NEWUTS = 0x04000000 - CLONE_NEWIPC = 0x08000000 - CLONE_NEWUSER = 0x10000000 - CLONE_NEWPID = 0x20000000 - CLONE_NEWNET = 0x40000000 + "syscall" ) var namespaceMap = map[libcontainer.Namespace]int{ - "": 0, - libcontainer.CLONE_NEWNS: CLONE_NEWNS, - libcontainer.CLONE_NEWUTS: CLONE_NEWUTS, - libcontainer.CLONE_NEWIPC: CLONE_NEWIPC, - libcontainer.CLONE_NEWUSER: CLONE_NEWUSER, - libcontainer.CLONE_NEWPID: CLONE_NEWPID, - libcontainer.CLONE_NEWNET: CLONE_NEWNET, + libcontainer.CLONE_NEWNS: syscall.CLONE_NEWNS, + libcontainer.CLONE_NEWUTS: syscall.CLONE_NEWUTS, + libcontainer.CLONE_NEWIPC: syscall.CLONE_NEWIPC, + libcontainer.CLONE_NEWUSER: syscall.CLONE_NEWUSER, + libcontainer.CLONE_NEWPID: syscall.CLONE_NEWPID, + libcontainer.CLONE_NEWNET: syscall.CLONE_NEWNET, } // getNamespaceFlags parses the container's Namespaces options to set the correct