General cleanup of libcontainer

Docker-DCO-1.1-Signed-off-by: Michael Crosby <michael@crosbymichael.com> (github: crosbymichael)
This commit is contained in:
Michael Crosby 2014-02-19 16:40:36 -08:00
parent 0223967711
commit b992a5342f
7 changed files with 111 additions and 166 deletions

View File

@ -1,9 +0,0 @@
package libcontainer
import (
"errors"
)
var (
ErrInvalidPid = errors.New("no ns pid found")
)

View File

@ -1,15 +1,10 @@
package network package network
import ( import (
"errors"
"github.com/dotcloud/docker/pkg/netlink" "github.com/dotcloud/docker/pkg/netlink"
"net" "net"
) )
var (
ErrNoDefaultRoute = errors.New("no default network route found")
)
func InterfaceUp(name string) error { func InterfaceUp(name string) error {
iface, err := net.InterfaceByName(name) iface, err := net.InterfaceByName(name)
if err != nil { if err != nil {
@ -46,14 +41,6 @@ func SetInterfaceInNamespacePid(name string, nsPid int) error {
return netlink.NetworkSetNsPid(iface, nsPid) return netlink.NetworkSetNsPid(iface, nsPid)
} }
func SetInterfaceInNamespaceFd(name string, fd int) error {
iface, err := net.InterfaceByName(name)
if err != nil {
return err
}
return netlink.NetworkSetNsFd(iface, fd)
}
func SetInterfaceMaster(name, master string) error { func SetInterfaceMaster(name, master string) error {
iface, err := net.InterfaceByName(name) iface, err := net.InterfaceByName(name)
if err != nil { if err != nil {
@ -89,16 +76,3 @@ func SetMtu(name string, mtu int) error {
} }
return netlink.NetworkSetMTU(iface, mtu) return netlink.NetworkSetMTU(iface, mtu)
} }
func GetDefaultMtu() (int, error) {
routes, err := netlink.NetworkGetRoutes()
if err != nil {
return -1, err
}
for _, r := range routes {
if r.Default {
return r.Iface.MTU, nil
}
}
return -1, ErrNoDefaultRoute
}

View File

@ -8,65 +8,54 @@ import (
"github.com/dotcloud/docker/pkg/system" "github.com/dotcloud/docker/pkg/system"
"github.com/dotcloud/docker/pkg/term" "github.com/dotcloud/docker/pkg/term"
"io" "io"
"log" "io/ioutil"
"os" "os"
"os/exec" "os/exec"
"syscall" "syscall"
) )
func execCommand(container *libcontainer.Container) (pid int, err error) { func execCommand(container *libcontainer.Container) (int, error) {
master, console, err := createMasterAndConsole() master, console, err := createMasterAndConsole()
if err != nil { if err != nil {
return -1, err return -1, err
} }
// we need CLONE_VFORK so we can wait on the child command := exec.Command("nsinit", "init", console)
flag := uintptr(getNamespaceFlags(container.Namespaces) | CLONE_VFORK)
command := exec.Command("nsinit", console)
command.SysProcAttr = &syscall.SysProcAttr{ command.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: flag, Cloneflags: uintptr(getNamespaceFlags(container.Namespaces) | syscall.CLONE_VFORK), // we need CLONE_VFORK so we can wait on the child
} }
inPipe, err := command.StdinPipe() inPipe, err := command.StdinPipe()
if err != nil { if err != nil {
return -1, err return -1, err
} }
if err := command.Start(); err != nil { if err := command.Start(); err != nil {
return -1, err return -1, err
} }
pid = command.Process.Pid if err := writePidFile(command); err != nil {
return -1, err
}
if container.Network != nil { if container.Network != nil {
name1, name2, err := createVethPair() name1, name2, err := createVethPair()
if err != nil { if err != nil {
log.Fatal(err) return -1, err
} }
if err := network.SetInterfaceMaster(name1, container.Network.Bridge); err != nil { if err := network.SetInterfaceMaster(name1, container.Network.Bridge); err != nil {
log.Fatal(err) return -1, err
} }
if err := network.InterfaceUp(name1); err != nil { if err := network.InterfaceUp(name1); err != nil {
log.Fatal(err) return -1, err
} }
if err := network.SetInterfaceInNamespacePid(name2, pid); err != nil { if err := network.SetInterfaceInNamespacePid(name2, command.Process.Pid); err != nil {
log.Fatal(err) return -1, err
} }
fmt.Fprint(inPipe, name2) fmt.Fprint(inPipe, name2)
inPipe.Close() inPipe.Close()
} }
go func() { go io.Copy(os.Stdout, master)
if _, err := io.Copy(os.Stdout, master); err != nil { go io.Copy(master, os.Stdin)
log.Println(err)
}
}()
go func() {
if _, err := io.Copy(master, os.Stdin); err != nil {
log.Println(err)
}
}()
ws, err := term.GetWinsize(os.Stdin.Fd()) ws, err := term.GetWinsize(os.Stdin.Fd())
if err != nil { if err != nil {
@ -83,9 +72,11 @@ func execCommand(container *libcontainer.Container) (pid int, err error) {
defer term.RestoreTerminal(os.Stdin.Fd(), state) defer term.RestoreTerminal(os.Stdin.Fd(), state)
if err := command.Wait(); err != nil { if err := command.Wait(); err != nil {
return pid, err if _, ok := err.(*exec.ExitError); !ok {
return -1, err
}
} }
return pid, nil return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil
} }
func createMasterAndConsole() (*os.File, string, error) { func createMasterAndConsole() (*os.File, string, error) {
@ -93,12 +84,10 @@ func createMasterAndConsole() (*os.File, string, error) {
if err != nil { if err != nil {
return nil, "", err return nil, "", err
} }
console, err := system.Ptsname(master) console, err := system.Ptsname(master)
if err != nil { if err != nil {
return nil, "", err return nil, "", err
} }
if err := system.Unlockpt(master); err != nil { if err := system.Unlockpt(master); err != nil {
return nil, "", err return nil, "", err
} }
@ -119,3 +108,7 @@ func createVethPair() (name1 string, name2 string, err error) {
} }
return return
} }
func writePidFile(command *exec.Cmd) error {
return ioutil.WriteFile(".nspid", []byte(fmt.Sprint(command.Process.Pid)), 0655)
}

View File

@ -1,7 +1,6 @@
package main package main
import ( import (
"encoding/json"
"fmt" "fmt"
"github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer"
"github.com/dotcloud/docker/pkg/libcontainer/capabilities" "github.com/dotcloud/docker/pkg/libcontainer/capabilities"
@ -14,49 +13,21 @@ import (
"syscall" "syscall"
) )
func loadContainer() (*libcontainer.Container, error) { func initCommand(container *libcontainer.Container, console string) error {
f, err := os.Open("container.json")
if err != nil {
return nil, err
}
defer f.Close()
var container *libcontainer.Container
if err := json.NewDecoder(f).Decode(&container); err != nil {
return nil, err
}
return container, nil
}
func main() {
container, err := loadContainer()
if err != nil {
log.Fatal(err)
}
if os.Args[1] == "exec" {
_, err := execCommand(container)
if err != nil {
log.Fatal(err)
}
os.Exit(0)
}
console := os.Args[1]
if err := setLogFile(container); err != nil { if err := setLogFile(container); err != nil {
log.Fatal(err) return err
} }
rootfs, err := resolveRootfs() rootfs, err := resolveRootfs()
if err != nil { if err != nil {
log.Fatal(err) return err
} }
var tempVethName string var tempVethName string
if container.Network != nil { if container.Network != nil {
data, err := ioutil.ReadAll(os.Stdin) data, err := ioutil.ReadAll(os.Stdin)
if err != nil { if err != nil {
log.Fatalf("error reading from stdin %s", err) return fmt.Errorf("error reading from stdin %s", err)
} }
tempVethName = string(data) tempVethName = string(data)
} }
@ -68,48 +39,48 @@ func main() {
slave, err := openTerminal(console, syscall.O_RDWR) slave, err := openTerminal(console, syscall.O_RDWR)
if err != nil { if err != nil {
log.Fatalf("open terminal %s", err) return fmt.Errorf("open terminal %s", err)
} }
if slave.Fd() != 0 { if slave.Fd() != 0 {
log.Fatalf("slave fd should be 0") return fmt.Errorf("slave fd should be 0")
} }
if err := dupSlave(slave); err != nil { if err := dupSlave(slave); err != nil {
log.Fatalf("dup2 slave %s", err) return fmt.Errorf("dup2 slave %s", err)
} }
if _, err := system.Setsid(); err != nil { if _, err := system.Setsid(); err != nil {
log.Fatalf("setsid %s", err) return fmt.Errorf("setsid %s", err)
} }
if err := system.Setctty(); err != nil { if err := system.Setctty(); err != nil {
log.Fatalf("setctty %s", err) return fmt.Errorf("setctty %s", err)
} }
if err := system.ParentDeathSignal(); err != nil { if err := system.ParentDeathSignal(); err != nil {
log.Fatalf("parent deth signal %s", err) return fmt.Errorf("parent deth signal %s", err)
} }
if err := setupNewMountNamespace(rootfs, console, container.ReadonlyFs); err != nil { if err := setupNewMountNamespace(rootfs, console, container.ReadonlyFs); err != nil {
log.Fatalf("setup mount namespace %s", err) return fmt.Errorf("setup mount namespace %s", err)
} }
if container.Network != nil { if container.Network != nil {
if err := setupNetworking(container.Network, tempVethName); err != nil { if err := setupNetworking(container.Network, tempVethName); err != nil {
log.Fatalf("setup networking %s", err) return fmt.Errorf("setup networking %s", err)
} }
} }
if err := system.Sethostname(container.ID); err != nil { if err := system.Sethostname(container.ID); err != nil {
log.Fatalf("sethostname %s", err) return fmt.Errorf("sethostname %s", err)
} }
if err := capabilities.DropCapabilities(container); err != nil { if err := capabilities.DropCapabilities(container); err != nil {
log.Fatalf("drop capabilities %s", err) return fmt.Errorf("drop capabilities %s", err)
} }
if err := setupUser(container); err != nil { if err := setupUser(container); err != nil {
log.Fatalf("setup user %s", err) return fmt.Errorf("setup user %s", err)
} }
if container.WorkingDir != "" { if container.WorkingDir != "" {
if err := system.Chdir(container.WorkingDir); err != nil { if err := system.Chdir(container.WorkingDir); err != nil {
log.Fatalf("chdir to %s %s", container.WorkingDir, err) return fmt.Errorf("chdir to %s %s", container.WorkingDir, err)
} }
} }
if err := system.Exec(container.Command.Args[0], container.Command.Args[0:], container.Command.Env); err != nil { if err := system.Exec(container.Command.Args[0], container.Command.Args[0:], container.Command.Env); err != nil {
log.Fatalf("exec %s", err) return fmt.Errorf("exec %s", err)
} }
panic("unreachable") panic("unreachable")
} }

42
nsinit/main.go Normal file
View File

@ -0,0 +1,42 @@
package main
import (
"encoding/json"
"github.com/dotcloud/docker/pkg/libcontainer"
"log"
"os"
)
func main() {
container, err := loadContainer()
if err != nil {
log.Fatal(err)
}
switch os.Args[1] {
case "exec":
exitCode, err := execCommand(container)
if err != nil {
log.Fatal(err)
}
os.Exit(exitCode)
case "init":
if err := initCommand(container, os.Args[2]); err != nil {
log.Fatal(err)
}
}
}
func loadContainer() (*libcontainer.Container, error) {
f, err := os.Open("container.json")
if err != nil {
return nil, err
}
defer f.Close()
var container *libcontainer.Container
if err := json.NewDecoder(f).Decode(&container); err != nil {
return nil, err
}
return container, nil
}

View File

@ -3,68 +3,47 @@ package main
import ( import (
"fmt" "fmt"
"github.com/dotcloud/docker/pkg/system" "github.com/dotcloud/docker/pkg/system"
"log"
"os" "os"
"path/filepath" "path/filepath"
"syscall" "syscall"
) )
var ( // default mount point options
// default mount point options const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
defaults = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
)
func setupNewMountNamespace(rootfs, console string, readonly bool) error { func setupNewMountNamespace(rootfs, console string, readonly bool) error {
if err := system.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil { if err := system.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil {
return fmt.Errorf("mounting / as slave %s", err) return fmt.Errorf("mounting / as slave %s", err)
} }
if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil { if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
return fmt.Errorf("mouting %s as bind %s", rootfs, err) return fmt.Errorf("mouting %s as bind %s", rootfs, err)
} }
if readonly { if readonly {
if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, ""); err != nil { if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, ""); err != nil {
return fmt.Errorf("mounting %s as readonly %s", rootfs, err) return fmt.Errorf("mounting %s as readonly %s", rootfs, err)
} }
} }
if err := mountSystem(rootfs); err != nil { if err := mountSystem(rootfs); err != nil {
return fmt.Errorf("mount system %s", err) return fmt.Errorf("mount system %s", err)
} }
if err := copyDevNodes(rootfs); err != nil { if err := copyDevNodes(rootfs); err != nil {
return fmt.Errorf("copy dev nodes %s", err) return fmt.Errorf("copy dev nodes %s", err)
} }
ptmx := filepath.Join(rootfs, "dev/ptmx")
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
return err
}
if err := os.Symlink("pts/ptmx", ptmx); err != nil {
return fmt.Errorf("symlink dev ptmx %s", err)
}
if err := setupDev(rootfs); err != nil { if err := setupDev(rootfs); err != nil {
return err return err
} }
if err := setupPtmx(rootfs, console); err != nil {
if err := setupConsole(rootfs, console); err != nil {
return err return err
} }
if err := system.Chdir(rootfs); err != nil { if err := system.Chdir(rootfs); err != nil {
return fmt.Errorf("chdir into %s %s", rootfs, err) return fmt.Errorf("chdir into %s %s", rootfs, err)
} }
if err := system.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil { if err := system.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil {
return fmt.Errorf("mount move %s into / %s", rootfs, err) return fmt.Errorf("mount move %s into / %s", rootfs, err)
} }
if err := system.Chroot("."); err != nil { if err := system.Chroot("."); err != nil {
return fmt.Errorf("chroot . %s", err) return fmt.Errorf("chroot . %s", err)
} }
if err := system.Chdir("/"); err != nil { if err := system.Chdir("/"); err != nil {
return fmt.Errorf("chdir / %s", err) return fmt.Errorf("chdir / %s", err)
} }
@ -90,13 +69,10 @@ func copyDevNodes(rootfs string) error {
if err != nil { if err != nil {
return err return err
} }
var ( var (
dest = filepath.Join(rootfs, "dev", node) dest = filepath.Join(rootfs, "dev", node)
st = stat.Sys().(*syscall.Stat_t) st = stat.Sys().(*syscall.Stat_t)
) )
log.Printf("copy %s to %s %d\n", node, dest, st.Rdev)
if err := system.Mknod(dest, st.Mode, int(st.Rdev)); err != nil && !os.IsExist(err) { if err := system.Mknod(dest, st.Mode, int(st.Rdev)); err != nil && !os.IsExist(err) {
return fmt.Errorf("copy %s %s", node, err) return fmt.Errorf("copy %s %s", node, err)
} }
@ -134,24 +110,22 @@ func setupConsole(rootfs, console string) error {
if err != nil { if err != nil {
return fmt.Errorf("stat console %s %s", console, err) return fmt.Errorf("stat console %s %s", console, err)
} }
st := stat.Sys().(*syscall.Stat_t) var (
st = stat.Sys().(*syscall.Stat_t)
dest := filepath.Join(rootfs, "dev/console") dest = filepath.Join(rootfs, "dev/console")
)
if err := os.Remove(dest); err != nil && !os.IsNotExist(err) { if err := os.Remove(dest); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("remove %s %s", dest, err) return fmt.Errorf("remove %s %s", dest, err)
} }
if err := os.Chmod(console, 0600); err != nil { if err := os.Chmod(console, 0600); err != nil {
return err return err
} }
if err := os.Chown(console, 0, 0); err != nil { if err := os.Chown(console, 0, 0); err != nil {
return err return err
} }
if err := system.Mknod(dest, (st.Mode&^07777)|0600, int(st.Rdev)); err != nil { if err := system.Mknod(dest, (st.Mode&^07777)|0600, int(st.Rdev)); err != nil {
return fmt.Errorf("mknod %s %s", dest, err) return fmt.Errorf("mknod %s %s", dest, err)
} }
if err := system.Mount(console, dest, "bind", syscall.MS_BIND, ""); err != nil { if err := system.Mount(console, dest, "bind", syscall.MS_BIND, ""); err != nil {
return fmt.Errorf("bind %s to %s %s", console, dest, err) return fmt.Errorf("bind %s to %s %s", console, dest, err)
} }
@ -168,10 +142,10 @@ func mountSystem(rootfs string) error {
flags int flags int
data string data string
}{ }{
{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaults}, {source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaults}, {source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags},
{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: "mode=755"}, {source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: "mode=755"},
{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaults, data: "mode=1777"}, {source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: "mode=1777"},
{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: "newinstance,ptmxmode=0666,mode=620,gid=5"}, {source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: "newinstance,ptmxmode=0666,mode=620,gid=5"},
{source: "tmpfs", path: filepath.Join(rootfs, "run"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_STRICTATIME, data: "mode=755"}, {source: "tmpfs", path: filepath.Join(rootfs, "run"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_STRICTATIME, data: "mode=755"},
} { } {
@ -189,7 +163,7 @@ func remountProc() error {
if err := system.Unmount("/proc", syscall.MNT_DETACH); err != nil { if err := system.Unmount("/proc", syscall.MNT_DETACH); err != nil {
return err return err
} }
if err := system.Mount("proc", "/proc", "proc", uintptr(defaults), ""); err != nil { if err := system.Mount("proc", "/proc", "proc", uintptr(defaultMountFlags), ""); err != nil {
return err return err
} }
return nil return nil
@ -201,9 +175,20 @@ func remountSys() error {
return err return err
} }
} else { } else {
if err := system.Mount("sysfs", "/sys", "sysfs", uintptr(defaults), ""); err != nil { if err := system.Mount("sysfs", "/sys", "sysfs", uintptr(defaultMountFlags), ""); err != nil {
return err return err
} }
} }
return nil return nil
} }
func setupPtmx(rootfs, console string) error {
ptmx := filepath.Join(rootfs, "dev/ptmx")
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
return err
}
if err := os.Symlink("pts/ptmx", ptmx); err != nil {
return fmt.Errorf("symlink dev ptmx %s", err)
}
return setupConsole(rootfs, console)
}

View File

@ -2,27 +2,16 @@ package main
import ( import (
"github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer"
) "syscall"
const (
SIGCHLD = 0x14
CLONE_VFORK = 0x00004000
CLONE_NEWNS = 0x00020000
CLONE_NEWUTS = 0x04000000
CLONE_NEWIPC = 0x08000000
CLONE_NEWUSER = 0x10000000
CLONE_NEWPID = 0x20000000
CLONE_NEWNET = 0x40000000
) )
var namespaceMap = map[libcontainer.Namespace]int{ var namespaceMap = map[libcontainer.Namespace]int{
"": 0, libcontainer.CLONE_NEWNS: syscall.CLONE_NEWNS,
libcontainer.CLONE_NEWNS: CLONE_NEWNS, libcontainer.CLONE_NEWUTS: syscall.CLONE_NEWUTS,
libcontainer.CLONE_NEWUTS: CLONE_NEWUTS, libcontainer.CLONE_NEWIPC: syscall.CLONE_NEWIPC,
libcontainer.CLONE_NEWIPC: CLONE_NEWIPC, libcontainer.CLONE_NEWUSER: syscall.CLONE_NEWUSER,
libcontainer.CLONE_NEWUSER: CLONE_NEWUSER, libcontainer.CLONE_NEWPID: syscall.CLONE_NEWPID,
libcontainer.CLONE_NEWPID: CLONE_NEWPID, libcontainer.CLONE_NEWNET: syscall.CLONE_NEWNET,
libcontainer.CLONE_NEWNET: CLONE_NEWNET,
} }
// getNamespaceFlags parses the container's Namespaces options to set the correct // getNamespaceFlags parses the container's Namespaces options to set the correct