diff --git a/console/console.go b/console/console.go index 519b5644..5355d63d 100644 --- a/console/console.go +++ b/console/console.go @@ -7,22 +7,24 @@ import ( "os" "path/filepath" "syscall" + "unsafe" "github.com/docker/libcontainer/label" - "github.com/dotcloud/docker/pkg/system" ) // Setup initializes the proper /dev/console inside the rootfs path func Setup(rootfs, consolePath, mountLabel string) error { - oldMask := system.Umask(0000) - defer system.Umask(oldMask) + oldMask := syscall.Umask(0000) + defer syscall.Umask(oldMask) if err := os.Chmod(consolePath, 0600); err != nil { return err } + if err := os.Chown(consolePath, 0, 0); err != nil { return err } + if err := label.SetFileLabel(consolePath, mountLabel); err != nil { return fmt.Errorf("set file label %s %s", consolePath, err) } @@ -33,26 +35,91 @@ func Setup(rootfs, consolePath, mountLabel string) error { if err != nil && !os.IsExist(err) { return fmt.Errorf("create %s %s", dest, err) } + if f != nil { f.Close() } - if err := system.Mount(consolePath, dest, "bind", syscall.MS_BIND, ""); err != nil { + if err := syscall.Mount(consolePath, dest, "bind", syscall.MS_BIND, ""); err != nil { return fmt.Errorf("bind %s to %s %s", consolePath, dest, err) } + return nil } func OpenAndDup(consolePath string) error { - slave, err := system.OpenTerminal(consolePath, syscall.O_RDWR) + slave, err := OpenTerminal(consolePath, syscall.O_RDWR) if err != nil { return fmt.Errorf("open terminal %s", err) } - if err := system.Dup2(slave.Fd(), 0); err != nil { + + if err := syscall.Dup2(int(slave.Fd()), 0); err != nil { return err } - if err := system.Dup2(slave.Fd(), 1); err != nil { + + if err := syscall.Dup2(int(slave.Fd()), 1); err != nil { return err } - return system.Dup2(slave.Fd(), 2) + + return syscall.Dup2(int(slave.Fd()), 2) +} + +// Unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. +// Unlockpt should be called before opening the slave side of a pseudoterminal. +func Unlockpt(f *os.File) error { + var u int + + return Ioctl(f.Fd(), syscall.TIOCSPTLCK, uintptr(unsafe.Pointer(&u))) +} + +// Ptsname retrieves the name of the first available pts for the given master. +func Ptsname(f *os.File) (string, error) { + var n int + + if err := Ioctl(f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil { + return "", err + } + + return fmt.Sprintf("/dev/pts/%d", n), nil +} + +// CreateMasterAndConsole will open /dev/ptmx on the host and retreive the +// pts name for use as the pty slave inside the container +func CreateMasterAndConsole() (*os.File, string, error) { + master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) + if err != nil { + return nil, "", err + } + console, err := Ptsname(master) + if err != nil { + return nil, "", err + } + if err := Unlockpt(master); err != nil { + return nil, "", err + } + return master, console, nil +} + +// OpenPtmx opens /dev/ptmx, i.e. the PTY master. +func OpenPtmx() (*os.File, error) { + // O_NOCTTY and O_CLOEXEC are not present in os package so we use the syscall's one for all. + return os.OpenFile("/dev/ptmx", syscall.O_RDONLY|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) +} + +// OpenTerminal is a clone of os.OpenFile without the O_CLOEXEC +// used to open the pty slave inside the container namespace +func OpenTerminal(name string, flag int) (*os.File, error) { + r, e := syscall.Open(name, flag, 0) + if e != nil { + return nil, &os.PathError{"open", name, e} + } + return os.NewFile(uintptr(r), name), nil +} + +func Ioctl(fd uintptr, flag, data uintptr) error { + if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, flag, data); err != 0 { + return err + } + + return nil } diff --git a/namespaces/exec.go b/namespaces/exec.go index 9053f632..bf297c08 100644 --- a/namespaces/exec.go +++ b/namespaces/exec.go @@ -11,6 +11,7 @@ import ( "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups/fs" "github.com/docker/libcontainer/cgroups/systemd" + consolePkg "github.com/docker/libcontainer/console" "github.com/docker/libcontainer/network" "github.com/docker/libcontainer/syncpipe" "github.com/dotcloud/docker/pkg/system" @@ -36,7 +37,7 @@ func Exec(container *libcontainer.Config, term Terminal, rootfs, dataPath string defer syncPipe.Close() if container.Tty { - master, console, err = system.CreateMasterAndConsole() + master, console, err = consolePkg.CreateMasterAndConsole() if err != nil { return -1, err } @@ -110,6 +111,7 @@ func Exec(container *libcontainer.Config, term Terminal, rootfs, dataPath string return -1, err } } + return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil } @@ -145,7 +147,11 @@ func DefaultCreateCommand(container *libcontainer.Config, console, rootfs, dataP command.Dir = rootfs command.Env = append(os.Environ(), env...) - system.SetCloneFlags(command, uintptr(GetNamespaceFlags(container.Namespaces))) + if command.SysProcAttr == nil { + command.SysProcAttr = &syscall.SysProcAttr{} + } + command.SysProcAttr.Cloneflags = uintptr(GetNamespaceFlags(container.Namespaces)) + command.SysProcAttr.Pdeathsig = syscall.SIGKILL command.ExtraFiles = []*os.File{pipe} @@ -157,11 +163,14 @@ func DefaultCreateCommand(container *libcontainer.Config, console, rootfs, dataP func SetupCgroups(container *libcontainer.Config, nspid int) (cgroups.ActiveCgroup, error) { if container.Cgroups != nil { c := container.Cgroups + if systemd.UseSystemd() { return systemd.Apply(c, nspid) } + return fs.Apply(c, nspid) } + return nil, nil } diff --git a/namespaces/execin.go b/namespaces/execin.go index 3e79f4cd..5311adf2 100644 --- a/namespaces/execin.go +++ b/namespaces/execin.go @@ -9,7 +9,7 @@ import ( "github.com/docker/libcontainer" "github.com/docker/libcontainer/label" - "github.com/dotcloud/docker/pkg/system" + "github.com/docker/libcontainer/system" ) // ExecIn uses an existing pid and joins the pid's namespaces with the new command. diff --git a/network/netns.go b/network/netns.go index 64544476..1ff75064 100644 --- a/network/netns.go +++ b/network/netns.go @@ -7,7 +7,7 @@ import ( "os" "syscall" - "github.com/dotcloud/docker/pkg/system" + "github.com/docker/libcontainer/system" ) // crosbymichael: could make a network strategy that instead of returning veth pair names it returns a pid to an existing network namespace @@ -23,12 +23,15 @@ func (v *NetNS) Initialize(config *Network, networkState *NetworkState) error { if networkState.NsPath == "" { return fmt.Errorf("nspath does is not specified in NetworkState") } + f, err := os.OpenFile(networkState.NsPath, os.O_RDONLY, 0) if err != nil { return fmt.Errorf("failed get network namespace fd: %v", err) } + if err := system.Setns(f.Fd(), syscall.CLONE_NEWNET); err != nil { return fmt.Errorf("failed to setns current network namespace: %v", err) } + return nil } diff --git a/system/linux.go b/system/linux.go new file mode 100644 index 00000000..511b951a --- /dev/null +++ b/system/linux.go @@ -0,0 +1,17 @@ +// +build linux + +package system + +import ( + "os/exec" + "syscall" +) + +func Execv(cmd string, args []string, env []string) error { + name, err := exec.LookPath(cmd) + if err != nil { + return err + } + + return syscall.Exec(name, args, env) +} diff --git a/system/proc.go b/system/proc.go new file mode 100644 index 00000000..37808a29 --- /dev/null +++ b/system/proc.go @@ -0,0 +1,27 @@ +package system + +import ( + "io/ioutil" + "path/filepath" + "strconv" + "strings" +) + +// look in /proc to find the process start time so that we can verify +// that this pid has started after ourself +func GetProcessStartTime(pid int) (string, error) { + data, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat")) + if err != nil { + return "", err + } + + parts := strings.Split(string(data), " ") + // the starttime is located at pos 22 + // from the man page + // + // starttime %llu (was %lu before Linux 2.6) + // (22) The time the process started after system boot. In kernels before Linux 2.6, this + // value was expressed in jiffies. Since Linux 2.6, the value is expressed in clock ticks + // (divide by sysconf(_SC_CLK_TCK)). + return parts[22-1], nil // starts at 1 +} diff --git a/system/setns_linux.go b/system/setns_linux.go new file mode 100644 index 00000000..a0a259e1 --- /dev/null +++ b/system/setns_linux.go @@ -0,0 +1,29 @@ +package system + +import ( + "fmt" + "runtime" + "syscall" +) + +// Via http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7b21fddd087678a70ad64afc0f632e0f1071b092 +// +// We need different setns values for the different platforms and arch +// We are declaring the macro here because the SETNS syscall does not exist in th stdlib +var setNsMap = map[string]uintptr{ + "linux/amd64": 308, +} + +func Setns(fd uintptr, flags uintptr) error { + ns, exists := setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)] + if !exists { + return fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH) + } + + _, _, err := syscall.RawSyscall(ns, fd, flags, 0) + if err != 0 { + return err + } + + return nil +}