402 lines
9.2 KiB
Go
402 lines
9.2 KiB
Go
// +build linux
|
|
|
|
package main
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"syscall"
|
|
|
|
"github.com/Sirupsen/logrus"
|
|
"github.com/codegangsta/cli"
|
|
"github.com/opencontainers/runc/libcontainer"
|
|
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
|
"github.com/opencontainers/runc/libcontainer/configs"
|
|
"github.com/opencontainers/specs/specs-go"
|
|
)
|
|
|
|
const wildcard = -1
|
|
|
|
var errEmptyID = errors.New("container id cannot be empty")
|
|
|
|
var allowedDevices = []*configs.Device{
|
|
// allow mknod for any device
|
|
{
|
|
Type: 'c',
|
|
Major: wildcard,
|
|
Minor: wildcard,
|
|
Permissions: "m",
|
|
Allow: true,
|
|
},
|
|
{
|
|
Type: 'b',
|
|
Major: wildcard,
|
|
Minor: wildcard,
|
|
Permissions: "m",
|
|
Allow: true,
|
|
},
|
|
{
|
|
Type: 'c',
|
|
Path: "/dev/null",
|
|
Major: 1,
|
|
Minor: 3,
|
|
Permissions: "rwm",
|
|
Allow: true,
|
|
},
|
|
{
|
|
Type: 'c',
|
|
Path: "/dev/random",
|
|
Major: 1,
|
|
Minor: 8,
|
|
Permissions: "rwm",
|
|
Allow: true,
|
|
},
|
|
{
|
|
Type: 'c',
|
|
Path: "/dev/full",
|
|
Major: 1,
|
|
Minor: 7,
|
|
Permissions: "rwm",
|
|
Allow: true,
|
|
},
|
|
{
|
|
Type: 'c',
|
|
Path: "/dev/tty",
|
|
Major: 5,
|
|
Minor: 0,
|
|
Permissions: "rwm",
|
|
Allow: true,
|
|
},
|
|
{
|
|
Type: 'c',
|
|
Path: "/dev/zero",
|
|
Major: 1,
|
|
Minor: 5,
|
|
Permissions: "rwm",
|
|
Allow: true,
|
|
},
|
|
{
|
|
Type: 'c',
|
|
Path: "/dev/urandom",
|
|
Major: 1,
|
|
Minor: 9,
|
|
Permissions: "rwm",
|
|
Allow: true,
|
|
},
|
|
{
|
|
Path: "/dev/console",
|
|
Type: 'c',
|
|
Major: 5,
|
|
Minor: 1,
|
|
Permissions: "rwm",
|
|
Allow: true,
|
|
},
|
|
// /dev/pts/ - pts namespaces are "coming soon"
|
|
{
|
|
Path: "",
|
|
Type: 'c',
|
|
Major: 136,
|
|
Minor: wildcard,
|
|
Permissions: "rwm",
|
|
Allow: true,
|
|
},
|
|
{
|
|
Path: "",
|
|
Type: 'c',
|
|
Major: 5,
|
|
Minor: 2,
|
|
Permissions: "rwm",
|
|
Allow: true,
|
|
},
|
|
// tuntap
|
|
{
|
|
Path: "",
|
|
Type: 'c',
|
|
Major: 10,
|
|
Minor: 200,
|
|
Permissions: "rwm",
|
|
Allow: true,
|
|
},
|
|
}
|
|
|
|
var (
|
|
maskedPaths = []string{
|
|
"/proc/kcore",
|
|
"/proc/latency_stats",
|
|
"/proc/timer_stats",
|
|
"/proc/sched_debug",
|
|
}
|
|
readonlyPaths = []string{
|
|
"/proc/asound",
|
|
"/proc/bus",
|
|
"/proc/fs",
|
|
"/proc/irq",
|
|
"/proc/sys",
|
|
"/proc/sysrq-trigger",
|
|
}
|
|
)
|
|
|
|
var container libcontainer.Container
|
|
|
|
// loadFactory returns the configured factory instance for execing containers.
|
|
func loadFactory(context *cli.Context) (libcontainer.Factory, error) {
|
|
root := context.GlobalString("root")
|
|
abs, err := filepath.Abs(root)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
cgroupManager := libcontainer.Cgroupfs
|
|
if context.GlobalBool("systemd-cgroup") {
|
|
if systemd.UseSystemd() {
|
|
cgroupManager = libcontainer.SystemdCgroups
|
|
} else {
|
|
return nil, fmt.Errorf("systemd cgroup flag passed, but systemd support for managing cgroups is not available.")
|
|
}
|
|
}
|
|
return libcontainer.New(abs, cgroupManager, func(l *libcontainer.LinuxFactory) error {
|
|
l.CriuPath = context.GlobalString("criu")
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// getContainer returns the specified container instance by loading it from state
|
|
// with the default factory.
|
|
func getContainer(context *cli.Context) (libcontainer.Container, error) {
|
|
id := context.Args().First()
|
|
if id == "" {
|
|
return nil, errEmptyID
|
|
}
|
|
factory, err := loadFactory(context)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return factory.Load(id)
|
|
}
|
|
|
|
// fatal prints the error's details if it is a libcontainer specific error type
|
|
// then exits the program with an exit status of 1.
|
|
func fatal(err error) {
|
|
// make sure the error is written to the logger
|
|
logrus.Error(err)
|
|
fmt.Fprintln(os.Stderr, err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
func fatalf(t string, v ...interface{}) {
|
|
fatal(fmt.Errorf(t, v...))
|
|
}
|
|
|
|
func getDefaultImagePath(context *cli.Context) string {
|
|
cwd, err := os.Getwd()
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
return filepath.Join(cwd, "checkpoint")
|
|
}
|
|
|
|
// newProcess returns a new libcontainer Process with the arguments from the
|
|
// spec and stdio from the current process.
|
|
func newProcess(p specs.Process) (*libcontainer.Process, error) {
|
|
lp := &libcontainer.Process{
|
|
Args: p.Args,
|
|
Env: p.Env,
|
|
// TODO: fix libcontainer's API to better support uid/gid in a typesafe way.
|
|
User: fmt.Sprintf("%d:%d", p.User.UID, p.User.GID),
|
|
Cwd: p.Cwd,
|
|
Capabilities: p.Capabilities,
|
|
Label: p.SelinuxLabel,
|
|
NoNewPrivileges: &p.NoNewPrivileges,
|
|
AppArmorProfile: p.ApparmorProfile,
|
|
}
|
|
for _, rlimit := range p.Rlimits {
|
|
rl, err := createLibContainerRlimit(rlimit)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
lp.Rlimits = append(lp.Rlimits, rl)
|
|
}
|
|
return lp, nil
|
|
}
|
|
|
|
func dupStdio(process *libcontainer.Process, rootuid int) error {
|
|
process.Stdin = os.Stdin
|
|
process.Stdout = os.Stdout
|
|
process.Stderr = os.Stderr
|
|
for _, fd := range []uintptr{
|
|
os.Stdin.Fd(),
|
|
os.Stdout.Fd(),
|
|
os.Stderr.Fd(),
|
|
} {
|
|
if err := syscall.Fchown(int(fd), rootuid, rootuid); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// If systemd is supporting sd_notify protocol, this function will add support
|
|
// for sd_notify protocol from within the container.
|
|
func setupSdNotify(spec *specs.Spec, notifySocket string) {
|
|
spec.Mounts = append(spec.Mounts, specs.Mount{Destination: notifySocket, Type: "bind", Source: notifySocket, Options: []string{"bind"}})
|
|
spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", notifySocket))
|
|
}
|
|
|
|
func destroy(container libcontainer.Container) {
|
|
if err := container.Destroy(); err != nil {
|
|
logrus.Error(err)
|
|
}
|
|
}
|
|
|
|
// setupIO sets the proper IO on the process depending on the configuration
|
|
// If there is a nil error then there must be a non nil tty returned
|
|
func setupIO(process *libcontainer.Process, rootuid int, console string, createTTY, detach bool) (*tty, error) {
|
|
// detach and createTty will not work unless a console path is passed
|
|
// so error out here before changing any terminal settings
|
|
if createTTY && detach && console == "" {
|
|
return nil, fmt.Errorf("cannot allocate tty if runc will detach")
|
|
}
|
|
if createTTY {
|
|
return createTty(process, rootuid, console)
|
|
}
|
|
if detach {
|
|
if err := dupStdio(process, rootuid); err != nil {
|
|
return nil, err
|
|
}
|
|
return &tty{}, nil
|
|
}
|
|
return createStdioPipes(process, rootuid)
|
|
}
|
|
|
|
// createPidFile creates a file with the processes pid inside it atomically
|
|
// it creates a temp file with the paths filename + '.' infront of it
|
|
// then renames the file
|
|
func createPidFile(path string, process *libcontainer.Process) error {
|
|
pid, err := process.Pid()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
var (
|
|
tmpDir = filepath.Dir(path)
|
|
tmpName = filepath.Join(tmpDir, fmt.Sprintf(".%s", filepath.Base(path)))
|
|
)
|
|
f, err := os.OpenFile(tmpName, os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, 0666)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
_, err = fmt.Fprintf(f, "%d", pid)
|
|
f.Close()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return os.Rename(tmpName, path)
|
|
}
|
|
|
|
func createContainer(context *cli.Context, id string, spec *specs.Spec) (libcontainer.Container, error) {
|
|
config, err := createLibcontainerConfig(id, context.GlobalBool("systemd-cgroup"), spec)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if _, err := os.Stat(config.Rootfs); err != nil {
|
|
if os.IsNotExist(err) {
|
|
return nil, fmt.Errorf("rootfs (%q) does not exist", config.Rootfs)
|
|
}
|
|
return nil, err
|
|
}
|
|
|
|
factory, err := loadFactory(context)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return factory.Create(id, config)
|
|
}
|
|
|
|
type runner struct {
|
|
enableSubreaper bool
|
|
shouldDestroy bool
|
|
detach bool
|
|
listenFDs []*os.File
|
|
pidFile string
|
|
console string
|
|
container libcontainer.Container
|
|
}
|
|
|
|
func (r *runner) run(config *specs.Process) (int, error) {
|
|
process, err := newProcess(*config)
|
|
if err != nil {
|
|
r.destroy()
|
|
return -1, err
|
|
}
|
|
if len(r.listenFDs) > 0 {
|
|
process.Env = append(process.Env, fmt.Sprintf("LISTEN_FDS=%d", len(r.listenFDs)), "LISTEN_PID=1")
|
|
process.ExtraFiles = append(process.ExtraFiles, r.listenFDs...)
|
|
}
|
|
rootuid, err := r.container.Config().HostUID()
|
|
if err != nil {
|
|
r.destroy()
|
|
return -1, err
|
|
}
|
|
tty, err := setupIO(process, rootuid, r.console, config.Terminal, r.detach)
|
|
if err != nil {
|
|
r.destroy()
|
|
return -1, err
|
|
}
|
|
handler := newSignalHandler(tty, r.enableSubreaper)
|
|
if err := r.container.Start(process); err != nil {
|
|
r.destroy()
|
|
tty.Close()
|
|
return -1, err
|
|
}
|
|
if err := tty.ClosePostStart(); err != nil {
|
|
r.terminate(process)
|
|
r.destroy()
|
|
tty.Close()
|
|
return -1, err
|
|
}
|
|
if r.pidFile != "" {
|
|
if err := createPidFile(r.pidFile, process); err != nil {
|
|
r.terminate(process)
|
|
r.destroy()
|
|
tty.Close()
|
|
return -1, err
|
|
}
|
|
}
|
|
if r.detach {
|
|
tty.Close()
|
|
return 0, nil
|
|
}
|
|
status, err := handler.forward(process)
|
|
if err != nil {
|
|
r.terminate(process)
|
|
}
|
|
r.destroy()
|
|
tty.Close()
|
|
return status, err
|
|
}
|
|
|
|
func (r *runner) destroy() {
|
|
if r.shouldDestroy {
|
|
destroy(r.container)
|
|
}
|
|
}
|
|
|
|
func (r *runner) terminate(p *libcontainer.Process) {
|
|
p.Signal(syscall.SIGKILL)
|
|
p.Wait()
|
|
}
|
|
|
|
func validateProcessSpec(spec *specs.Process) error {
|
|
if spec.Cwd == "" {
|
|
return fmt.Errorf("Cwd property must not be empty")
|
|
}
|
|
if !filepath.IsAbs(spec.Cwd) {
|
|
return fmt.Errorf("Cwd must be an absolute path")
|
|
}
|
|
if len(spec.Args) == 0 {
|
|
return fmt.Errorf("args must not be empty")
|
|
}
|
|
return nil
|
|
}
|