2015-02-07 04:48:57 +08:00
|
|
|
// +build linux
|
|
|
|
|
|
|
|
package libcontainer
|
|
|
|
|
|
|
|
import (
|
2016-02-23 04:36:12 +08:00
|
|
|
"fmt"
|
2015-03-06 06:33:13 +08:00
|
|
|
"os"
|
2016-05-14 07:54:16 +08:00
|
|
|
"os/exec"
|
2015-02-07 04:48:57 +08:00
|
|
|
"syscall"
|
|
|
|
|
2015-06-22 10:29:59 +08:00
|
|
|
"github.com/opencontainers/runc/libcontainer/apparmor"
|
|
|
|
"github.com/opencontainers/runc/libcontainer/configs"
|
2016-01-21 07:12:25 +08:00
|
|
|
"github.com/opencontainers/runc/libcontainer/keys"
|
2015-06-22 10:29:59 +08:00
|
|
|
"github.com/opencontainers/runc/libcontainer/label"
|
2015-06-30 02:12:54 +08:00
|
|
|
"github.com/opencontainers/runc/libcontainer/seccomp"
|
2015-06-22 10:29:59 +08:00
|
|
|
"github.com/opencontainers/runc/libcontainer/system"
|
2015-02-07 04:48:57 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
type linuxStandardInit struct {
|
2016-06-03 23:29:34 +08:00
|
|
|
pipe *os.File
|
2016-06-07 04:15:18 +08:00
|
|
|
parentPid int
|
|
|
|
stateDirFD int
|
|
|
|
config *initConfig
|
2015-02-07 04:48:57 +08:00
|
|
|
}
|
|
|
|
|
2016-02-23 04:36:12 +08:00
|
|
|
func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
|
|
|
|
var newperms uint32
|
|
|
|
|
|
|
|
if l.config.Config.Namespaces.Contains(configs.NEWUSER) {
|
|
|
|
// with user ns we need 'other' search permissions
|
|
|
|
newperms = 0x8
|
|
|
|
} else {
|
|
|
|
// without user ns we need 'UID' search permissions
|
|
|
|
newperms = 0x80000
|
|
|
|
}
|
|
|
|
|
|
|
|
// create a unique per session container name that we can
|
|
|
|
// join in setns; however, other containers can also join it
|
|
|
|
return fmt.Sprintf("_ses.%s", l.config.ContainerId), 0xffffffff, newperms
|
|
|
|
}
|
|
|
|
|
2016-02-16 19:55:26 +08:00
|
|
|
// PR_SET_NO_NEW_PRIVS isn't exposed in Golang so we define it ourselves copying the value
|
|
|
|
// the kernel
|
|
|
|
const PR_SET_NO_NEW_PRIVS = 0x26
|
|
|
|
|
2016-06-07 04:15:18 +08:00
|
|
|
func (l *linuxStandardInit) Init() error {
|
2016-06-04 02:53:07 +08:00
|
|
|
if !l.config.Config.NoNewKeyring {
|
|
|
|
ringname, keepperms, newperms := l.getSessionRingParams()
|
2016-02-23 04:36:12 +08:00
|
|
|
|
2016-06-04 02:53:07 +08:00
|
|
|
// do not inherit the parent's session keyring
|
2016-07-25 06:41:57 +08:00
|
|
|
sessKeyId, err := keys.JoinSessionKeyring(ringname)
|
2016-06-04 02:53:07 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
// make session keyring searcheable
|
2016-07-25 06:41:57 +08:00
|
|
|
if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil {
|
2016-06-04 02:53:07 +08:00
|
|
|
return err
|
|
|
|
}
|
2016-01-21 07:12:25 +08:00
|
|
|
}
|
|
|
|
|
2015-02-11 03:51:45 +08:00
|
|
|
if err := setupNetwork(l.config); err != nil {
|
2015-02-07 04:48:57 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := setupRoute(l.config.Config); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2016-03-02 02:19:07 +08:00
|
|
|
|
2015-02-07 04:48:57 +08:00
|
|
|
label.Init()
|
2016-06-03 23:29:34 +08:00
|
|
|
|
|
|
|
// prepareRootfs() can be executed only for a new mount namespace.
|
2015-02-07 04:48:57 +08:00
|
|
|
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
|
2016-06-03 23:29:34 +08:00
|
|
|
if err := prepareRootfs(l.pipe, l.config.Config); err != nil {
|
2015-02-07 04:48:57 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2016-06-03 23:29:34 +08:00
|
|
|
|
|
|
|
// Set up the console. This has to be done *before* we finalize the rootfs,
|
|
|
|
// but *after* we've given the user the chance to set up all of the mounts
|
|
|
|
// they wanted.
|
|
|
|
if l.config.CreateConsole {
|
|
|
|
if err := setupConsole(l.pipe, l.config, true); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := system.Setctty(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Finish the rootfs setup.
|
|
|
|
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
|
|
|
|
if err := finalizeRootfs(l.config.Config); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-02-07 04:48:57 +08:00
|
|
|
if hostname := l.config.Config.Hostname; hostname != "" {
|
|
|
|
if err := syscall.Sethostname([]byte(hostname)); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2016-03-04 02:44:33 +08:00
|
|
|
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
|
2015-02-07 04:48:57 +08:00
|
|
|
return err
|
|
|
|
}
|
2016-03-04 02:44:33 +08:00
|
|
|
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
|
2015-02-07 04:48:57 +08:00
|
|
|
return err
|
|
|
|
}
|
2015-04-23 10:17:30 +08:00
|
|
|
|
2015-07-07 07:18:08 +08:00
|
|
|
for key, value := range l.config.Config.Sysctl {
|
2015-04-23 10:17:30 +08:00
|
|
|
if err := writeSystemProperty(key, value); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2015-02-13 08:23:05 +08:00
|
|
|
for _, path := range l.config.Config.ReadonlyPaths {
|
|
|
|
if err := remountReadonly(path); err != nil {
|
|
|
|
return err
|
2015-02-10 06:42:21 +08:00
|
|
|
}
|
2015-02-13 08:23:05 +08:00
|
|
|
}
|
|
|
|
for _, path := range l.config.Config.MaskPaths {
|
2016-09-23 15:02:10 +08:00
|
|
|
if err := maskPath(path); err != nil {
|
2015-02-07 04:48:57 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pdeath, err := system.GetParentDeathSignal()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2016-03-04 02:44:33 +08:00
|
|
|
if l.config.NoNewPrivileges {
|
2016-02-16 19:55:26 +08:00
|
|
|
if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2015-12-17 17:16:34 +08:00
|
|
|
// Tell our parent that we're ready to Execv. This must be done before the
|
|
|
|
// Seccomp rules have been applied, because we need to be able to read and
|
|
|
|
// write to a socket.
|
|
|
|
if err := syncParentReady(l.pipe); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2016-04-27 23:15:58 +08:00
|
|
|
// Without NoNewPrivileges seccomp is a privileged operation, so we need to
|
|
|
|
// do this before dropping capabilities; otherwise do it as late as possible
|
|
|
|
// just before execve so as few syscalls take place after it as possible.
|
|
|
|
if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges {
|
2015-06-30 02:12:54 +08:00
|
|
|
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2015-02-10 05:11:57 +08:00
|
|
|
if err := finalizeNamespace(l.config); err != nil {
|
2015-02-07 04:48:57 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
// finalizeNamespace can change user/group which clears the parent death
|
|
|
|
// signal, so we restore it here.
|
|
|
|
if err := pdeath.Restore(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2016-10-12 07:22:48 +08:00
|
|
|
// compare the parent from the initial start of the init process and make sure that it did not change.
|
2016-07-13 23:32:38 +08:00
|
|
|
// if the parent changes that means it died and we were reparented to something else so we should
|
2015-04-03 04:55:55 +08:00
|
|
|
// just kill ourself and not cause problems for someone else.
|
|
|
|
if syscall.Getppid() != l.parentPid {
|
2015-02-07 04:48:57 +08:00
|
|
|
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
|
|
|
|
}
|
2016-05-14 07:54:16 +08:00
|
|
|
// check for the arg before waiting to make sure it exists and it is returned
|
2016-06-07 04:15:18 +08:00
|
|
|
// as a create time error.
|
2016-05-14 07:54:16 +08:00
|
|
|
name, err := exec.LookPath(l.config.Args[0])
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2016-06-07 04:15:18 +08:00
|
|
|
// close the pipe to signal that we have completed our init.
|
2016-05-14 07:54:16 +08:00
|
|
|
l.pipe.Close()
|
2016-06-07 04:15:18 +08:00
|
|
|
// wait for the fifo to be opened on the other side before
|
|
|
|
// exec'ing the users process.
|
|
|
|
fd, err := syscall.Openat(l.stateDirFD, execFifoFilename, os.O_WRONLY|syscall.O_CLOEXEC, 0)
|
|
|
|
if err != nil {
|
2016-06-14 08:21:28 +08:00
|
|
|
return newSystemErrorWithCause(err, "openat exec fifo")
|
2016-06-07 04:15:18 +08:00
|
|
|
}
|
|
|
|
if _, err := syscall.Write(fd, []byte("0")); err != nil {
|
2016-06-14 08:21:28 +08:00
|
|
|
return newSystemErrorWithCause(err, "write 0 exec fifo")
|
2016-06-07 04:15:18 +08:00
|
|
|
}
|
|
|
|
if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
|
|
|
|
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
2016-06-14 08:21:28 +08:00
|
|
|
return newSystemErrorWithCause(err, "init seccomp")
|
2016-06-07 04:15:18 +08:00
|
|
|
}
|
|
|
|
}
|
2016-06-14 08:21:28 +08:00
|
|
|
if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil {
|
|
|
|
return newSystemErrorWithCause(err, "exec user process")
|
|
|
|
}
|
|
|
|
return nil
|
2015-02-07 04:48:57 +08:00
|
|
|
}
|