143 lines
3.9 KiB
Go
143 lines
3.9 KiB
Go
// +build linux
|
|
|
|
package libcontainer
|
|
|
|
import (
|
|
"io"
|
|
"os"
|
|
"syscall"
|
|
|
|
"github.com/opencontainers/runc/libcontainer/apparmor"
|
|
"github.com/opencontainers/runc/libcontainer/configs"
|
|
"github.com/opencontainers/runc/libcontainer/keys"
|
|
"github.com/opencontainers/runc/libcontainer/label"
|
|
"github.com/opencontainers/runc/libcontainer/seccomp"
|
|
"github.com/opencontainers/runc/libcontainer/system"
|
|
)
|
|
|
|
type linuxStandardInit struct {
|
|
pipe io.ReadWriter
|
|
parentPid int
|
|
config *initConfig
|
|
}
|
|
|
|
// PR_SET_NO_NEW_PRIVS isn't exposed in Golang so we define it ourselves copying the value
|
|
// the kernel
|
|
const PR_SET_NO_NEW_PRIVS = 0x26
|
|
|
|
func (l *linuxStandardInit) Init() error {
|
|
// do not inherit the parent's session keyring
|
|
sessKeyId, err := keyctl.JoinSessionKeyring("")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// make session keyring searcheable
|
|
// without user ns we need 'UID' search permissions
|
|
// with user ns we need 'other' search permissions
|
|
if err := keyctl.ModKeyringPerm(sessKeyId, 0xffffffff, 0x080008); err != nil {
|
|
return err
|
|
}
|
|
|
|
// join any namespaces via a path to the namespace fd if provided
|
|
if err := joinExistingNamespaces(l.config.Config.Namespaces); err != nil {
|
|
return err
|
|
}
|
|
var console *linuxConsole
|
|
if l.config.Console != "" {
|
|
console = newConsoleFromPath(l.config.Console)
|
|
if err := console.dupStdio(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if _, err := syscall.Setsid(); err != nil {
|
|
return err
|
|
}
|
|
if console != nil {
|
|
if err := system.Setctty(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if err := setupNetwork(l.config); err != nil {
|
|
return err
|
|
}
|
|
if err := setupRoute(l.config.Config); err != nil {
|
|
return err
|
|
}
|
|
if err := setupRlimits(l.config.Config); err != nil {
|
|
return err
|
|
}
|
|
if err := setOomScoreAdj(l.config.Config.OomScoreAdj); err != nil {
|
|
return err
|
|
}
|
|
label.Init()
|
|
// InitializeMountNamespace() can be executed only for a new mount namespace
|
|
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
|
|
if err := setupRootfs(l.config.Config, console); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if hostname := l.config.Config.Hostname; hostname != "" {
|
|
if err := syscall.Sethostname([]byte(hostname)); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil {
|
|
return err
|
|
}
|
|
if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil {
|
|
return err
|
|
}
|
|
|
|
for key, value := range l.config.Config.Sysctl {
|
|
if err := writeSystemProperty(key, value); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
for _, path := range l.config.Config.ReadonlyPaths {
|
|
if err := remountReadonly(path); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
for _, path := range l.config.Config.MaskPaths {
|
|
if err := maskFile(path); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
pdeath, err := system.GetParentDeathSignal()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if l.config.Config.NoNewPrivileges {
|
|
if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
// Tell our parent that we're ready to Execv. This must be done before the
|
|
// Seccomp rules have been applied, because we need to be able to read and
|
|
// write to a socket.
|
|
if err := syncParentReady(l.pipe); err != nil {
|
|
return err
|
|
}
|
|
if l.config.Config.Seccomp != nil {
|
|
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if err := finalizeNamespace(l.config); err != nil {
|
|
return err
|
|
}
|
|
// finalizeNamespace can change user/group which clears the parent death
|
|
// signal, so we restore it here.
|
|
if err := pdeath.Restore(); err != nil {
|
|
return err
|
|
}
|
|
// compare the parent from the inital start of the init process and make sure that it did not change.
|
|
// if the parent changes that means it died and we were reparened to something else so we should
|
|
// just kill ourself and not cause problems for someone else.
|
|
if syscall.Getppid() != l.parentPid {
|
|
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
|
|
}
|
|
|
|
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
|
|
}
|