Use fifo for create/start
This removes the use of a signal handler and SIGCONT to signal the init process to exec the users process. Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
parent
8fbe19e020
commit
3aacff695d
|
@ -124,8 +124,8 @@ type BaseContainer interface {
|
|||
Start(process *Process) (err error)
|
||||
|
||||
// Run immediatly starts the process inside the conatiner. Returns error if process
|
||||
// fails to start. It does not block waiting for a SIGCONT after start returns but
|
||||
// sends the signal when the process has completed.
|
||||
// fails to start. It does not block waiting for the exec fifo after start returns but
|
||||
// opens the fifo after start returns.
|
||||
//
|
||||
// errors:
|
||||
// ContainerDestroyed - Container no longer exists,
|
||||
|
@ -148,4 +148,10 @@ type BaseContainer interface {
|
|||
// errors:
|
||||
// SystemError - System error.
|
||||
Signal(s os.Signal) error
|
||||
|
||||
// Exec signals the container to exec the users process at the end of the init.
|
||||
//
|
||||
// errors:
|
||||
// SystemError - System error.
|
||||
Exec() error
|
||||
}
|
||||
|
|
|
@ -29,10 +29,6 @@ import (
|
|||
|
||||
const stdioFdCount = 3
|
||||
|
||||
// InitContinueSignal is used to signal the container init process to
|
||||
// start the users specified process after the container create has finished.
|
||||
const InitContinueSignal = syscall.SIGCONT
|
||||
|
||||
type linuxContainer struct {
|
||||
id string
|
||||
root string
|
||||
|
@ -195,16 +191,39 @@ func (c *linuxContainer) Run(process *Process) error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
isInit := status == Stopped
|
||||
if err := c.start(process, isInit); err != nil {
|
||||
if err := c.start(process, status == Stopped); err != nil {
|
||||
return err
|
||||
}
|
||||
if isInit {
|
||||
return process.ops.signal(InitContinueSignal)
|
||||
if status == Stopped {
|
||||
return c.exec()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Exec() error {
|
||||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
return c.exec()
|
||||
}
|
||||
|
||||
func (c *linuxContainer) exec() error {
|
||||
path := filepath.Join(c.root, execFifoFilename)
|
||||
f, err := os.OpenFile(path, os.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "open exec fifo for reading")
|
||||
}
|
||||
defer f.Close()
|
||||
data, err := ioutil.ReadAll(f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(data) > 0 {
|
||||
os.Remove(path)
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("cannot start an already running container")
|
||||
}
|
||||
|
||||
func (c *linuxContainer) start(process *Process, isInit bool) error {
|
||||
parent, err := c.newParentProcess(process, isInit)
|
||||
if err != nil {
|
||||
|
@ -262,17 +281,21 @@ func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProces
|
|||
if err != nil {
|
||||
return nil, newSystemErrorWithCause(err, "creating new init pipe")
|
||||
}
|
||||
cmd, err := c.commandTemplate(p, childPipe)
|
||||
rootDir, err := os.Open(c.root)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cmd, err := c.commandTemplate(p, childPipe, rootDir)
|
||||
if err != nil {
|
||||
return nil, newSystemErrorWithCause(err, "creating new command template")
|
||||
}
|
||||
if !doInit {
|
||||
return c.newSetnsProcess(p, cmd, parentPipe, childPipe)
|
||||
return c.newSetnsProcess(p, cmd, parentPipe, childPipe, rootDir)
|
||||
}
|
||||
return c.newInitProcess(p, cmd, parentPipe, childPipe)
|
||||
return c.newInitProcess(p, cmd, parentPipe, childPipe, rootDir)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) {
|
||||
func (c *linuxContainer) commandTemplate(p *Process, childPipe, rootDir *os.File) (*exec.Cmd, error) {
|
||||
cmd := &exec.Cmd{
|
||||
Path: c.initPath,
|
||||
Args: c.initArgs,
|
||||
|
@ -284,8 +307,10 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.
|
|||
if cmd.SysProcAttr == nil {
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{}
|
||||
}
|
||||
cmd.ExtraFiles = append(p.ExtraFiles, childPipe)
|
||||
cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-1))
|
||||
cmd.ExtraFiles = append(p.ExtraFiles, childPipe, rootDir)
|
||||
cmd.Env = append(cmd.Env,
|
||||
fmt.Sprintf("_LIBCONTAINER_INITPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-2),
|
||||
fmt.Sprintf("_LIBCONTAINER_STATEDIR=%d", stdioFdCount+len(cmd.ExtraFiles)-1))
|
||||
// NOTE: when running a container with no PID namespace and the parent process spawning the container is
|
||||
// PID1 the pdeathsig is being delivered to the container's init process by the kernel for some reason
|
||||
// even with the parent still running.
|
||||
|
@ -295,7 +320,7 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.
|
|||
return cmd, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*initProcess, error) {
|
||||
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe, rootDir *os.File) (*initProcess, error) {
|
||||
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
|
||||
nsMaps := make(map[configs.NamespaceType]string)
|
||||
for _, ns := range c.config.Namespaces {
|
||||
|
@ -318,10 +343,11 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
|
|||
process: p,
|
||||
bootstrapData: data,
|
||||
sharePidns: sharePidns,
|
||||
rootDir: rootDir,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*setnsProcess, error) {
|
||||
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe, rootDir *os.File) (*setnsProcess, error) {
|
||||
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
|
||||
state, err := c.currentState()
|
||||
if err != nil {
|
||||
|
@ -342,6 +368,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
|
|||
config: c.newInitConfig(p),
|
||||
process: p,
|
||||
bootstrapData: data,
|
||||
rootDir: rootDir,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
@ -360,6 +387,7 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
|
|||
AppArmorProfile: c.config.AppArmorProfile,
|
||||
ProcessLabel: c.config.ProcessLabel,
|
||||
Rlimits: c.config.Rlimits,
|
||||
ExecFifoPath: filepath.Join(c.root, execFifoFilename),
|
||||
}
|
||||
if process.NoNewPrivileges != nil {
|
||||
cfg.NoNewPrivileges = *process.NoNewPrivileges
|
||||
|
|
|
@ -7,7 +7,6 @@ import (
|
|||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime/debug"
|
||||
|
@ -25,6 +24,7 @@ import (
|
|||
|
||||
const (
|
||||
stateFilename = "state.json"
|
||||
execFifoFilename = "exec.fifo"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -168,6 +168,9 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
|
|||
if err := os.MkdirAll(containerRoot, 0700); err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
if err := syscall.Mkfifo(filepath.Join(containerRoot, execFifoFilename), 0666); err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
c := &linuxContainer{
|
||||
id: id,
|
||||
root: containerRoot,
|
||||
|
@ -220,13 +223,18 @@ func (l *LinuxFactory) Type() string {
|
|||
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
|
||||
// This is a low level implementation detail of the reexec and should not be consumed externally
|
||||
func (l *LinuxFactory) StartInitialization() (err error) {
|
||||
// start the signal handler as soon as we can
|
||||
s := make(chan os.Signal, 1)
|
||||
signal.Notify(s, InitContinueSignal)
|
||||
fdStr := os.Getenv("_LIBCONTAINER_INITPIPE")
|
||||
pipefd, err := strconv.Atoi(fdStr)
|
||||
var pipefd, rootfd int
|
||||
for k, v := range map[string]*int{
|
||||
"_LIBCONTAINER_INITPIPE": &pipefd,
|
||||
"_LIBCONTAINER_STATEDIR": &rootfd,
|
||||
} {
|
||||
s := os.Getenv(k)
|
||||
|
||||
i, err := strconv.Atoi(s)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error converting env var _LIBCONTAINER_INITPIPE(%q) to an int: %s", fdStr, err)
|
||||
return fmt.Errorf("unable to convert %s=%s to int", k, s)
|
||||
}
|
||||
*v = i
|
||||
}
|
||||
var (
|
||||
pipe = os.NewFile(uintptr(pipefd), "pipe")
|
||||
|
@ -235,6 +243,7 @@ func (l *LinuxFactory) StartInitialization() (err error) {
|
|||
// clear the current process's environment to clean any libcontainer
|
||||
// specific env vars.
|
||||
os.Clearenv()
|
||||
|
||||
var i initer
|
||||
defer func() {
|
||||
// We have an error during the initialization of the container's init,
|
||||
|
@ -253,18 +262,16 @@ func (l *LinuxFactory) StartInitialization() (err error) {
|
|||
// ensure that this pipe is always closed
|
||||
pipe.Close()
|
||||
}()
|
||||
|
||||
defer func() {
|
||||
if e := recover(); e != nil {
|
||||
err = fmt.Errorf("panic from initialization: %v, %v", e, string(debug.Stack()))
|
||||
}
|
||||
}()
|
||||
|
||||
i, err = newContainerInit(it, pipe)
|
||||
i, err = newContainerInit(it, pipe, rootfd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return i.Init(s)
|
||||
return i.Init()
|
||||
}
|
||||
|
||||
func (l *LinuxFactory) loadState(root string) (*State, error) {
|
||||
|
|
|
@ -58,13 +58,14 @@ type initConfig struct {
|
|||
PassedFilesCount int `json:"passed_files_count"`
|
||||
ContainerId string `json:"containerid"`
|
||||
Rlimits []configs.Rlimit `json:"rlimits"`
|
||||
ExecFifoPath string `json:"start_pipe_path"`
|
||||
}
|
||||
|
||||
type initer interface {
|
||||
Init(s chan os.Signal) error
|
||||
Init() error
|
||||
}
|
||||
|
||||
func newContainerInit(t initType, pipe *os.File) (initer, error) {
|
||||
func newContainerInit(t initType, pipe *os.File, stateDirFD int) (initer, error) {
|
||||
var config *initConfig
|
||||
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
|
||||
return nil, err
|
||||
|
@ -82,6 +83,7 @@ func newContainerInit(t initType, pipe *os.File) (initer, error) {
|
|||
pipe: pipe,
|
||||
parentPid: syscall.Getppid(),
|
||||
config: config,
|
||||
stateDirFD: stateDirFD,
|
||||
}, nil
|
||||
}
|
||||
return nil, fmt.Errorf("unknown init type %q", t)
|
||||
|
|
|
@ -42,13 +42,13 @@ func TestMain(m *testing.M) {
|
|||
logrus.SetOutput(os.Stderr)
|
||||
logrus.SetLevel(logrus.InfoLevel)
|
||||
|
||||
factory, err = libcontainer.New(".", libcontainer.Cgroupfs)
|
||||
factory, err = libcontainer.New("/run/libctTests", libcontainer.Cgroupfs)
|
||||
if err != nil {
|
||||
logrus.Error(err)
|
||||
os.Exit(1)
|
||||
}
|
||||
if systemd.UseSystemd() {
|
||||
systemdFactory, err = libcontainer.New(".", libcontainer.SystemdCgroups)
|
||||
systemdFactory, err = libcontainer.New("/run/libctTests", libcontainer.SystemdCgroups)
|
||||
if err != nil {
|
||||
logrus.Error(err)
|
||||
os.Exit(1)
|
||||
|
|
|
@ -101,8 +101,8 @@ func TestSeccompPermitWriteConditional(t *testing.T) {
|
|||
Args: []*configs.Arg{
|
||||
{
|
||||
Index: 0,
|
||||
Value: 1,
|
||||
Op: configs.GreaterThan,
|
||||
Value: 2,
|
||||
Op: configs.EqualTo,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -162,8 +162,8 @@ func TestSeccompDenyWriteConditional(t *testing.T) {
|
|||
Args: []*configs.Arg{
|
||||
{
|
||||
Index: 0,
|
||||
Value: 1,
|
||||
Op: configs.GreaterThan,
|
||||
Value: 2,
|
||||
Op: configs.EqualTo,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
|
@ -2,6 +2,8 @@ package integration
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/md5"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
|
@ -11,6 +13,7 @@ import (
|
|||
"strings"
|
||||
"syscall"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
|
@ -92,7 +95,9 @@ func copyBusybox(dest string) error {
|
|||
}
|
||||
|
||||
func newContainer(config *configs.Config) (libcontainer.Container, error) {
|
||||
return newContainerWithName("testCT", config)
|
||||
h := md5.New()
|
||||
h.Write([]byte(time.Now().String()))
|
||||
return newContainerWithName(hex.EncodeToString(h.Sum(nil)), config)
|
||||
}
|
||||
|
||||
func newContainerWithName(name string, config *configs.Config) (libcontainer.Container, error) {
|
||||
|
|
|
@ -51,6 +51,7 @@ type setnsProcess struct {
|
|||
fds []string
|
||||
process *Process
|
||||
bootstrapData io.Reader
|
||||
rootDir *os.File
|
||||
}
|
||||
|
||||
func (p *setnsProcess) startTime() (string, error) {
|
||||
|
@ -69,6 +70,7 @@ func (p *setnsProcess) start() (err error) {
|
|||
defer p.parentPipe.Close()
|
||||
err = p.cmd.Start()
|
||||
p.childPipe.Close()
|
||||
p.rootDir.Close()
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "starting setns process")
|
||||
}
|
||||
|
@ -186,6 +188,7 @@ type initProcess struct {
|
|||
process *Process
|
||||
bootstrapData io.Reader
|
||||
sharePidns bool
|
||||
rootDir *os.File
|
||||
}
|
||||
|
||||
func (p *initProcess) pid() int {
|
||||
|
@ -230,6 +233,7 @@ func (p *initProcess) start() error {
|
|||
err := p.cmd.Start()
|
||||
p.process.ops = p
|
||||
p.childPipe.Close()
|
||||
p.rootDir.Close()
|
||||
if err != nil {
|
||||
p.process.ops = nil
|
||||
return newSystemErrorWithCause(err, "starting init process command")
|
||||
|
|
|
@ -5,7 +5,6 @@ package libcontainer
|
|||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/apparmor"
|
||||
"github.com/opencontainers/runc/libcontainer/keys"
|
||||
|
@ -24,7 +23,7 @@ func (l *linuxSetnsInit) getSessionRingName() string {
|
|||
return fmt.Sprintf("_ses.%s", l.config.ContainerId)
|
||||
}
|
||||
|
||||
func (l *linuxSetnsInit) Init(s chan os.Signal) error {
|
||||
func (l *linuxSetnsInit) Init() error {
|
||||
if !l.config.Config.NoNewKeyring {
|
||||
// do not inherit the parent's session keyring
|
||||
if _, err := keyctl.JoinSessionKeyring(l.getSessionRingName()); err != nil {
|
||||
|
@ -50,7 +49,5 @@ func (l *linuxSetnsInit) Init(s chan os.Signal) error {
|
|||
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
signal.Stop(s)
|
||||
close(s)
|
||||
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
|
||||
}
|
||||
|
|
|
@ -7,7 +7,6 @@ import (
|
|||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/apparmor"
|
||||
|
@ -21,6 +20,7 @@ import (
|
|||
type linuxStandardInit struct {
|
||||
pipe io.ReadWriteCloser
|
||||
parentPid int
|
||||
stateDirFD int
|
||||
config *initConfig
|
||||
}
|
||||
|
||||
|
@ -44,7 +44,7 @@ func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
|
|||
// the kernel
|
||||
const PR_SET_NO_NEW_PRIVS = 0x26
|
||||
|
||||
func (l *linuxStandardInit) Init(s chan os.Signal) error {
|
||||
func (l *linuxStandardInit) Init() error {
|
||||
if !l.config.Config.NoNewKeyring {
|
||||
ringname, keepperms, newperms := l.getSessionRingParams()
|
||||
|
||||
|
@ -149,23 +149,27 @@ func (l *linuxStandardInit) Init(s chan os.Signal) error {
|
|||
if syscall.Getppid() != l.parentPid {
|
||||
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
|
||||
}
|
||||
// check for the arg before waiting to make sure it exists and it is returned
|
||||
// as a create time error.
|
||||
name, err := exec.LookPath(l.config.Args[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// close the pipe to signal that we have completed our init.
|
||||
l.pipe.Close()
|
||||
// wait for the fifo to be opened on the other side before
|
||||
// exec'ing the users process.
|
||||
fd, err := syscall.Openat(l.stateDirFD, execFifoFilename, os.O_WRONLY|syscall.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := syscall.Write(fd, []byte("0")); err != nil {
|
||||
return err
|
||||
}
|
||||
if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
|
||||
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// check for the arg before waiting to make sure it exists and it is returned
|
||||
// as a create time error
|
||||
name, err := exec.LookPath(l.config.Args[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// close the pipe to signal that we have completed our init
|
||||
l.pipe.Close()
|
||||
// wait for the signal to exec the users process
|
||||
<-s
|
||||
// clean up the signal handler
|
||||
signal.Stop(s)
|
||||
close(s)
|
||||
return syscall.Exec(name, l.config.Args[0:], os.Environ())
|
||||
}
|
||||
|
|
2
start.go
2
start.go
|
@ -27,7 +27,7 @@ your host.`,
|
|||
}
|
||||
switch status {
|
||||
case libcontainer.Created:
|
||||
return container.Signal(libcontainer.InitContinueSignal)
|
||||
return container.Exec()
|
||||
case libcontainer.Stopped:
|
||||
return fmt.Errorf("cannot start a container that has run and stopped")
|
||||
case libcontainer.Running:
|
||||
|
|
|
@ -229,7 +229,11 @@ func (r *runner) run(config *specs.Process) (int, error) {
|
|||
return -1, err
|
||||
}
|
||||
handler := newSignalHandler(tty, r.enableSubreaper)
|
||||
if err := r.container.Start(process); err != nil {
|
||||
startFn := r.container.Start
|
||||
if !r.create {
|
||||
startFn = r.container.Run
|
||||
}
|
||||
if err := startFn(process); err != nil {
|
||||
r.destroy()
|
||||
tty.Close()
|
||||
return -1, err
|
||||
|
@ -248,14 +252,6 @@ func (r *runner) run(config *specs.Process) (int, error) {
|
|||
return -1, err
|
||||
}
|
||||
}
|
||||
if !r.create {
|
||||
if err := process.Signal(libcontainer.InitContinueSignal); err != nil {
|
||||
r.terminate(process)
|
||||
r.destroy()
|
||||
tty.Close()
|
||||
return -1, err
|
||||
}
|
||||
}
|
||||
if r.detach || r.create {
|
||||
tty.Close()
|
||||
return 0, nil
|
||||
|
|
Loading…
Reference in New Issue