Avoid race when opening exec fifo

When starting a container with `runc start` or `runc run`, the stub
process (runc[2:INIT]) opens a fifo for writing. Its parent runc process
will open the same fifo for reading. In this way, they synchronize.

If the stub process exits at the wrong time, the parent runc process
will block forever.

This can happen when racing 2 runc operations against each other: `runc
run/start`, and `runc delete`. It could also happen for other reasons,
e.g. the kernel's OOM killer may select the stub process.

This commit resolves this race by racing the opening of the exec fifo
from the runc parent process against the stub process exiting. If the
stub process exits before we open the fifo, we return an error.

Another solution is to wait on the stub process. However, it seems it
would require more refactoring to avoid calling wait multiple times on
the same process, which is an error.

Signed-off-by: Craig Furman <cfurman@pivotal.io>
This commit is contained in:
Will Martin 2018-01-22 17:03:02 +00:00 committed by Craig Furman
parent ab4a819167
commit 8d3e6c9826
1 changed files with 60 additions and 9 deletions

View File

@ -5,6 +5,7 @@ package libcontainer
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"io"
"io/ioutil"
@ -267,20 +268,70 @@ func (c *linuxContainer) Exec() error {
func (c *linuxContainer) exec() error {
path := filepath.Join(c.root, execFifoFilename)
f, err := os.OpenFile(path, os.O_RDONLY, 0)
if err != nil {
return newSystemErrorWithCause(err, "open exec fifo for reading")
fifoOpen := make(chan struct{})
select {
case <-awaitProcessExit(c.initProcess.pid(), fifoOpen):
return errors.New("container process is already dead")
case result := <-awaitFifoOpen(path):
close(fifoOpen)
if result.err != nil {
return result.err
}
f := result.file
defer f.Close()
if err := readFromExecFifo(f); err != nil {
return err
}
return os.Remove(path)
}
defer f.Close()
data, err := ioutil.ReadAll(f)
}
func readFromExecFifo(execFifo io.Reader) error {
data, err := ioutil.ReadAll(execFifo)
if err != nil {
return err
}
if len(data) > 0 {
os.Remove(path)
return nil
if len(data) <= 0 {
return fmt.Errorf("cannot start an already running container")
}
return fmt.Errorf("cannot start an already running container")
return nil
}
func awaitProcessExit(pid int, exit <-chan struct{}) <-chan struct{} {
isDead := make(chan struct{})
go func() {
for {
select {
case <-exit:
return
case <-time.After(time.Millisecond * 100):
stat, err := system.Stat(pid)
if err != nil || stat.State == system.Zombie {
close(isDead)
return
}
}
}
}()
return isDead
}
func awaitFifoOpen(path string) <-chan openResult {
fifoOpened := make(chan openResult)
go func() {
f, err := os.OpenFile(path, os.O_RDONLY, 0)
if err != nil {
fifoOpened <- openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")}
}
fifoOpened <- openResult{file: f}
}()
return fifoOpened
}
type openResult struct {
file *os.File
err error
}
func (c *linuxContainer) start(process *Process, isInit bool) error {