Avoid race when opening exec fifo
When starting a container with `runc start` or `runc run`, the stub process (runc[2:INIT]) opens a fifo for writing. Its parent runc process will open the same fifo for reading. In this way, they synchronize. If the stub process exits at the wrong time, the parent runc process will block forever. This can happen when racing 2 runc operations against each other: `runc run/start`, and `runc delete`. It could also happen for other reasons, e.g. the kernel's OOM killer may select the stub process. This commit resolves this race by racing the opening of the exec fifo from the runc parent process against the stub process exiting. If the stub process exits before we open the fifo, we return an error. Another solution is to wait on the stub process. However, it seems it would require more refactoring to avoid calling wait multiple times on the same process, which is an error. Signed-off-by: Craig Furman <cfurman@pivotal.io>
This commit is contained in:
parent
ab4a819167
commit
8d3e6c9826
|
@ -5,6 +5,7 @@ package libcontainer
|
|||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
|
@ -267,20 +268,70 @@ func (c *linuxContainer) Exec() error {
|
|||
|
||||
func (c *linuxContainer) exec() error {
|
||||
path := filepath.Join(c.root, execFifoFilename)
|
||||
f, err := os.OpenFile(path, os.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "open exec fifo for reading")
|
||||
|
||||
fifoOpen := make(chan struct{})
|
||||
select {
|
||||
case <-awaitProcessExit(c.initProcess.pid(), fifoOpen):
|
||||
return errors.New("container process is already dead")
|
||||
case result := <-awaitFifoOpen(path):
|
||||
close(fifoOpen)
|
||||
if result.err != nil {
|
||||
return result.err
|
||||
}
|
||||
f := result.file
|
||||
defer f.Close()
|
||||
if err := readFromExecFifo(f); err != nil {
|
||||
return err
|
||||
}
|
||||
return os.Remove(path)
|
||||
}
|
||||
defer f.Close()
|
||||
data, err := ioutil.ReadAll(f)
|
||||
}
|
||||
|
||||
func readFromExecFifo(execFifo io.Reader) error {
|
||||
data, err := ioutil.ReadAll(execFifo)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(data) > 0 {
|
||||
os.Remove(path)
|
||||
return nil
|
||||
if len(data) <= 0 {
|
||||
return fmt.Errorf("cannot start an already running container")
|
||||
}
|
||||
return fmt.Errorf("cannot start an already running container")
|
||||
return nil
|
||||
}
|
||||
|
||||
func awaitProcessExit(pid int, exit <-chan struct{}) <-chan struct{} {
|
||||
isDead := make(chan struct{})
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-exit:
|
||||
return
|
||||
case <-time.After(time.Millisecond * 100):
|
||||
stat, err := system.Stat(pid)
|
||||
if err != nil || stat.State == system.Zombie {
|
||||
close(isDead)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
return isDead
|
||||
}
|
||||
|
||||
func awaitFifoOpen(path string) <-chan openResult {
|
||||
fifoOpened := make(chan openResult)
|
||||
go func() {
|
||||
f, err := os.OpenFile(path, os.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
fifoOpened <- openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")}
|
||||
}
|
||||
fifoOpened <- openResult{file: f}
|
||||
}()
|
||||
return fifoOpened
|
||||
}
|
||||
|
||||
type openResult struct {
|
||||
file *os.File
|
||||
err error
|
||||
}
|
||||
|
||||
func (c *linuxContainer) start(process *Process, isInit bool) error {
|
||||
|
|
Loading…
Reference in New Issue