Merge pull request #2185 from liggitt/exec-race
Fix race checking for process exit and waiting for exec fifo
This commit is contained in:
commit
a88592a634
|
@ -265,22 +265,24 @@ func (c *linuxContainer) Exec() error {
|
|||
|
||||
func (c *linuxContainer) exec() error {
|
||||
path := filepath.Join(c.root, execFifoFilename)
|
||||
pid := c.initProcess.pid()
|
||||
blockingFifoOpenCh := awaitFifoOpen(path)
|
||||
for {
|
||||
select {
|
||||
case result := <-blockingFifoOpenCh:
|
||||
return handleFifoResult(result)
|
||||
|
||||
fifoOpen := make(chan struct{})
|
||||
select {
|
||||
case <-awaitProcessExit(c.initProcess.pid(), fifoOpen):
|
||||
return errors.New("container process is already dead")
|
||||
case result := <-awaitFifoOpen(path):
|
||||
close(fifoOpen)
|
||||
if result.err != nil {
|
||||
return result.err
|
||||
case <-time.After(time.Millisecond * 100):
|
||||
stat, err := system.Stat(pid)
|
||||
if err != nil || stat.State == system.Zombie {
|
||||
// could be because process started, ran, and completed between our 100ms timeout and our system.Stat() check.
|
||||
// see if the fifo exists and has data (with a non-blocking open, which will succeed if the writing process is complete).
|
||||
if err := handleFifoResult(fifoOpen(path, false)); err != nil {
|
||||
return errors.New("container process is already dead")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
f := result.file
|
||||
defer f.Close()
|
||||
if err := readFromExecFifo(f); err != nil {
|
||||
return err
|
||||
}
|
||||
return os.Remove(path)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -295,38 +297,39 @@ func readFromExecFifo(execFifo io.Reader) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func awaitProcessExit(pid int, exit <-chan struct{}) <-chan struct{} {
|
||||
isDead := make(chan struct{})
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-exit:
|
||||
return
|
||||
case <-time.After(time.Millisecond * 100):
|
||||
stat, err := system.Stat(pid)
|
||||
if err != nil || stat.State == system.Zombie {
|
||||
close(isDead)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
return isDead
|
||||
}
|
||||
|
||||
func awaitFifoOpen(path string) <-chan openResult {
|
||||
fifoOpened := make(chan openResult)
|
||||
go func() {
|
||||
f, err := os.OpenFile(path, os.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
fifoOpened <- openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")}
|
||||
return
|
||||
}
|
||||
fifoOpened <- openResult{file: f}
|
||||
result := fifoOpen(path, true)
|
||||
fifoOpened <- result
|
||||
}()
|
||||
return fifoOpened
|
||||
}
|
||||
|
||||
func fifoOpen(path string, block bool) openResult {
|
||||
flags := os.O_RDONLY
|
||||
if !block {
|
||||
flags |= syscall.O_NONBLOCK
|
||||
}
|
||||
f, err := os.OpenFile(path, flags, 0)
|
||||
if err != nil {
|
||||
return openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")}
|
||||
}
|
||||
return openResult{file: f}
|
||||
}
|
||||
|
||||
func handleFifoResult(result openResult) error {
|
||||
if result.err != nil {
|
||||
return result.err
|
||||
}
|
||||
f := result.file
|
||||
defer f.Close()
|
||||
if err := readFromExecFifo(f); err != nil {
|
||||
return err
|
||||
}
|
||||
return os.Remove(f.Name())
|
||||
}
|
||||
|
||||
type openResult struct {
|
||||
file *os.File
|
||||
err error
|
||||
|
|
|
@ -218,12 +218,16 @@ function wait_for_container() {
|
|||
local attempts=$1
|
||||
local delay=$2
|
||||
local cid=$3
|
||||
# optionally wait for a specific status
|
||||
local wait_for_status="${4:-}"
|
||||
local i
|
||||
|
||||
for ((i = 0; i < attempts; i++)); do
|
||||
runc state $cid
|
||||
if [[ "$status" -eq 0 ]]; then
|
||||
return 0
|
||||
if [[ "${output}" == *"${wait_for_status}"* ]]; then
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
sleep $delay
|
||||
done
|
||||
|
@ -237,12 +241,16 @@ function wait_for_container_inroot() {
|
|||
local attempts=$1
|
||||
local delay=$2
|
||||
local cid=$3
|
||||
# optionally wait for a specific status
|
||||
local wait_for_status="${4:-}"
|
||||
local i
|
||||
|
||||
for ((i = 0; i < attempts; i++)); do
|
||||
ROOT=$4 runc state $cid
|
||||
if [[ "$status" -eq 0 ]]; then
|
||||
return 0
|
||||
if [[ "${output}" == *"${wait_for_status}"* ]]; then
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
sleep $delay
|
||||
done
|
||||
|
|
|
@ -205,7 +205,7 @@ EOF
|
|||
__runc run test_busybox
|
||||
) &
|
||||
|
||||
wait_for_container 15 1 test_busybox
|
||||
wait_for_container 15 1 test_busybox running
|
||||
testcontainer test_busybox running
|
||||
|
||||
# Kill the container.
|
||||
|
|
Loading…
Reference in New Issue