Merge pull request #2185 from liggitt/exec-race

Fix race checking for process exit and waiting for exec fifo
This commit is contained in:
Michael Crosby 2019-12-26 10:41:07 -05:00 committed by GitHub
commit a88592a634
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 53 additions and 42 deletions

View File

@ -265,22 +265,24 @@ func (c *linuxContainer) Exec() error {
func (c *linuxContainer) exec() error {
path := filepath.Join(c.root, execFifoFilename)
fifoOpen := make(chan struct{})
pid := c.initProcess.pid()
blockingFifoOpenCh := awaitFifoOpen(path)
for {
select {
case <-awaitProcessExit(c.initProcess.pid(), fifoOpen):
case result := <-blockingFifoOpenCh:
return handleFifoResult(result)
case <-time.After(time.Millisecond * 100):
stat, err := system.Stat(pid)
if err != nil || stat.State == system.Zombie {
// could be because process started, ran, and completed between our 100ms timeout and our system.Stat() check.
// see if the fifo exists and has data (with a non-blocking open, which will succeed if the writing process is complete).
if err := handleFifoResult(fifoOpen(path, false)); err != nil {
return errors.New("container process is already dead")
case result := <-awaitFifoOpen(path):
close(fifoOpen)
if result.err != nil {
return result.err
}
f := result.file
defer f.Close()
if err := readFromExecFifo(f); err != nil {
return err
return nil
}
}
return os.Remove(path)
}
}
@ -295,38 +297,39 @@ func readFromExecFifo(execFifo io.Reader) error {
return nil
}
func awaitProcessExit(pid int, exit <-chan struct{}) <-chan struct{} {
isDead := make(chan struct{})
go func() {
for {
select {
case <-exit:
return
case <-time.After(time.Millisecond * 100):
stat, err := system.Stat(pid)
if err != nil || stat.State == system.Zombie {
close(isDead)
return
}
}
}
}()
return isDead
}
func awaitFifoOpen(path string) <-chan openResult {
fifoOpened := make(chan openResult)
go func() {
f, err := os.OpenFile(path, os.O_RDONLY, 0)
if err != nil {
fifoOpened <- openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")}
return
}
fifoOpened <- openResult{file: f}
result := fifoOpen(path, true)
fifoOpened <- result
}()
return fifoOpened
}
func fifoOpen(path string, block bool) openResult {
flags := os.O_RDONLY
if !block {
flags |= syscall.O_NONBLOCK
}
f, err := os.OpenFile(path, flags, 0)
if err != nil {
return openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")}
}
return openResult{file: f}
}
func handleFifoResult(result openResult) error {
if result.err != nil {
return result.err
}
f := result.file
defer f.Close()
if err := readFromExecFifo(f); err != nil {
return err
}
return os.Remove(f.Name())
}
type openResult struct {
file *os.File
err error

View File

@ -218,13 +218,17 @@ function wait_for_container() {
local attempts=$1
local delay=$2
local cid=$3
# optionally wait for a specific status
local wait_for_status="${4:-}"
local i
for ((i = 0; i < attempts; i++)); do
runc state $cid
if [[ "$status" -eq 0 ]]; then
if [[ "${output}" == *"${wait_for_status}"* ]]; then
return 0
fi
fi
sleep $delay
done
@ -237,13 +241,17 @@ function wait_for_container_inroot() {
local attempts=$1
local delay=$2
local cid=$3
# optionally wait for a specific status
local wait_for_status="${4:-}"
local i
for ((i = 0; i < attempts; i++)); do
ROOT=$4 runc state $cid
if [[ "$status" -eq 0 ]]; then
if [[ "${output}" == *"${wait_for_status}"* ]]; then
return 0
fi
fi
sleep $delay
done

View File

@ -205,7 +205,7 @@ EOF
__runc run test_busybox
) &
wait_for_container 15 1 test_busybox
wait_for_container 15 1 test_busybox running
testcontainer test_busybox running
# Kill the container.