libcontainer/system/proc: Add Stat_t.State

And Stat_t.PID and Stat_t.Name while we're at it.  Then use the new
.State property in runType to distinguish between running and
zombie/dead processes, since kill(2) does not [1].  With this change
we no longer claim Running status for zombie/dead processes.

I've also removed the kill(2) call from runType.  It was originally
added in 13841ef3 (new-api: return the Running state only if the init
process is alive, 2014-12-23), but we've been accessing
/proc/[pid]/stat since 14e95b2a (Make state detection precise,
2016-07-05, #930), and with the /stat access the kill(2) check is
redundant.

I also don't see much point to the previously-separate
doesInitProcessExist, so I've inlined that logic in runType.

It would be nice to distinguish between "/proc/[pid]/stat doesn't
exist" and errors parsing its contents, but I've skipped that for the
moment.

The Running -> Stopped change in checkpoint_test.go is because the
post-checkpoint process is a zombie, and with this commit zombie
processes are Stopped (and no longer Running).

[1]: https://github.com/opencontainers/runc/pull/1483#issuecomment-307527789

Signed-off-by: W. Trevor King <wking@tremily.us>
This commit is contained in:
W. Trevor King 2017-06-14 16:41:16 -07:00
parent 75d98b26b7
commit 2bea4c897e
4 changed files with 114 additions and 65 deletions

View File

@ -1365,40 +1365,17 @@ func (c *linuxContainer) refreshState() error {
return c.state.transition(&stoppedState{c: c})
}
// doesInitProcessExist checks if the init process is still the same process
// as the initial one, it could happen that the original process has exited
// and a new process has been created with the same pid, in this case, the
// container would already be stopped.
func (c *linuxContainer) doesInitProcessExist(initPid int) (bool, error) {
stat, err := system.Stat(initPid)
if err != nil {
return false, newSystemErrorWithCausef(err, "getting init process %d status", initPid)
}
if c.initProcessStartTime != stat.StartTime {
return false, nil
}
return true, nil
}
func (c *linuxContainer) runType() (Status, error) {
if c.initProcess == nil {
return Stopped, nil
}
pid := c.initProcess.pid()
// return Running if the init process is alive
if err := unix.Kill(pid, 0); err != nil {
if err == unix.ESRCH {
// It means the process does not exist anymore, could happen when the
// process exited just when we call the function, we should not return
// error in this case.
return Stopped, nil
}
return Stopped, newSystemErrorWithCausef(err, "sending signal 0 to pid %d", pid)
stat, err := system.Stat(pid)
if err != nil {
return Stopped, nil
}
// check if the process is still the original init process.
exist, err := c.doesInitProcessExist(pid)
if !exist || err != nil {
return Stopped, err
if stat.StartTime != c.initProcessStartTime || stat.State == system.Zombie || stat.State == system.Dead {
return Stopped, nil
}
// We'll create exec fifo and blocking on it after container is created,
// and delete it after start container.

View File

@ -180,7 +180,7 @@ func testCheckpoint(t *testing.T, userns bool) {
t.Fatal(err)
}
if state != libcontainer.Running {
if state != libcontainer.Stopped {
t.Fatal("Unexpected state checkpoint: ", state)
}

View File

@ -8,9 +8,55 @@ import (
"strings"
)
// State is the status of a process.
type State rune
const ( // Only values for Linux 3.14 and later are listed here
Dead State = 'X'
DiskSleep State = 'D'
Running State = 'R'
Sleeping State = 'S'
Stopped State = 'T'
TracingStop State = 't'
Zombie State = 'Z'
)
// String forms of the state from proc(5)'s documentation for
// /proc/[pid]/status' "State" field.
func (s State) String() string {
switch s {
case Dead:
return "dead"
case DiskSleep:
return "disk sleep"
case Running:
return "running"
case Sleeping:
return "sleeping"
case Stopped:
return "stopped"
case TracingStop:
return "tracing stop"
case Zombie:
return "zombie"
default:
return fmt.Sprintf("unknown (%c)", s)
}
}
// Stat_t represents the information from /proc/[pid]/stat, as
// described in proc(5).
// described in proc(5) with names based on the /proc/[pid]/status
// fields.
type Stat_t struct {
// PID is the process ID.
PID uint
// Name is the command run by the process.
Name string
// State is the state of the process.
State State
// StartTime is the number of clock ticks after system boot (since
// Linux 2.6).
StartTime uint64
@ -22,9 +68,7 @@ func Stat(pid int) (stat Stat_t, err error) {
if err != nil {
return stat, err
}
data := string(bytes)
stat.StartTime, err = parseStartTime(data)
return stat, err
return parseStat(string(bytes))
}
// GetProcessStartTime is deprecated. Use Stat(pid) and
@ -37,30 +81,33 @@ func GetProcessStartTime(pid int) (string, error) {
return fmt.Sprintf("%d", stat.StartTime), nil
}
func parseStartTime(stat string) (uint64, error) {
// the starttime is located at pos 22
// from the man page
//
// starttime %llu (was %lu before Linux 2.6)
// (22) The time the process started after system boot. In kernels before Linux 2.6, this
// value was expressed in jiffies. Since Linux 2.6, the value is expressed in clock ticks
// (divide by sysconf(_SC_CLK_TCK)).
//
// NOTE:
// pos 2 could contain space and is inside `(` and `)`:
// (2) comm %s
// The filename of the executable, in parentheses.
// This is visible whether or not the executable is
// swapped out.
//
// the following is an example:
func parseStat(data string) (stat Stat_t, err error) {
// From proc(5), field 2 could contain space and is inside `(` and `)`.
// The following is an example:
// 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0
i := strings.LastIndex(data, ")")
if i <= 2 || i >= len(data)-1 {
return stat, fmt.Errorf("invalid stat data: %q", data)
}
// get parts after last `)`:
s := strings.Split(stat, ")")
parts := strings.Split(strings.TrimSpace(s[len(s)-1]), " ")
startTimeString := parts[22-3] // starts at 3 (after the filename pos `2`)
var startTime uint64
fmt.Sscanf(startTimeString, "%d", &startTime)
return startTime, nil
parts := strings.SplitN(data[:i], "(", 2)
if len(parts) != 2 {
return stat, fmt.Errorf("invalid stat data: %q", data)
}
stat.Name = parts[1]
_, err = fmt.Sscanf(parts[0], "%d", &stat.PID)
if err != nil {
return stat, err
}
// parts indexes should be offset by 3 from the field number given
// proc(5), because parts is zero-indexed and we've removed fields
// one (PID) and two (Name) in the paren-split.
parts = strings.Split(data[i+2:], " ")
var state int
fmt.Sscanf(parts[3-3], "%c", &state)
stat.State = State(state)
fmt.Sscanf(parts[22-3], "%d", &stat.StartTime)
return stat, nil
}

View File

@ -3,18 +3,43 @@ package system
import "testing"
func TestParseStartTime(t *testing.T) {
data := map[string]uint64{
"4902 (gunicorn: maste) S 4885 4902 4902 0 -1 4194560 29683 29929 61 83 78 16 96 17 20 0 1 0 9126532 52965376 1903 18446744073709551615 4194304 7461796 140733928751520 140733928698072 139816984959091 0 0 16781312 137447943 1 0 0 17 3 0 0 9 0 0 9559488 10071156 33050624 140733928758775 140733928758945 140733928758945 140733928759264 0": 9126532,
"9534 (cat) R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": 9214966,
"24767 (irq/44-mei_me) S 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 -51 0 1 0 8722075 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 1 50 1 0 0 0 0 0 0 0 0 0 0 0": 8722075,
data := map[string]Stat_t{
"4902 (gunicorn: maste) S 4885 4902 4902 0 -1 4194560 29683 29929 61 83 78 16 96 17 20 0 1 0 9126532 52965376 1903 18446744073709551615 4194304 7461796 140733928751520 140733928698072 139816984959091 0 0 16781312 137447943 1 0 0 17 3 0 0 9 0 0 9559488 10071156 33050624 140733928758775 140733928758945 140733928758945 140733928759264 0": {
PID: 4902,
Name: "gunicorn: maste",
State: 'S',
StartTime: 9126532,
},
"9534 (cat) R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": {
PID: 9534,
Name: "cat",
State: 'R',
StartTime: 9214966,
},
"24767 (irq/44-mei_me) S 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 -51 0 1 0 8722075 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 1 50 1 0 0 0 0 0 0 0 0 0 0 0": {
PID: 24767,
Name: "irq/44-mei_me",
State: 'S',
StartTime: 8722075,
},
}
for line, startTime := range data {
st, err := parseStartTime(line)
for line, expected := range data {
st, err := parseStat(line)
if err != nil {
t.Fatal(err)
}
if startTime != st {
t.Fatalf("expected start time %q but received %q", startTime, st)
if st.PID != expected.PID {
t.Fatalf("expected PID %q but received %q", expected.PID, st.PID)
}
if st.State != expected.State {
t.Fatalf("expected state %q but received %q", expected.State, st.State)
}
if st.Name != expected.Name {
t.Fatalf("expected name %q but received %q", expected.Name, st.Name)
}
if st.StartTime != expected.StartTime {
t.Fatalf("expected start time %q but received %q", expected.StartTime, st.StartTime)
}
}
}