libcontainer/system/proc: Add Stat_t.State
And Stat_t.PID and Stat_t.Name while we're at it. Then use the new .State property in runType to distinguish between running and zombie/dead processes, since kill(2) does not [1]. With this change we no longer claim Running status for zombie/dead processes. I've also removed the kill(2) call from runType. It was originally added in13841ef3
(new-api: return the Running state only if the init process is alive, 2014-12-23), but we've been accessing /proc/[pid]/stat since14e95b2a
(Make state detection precise, 2016-07-05, #930), and with the /stat access the kill(2) check is redundant. I also don't see much point to the previously-separate doesInitProcessExist, so I've inlined that logic in runType. It would be nice to distinguish between "/proc/[pid]/stat doesn't exist" and errors parsing its contents, but I've skipped that for the moment. The Running -> Stopped change in checkpoint_test.go is because the post-checkpoint process is a zombie, and with this commit zombie processes are Stopped (and no longer Running). [1]: https://github.com/opencontainers/runc/pull/1483#issuecomment-307527789 Signed-off-by: W. Trevor King <wking@tremily.us>
This commit is contained in:
parent
75d98b26b7
commit
2bea4c897e
|
@ -1365,40 +1365,17 @@ func (c *linuxContainer) refreshState() error {
|
|||
return c.state.transition(&stoppedState{c: c})
|
||||
}
|
||||
|
||||
// doesInitProcessExist checks if the init process is still the same process
|
||||
// as the initial one, it could happen that the original process has exited
|
||||
// and a new process has been created with the same pid, in this case, the
|
||||
// container would already be stopped.
|
||||
func (c *linuxContainer) doesInitProcessExist(initPid int) (bool, error) {
|
||||
stat, err := system.Stat(initPid)
|
||||
if err != nil {
|
||||
return false, newSystemErrorWithCausef(err, "getting init process %d status", initPid)
|
||||
}
|
||||
if c.initProcessStartTime != stat.StartTime {
|
||||
return false, nil
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) runType() (Status, error) {
|
||||
if c.initProcess == nil {
|
||||
return Stopped, nil
|
||||
}
|
||||
pid := c.initProcess.pid()
|
||||
// return Running if the init process is alive
|
||||
if err := unix.Kill(pid, 0); err != nil {
|
||||
if err == unix.ESRCH {
|
||||
// It means the process does not exist anymore, could happen when the
|
||||
// process exited just when we call the function, we should not return
|
||||
// error in this case.
|
||||
return Stopped, nil
|
||||
}
|
||||
return Stopped, newSystemErrorWithCausef(err, "sending signal 0 to pid %d", pid)
|
||||
stat, err := system.Stat(pid)
|
||||
if err != nil {
|
||||
return Stopped, nil
|
||||
}
|
||||
// check if the process is still the original init process.
|
||||
exist, err := c.doesInitProcessExist(pid)
|
||||
if !exist || err != nil {
|
||||
return Stopped, err
|
||||
if stat.StartTime != c.initProcessStartTime || stat.State == system.Zombie || stat.State == system.Dead {
|
||||
return Stopped, nil
|
||||
}
|
||||
// We'll create exec fifo and blocking on it after container is created,
|
||||
// and delete it after start container.
|
||||
|
|
|
@ -180,7 +180,7 @@ func testCheckpoint(t *testing.T, userns bool) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if state != libcontainer.Running {
|
||||
if state != libcontainer.Stopped {
|
||||
t.Fatal("Unexpected state checkpoint: ", state)
|
||||
}
|
||||
|
||||
|
|
|
@ -8,9 +8,55 @@ import (
|
|||
"strings"
|
||||
)
|
||||
|
||||
// State is the status of a process.
|
||||
type State rune
|
||||
|
||||
const ( // Only values for Linux 3.14 and later are listed here
|
||||
Dead State = 'X'
|
||||
DiskSleep State = 'D'
|
||||
Running State = 'R'
|
||||
Sleeping State = 'S'
|
||||
Stopped State = 'T'
|
||||
TracingStop State = 't'
|
||||
Zombie State = 'Z'
|
||||
)
|
||||
|
||||
// String forms of the state from proc(5)'s documentation for
|
||||
// /proc/[pid]/status' "State" field.
|
||||
func (s State) String() string {
|
||||
switch s {
|
||||
case Dead:
|
||||
return "dead"
|
||||
case DiskSleep:
|
||||
return "disk sleep"
|
||||
case Running:
|
||||
return "running"
|
||||
case Sleeping:
|
||||
return "sleeping"
|
||||
case Stopped:
|
||||
return "stopped"
|
||||
case TracingStop:
|
||||
return "tracing stop"
|
||||
case Zombie:
|
||||
return "zombie"
|
||||
default:
|
||||
return fmt.Sprintf("unknown (%c)", s)
|
||||
}
|
||||
}
|
||||
|
||||
// Stat_t represents the information from /proc/[pid]/stat, as
|
||||
// described in proc(5).
|
||||
// described in proc(5) with names based on the /proc/[pid]/status
|
||||
// fields.
|
||||
type Stat_t struct {
|
||||
// PID is the process ID.
|
||||
PID uint
|
||||
|
||||
// Name is the command run by the process.
|
||||
Name string
|
||||
|
||||
// State is the state of the process.
|
||||
State State
|
||||
|
||||
// StartTime is the number of clock ticks after system boot (since
|
||||
// Linux 2.6).
|
||||
StartTime uint64
|
||||
|
@ -22,9 +68,7 @@ func Stat(pid int) (stat Stat_t, err error) {
|
|||
if err != nil {
|
||||
return stat, err
|
||||
}
|
||||
data := string(bytes)
|
||||
stat.StartTime, err = parseStartTime(data)
|
||||
return stat, err
|
||||
return parseStat(string(bytes))
|
||||
}
|
||||
|
||||
// GetProcessStartTime is deprecated. Use Stat(pid) and
|
||||
|
@ -37,30 +81,33 @@ func GetProcessStartTime(pid int) (string, error) {
|
|||
return fmt.Sprintf("%d", stat.StartTime), nil
|
||||
}
|
||||
|
||||
func parseStartTime(stat string) (uint64, error) {
|
||||
// the starttime is located at pos 22
|
||||
// from the man page
|
||||
//
|
||||
// starttime %llu (was %lu before Linux 2.6)
|
||||
// (22) The time the process started after system boot. In kernels before Linux 2.6, this
|
||||
// value was expressed in jiffies. Since Linux 2.6, the value is expressed in clock ticks
|
||||
// (divide by sysconf(_SC_CLK_TCK)).
|
||||
//
|
||||
// NOTE:
|
||||
// pos 2 could contain space and is inside `(` and `)`:
|
||||
// (2) comm %s
|
||||
// The filename of the executable, in parentheses.
|
||||
// This is visible whether or not the executable is
|
||||
// swapped out.
|
||||
//
|
||||
// the following is an example:
|
||||
func parseStat(data string) (stat Stat_t, err error) {
|
||||
// From proc(5), field 2 could contain space and is inside `(` and `)`.
|
||||
// The following is an example:
|
||||
// 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
i := strings.LastIndex(data, ")")
|
||||
if i <= 2 || i >= len(data)-1 {
|
||||
return stat, fmt.Errorf("invalid stat data: %q", data)
|
||||
}
|
||||
|
||||
// get parts after last `)`:
|
||||
s := strings.Split(stat, ")")
|
||||
parts := strings.Split(strings.TrimSpace(s[len(s)-1]), " ")
|
||||
startTimeString := parts[22-3] // starts at 3 (after the filename pos `2`)
|
||||
var startTime uint64
|
||||
fmt.Sscanf(startTimeString, "%d", &startTime)
|
||||
return startTime, nil
|
||||
parts := strings.SplitN(data[:i], "(", 2)
|
||||
if len(parts) != 2 {
|
||||
return stat, fmt.Errorf("invalid stat data: %q", data)
|
||||
}
|
||||
|
||||
stat.Name = parts[1]
|
||||
_, err = fmt.Sscanf(parts[0], "%d", &stat.PID)
|
||||
if err != nil {
|
||||
return stat, err
|
||||
}
|
||||
|
||||
// parts indexes should be offset by 3 from the field number given
|
||||
// proc(5), because parts is zero-indexed and we've removed fields
|
||||
// one (PID) and two (Name) in the paren-split.
|
||||
parts = strings.Split(data[i+2:], " ")
|
||||
var state int
|
||||
fmt.Sscanf(parts[3-3], "%c", &state)
|
||||
stat.State = State(state)
|
||||
fmt.Sscanf(parts[22-3], "%d", &stat.StartTime)
|
||||
return stat, nil
|
||||
}
|
||||
|
|
|
@ -3,18 +3,43 @@ package system
|
|||
import "testing"
|
||||
|
||||
func TestParseStartTime(t *testing.T) {
|
||||
data := map[string]uint64{
|
||||
"4902 (gunicorn: maste) S 4885 4902 4902 0 -1 4194560 29683 29929 61 83 78 16 96 17 20 0 1 0 9126532 52965376 1903 18446744073709551615 4194304 7461796 140733928751520 140733928698072 139816984959091 0 0 16781312 137447943 1 0 0 17 3 0 0 9 0 0 9559488 10071156 33050624 140733928758775 140733928758945 140733928758945 140733928759264 0": 9126532,
|
||||
"9534 (cat) R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": 9214966,
|
||||
"24767 (irq/44-mei_me) S 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 -51 0 1 0 8722075 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 1 50 1 0 0 0 0 0 0 0 0 0 0 0": 8722075,
|
||||
data := map[string]Stat_t{
|
||||
"4902 (gunicorn: maste) S 4885 4902 4902 0 -1 4194560 29683 29929 61 83 78 16 96 17 20 0 1 0 9126532 52965376 1903 18446744073709551615 4194304 7461796 140733928751520 140733928698072 139816984959091 0 0 16781312 137447943 1 0 0 17 3 0 0 9 0 0 9559488 10071156 33050624 140733928758775 140733928758945 140733928758945 140733928759264 0": {
|
||||
PID: 4902,
|
||||
Name: "gunicorn: maste",
|
||||
State: 'S',
|
||||
StartTime: 9126532,
|
||||
},
|
||||
"9534 (cat) R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": {
|
||||
PID: 9534,
|
||||
Name: "cat",
|
||||
State: 'R',
|
||||
StartTime: 9214966,
|
||||
},
|
||||
|
||||
"24767 (irq/44-mei_me) S 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 -51 0 1 0 8722075 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 1 50 1 0 0 0 0 0 0 0 0 0 0 0": {
|
||||
PID: 24767,
|
||||
Name: "irq/44-mei_me",
|
||||
State: 'S',
|
||||
StartTime: 8722075,
|
||||
},
|
||||
}
|
||||
for line, startTime := range data {
|
||||
st, err := parseStartTime(line)
|
||||
for line, expected := range data {
|
||||
st, err := parseStat(line)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if startTime != st {
|
||||
t.Fatalf("expected start time %q but received %q", startTime, st)
|
||||
if st.PID != expected.PID {
|
||||
t.Fatalf("expected PID %q but received %q", expected.PID, st.PID)
|
||||
}
|
||||
if st.State != expected.State {
|
||||
t.Fatalf("expected state %q but received %q", expected.State, st.State)
|
||||
}
|
||||
if st.Name != expected.Name {
|
||||
t.Fatalf("expected name %q but received %q", expected.Name, st.Name)
|
||||
}
|
||||
if st.StartTime != expected.StartTime {
|
||||
t.Fatalf("expected start time %q but received %q", expected.StartTime, st.StartTime)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue