From 2bea4c897e68475c0e698f7ebec4ba989ea0cda0 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Wed, 14 Jun 2017 16:41:16 -0700 Subject: [PATCH] libcontainer/system/proc: Add Stat_t.State And Stat_t.PID and Stat_t.Name while we're at it. Then use the new .State property in runType to distinguish between running and zombie/dead processes, since kill(2) does not [1]. With this change we no longer claim Running status for zombie/dead processes. I've also removed the kill(2) call from runType. It was originally added in 13841ef3 (new-api: return the Running state only if the init process is alive, 2014-12-23), but we've been accessing /proc/[pid]/stat since 14e95b2a (Make state detection precise, 2016-07-05, #930), and with the /stat access the kill(2) check is redundant. I also don't see much point to the previously-separate doesInitProcessExist, so I've inlined that logic in runType. It would be nice to distinguish between "/proc/[pid]/stat doesn't exist" and errors parsing its contents, but I've skipped that for the moment. The Running -> Stopped change in checkpoint_test.go is because the post-checkpoint process is a zombie, and with this commit zombie processes are Stopped (and no longer Running). [1]: https://github.com/opencontainers/runc/pull/1483#issuecomment-307527789 Signed-off-by: W. Trevor King --- libcontainer/container_linux.go | 33 +------ libcontainer/integration/checkpoint_test.go | 2 +- libcontainer/system/proc.go | 103 ++++++++++++++------ libcontainer/system/proc_test.go | 41 ++++++-- 4 files changed, 114 insertions(+), 65 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 13039587..74811ee3 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -1365,40 +1365,17 @@ func (c *linuxContainer) refreshState() error { return c.state.transition(&stoppedState{c: c}) } -// doesInitProcessExist checks if the init process is still the same process -// as the initial one, it could happen that the original process has exited -// and a new process has been created with the same pid, in this case, the -// container would already be stopped. -func (c *linuxContainer) doesInitProcessExist(initPid int) (bool, error) { - stat, err := system.Stat(initPid) - if err != nil { - return false, newSystemErrorWithCausef(err, "getting init process %d status", initPid) - } - if c.initProcessStartTime != stat.StartTime { - return false, nil - } - return true, nil -} - func (c *linuxContainer) runType() (Status, error) { if c.initProcess == nil { return Stopped, nil } pid := c.initProcess.pid() - // return Running if the init process is alive - if err := unix.Kill(pid, 0); err != nil { - if err == unix.ESRCH { - // It means the process does not exist anymore, could happen when the - // process exited just when we call the function, we should not return - // error in this case. - return Stopped, nil - } - return Stopped, newSystemErrorWithCausef(err, "sending signal 0 to pid %d", pid) + stat, err := system.Stat(pid) + if err != nil { + return Stopped, nil } - // check if the process is still the original init process. - exist, err := c.doesInitProcessExist(pid) - if !exist || err != nil { - return Stopped, err + if stat.StartTime != c.initProcessStartTime || stat.State == system.Zombie || stat.State == system.Dead { + return Stopped, nil } // We'll create exec fifo and blocking on it after container is created, // and delete it after start container. diff --git a/libcontainer/integration/checkpoint_test.go b/libcontainer/integration/checkpoint_test.go index a8c786c1..9645999d 100644 --- a/libcontainer/integration/checkpoint_test.go +++ b/libcontainer/integration/checkpoint_test.go @@ -180,7 +180,7 @@ func testCheckpoint(t *testing.T, userns bool) { t.Fatal(err) } - if state != libcontainer.Running { + if state != libcontainer.Stopped { t.Fatal("Unexpected state checkpoint: ", state) } diff --git a/libcontainer/system/proc.go b/libcontainer/system/proc.go index 4ffb8331..79232a43 100644 --- a/libcontainer/system/proc.go +++ b/libcontainer/system/proc.go @@ -8,9 +8,55 @@ import ( "strings" ) +// State is the status of a process. +type State rune + +const ( // Only values for Linux 3.14 and later are listed here + Dead State = 'X' + DiskSleep State = 'D' + Running State = 'R' + Sleeping State = 'S' + Stopped State = 'T' + TracingStop State = 't' + Zombie State = 'Z' +) + +// String forms of the state from proc(5)'s documentation for +// /proc/[pid]/status' "State" field. +func (s State) String() string { + switch s { + case Dead: + return "dead" + case DiskSleep: + return "disk sleep" + case Running: + return "running" + case Sleeping: + return "sleeping" + case Stopped: + return "stopped" + case TracingStop: + return "tracing stop" + case Zombie: + return "zombie" + default: + return fmt.Sprintf("unknown (%c)", s) + } +} + // Stat_t represents the information from /proc/[pid]/stat, as -// described in proc(5). +// described in proc(5) with names based on the /proc/[pid]/status +// fields. type Stat_t struct { + // PID is the process ID. + PID uint + + // Name is the command run by the process. + Name string + + // State is the state of the process. + State State + // StartTime is the number of clock ticks after system boot (since // Linux 2.6). StartTime uint64 @@ -22,9 +68,7 @@ func Stat(pid int) (stat Stat_t, err error) { if err != nil { return stat, err } - data := string(bytes) - stat.StartTime, err = parseStartTime(data) - return stat, err + return parseStat(string(bytes)) } // GetProcessStartTime is deprecated. Use Stat(pid) and @@ -37,30 +81,33 @@ func GetProcessStartTime(pid int) (string, error) { return fmt.Sprintf("%d", stat.StartTime), nil } -func parseStartTime(stat string) (uint64, error) { - // the starttime is located at pos 22 - // from the man page - // - // starttime %llu (was %lu before Linux 2.6) - // (22) The time the process started after system boot. In kernels before Linux 2.6, this - // value was expressed in jiffies. Since Linux 2.6, the value is expressed in clock ticks - // (divide by sysconf(_SC_CLK_TCK)). - // - // NOTE: - // pos 2 could contain space and is inside `(` and `)`: - // (2) comm %s - // The filename of the executable, in parentheses. - // This is visible whether or not the executable is - // swapped out. - // - // the following is an example: +func parseStat(data string) (stat Stat_t, err error) { + // From proc(5), field 2 could contain space and is inside `(` and `)`. + // The following is an example: // 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0 + i := strings.LastIndex(data, ")") + if i <= 2 || i >= len(data)-1 { + return stat, fmt.Errorf("invalid stat data: %q", data) + } - // get parts after last `)`: - s := strings.Split(stat, ")") - parts := strings.Split(strings.TrimSpace(s[len(s)-1]), " ") - startTimeString := parts[22-3] // starts at 3 (after the filename pos `2`) - var startTime uint64 - fmt.Sscanf(startTimeString, "%d", &startTime) - return startTime, nil + parts := strings.SplitN(data[:i], "(", 2) + if len(parts) != 2 { + return stat, fmt.Errorf("invalid stat data: %q", data) + } + + stat.Name = parts[1] + _, err = fmt.Sscanf(parts[0], "%d", &stat.PID) + if err != nil { + return stat, err + } + + // parts indexes should be offset by 3 from the field number given + // proc(5), because parts is zero-indexed and we've removed fields + // one (PID) and two (Name) in the paren-split. + parts = strings.Split(data[i+2:], " ") + var state int + fmt.Sscanf(parts[3-3], "%c", &state) + stat.State = State(state) + fmt.Sscanf(parts[22-3], "%d", &stat.StartTime) + return stat, nil } diff --git a/libcontainer/system/proc_test.go b/libcontainer/system/proc_test.go index c7615ef9..7e1acc5b 100644 --- a/libcontainer/system/proc_test.go +++ b/libcontainer/system/proc_test.go @@ -3,18 +3,43 @@ package system import "testing" func TestParseStartTime(t *testing.T) { - data := map[string]uint64{ - "4902 (gunicorn: maste) S 4885 4902 4902 0 -1 4194560 29683 29929 61 83 78 16 96 17 20 0 1 0 9126532 52965376 1903 18446744073709551615 4194304 7461796 140733928751520 140733928698072 139816984959091 0 0 16781312 137447943 1 0 0 17 3 0 0 9 0 0 9559488 10071156 33050624 140733928758775 140733928758945 140733928758945 140733928759264 0": 9126532, - "9534 (cat) R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": 9214966, - "24767 (irq/44-mei_me) S 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 -51 0 1 0 8722075 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 1 50 1 0 0 0 0 0 0 0 0 0 0 0": 8722075, + data := map[string]Stat_t{ + "4902 (gunicorn: maste) S 4885 4902 4902 0 -1 4194560 29683 29929 61 83 78 16 96 17 20 0 1 0 9126532 52965376 1903 18446744073709551615 4194304 7461796 140733928751520 140733928698072 139816984959091 0 0 16781312 137447943 1 0 0 17 3 0 0 9 0 0 9559488 10071156 33050624 140733928758775 140733928758945 140733928758945 140733928759264 0": { + PID: 4902, + Name: "gunicorn: maste", + State: 'S', + StartTime: 9126532, + }, + "9534 (cat) R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": { + PID: 9534, + Name: "cat", + State: 'R', + StartTime: 9214966, + }, + + "24767 (irq/44-mei_me) S 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 -51 0 1 0 8722075 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 1 50 1 0 0 0 0 0 0 0 0 0 0 0": { + PID: 24767, + Name: "irq/44-mei_me", + State: 'S', + StartTime: 8722075, + }, } - for line, startTime := range data { - st, err := parseStartTime(line) + for line, expected := range data { + st, err := parseStat(line) if err != nil { t.Fatal(err) } - if startTime != st { - t.Fatalf("expected start time %q but received %q", startTime, st) + if st.PID != expected.PID { + t.Fatalf("expected PID %q but received %q", expected.PID, st.PID) + } + if st.State != expected.State { + t.Fatalf("expected state %q but received %q", expected.State, st.State) + } + if st.Name != expected.Name { + t.Fatalf("expected name %q but received %q", expected.Name, st.Name) + } + if st.StartTime != expected.StartTime { + t.Fatalf("expected start time %q but received %q", expected.StartTime, st.StartTime) } } }