Merge pull request #1489 from wking/process-status

libcontainer/container_linux: Consider process state (running, zombie, etc.) in runType
This commit is contained in:
Michael Crosby 2017-06-21 10:24:04 -07:00 committed by GitHub
commit bd65ef625d
8 changed files with 155 additions and 81 deletions

View File

@ -54,7 +54,7 @@ type BaseState struct {
InitProcessPid int `json:"init_process_pid"`
// InitProcessStartTime is the init process start time in clock cycles since boot time.
InitProcessStartTime string `json:"init_process_start"`
InitProcessStartTime uint64 `json:"init_process_start"`
// Created is the unix timestamp for the creation time of the container in UTC
Created time.Time `json:"created"`

View File

@ -40,7 +40,7 @@ type linuxContainer struct {
cgroupManager cgroups.Manager
initArgs []string
initProcess parentProcess
initProcessStartTime string
initProcessStartTime uint64
criuPath string
m sync.Mutex
criuVersion int
@ -1365,40 +1365,17 @@ func (c *linuxContainer) refreshState() error {
return c.state.transition(&stoppedState{c: c})
}
// doesInitProcessExist checks if the init process is still the same process
// as the initial one, it could happen that the original process has exited
// and a new process has been created with the same pid, in this case, the
// container would already be stopped.
func (c *linuxContainer) doesInitProcessExist(initPid int) (bool, error) {
startTime, err := system.GetProcessStartTime(initPid)
if err != nil {
return false, newSystemErrorWithCausef(err, "getting init process %d start time", initPid)
}
if c.initProcessStartTime != startTime {
return false, nil
}
return true, nil
}
func (c *linuxContainer) runType() (Status, error) {
if c.initProcess == nil {
return Stopped, nil
}
pid := c.initProcess.pid()
// return Running if the init process is alive
if err := unix.Kill(pid, 0); err != nil {
if err == unix.ESRCH {
// It means the process does not exist anymore, could happen when the
// process exited just when we call the function, we should not return
// error in this case.
return Stopped, nil
}
return Stopped, newSystemErrorWithCausef(err, "sending signal 0 to pid %d", pid)
stat, err := system.Stat(pid)
if err != nil {
return Stopped, nil
}
// check if the process is still the original init process.
exist, err := c.doesInitProcessExist(pid)
if !exist || err != nil {
return Stopped, err
if stat.StartTime != c.initProcessStartTime || stat.State == system.Zombie || stat.State == system.Dead {
return Stopped, nil
}
// We'll create exec fifo and blocking on it after container is created,
// and delete it after start container.
@ -1427,7 +1404,7 @@ func (c *linuxContainer) isPaused() (bool, error) {
func (c *linuxContainer) currentState() (*State, error) {
var (
startTime string
startTime uint64
externalDescriptors []string
pid = -1
)

View File

@ -52,7 +52,7 @@ func (m *mockCgroupManager) Freeze(state configs.FreezerState) error {
type mockProcess struct {
_pid int
started string
started uint64
}
func (m *mockProcess) terminate() error {
@ -63,7 +63,7 @@ func (m *mockProcess) pid() int {
return m._pid
}
func (m *mockProcess) startTime() (string, error) {
func (m *mockProcess) startTime() (uint64, error) {
return m.started, nil
}
@ -150,7 +150,7 @@ func TestGetContainerState(t *testing.T) {
},
initProcess: &mockProcess{
_pid: pid,
started: "010",
started: 10,
},
cgroupManager: &mockCgroupManager{
pids: []int{1, 2, 3},
@ -174,8 +174,8 @@ func TestGetContainerState(t *testing.T) {
if state.InitProcessPid != pid {
t.Fatalf("expected pid %d but received %d", pid, state.InitProcessPid)
}
if state.InitProcessStartTime != "010" {
t.Fatalf("expected process start time 010 but received %s", state.InitProcessStartTime)
if state.InitProcessStartTime != 10 {
t.Fatalf("expected process start time 10 but received %d", state.InitProcessStartTime)
}
paths := state.CgroupPaths
if paths == nil {

View File

@ -180,7 +180,7 @@ func testCheckpoint(t *testing.T, userns bool) {
t.Fatal(err)
}
if state != libcontainer.Running {
if state != libcontainer.Stopped {
t.Fatal("Unexpected state checkpoint: ", state)
}

View File

@ -35,7 +35,7 @@ type parentProcess interface {
wait() (*os.ProcessState, error)
// startTime returns the process start time.
startTime() (string, error)
startTime() (uint64, error)
signal(os.Signal) error
@ -55,8 +55,9 @@ type setnsProcess struct {
bootstrapData io.Reader
}
func (p *setnsProcess) startTime() (string, error) {
return system.GetProcessStartTime(p.pid())
func (p *setnsProcess) startTime() (uint64, error) {
stat, err := system.Stat(p.pid())
return stat.StartTime, err
}
func (p *setnsProcess) signal(sig os.Signal) error {
@ -384,8 +385,9 @@ func (p *initProcess) terminate() error {
return err
}
func (p *initProcess) startTime() (string, error) {
return system.GetProcessStartTime(p.pid())
func (p *initProcess) startTime() (uint64, error) {
stat, err := system.Stat(p.pid())
return stat.StartTime, err
}
func (p *initProcess) sendConfig() error {

View File

@ -17,20 +17,20 @@ func newRestoredProcess(pid int, fds []string) (*restoredProcess, error) {
if err != nil {
return nil, err
}
started, err := system.GetProcessStartTime(pid)
stat, err := system.Stat(pid)
if err != nil {
return nil, err
}
return &restoredProcess{
proc: proc,
processStartTime: started,
processStartTime: stat.StartTime,
fds: fds,
}, nil
}
type restoredProcess struct {
proc *os.Process
processStartTime string
processStartTime uint64
fds []string
}
@ -60,7 +60,7 @@ func (p *restoredProcess) wait() (*os.ProcessState, error) {
return st, nil
}
func (p *restoredProcess) startTime() (string, error) {
func (p *restoredProcess) startTime() (uint64, error) {
return p.processStartTime, nil
}
@ -81,7 +81,7 @@ func (p *restoredProcess) setExternalDescriptors(newFds []string) {
// a persisted state.
type nonChildProcess struct {
processPid int
processStartTime string
processStartTime uint64
fds []string
}
@ -101,7 +101,7 @@ func (p *nonChildProcess) wait() (*os.ProcessState, error) {
return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError)
}
func (p *nonChildProcess) startTime() (string, error) {
func (p *nonChildProcess) startTime() (uint64, error) {
return p.processStartTime, nil
}

View File

@ -1,43 +1,113 @@
package system
import (
"fmt"
"io/ioutil"
"path/filepath"
"strconv"
"strings"
)
// look in /proc to find the process start time so that we can verify
// that this pid has started after ourself
// State is the status of a process.
type State rune
const ( // Only values for Linux 3.14 and later are listed here
Dead State = 'X'
DiskSleep State = 'D'
Running State = 'R'
Sleeping State = 'S'
Stopped State = 'T'
TracingStop State = 't'
Zombie State = 'Z'
)
// String forms of the state from proc(5)'s documentation for
// /proc/[pid]/status' "State" field.
func (s State) String() string {
switch s {
case Dead:
return "dead"
case DiskSleep:
return "disk sleep"
case Running:
return "running"
case Sleeping:
return "sleeping"
case Stopped:
return "stopped"
case TracingStop:
return "tracing stop"
case Zombie:
return "zombie"
default:
return fmt.Sprintf("unknown (%c)", s)
}
}
// Stat_t represents the information from /proc/[pid]/stat, as
// described in proc(5) with names based on the /proc/[pid]/status
// fields.
type Stat_t struct {
// PID is the process ID.
PID uint
// Name is the command run by the process.
Name string
// State is the state of the process.
State State
// StartTime is the number of clock ticks after system boot (since
// Linux 2.6).
StartTime uint64
}
// Stat returns a Stat_t instance for the specified process.
func Stat(pid int) (stat Stat_t, err error) {
bytes, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
if err != nil {
return stat, err
}
return parseStat(string(bytes))
}
// GetProcessStartTime is deprecated. Use Stat(pid) and
// Stat_t.StartTime instead.
func GetProcessStartTime(pid int) (string, error) {
data, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
stat, err := Stat(pid)
if err != nil {
return "", err
}
return parseStartTime(string(data))
return fmt.Sprintf("%d", stat.StartTime), nil
}
func parseStartTime(stat string) (string, error) {
// the starttime is located at pos 22
// from the man page
//
// starttime %llu (was %lu before Linux 2.6)
// (22) The time the process started after system boot. In kernels before Linux 2.6, this
// value was expressed in jiffies. Since Linux 2.6, the value is expressed in clock ticks
// (divide by sysconf(_SC_CLK_TCK)).
//
// NOTE:
// pos 2 could contain space and is inside `(` and `)`:
// (2) comm %s
// The filename of the executable, in parentheses.
// This is visible whether or not the executable is
// swapped out.
//
// the following is an example:
func parseStat(data string) (stat Stat_t, err error) {
// From proc(5), field 2 could contain space and is inside `(` and `)`.
// The following is an example:
// 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0
i := strings.LastIndex(data, ")")
if i <= 2 || i >= len(data)-1 {
return stat, fmt.Errorf("invalid stat data: %q", data)
}
// get parts after last `)`:
s := strings.Split(stat, ")")
parts := strings.Split(strings.TrimSpace(s[len(s)-1]), " ")
return parts[22-3], nil // starts at 3 (after the filename pos `2`)
parts := strings.SplitN(data[:i], "(", 2)
if len(parts) != 2 {
return stat, fmt.Errorf("invalid stat data: %q", data)
}
stat.Name = parts[1]
_, err = fmt.Sscanf(parts[0], "%d", &stat.PID)
if err != nil {
return stat, err
}
// parts indexes should be offset by 3 from the field number given
// proc(5), because parts is zero-indexed and we've removed fields
// one (PID) and two (Name) in the paren-split.
parts = strings.Split(data[i+2:], " ")
var state int
fmt.Sscanf(parts[3-3], "%c", &state)
stat.State = State(state)
fmt.Sscanf(parts[22-3], "%d", &stat.StartTime)
return stat, nil
}

View File

@ -3,18 +3,43 @@ package system
import "testing"
func TestParseStartTime(t *testing.T) {
data := map[string]string{
"4902 (gunicorn: maste) S 4885 4902 4902 0 -1 4194560 29683 29929 61 83 78 16 96 17 20 0 1 0 9126532 52965376 1903 18446744073709551615 4194304 7461796 140733928751520 140733928698072 139816984959091 0 0 16781312 137447943 1 0 0 17 3 0 0 9 0 0 9559488 10071156 33050624 140733928758775 140733928758945 140733928758945 140733928759264 0": "9126532",
"9534 (cat) R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": "9214966",
"24767 (irq/44-mei_me) S 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 -51 0 1 0 8722075 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 1 50 1 0 0 0 0 0 0 0 0 0 0 0": "8722075",
data := map[string]Stat_t{
"4902 (gunicorn: maste) S 4885 4902 4902 0 -1 4194560 29683 29929 61 83 78 16 96 17 20 0 1 0 9126532 52965376 1903 18446744073709551615 4194304 7461796 140733928751520 140733928698072 139816984959091 0 0 16781312 137447943 1 0 0 17 3 0 0 9 0 0 9559488 10071156 33050624 140733928758775 140733928758945 140733928758945 140733928759264 0": {
PID: 4902,
Name: "gunicorn: maste",
State: 'S',
StartTime: 9126532,
},
"9534 (cat) R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": {
PID: 9534,
Name: "cat",
State: 'R',
StartTime: 9214966,
},
"24767 (irq/44-mei_me) S 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 -51 0 1 0 8722075 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 1 50 1 0 0 0 0 0 0 0 0 0 0 0": {
PID: 24767,
Name: "irq/44-mei_me",
State: 'S',
StartTime: 8722075,
},
}
for line, startTime := range data {
st, err := parseStartTime(line)
for line, expected := range data {
st, err := parseStat(line)
if err != nil {
t.Fatal(err)
}
if startTime != st {
t.Fatalf("expected start time %q but received %q", startTime, st)
if st.PID != expected.PID {
t.Fatalf("expected PID %q but received %q", expected.PID, st.PID)
}
if st.State != expected.State {
t.Fatalf("expected state %q but received %q", expected.State, st.State)
}
if st.Name != expected.Name {
t.Fatalf("expected name %q but received %q", expected.Name, st.Name)
}
if st.StartTime != expected.StartTime {
t.Fatalf("expected start time %q but received %q", expected.StartTime, st.StartTime)
}
}
}