Merge pull request #1489 from wking/process-status
libcontainer/container_linux: Consider process state (running, zombie, etc.) in runType
This commit is contained in:
commit
bd65ef625d
|
@ -54,7 +54,7 @@ type BaseState struct {
|
|||
InitProcessPid int `json:"init_process_pid"`
|
||||
|
||||
// InitProcessStartTime is the init process start time in clock cycles since boot time.
|
||||
InitProcessStartTime string `json:"init_process_start"`
|
||||
InitProcessStartTime uint64 `json:"init_process_start"`
|
||||
|
||||
// Created is the unix timestamp for the creation time of the container in UTC
|
||||
Created time.Time `json:"created"`
|
||||
|
|
|
@ -40,7 +40,7 @@ type linuxContainer struct {
|
|||
cgroupManager cgroups.Manager
|
||||
initArgs []string
|
||||
initProcess parentProcess
|
||||
initProcessStartTime string
|
||||
initProcessStartTime uint64
|
||||
criuPath string
|
||||
m sync.Mutex
|
||||
criuVersion int
|
||||
|
@ -1365,40 +1365,17 @@ func (c *linuxContainer) refreshState() error {
|
|||
return c.state.transition(&stoppedState{c: c})
|
||||
}
|
||||
|
||||
// doesInitProcessExist checks if the init process is still the same process
|
||||
// as the initial one, it could happen that the original process has exited
|
||||
// and a new process has been created with the same pid, in this case, the
|
||||
// container would already be stopped.
|
||||
func (c *linuxContainer) doesInitProcessExist(initPid int) (bool, error) {
|
||||
startTime, err := system.GetProcessStartTime(initPid)
|
||||
if err != nil {
|
||||
return false, newSystemErrorWithCausef(err, "getting init process %d start time", initPid)
|
||||
}
|
||||
if c.initProcessStartTime != startTime {
|
||||
return false, nil
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) runType() (Status, error) {
|
||||
if c.initProcess == nil {
|
||||
return Stopped, nil
|
||||
}
|
||||
pid := c.initProcess.pid()
|
||||
// return Running if the init process is alive
|
||||
if err := unix.Kill(pid, 0); err != nil {
|
||||
if err == unix.ESRCH {
|
||||
// It means the process does not exist anymore, could happen when the
|
||||
// process exited just when we call the function, we should not return
|
||||
// error in this case.
|
||||
return Stopped, nil
|
||||
}
|
||||
return Stopped, newSystemErrorWithCausef(err, "sending signal 0 to pid %d", pid)
|
||||
stat, err := system.Stat(pid)
|
||||
if err != nil {
|
||||
return Stopped, nil
|
||||
}
|
||||
// check if the process is still the original init process.
|
||||
exist, err := c.doesInitProcessExist(pid)
|
||||
if !exist || err != nil {
|
||||
return Stopped, err
|
||||
if stat.StartTime != c.initProcessStartTime || stat.State == system.Zombie || stat.State == system.Dead {
|
||||
return Stopped, nil
|
||||
}
|
||||
// We'll create exec fifo and blocking on it after container is created,
|
||||
// and delete it after start container.
|
||||
|
@ -1427,7 +1404,7 @@ func (c *linuxContainer) isPaused() (bool, error) {
|
|||
|
||||
func (c *linuxContainer) currentState() (*State, error) {
|
||||
var (
|
||||
startTime string
|
||||
startTime uint64
|
||||
externalDescriptors []string
|
||||
pid = -1
|
||||
)
|
||||
|
|
|
@ -52,7 +52,7 @@ func (m *mockCgroupManager) Freeze(state configs.FreezerState) error {
|
|||
|
||||
type mockProcess struct {
|
||||
_pid int
|
||||
started string
|
||||
started uint64
|
||||
}
|
||||
|
||||
func (m *mockProcess) terminate() error {
|
||||
|
@ -63,7 +63,7 @@ func (m *mockProcess) pid() int {
|
|||
return m._pid
|
||||
}
|
||||
|
||||
func (m *mockProcess) startTime() (string, error) {
|
||||
func (m *mockProcess) startTime() (uint64, error) {
|
||||
return m.started, nil
|
||||
}
|
||||
|
||||
|
@ -150,7 +150,7 @@ func TestGetContainerState(t *testing.T) {
|
|||
},
|
||||
initProcess: &mockProcess{
|
||||
_pid: pid,
|
||||
started: "010",
|
||||
started: 10,
|
||||
},
|
||||
cgroupManager: &mockCgroupManager{
|
||||
pids: []int{1, 2, 3},
|
||||
|
@ -174,8 +174,8 @@ func TestGetContainerState(t *testing.T) {
|
|||
if state.InitProcessPid != pid {
|
||||
t.Fatalf("expected pid %d but received %d", pid, state.InitProcessPid)
|
||||
}
|
||||
if state.InitProcessStartTime != "010" {
|
||||
t.Fatalf("expected process start time 010 but received %s", state.InitProcessStartTime)
|
||||
if state.InitProcessStartTime != 10 {
|
||||
t.Fatalf("expected process start time 10 but received %d", state.InitProcessStartTime)
|
||||
}
|
||||
paths := state.CgroupPaths
|
||||
if paths == nil {
|
||||
|
|
|
@ -180,7 +180,7 @@ func testCheckpoint(t *testing.T, userns bool) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if state != libcontainer.Running {
|
||||
if state != libcontainer.Stopped {
|
||||
t.Fatal("Unexpected state checkpoint: ", state)
|
||||
}
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ type parentProcess interface {
|
|||
wait() (*os.ProcessState, error)
|
||||
|
||||
// startTime returns the process start time.
|
||||
startTime() (string, error)
|
||||
startTime() (uint64, error)
|
||||
|
||||
signal(os.Signal) error
|
||||
|
||||
|
@ -55,8 +55,9 @@ type setnsProcess struct {
|
|||
bootstrapData io.Reader
|
||||
}
|
||||
|
||||
func (p *setnsProcess) startTime() (string, error) {
|
||||
return system.GetProcessStartTime(p.pid())
|
||||
func (p *setnsProcess) startTime() (uint64, error) {
|
||||
stat, err := system.Stat(p.pid())
|
||||
return stat.StartTime, err
|
||||
}
|
||||
|
||||
func (p *setnsProcess) signal(sig os.Signal) error {
|
||||
|
@ -384,8 +385,9 @@ func (p *initProcess) terminate() error {
|
|||
return err
|
||||
}
|
||||
|
||||
func (p *initProcess) startTime() (string, error) {
|
||||
return system.GetProcessStartTime(p.pid())
|
||||
func (p *initProcess) startTime() (uint64, error) {
|
||||
stat, err := system.Stat(p.pid())
|
||||
return stat.StartTime, err
|
||||
}
|
||||
|
||||
func (p *initProcess) sendConfig() error {
|
||||
|
|
|
@ -17,20 +17,20 @@ func newRestoredProcess(pid int, fds []string) (*restoredProcess, error) {
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
started, err := system.GetProcessStartTime(pid)
|
||||
stat, err := system.Stat(pid)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &restoredProcess{
|
||||
proc: proc,
|
||||
processStartTime: started,
|
||||
processStartTime: stat.StartTime,
|
||||
fds: fds,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type restoredProcess struct {
|
||||
proc *os.Process
|
||||
processStartTime string
|
||||
processStartTime uint64
|
||||
fds []string
|
||||
}
|
||||
|
||||
|
@ -60,7 +60,7 @@ func (p *restoredProcess) wait() (*os.ProcessState, error) {
|
|||
return st, nil
|
||||
}
|
||||
|
||||
func (p *restoredProcess) startTime() (string, error) {
|
||||
func (p *restoredProcess) startTime() (uint64, error) {
|
||||
return p.processStartTime, nil
|
||||
}
|
||||
|
||||
|
@ -81,7 +81,7 @@ func (p *restoredProcess) setExternalDescriptors(newFds []string) {
|
|||
// a persisted state.
|
||||
type nonChildProcess struct {
|
||||
processPid int
|
||||
processStartTime string
|
||||
processStartTime uint64
|
||||
fds []string
|
||||
}
|
||||
|
||||
|
@ -101,7 +101,7 @@ func (p *nonChildProcess) wait() (*os.ProcessState, error) {
|
|||
return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError)
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) startTime() (string, error) {
|
||||
func (p *nonChildProcess) startTime() (uint64, error) {
|
||||
return p.processStartTime, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -1,43 +1,113 @@
|
|||
package system
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// look in /proc to find the process start time so that we can verify
|
||||
// that this pid has started after ourself
|
||||
// State is the status of a process.
|
||||
type State rune
|
||||
|
||||
const ( // Only values for Linux 3.14 and later are listed here
|
||||
Dead State = 'X'
|
||||
DiskSleep State = 'D'
|
||||
Running State = 'R'
|
||||
Sleeping State = 'S'
|
||||
Stopped State = 'T'
|
||||
TracingStop State = 't'
|
||||
Zombie State = 'Z'
|
||||
)
|
||||
|
||||
// String forms of the state from proc(5)'s documentation for
|
||||
// /proc/[pid]/status' "State" field.
|
||||
func (s State) String() string {
|
||||
switch s {
|
||||
case Dead:
|
||||
return "dead"
|
||||
case DiskSleep:
|
||||
return "disk sleep"
|
||||
case Running:
|
||||
return "running"
|
||||
case Sleeping:
|
||||
return "sleeping"
|
||||
case Stopped:
|
||||
return "stopped"
|
||||
case TracingStop:
|
||||
return "tracing stop"
|
||||
case Zombie:
|
||||
return "zombie"
|
||||
default:
|
||||
return fmt.Sprintf("unknown (%c)", s)
|
||||
}
|
||||
}
|
||||
|
||||
// Stat_t represents the information from /proc/[pid]/stat, as
|
||||
// described in proc(5) with names based on the /proc/[pid]/status
|
||||
// fields.
|
||||
type Stat_t struct {
|
||||
// PID is the process ID.
|
||||
PID uint
|
||||
|
||||
// Name is the command run by the process.
|
||||
Name string
|
||||
|
||||
// State is the state of the process.
|
||||
State State
|
||||
|
||||
// StartTime is the number of clock ticks after system boot (since
|
||||
// Linux 2.6).
|
||||
StartTime uint64
|
||||
}
|
||||
|
||||
// Stat returns a Stat_t instance for the specified process.
|
||||
func Stat(pid int) (stat Stat_t, err error) {
|
||||
bytes, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
|
||||
if err != nil {
|
||||
return stat, err
|
||||
}
|
||||
return parseStat(string(bytes))
|
||||
}
|
||||
|
||||
// GetProcessStartTime is deprecated. Use Stat(pid) and
|
||||
// Stat_t.StartTime instead.
|
||||
func GetProcessStartTime(pid int) (string, error) {
|
||||
data, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
|
||||
stat, err := Stat(pid)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return parseStartTime(string(data))
|
||||
return fmt.Sprintf("%d", stat.StartTime), nil
|
||||
}
|
||||
|
||||
func parseStartTime(stat string) (string, error) {
|
||||
// the starttime is located at pos 22
|
||||
// from the man page
|
||||
//
|
||||
// starttime %llu (was %lu before Linux 2.6)
|
||||
// (22) The time the process started after system boot. In kernels before Linux 2.6, this
|
||||
// value was expressed in jiffies. Since Linux 2.6, the value is expressed in clock ticks
|
||||
// (divide by sysconf(_SC_CLK_TCK)).
|
||||
//
|
||||
// NOTE:
|
||||
// pos 2 could contain space and is inside `(` and `)`:
|
||||
// (2) comm %s
|
||||
// The filename of the executable, in parentheses.
|
||||
// This is visible whether or not the executable is
|
||||
// swapped out.
|
||||
//
|
||||
// the following is an example:
|
||||
func parseStat(data string) (stat Stat_t, err error) {
|
||||
// From proc(5), field 2 could contain space and is inside `(` and `)`.
|
||||
// The following is an example:
|
||||
// 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
i := strings.LastIndex(data, ")")
|
||||
if i <= 2 || i >= len(data)-1 {
|
||||
return stat, fmt.Errorf("invalid stat data: %q", data)
|
||||
}
|
||||
|
||||
// get parts after last `)`:
|
||||
s := strings.Split(stat, ")")
|
||||
parts := strings.Split(strings.TrimSpace(s[len(s)-1]), " ")
|
||||
return parts[22-3], nil // starts at 3 (after the filename pos `2`)
|
||||
parts := strings.SplitN(data[:i], "(", 2)
|
||||
if len(parts) != 2 {
|
||||
return stat, fmt.Errorf("invalid stat data: %q", data)
|
||||
}
|
||||
|
||||
stat.Name = parts[1]
|
||||
_, err = fmt.Sscanf(parts[0], "%d", &stat.PID)
|
||||
if err != nil {
|
||||
return stat, err
|
||||
}
|
||||
|
||||
// parts indexes should be offset by 3 from the field number given
|
||||
// proc(5), because parts is zero-indexed and we've removed fields
|
||||
// one (PID) and two (Name) in the paren-split.
|
||||
parts = strings.Split(data[i+2:], " ")
|
||||
var state int
|
||||
fmt.Sscanf(parts[3-3], "%c", &state)
|
||||
stat.State = State(state)
|
||||
fmt.Sscanf(parts[22-3], "%d", &stat.StartTime)
|
||||
return stat, nil
|
||||
}
|
||||
|
|
|
@ -3,18 +3,43 @@ package system
|
|||
import "testing"
|
||||
|
||||
func TestParseStartTime(t *testing.T) {
|
||||
data := map[string]string{
|
||||
"4902 (gunicorn: maste) S 4885 4902 4902 0 -1 4194560 29683 29929 61 83 78 16 96 17 20 0 1 0 9126532 52965376 1903 18446744073709551615 4194304 7461796 140733928751520 140733928698072 139816984959091 0 0 16781312 137447943 1 0 0 17 3 0 0 9 0 0 9559488 10071156 33050624 140733928758775 140733928758945 140733928758945 140733928759264 0": "9126532",
|
||||
"9534 (cat) R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": "9214966",
|
||||
"24767 (irq/44-mei_me) S 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 -51 0 1 0 8722075 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 1 50 1 0 0 0 0 0 0 0 0 0 0 0": "8722075",
|
||||
data := map[string]Stat_t{
|
||||
"4902 (gunicorn: maste) S 4885 4902 4902 0 -1 4194560 29683 29929 61 83 78 16 96 17 20 0 1 0 9126532 52965376 1903 18446744073709551615 4194304 7461796 140733928751520 140733928698072 139816984959091 0 0 16781312 137447943 1 0 0 17 3 0 0 9 0 0 9559488 10071156 33050624 140733928758775 140733928758945 140733928758945 140733928759264 0": {
|
||||
PID: 4902,
|
||||
Name: "gunicorn: maste",
|
||||
State: 'S',
|
||||
StartTime: 9126532,
|
||||
},
|
||||
"9534 (cat) R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": {
|
||||
PID: 9534,
|
||||
Name: "cat",
|
||||
State: 'R',
|
||||
StartTime: 9214966,
|
||||
},
|
||||
|
||||
"24767 (irq/44-mei_me) S 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 -51 0 1 0 8722075 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 1 50 1 0 0 0 0 0 0 0 0 0 0 0": {
|
||||
PID: 24767,
|
||||
Name: "irq/44-mei_me",
|
||||
State: 'S',
|
||||
StartTime: 8722075,
|
||||
},
|
||||
}
|
||||
for line, startTime := range data {
|
||||
st, err := parseStartTime(line)
|
||||
for line, expected := range data {
|
||||
st, err := parseStat(line)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if startTime != st {
|
||||
t.Fatalf("expected start time %q but received %q", startTime, st)
|
||||
if st.PID != expected.PID {
|
||||
t.Fatalf("expected PID %q but received %q", expected.PID, st.PID)
|
||||
}
|
||||
if st.State != expected.State {
|
||||
t.Fatalf("expected state %q but received %q", expected.State, st.State)
|
||||
}
|
||||
if st.Name != expected.Name {
|
||||
t.Fatalf("expected name %q but received %q", expected.Name, st.Name)
|
||||
}
|
||||
if st.StartTime != expected.StartTime {
|
||||
t.Fatalf("expected start time %q but received %q", expected.StartTime, st.StartTime)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue