Merge pull request #1489 from wking/process-status
libcontainer/container_linux: Consider process state (running, zombie, etc.) in runType
This commit is contained in:
commit
bd65ef625d
|
@ -54,7 +54,7 @@ type BaseState struct {
|
||||||
InitProcessPid int `json:"init_process_pid"`
|
InitProcessPid int `json:"init_process_pid"`
|
||||||
|
|
||||||
// InitProcessStartTime is the init process start time in clock cycles since boot time.
|
// InitProcessStartTime is the init process start time in clock cycles since boot time.
|
||||||
InitProcessStartTime string `json:"init_process_start"`
|
InitProcessStartTime uint64 `json:"init_process_start"`
|
||||||
|
|
||||||
// Created is the unix timestamp for the creation time of the container in UTC
|
// Created is the unix timestamp for the creation time of the container in UTC
|
||||||
Created time.Time `json:"created"`
|
Created time.Time `json:"created"`
|
||||||
|
|
|
@ -40,7 +40,7 @@ type linuxContainer struct {
|
||||||
cgroupManager cgroups.Manager
|
cgroupManager cgroups.Manager
|
||||||
initArgs []string
|
initArgs []string
|
||||||
initProcess parentProcess
|
initProcess parentProcess
|
||||||
initProcessStartTime string
|
initProcessStartTime uint64
|
||||||
criuPath string
|
criuPath string
|
||||||
m sync.Mutex
|
m sync.Mutex
|
||||||
criuVersion int
|
criuVersion int
|
||||||
|
@ -1365,40 +1365,17 @@ func (c *linuxContainer) refreshState() error {
|
||||||
return c.state.transition(&stoppedState{c: c})
|
return c.state.transition(&stoppedState{c: c})
|
||||||
}
|
}
|
||||||
|
|
||||||
// doesInitProcessExist checks if the init process is still the same process
|
|
||||||
// as the initial one, it could happen that the original process has exited
|
|
||||||
// and a new process has been created with the same pid, in this case, the
|
|
||||||
// container would already be stopped.
|
|
||||||
func (c *linuxContainer) doesInitProcessExist(initPid int) (bool, error) {
|
|
||||||
startTime, err := system.GetProcessStartTime(initPid)
|
|
||||||
if err != nil {
|
|
||||||
return false, newSystemErrorWithCausef(err, "getting init process %d start time", initPid)
|
|
||||||
}
|
|
||||||
if c.initProcessStartTime != startTime {
|
|
||||||
return false, nil
|
|
||||||
}
|
|
||||||
return true, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *linuxContainer) runType() (Status, error) {
|
func (c *linuxContainer) runType() (Status, error) {
|
||||||
if c.initProcess == nil {
|
if c.initProcess == nil {
|
||||||
return Stopped, nil
|
return Stopped, nil
|
||||||
}
|
}
|
||||||
pid := c.initProcess.pid()
|
pid := c.initProcess.pid()
|
||||||
// return Running if the init process is alive
|
stat, err := system.Stat(pid)
|
||||||
if err := unix.Kill(pid, 0); err != nil {
|
if err != nil {
|
||||||
if err == unix.ESRCH {
|
return Stopped, nil
|
||||||
// It means the process does not exist anymore, could happen when the
|
|
||||||
// process exited just when we call the function, we should not return
|
|
||||||
// error in this case.
|
|
||||||
return Stopped, nil
|
|
||||||
}
|
|
||||||
return Stopped, newSystemErrorWithCausef(err, "sending signal 0 to pid %d", pid)
|
|
||||||
}
|
}
|
||||||
// check if the process is still the original init process.
|
if stat.StartTime != c.initProcessStartTime || stat.State == system.Zombie || stat.State == system.Dead {
|
||||||
exist, err := c.doesInitProcessExist(pid)
|
return Stopped, nil
|
||||||
if !exist || err != nil {
|
|
||||||
return Stopped, err
|
|
||||||
}
|
}
|
||||||
// We'll create exec fifo and blocking on it after container is created,
|
// We'll create exec fifo and blocking on it after container is created,
|
||||||
// and delete it after start container.
|
// and delete it after start container.
|
||||||
|
@ -1427,7 +1404,7 @@ func (c *linuxContainer) isPaused() (bool, error) {
|
||||||
|
|
||||||
func (c *linuxContainer) currentState() (*State, error) {
|
func (c *linuxContainer) currentState() (*State, error) {
|
||||||
var (
|
var (
|
||||||
startTime string
|
startTime uint64
|
||||||
externalDescriptors []string
|
externalDescriptors []string
|
||||||
pid = -1
|
pid = -1
|
||||||
)
|
)
|
||||||
|
|
|
@ -52,7 +52,7 @@ func (m *mockCgroupManager) Freeze(state configs.FreezerState) error {
|
||||||
|
|
||||||
type mockProcess struct {
|
type mockProcess struct {
|
||||||
_pid int
|
_pid int
|
||||||
started string
|
started uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *mockProcess) terminate() error {
|
func (m *mockProcess) terminate() error {
|
||||||
|
@ -63,7 +63,7 @@ func (m *mockProcess) pid() int {
|
||||||
return m._pid
|
return m._pid
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *mockProcess) startTime() (string, error) {
|
func (m *mockProcess) startTime() (uint64, error) {
|
||||||
return m.started, nil
|
return m.started, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -150,7 +150,7 @@ func TestGetContainerState(t *testing.T) {
|
||||||
},
|
},
|
||||||
initProcess: &mockProcess{
|
initProcess: &mockProcess{
|
||||||
_pid: pid,
|
_pid: pid,
|
||||||
started: "010",
|
started: 10,
|
||||||
},
|
},
|
||||||
cgroupManager: &mockCgroupManager{
|
cgroupManager: &mockCgroupManager{
|
||||||
pids: []int{1, 2, 3},
|
pids: []int{1, 2, 3},
|
||||||
|
@ -174,8 +174,8 @@ func TestGetContainerState(t *testing.T) {
|
||||||
if state.InitProcessPid != pid {
|
if state.InitProcessPid != pid {
|
||||||
t.Fatalf("expected pid %d but received %d", pid, state.InitProcessPid)
|
t.Fatalf("expected pid %d but received %d", pid, state.InitProcessPid)
|
||||||
}
|
}
|
||||||
if state.InitProcessStartTime != "010" {
|
if state.InitProcessStartTime != 10 {
|
||||||
t.Fatalf("expected process start time 010 but received %s", state.InitProcessStartTime)
|
t.Fatalf("expected process start time 10 but received %d", state.InitProcessStartTime)
|
||||||
}
|
}
|
||||||
paths := state.CgroupPaths
|
paths := state.CgroupPaths
|
||||||
if paths == nil {
|
if paths == nil {
|
||||||
|
|
|
@ -180,7 +180,7 @@ func testCheckpoint(t *testing.T, userns bool) {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if state != libcontainer.Running {
|
if state != libcontainer.Stopped {
|
||||||
t.Fatal("Unexpected state checkpoint: ", state)
|
t.Fatal("Unexpected state checkpoint: ", state)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -35,7 +35,7 @@ type parentProcess interface {
|
||||||
wait() (*os.ProcessState, error)
|
wait() (*os.ProcessState, error)
|
||||||
|
|
||||||
// startTime returns the process start time.
|
// startTime returns the process start time.
|
||||||
startTime() (string, error)
|
startTime() (uint64, error)
|
||||||
|
|
||||||
signal(os.Signal) error
|
signal(os.Signal) error
|
||||||
|
|
||||||
|
@ -55,8 +55,9 @@ type setnsProcess struct {
|
||||||
bootstrapData io.Reader
|
bootstrapData io.Reader
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *setnsProcess) startTime() (string, error) {
|
func (p *setnsProcess) startTime() (uint64, error) {
|
||||||
return system.GetProcessStartTime(p.pid())
|
stat, err := system.Stat(p.pid())
|
||||||
|
return stat.StartTime, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *setnsProcess) signal(sig os.Signal) error {
|
func (p *setnsProcess) signal(sig os.Signal) error {
|
||||||
|
@ -384,8 +385,9 @@ func (p *initProcess) terminate() error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *initProcess) startTime() (string, error) {
|
func (p *initProcess) startTime() (uint64, error) {
|
||||||
return system.GetProcessStartTime(p.pid())
|
stat, err := system.Stat(p.pid())
|
||||||
|
return stat.StartTime, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *initProcess) sendConfig() error {
|
func (p *initProcess) sendConfig() error {
|
||||||
|
|
|
@ -17,20 +17,20 @@ func newRestoredProcess(pid int, fds []string) (*restoredProcess, error) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
started, err := system.GetProcessStartTime(pid)
|
stat, err := system.Stat(pid)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
return &restoredProcess{
|
return &restoredProcess{
|
||||||
proc: proc,
|
proc: proc,
|
||||||
processStartTime: started,
|
processStartTime: stat.StartTime,
|
||||||
fds: fds,
|
fds: fds,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type restoredProcess struct {
|
type restoredProcess struct {
|
||||||
proc *os.Process
|
proc *os.Process
|
||||||
processStartTime string
|
processStartTime uint64
|
||||||
fds []string
|
fds []string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -60,7 +60,7 @@ func (p *restoredProcess) wait() (*os.ProcessState, error) {
|
||||||
return st, nil
|
return st, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *restoredProcess) startTime() (string, error) {
|
func (p *restoredProcess) startTime() (uint64, error) {
|
||||||
return p.processStartTime, nil
|
return p.processStartTime, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -81,7 +81,7 @@ func (p *restoredProcess) setExternalDescriptors(newFds []string) {
|
||||||
// a persisted state.
|
// a persisted state.
|
||||||
type nonChildProcess struct {
|
type nonChildProcess struct {
|
||||||
processPid int
|
processPid int
|
||||||
processStartTime string
|
processStartTime uint64
|
||||||
fds []string
|
fds []string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -101,7 +101,7 @@ func (p *nonChildProcess) wait() (*os.ProcessState, error) {
|
||||||
return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError)
|
return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *nonChildProcess) startTime() (string, error) {
|
func (p *nonChildProcess) startTime() (uint64, error) {
|
||||||
return p.processStartTime, nil
|
return p.processStartTime, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,43 +1,113 @@
|
||||||
package system
|
package system
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
// look in /proc to find the process start time so that we can verify
|
// State is the status of a process.
|
||||||
// that this pid has started after ourself
|
type State rune
|
||||||
|
|
||||||
|
const ( // Only values for Linux 3.14 and later are listed here
|
||||||
|
Dead State = 'X'
|
||||||
|
DiskSleep State = 'D'
|
||||||
|
Running State = 'R'
|
||||||
|
Sleeping State = 'S'
|
||||||
|
Stopped State = 'T'
|
||||||
|
TracingStop State = 't'
|
||||||
|
Zombie State = 'Z'
|
||||||
|
)
|
||||||
|
|
||||||
|
// String forms of the state from proc(5)'s documentation for
|
||||||
|
// /proc/[pid]/status' "State" field.
|
||||||
|
func (s State) String() string {
|
||||||
|
switch s {
|
||||||
|
case Dead:
|
||||||
|
return "dead"
|
||||||
|
case DiskSleep:
|
||||||
|
return "disk sleep"
|
||||||
|
case Running:
|
||||||
|
return "running"
|
||||||
|
case Sleeping:
|
||||||
|
return "sleeping"
|
||||||
|
case Stopped:
|
||||||
|
return "stopped"
|
||||||
|
case TracingStop:
|
||||||
|
return "tracing stop"
|
||||||
|
case Zombie:
|
||||||
|
return "zombie"
|
||||||
|
default:
|
||||||
|
return fmt.Sprintf("unknown (%c)", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stat_t represents the information from /proc/[pid]/stat, as
|
||||||
|
// described in proc(5) with names based on the /proc/[pid]/status
|
||||||
|
// fields.
|
||||||
|
type Stat_t struct {
|
||||||
|
// PID is the process ID.
|
||||||
|
PID uint
|
||||||
|
|
||||||
|
// Name is the command run by the process.
|
||||||
|
Name string
|
||||||
|
|
||||||
|
// State is the state of the process.
|
||||||
|
State State
|
||||||
|
|
||||||
|
// StartTime is the number of clock ticks after system boot (since
|
||||||
|
// Linux 2.6).
|
||||||
|
StartTime uint64
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stat returns a Stat_t instance for the specified process.
|
||||||
|
func Stat(pid int) (stat Stat_t, err error) {
|
||||||
|
bytes, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
|
||||||
|
if err != nil {
|
||||||
|
return stat, err
|
||||||
|
}
|
||||||
|
return parseStat(string(bytes))
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetProcessStartTime is deprecated. Use Stat(pid) and
|
||||||
|
// Stat_t.StartTime instead.
|
||||||
func GetProcessStartTime(pid int) (string, error) {
|
func GetProcessStartTime(pid int) (string, error) {
|
||||||
data, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
|
stat, err := Stat(pid)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
return parseStartTime(string(data))
|
return fmt.Sprintf("%d", stat.StartTime), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseStartTime(stat string) (string, error) {
|
func parseStat(data string) (stat Stat_t, err error) {
|
||||||
// the starttime is located at pos 22
|
// From proc(5), field 2 could contain space and is inside `(` and `)`.
|
||||||
// from the man page
|
// The following is an example:
|
||||||
//
|
|
||||||
// starttime %llu (was %lu before Linux 2.6)
|
|
||||||
// (22) The time the process started after system boot. In kernels before Linux 2.6, this
|
|
||||||
// value was expressed in jiffies. Since Linux 2.6, the value is expressed in clock ticks
|
|
||||||
// (divide by sysconf(_SC_CLK_TCK)).
|
|
||||||
//
|
|
||||||
// NOTE:
|
|
||||||
// pos 2 could contain space and is inside `(` and `)`:
|
|
||||||
// (2) comm %s
|
|
||||||
// The filename of the executable, in parentheses.
|
|
||||||
// This is visible whether or not the executable is
|
|
||||||
// swapped out.
|
|
||||||
//
|
|
||||||
// the following is an example:
|
|
||||||
// 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0
|
// 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||||
|
i := strings.LastIndex(data, ")")
|
||||||
|
if i <= 2 || i >= len(data)-1 {
|
||||||
|
return stat, fmt.Errorf("invalid stat data: %q", data)
|
||||||
|
}
|
||||||
|
|
||||||
// get parts after last `)`:
|
parts := strings.SplitN(data[:i], "(", 2)
|
||||||
s := strings.Split(stat, ")")
|
if len(parts) != 2 {
|
||||||
parts := strings.Split(strings.TrimSpace(s[len(s)-1]), " ")
|
return stat, fmt.Errorf("invalid stat data: %q", data)
|
||||||
return parts[22-3], nil // starts at 3 (after the filename pos `2`)
|
}
|
||||||
|
|
||||||
|
stat.Name = parts[1]
|
||||||
|
_, err = fmt.Sscanf(parts[0], "%d", &stat.PID)
|
||||||
|
if err != nil {
|
||||||
|
return stat, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// parts indexes should be offset by 3 from the field number given
|
||||||
|
// proc(5), because parts is zero-indexed and we've removed fields
|
||||||
|
// one (PID) and two (Name) in the paren-split.
|
||||||
|
parts = strings.Split(data[i+2:], " ")
|
||||||
|
var state int
|
||||||
|
fmt.Sscanf(parts[3-3], "%c", &state)
|
||||||
|
stat.State = State(state)
|
||||||
|
fmt.Sscanf(parts[22-3], "%d", &stat.StartTime)
|
||||||
|
return stat, nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,18 +3,43 @@ package system
|
||||||
import "testing"
|
import "testing"
|
||||||
|
|
||||||
func TestParseStartTime(t *testing.T) {
|
func TestParseStartTime(t *testing.T) {
|
||||||
data := map[string]string{
|
data := map[string]Stat_t{
|
||||||
"4902 (gunicorn: maste) S 4885 4902 4902 0 -1 4194560 29683 29929 61 83 78 16 96 17 20 0 1 0 9126532 52965376 1903 18446744073709551615 4194304 7461796 140733928751520 140733928698072 139816984959091 0 0 16781312 137447943 1 0 0 17 3 0 0 9 0 0 9559488 10071156 33050624 140733928758775 140733928758945 140733928758945 140733928759264 0": "9126532",
|
"4902 (gunicorn: maste) S 4885 4902 4902 0 -1 4194560 29683 29929 61 83 78 16 96 17 20 0 1 0 9126532 52965376 1903 18446744073709551615 4194304 7461796 140733928751520 140733928698072 139816984959091 0 0 16781312 137447943 1 0 0 17 3 0 0 9 0 0 9559488 10071156 33050624 140733928758775 140733928758945 140733928758945 140733928759264 0": {
|
||||||
"9534 (cat) R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": "9214966",
|
PID: 4902,
|
||||||
"24767 (irq/44-mei_me) S 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 -51 0 1 0 8722075 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 1 50 1 0 0 0 0 0 0 0 0 0 0 0": "8722075",
|
Name: "gunicorn: maste",
|
||||||
|
State: 'S',
|
||||||
|
StartTime: 9126532,
|
||||||
|
},
|
||||||
|
"9534 (cat) R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": {
|
||||||
|
PID: 9534,
|
||||||
|
Name: "cat",
|
||||||
|
State: 'R',
|
||||||
|
StartTime: 9214966,
|
||||||
|
},
|
||||||
|
|
||||||
|
"24767 (irq/44-mei_me) S 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 -51 0 1 0 8722075 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 1 50 1 0 0 0 0 0 0 0 0 0 0 0": {
|
||||||
|
PID: 24767,
|
||||||
|
Name: "irq/44-mei_me",
|
||||||
|
State: 'S',
|
||||||
|
StartTime: 8722075,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for line, startTime := range data {
|
for line, expected := range data {
|
||||||
st, err := parseStartTime(line)
|
st, err := parseStat(line)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if startTime != st {
|
if st.PID != expected.PID {
|
||||||
t.Fatalf("expected start time %q but received %q", startTime, st)
|
t.Fatalf("expected PID %q but received %q", expected.PID, st.PID)
|
||||||
|
}
|
||||||
|
if st.State != expected.State {
|
||||||
|
t.Fatalf("expected state %q but received %q", expected.State, st.State)
|
||||||
|
}
|
||||||
|
if st.Name != expected.Name {
|
||||||
|
t.Fatalf("expected name %q but received %q", expected.Name, st.Name)
|
||||||
|
}
|
||||||
|
if st.StartTime != expected.StartTime {
|
||||||
|
t.Fatalf("expected start time %q but received %q", expected.StartTime, st.StartTime)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue