Refactor parent processes into types

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
Michael Crosby 2015-02-06 21:12:27 -08:00
parent 21bb5ccc4f
commit 0c1919c427
22 changed files with 471 additions and 568 deletions

View File

@ -94,11 +94,6 @@ func TestConfigJsonFormat(t *testing.T) {
t.Fail()
}
if n.VethPrefix != "veth" {
t.Logf("veth prefix should be veth but received %q", n.VethPrefix)
t.Fail()
}
if n.Gateway != "172.17.42.1" {
t.Logf("veth gateway should be 172.17.42.1 but received %q", n.Gateway)
t.Fail()

View File

@ -11,9 +11,6 @@ type Network struct {
// The bridge to use.
Bridge string `json:"bridge,omitempty"`
// Prefix for the veth interfaces.
VethPrefix string `json:"veth_prefix,omitempty"`
// MacAddress contains the MAC address to set on the network interface
MacAddress string `json:"mac_address,omitempty"`
@ -38,6 +35,12 @@ type Network struct {
// container's interfaces if a pair is created, specifically in the case of type veth
// Note: This does not apply to loopback interfaces.
TxQueueLen int `json:"txqueuelen,omitempty"`
// The name of the veth interface on the Host.
VethHost string `json:"veth_host,omitempty"`
// The name of the veth interface created inside the container for the child.
VethChild string `json:"veth_child,omitempty"`
}
// Routes can be specified to create entries in the route table as the container is started

View File

@ -1,32 +1,5 @@
package configs
// State represents a running container's state
type State struct {
// InitPid is the init process id in the parent namespace
InitPid int `json:"init_pid,omitempty"`
// InitStartTime is the init process start time
InitStartTime string `json:"init_start_time,omitempty"`
// Network runtime state.
NetworkState NetworkState `json:"network_state,omitempty"`
// Path to all the cgroups setup for a container. Key is cgroup subsystem name.
CgroupPaths map[string]string `json:"cgroup_paths,omitempty"`
Status Status `json:"status,omitempty"`
}
// Struct describing the network specific runtime state that will be maintained by libcontainer for all running containers
// Do not depend on it outside of libcontainer.
// TODO: move veth names to config time
type NetworkState struct {
// The name of the veth interface on the Host.
VethHost string `json:"veth_host,omitempty"`
// The name of the veth interface created inside the container for the child.
VethChild string `json:"veth_child,omitempty"`
}
// The status of a container.
type Status int
@ -43,3 +16,15 @@ const (
// The container does not exist.
Destroyed
)
// State represents a running container's state
type State struct {
// InitPid is the init process id in the parent namespace
InitPid int `json:"init_pid,omitempty"`
// InitStartTime is the init process start time
InitStartTime string `json:"init_start_time,omitempty"`
// Path to all the cgroups setup for a container. Key is cgroup subsystem name.
CgroupPaths map[string]string `json:"cgroup_paths,omitempty"`
}

View File

@ -26,13 +26,11 @@ func testExecPS(t *testing.T, userns bool) {
if testing.Short() {
return
}
rootfs, err := newRootfs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
if userns {
config.UidMappings = []configs.IDMap{{0, 0, 1000}}
@ -42,13 +40,11 @@ func testExecPS(t *testing.T, userns bool) {
buffers, exitCode, err := runContainer(config, "", "ps")
if err != nil {
t.Fatal(err)
t.Fatalf("%s: %s", buffers, err)
}
if exitCode != 0 {
t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr)
}
lines := strings.Split(buffers.Stdout.String(), "\n")
if len(lines) < 2 {
t.Fatalf("more than one process running for output %q", buffers.Stdout.String())

View File

@ -2,12 +2,11 @@ package integration
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strings"
"syscall"
"github.com/docker/libcontainer"
@ -28,27 +27,15 @@ type stdBuffers struct {
Stderr *bytes.Buffer
}
func writeConfig(config *configs.Config) error {
f, err := os.OpenFile(filepath.Join(config.Rootfs, "container.json"), os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0700)
if err != nil {
return err
func (b *stdBuffers) String() string {
s := []string{}
if b.Stderr != nil {
s = append(s, b.Stderr.String())
}
defer f.Close()
return json.NewEncoder(f).Encode(config)
}
func loadConfig() (*configs.Config, error) {
f, err := os.Open(filepath.Join(os.Getenv("data_path"), "container.json"))
if err != nil {
return nil, err
if b.Stdout != nil {
s = append(s, b.Stdout.String())
}
defer f.Close()
var container *configs.Config
if err := json.NewDecoder(f).Decode(&container); err != nil {
return nil, err
}
return container, nil
return strings.Join(s, "|")
}
// newRootfs creates a new tmp directory and copies the busybox root filesystem
@ -85,12 +72,7 @@ func copyBusybox(dest string) error {
// buffers are returned containing the STDOUT and STDERR output for the run
// along with the exit code and any go error
func runContainer(config *configs.Config, console string, args ...string) (buffers *stdBuffers, exitCode int, err error) {
if err := writeConfig(config); err != nil {
return nil, -1, err
}
buffers = newStdBuffers()
process := &libcontainer.Process{
Args: args,
Env: standardEnvironment,

View File

@ -3,18 +3,14 @@
package libcontainer
import (
"encoding/json"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"syscall"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/network"
"github.com/docker/libcontainer/system"
"github.com/golang/glog"
)
@ -26,9 +22,9 @@ type linuxContainer struct {
id string
root string
config *configs.Config
state *configs.State
cgroupManager cgroups.Manager
initArgs []string
initProcess parentProcess
}
// ID returns the container's unique ID
@ -42,11 +38,11 @@ func (c *linuxContainer) Config() configs.Config {
}
func (c *linuxContainer) Status() (configs.Status, error) {
if c.state.InitPid <= 0 {
if c.initProcess == nil {
return configs.Destroyed, nil
}
// return Running if the init process is alive
if err := syscall.Kill(c.state.InitPid, 0); err != nil {
if err := syscall.Kill(c.initProcess.pid(), 0); err != nil {
if err == syscall.ESRCH {
return configs.Destroyed, nil
}
@ -77,8 +73,14 @@ func (c *linuxContainer) Stats() (*Stats, error) {
if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil {
return stats, newGenericError(err, SystemError)
}
if stats.NetworkStats, err = network.GetStats(&c.state.NetworkState); err != nil {
return stats, newGenericError(err, SystemError)
// TODO: handle stats for multiple veth interfaces
for _, iface := range c.config.Networks {
if iface.Type == "veth" {
if stats.NetworkStats, err = network.GetStats(iface.VethHost); err != nil {
return stats, newGenericError(err, SystemError)
}
break
}
}
return stats, nil
}
@ -88,186 +90,122 @@ func (c *linuxContainer) Start(process *Process) (int, error) {
if err != nil {
return -1, err
}
cmd := c.commandTemplate(process)
if status != configs.Destroyed {
// TODO: (crosbymichael) check out console use for execin
return c.startNewProcess(cmd, process)
}
if err := c.startInitialProcess(cmd, process); err != nil {
doInit := status == configs.Destroyed
parent, err := c.newParentProcess(process, doInit)
if err != nil {
return -1, err
}
return c.state.InitPid, nil
if err := parent.start(); err != nil {
// terminate the process to ensure that it properly is reaped.
if err := parent.terminate(); err != nil {
glog.Warning(err)
}
return -1, err
}
if doInit {
c.initProcess = parent
}
return parent.pid(), nil
}
// commandTemplate creates a template *exec.Cmd. It uses the init arguments provided
// to the factory and attaches IO to the process.
func (c *linuxContainer) commandTemplate(process *Process) *exec.Cmd {
func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProcess, error) {
parentPipe, childPipe, err := newPipe()
if err != nil {
return nil, err
}
cmd, err := c.commandTemplate(p, childPipe)
if err != nil {
return nil, err
}
if !doInit {
return c.newSetnsProcess(p, cmd, parentPipe, childPipe), nil
}
return c.newInitProcess(p, cmd, parentPipe, childPipe), nil
}
func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) {
cmd := exec.Command(c.initArgs[0], c.initArgs[1:]...)
cmd.Stdin = process.Stdin
cmd.Stdout = process.Stdout
cmd.Stderr = process.Stderr
cmd.Env = process.Env
cmd.Stdin = p.Stdin
cmd.Stdout = p.Stdout
cmd.Stderr = p.Stderr
cmd.Dir = c.config.Rootfs
if cmd.SysProcAttr == nil {
cmd.SysProcAttr = &syscall.SysProcAttr{}
}
cmd.ExtraFiles = []*os.File{childPipe}
cmd.SysProcAttr.Pdeathsig = syscall.Signal(c.config.ParentDeathSignal)
return cmd
return cmd, nil
}
// startNewProcess adds another process to an already running container
func (c *linuxContainer) startNewProcess(cmd *exec.Cmd, process *Process) (int, error) {
glog.Info("start new container process")
parent, child, err := c.newInitPipe()
if err != nil {
return -1, err
}
defer parent.Close()
cmd.ExtraFiles = []*os.File{child}
cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.state.InitPid), "_LIBCONTAINER_INITTYPE=setns")
// start the command
err = cmd.Start()
child.Close()
if err != nil {
return -1, err
}
s, err := cmd.Process.Wait()
if err != nil {
return -1, err
}
if !s.Success() {
return -1, &exec.ExitError{s}
}
decoder := json.NewDecoder(parent)
var pid *pid
if err := decoder.Decode(&pid); err != nil {
return -1, err
}
p, err := os.FindProcess(pid.Pid)
if err != nil {
return -1, err
}
terminate := func(terr error) (int, error) {
// TODO: log the errors for kill and wait
if err := p.Kill(); err != nil {
glog.Warning(err)
}
if _, err := p.Wait(); err != nil {
glog.Warning(err)
}
return -1, terr
}
if err := c.enterCgroups(pid.Pid); err != nil {
return terminate(err)
}
if err := json.NewEncoder(parent).Encode(&initConfig{
Config: c.config,
Args: process.Args,
Env: process.Env,
}); err != nil {
return terminate(err)
}
return pid.Pid, nil
}
// startInitialProcess starts PID 1 for the container.
func (c *linuxContainer) startInitialProcess(cmd *exec.Cmd, process *Process) error {
glog.Info("starting container initial process")
// create a pipe so that we can syncronize with the namespaced process and
// pass the state and configuration to the child process
parent, child, err := c.newInitPipe()
if err != nil {
return err
}
defer parent.Close()
cmd.ExtraFiles = []*os.File{child}
cmd.SysProcAttr.Cloneflags = c.config.Namespaces.CloneFlags()
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE=standard")
// if the container is configured to use user namespaces we have to setup the
// uid:gid mapping on the command.
if c.config.Namespaces.Contains(configs.NEWUSER) {
addUidGidMappings(cmd.SysProcAttr, c.config)
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) *initProcess {
cloneFlags := c.config.Namespaces.CloneFlags()
if cloneFlags&syscall.CLONE_NEWUSER != 0 {
c.addUidGidMappings(cmd.SysProcAttr)
// Default to root user when user namespaces are enabled.
if cmd.SysProcAttr.Credential == nil {
cmd.SysProcAttr.Credential = &syscall.Credential{}
}
}
err = cmd.Start()
child.Close()
cmd.SysProcAttr.Cloneflags = cloneFlags
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE=standard")
return &initProcess{
cmd: cmd,
childPipe: childPipe,
parentPipe: parentPipe,
manager: c.cgroupManager,
config: c.newInitConfig(p),
}
}
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) *setnsProcess {
cmd.Env = append(cmd.Env,
fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.initProcess.pid()),
"_LIBCONTAINER_INITTYPE=setns",
)
// TODO: set on container for process management
return &setnsProcess{
cmd: cmd,
cgroupPaths: c.cgroupManager.GetPaths(),
childPipe: childPipe,
parentPipe: parentPipe,
config: c.newInitConfig(p),
}
}
func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
return &initConfig{
Config: c.config,
Args: process.Args,
Env: process.Env,
}
}
// Converts IDMap to SysProcIDMap array and adds it to SysProcAttr.
func (c *linuxContainer) addUidGidMappings(sys *syscall.SysProcAttr) {
if c.config.UidMappings != nil {
sys.UidMappings = make([]syscall.SysProcIDMap, len(c.config.UidMappings))
for i, um := range c.config.UidMappings {
sys.UidMappings[i].ContainerID = um.ContainerID
sys.UidMappings[i].HostID = um.HostID
sys.UidMappings[i].Size = um.Size
}
}
if c.config.GidMappings != nil {
sys.GidMappings = make([]syscall.SysProcIDMap, len(c.config.GidMappings))
for i, gm := range c.config.GidMappings {
sys.GidMappings[i].ContainerID = gm.ContainerID
sys.GidMappings[i].HostID = gm.HostID
sys.GidMappings[i].Size = gm.Size
}
}
}
func newPipe() (parent *os.File, child *os.File, err error) {
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
if err != nil {
return newGenericError(err, SystemError)
return nil, nil, err
}
wait := func() (*os.ProcessState, error) {
ps, err := cmd.Process.Wait()
if err != nil {
return nil, newGenericError(err, SystemError)
}
// we should kill all processes in cgroup when init is died if we use
// host PID namespace
if !c.config.Namespaces.Contains(configs.NEWPID) {
c.killAllPids()
}
return ps, nil
}
terminate := func(terr error) error {
// TODO: log the errors for kill and wait
cmd.Process.Kill()
wait()
return terr
}
started, err := system.GetProcessStartTime(cmd.Process.Pid)
if err != nil {
return terminate(err)
}
// Do this before syncing with child so that no children
// can escape the cgroup
if err := c.cgroupManager.Apply(cmd.Process.Pid); err != nil {
return terminate(err)
}
defer func() {
if err != nil {
c.cgroupManager.Destroy()
}
}()
var networkState configs.NetworkState
if err := c.initializeNetworking(cmd.Process.Pid, &networkState); err != nil {
return terminate(err)
}
iconfig := &initConfig{
Args: process.Args,
Config: c.config,
Env: process.Env,
NetworkState: &networkState,
}
// Start the setup process to setup the init process
if c.config.Namespaces.Contains(configs.NEWUSER) {
if err = c.executeSetupCmd(cmd.Args, cmd.Process.Pid, c.config, iconfig, &networkState); err != nil {
return terminate(err)
}
}
// send the state to the container's init process then shutdown writes for the parent
if err := json.NewEncoder(parent).Encode(iconfig); err != nil {
return terminate(err)
}
// shutdown writes for the parent side of the pipe
if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil {
return terminate(err)
}
// wait for the child process to fully complete and receive an error message
// if one was encoutered
var ierr *initError
if err := json.NewDecoder(parent).Decode(&ierr); err != nil && err != io.EOF {
return terminate(err)
}
if ierr != nil {
return terminate(ierr)
}
c.state.InitPid = cmd.Process.Pid
c.state.InitStartTime = started
c.state.NetworkState = networkState
c.state.CgroupPaths = c.cgroupManager.GetPaths()
return nil
return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
}
func (c *linuxContainer) Destroy() error {
@ -291,172 +229,11 @@ func (c *linuxContainer) Resume() error {
}
func (c *linuxContainer) Signal(signal os.Signal) error {
glog.Infof("sending signal %d to pid %d", signal, c.state.InitPid)
glog.Infof("sending signal %d to pid %d", signal, c.initProcess.pid())
panic("not implemented")
}
// TODO: rename to be more descriptive
func (c *linuxContainer) OOM() (<-chan struct{}, error) {
return NotifyOnOOM(c.state)
}
func (c *linuxContainer) updateStateFile() error {
fnew := filepath.Join(c.root, fmt.Sprintf("%s.new", stateFilename))
f, err := os.Create(fnew)
if err != nil {
return newGenericError(err, SystemError)
}
defer f.Close()
if err := json.NewEncoder(f).Encode(c.state); err != nil {
f.Close()
os.Remove(fnew)
return newGenericError(err, SystemError)
}
fname := filepath.Join(c.root, stateFilename)
if err := os.Rename(fnew, fname); err != nil {
return newGenericError(err, SystemError)
}
return nil
}
// New returns a newly initialized Pipe for communication between processes
func (c *linuxContainer) newInitPipe() (parent *os.File, child *os.File, err error) {
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
if err != nil {
return nil, nil, err
}
return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
}
// Converts IDMap to SysProcIDMap array and adds it to SysProcAttr.
func addUidGidMappings(sys *syscall.SysProcAttr, container *configs.Config) {
if container.UidMappings != nil {
sys.UidMappings = make([]syscall.SysProcIDMap, len(container.UidMappings))
for i, um := range container.UidMappings {
sys.UidMappings[i].ContainerID = um.ContainerID
sys.UidMappings[i].HostID = um.HostID
sys.UidMappings[i].Size = um.Size
}
}
if container.GidMappings != nil {
sys.GidMappings = make([]syscall.SysProcIDMap, len(container.GidMappings))
for i, gm := range container.GidMappings {
sys.GidMappings[i].ContainerID = gm.ContainerID
sys.GidMappings[i].HostID = gm.HostID
sys.GidMappings[i].Size = gm.Size
}
}
}
// killAllPids iterates over all of the container's processes
// sending a SIGKILL to each process.
func (c *linuxContainer) killAllPids() error {
glog.Info("killing all processes in container")
var procs []*os.Process
c.cgroupManager.Freeze(configs.Frozen)
pids, err := c.cgroupManager.GetPids()
if err != nil {
return err
}
for _, pid := range pids {
// TODO: log err without aborting if we are unable to find
// a single PID
if p, err := os.FindProcess(pid); err == nil {
procs = append(procs, p)
p.Kill()
}
}
c.cgroupManager.Freeze(configs.Thawed)
for _, p := range procs {
p.Wait()
}
return err
}
// initializeNetworking creates the container's network stack outside of the namespace and moves
// interfaces into the container's net namespaces if necessary
func (c *linuxContainer) initializeNetworking(nspid int, networkState *configs.NetworkState) error {
glog.Info("initailzing container's network stack")
for _, config := range c.config.Networks {
strategy, err := network.GetStrategy(config.Type)
if err != nil {
return err
}
if err := strategy.Create(config, nspid, networkState); err != nil {
return err
}
}
return nil
}
func (c *linuxContainer) executeSetupCmd(args []string, ppid int, container *configs.Config, process *initConfig, networkState *configs.NetworkState) error {
command := exec.Command(args[0], args[1:]...)
parent, child, err := c.newInitPipe()
if err != nil {
return err
}
defer parent.Close()
command.ExtraFiles = []*os.File{child}
command.Dir = container.Rootfs
command.Env = append(command.Env,
fmt.Sprintf("_LIBCONTAINER_INITPID=%d", ppid),
fmt.Sprintf("_LIBCONTAINER_INITTYPE=userns_sidecar"))
err = command.Start()
child.Close()
if err != nil {
return err
}
s, err := command.Process.Wait()
if err != nil {
return err
}
if !s.Success() {
return &exec.ExitError{s}
}
decoder := json.NewDecoder(parent)
var pid *pid
if err := decoder.Decode(&pid); err != nil {
return err
}
p, err := os.FindProcess(pid.Pid)
if err != nil {
return err
}
terminate := func(terr error) error {
// TODO: log the errors for kill and wait
p.Kill()
p.Wait()
return terr
}
// send the state to the container's init process then shutdown writes for the parent
if err := json.NewEncoder(parent).Encode(process); err != nil {
return terminate(err)
}
// shutdown writes for the parent side of the pipe
if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil {
return terminate(err)
}
// wait for the child process to fully complete and receive an error message
// if one was encoutered
var ierr *initError
if err := decoder.Decode(&ierr); err != nil && err != io.EOF {
return terminate(err)
}
if ierr != nil {
return ierr
}
s, err = p.Wait()
if err != nil {
return err
}
if !s.Success() {
return &exec.ExitError{s}
}
return nil
}
func (c *linuxContainer) enterCgroups(pid int) error {
return cgroups.EnterPid(c.state.CgroupPaths, pid)
return NotifyOnOOM(c.cgroupManager.GetPaths())
}

View File

@ -69,7 +69,6 @@ func TestGetContainerStats(t *testing.T) {
},
},
},
state: &configs.State{},
}
stats, err := container.Stats()

View File

@ -87,7 +87,6 @@ func (l *linuxFactory) Create(id string, config *configs.Config) (Container, err
root: containerRoot,
config: config,
initArgs: l.initArgs,
state: &configs.State{},
cgroupManager: cgroups.NewCgroupManager(config.Cgroups),
}, nil
}
@ -107,15 +106,19 @@ func (l *linuxFactory) Load(id string) (Container, error) {
if err != nil {
return nil, err
}
r := &restoredProcess{
processPid: state.InitPid,
processStartTime: state.InitStartTime,
}
cgroupManager := cgroups.LoadCgroupManager(config.Cgroups, state.CgroupPaths)
glog.Infof("using %s as cgroup manager", cgroupManager)
return &linuxContainer{
initProcess: r,
id: id,
root: containerRoot,
config: config,
state: state,
cgroupManager: cgroupManager,
initArgs: l.initArgs,
cgroupManager: cgroupManager,
root: containerRoot,
}, nil
}
@ -193,3 +196,31 @@ func (l *linuxFactory) validateID(id string) error {
}
return nil
}
// restoredProcess represents a process where the calling process may or may not be
// the parent process. This process is created when a factory loads a container from
// a persisted state.
type restoredProcess struct {
processPid int
processStartTime string
}
func (p *restoredProcess) start() error {
return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError)
}
func (p *restoredProcess) pid() int {
return p.processPid
}
func (p *restoredProcess) terminate() error {
return newGenericError(fmt.Errorf("restored process cannot be terminated"), SystemError)
}
func (p *restoredProcess) wait() (*os.ProcessState, error) {
return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError)
}
func (p *restoredProcess) startTime() (string, error) {
return p.processStartTime, nil
}

View File

@ -29,21 +29,17 @@ func TestFactoryNew(t *testing.T) {
t.Fatal(rerr)
}
defer os.RemoveAll(root)
factory, err := New(root, nil)
if err != nil {
t.Fatal(err)
}
if factory == nil {
t.Fatal("factory should not be nil")
}
lfactory, ok := factory.(*linuxFactory)
if !ok {
t.Fatal("expected linux factory returned on linux based systems")
}
if lfactory.root != root {
t.Fatalf("expected factory root to be %q but received %q", root, lfactory.root)
}
@ -55,17 +51,14 @@ func TestFactoryLoadNotExists(t *testing.T) {
t.Fatal(rerr)
}
defer os.RemoveAll(root)
factory, err := New(root, nil)
if err != nil {
t.Fatal(err)
}
_, err = factory.Load("nocontainer")
if err == nil {
t.Fatal("expected nil error loading non-existing container")
}
lerr, ok := err.(Error)
if !ok {
t.Fatal("expected libcontainer error type")
@ -81,7 +74,6 @@ func TestFactoryLoadContainer(t *testing.T) {
t.Fatal(err)
}
defer os.RemoveAll(root)
// setup default container config and state for mocking
var (
id = "1"
@ -92,7 +84,6 @@ func TestFactoryLoadContainer(t *testing.T) {
InitPid: 1024,
}
)
if err := os.Mkdir(filepath.Join(root, id), 0700); err != nil {
t.Fatal(err)
}
@ -102,34 +93,27 @@ func TestFactoryLoadContainer(t *testing.T) {
if err := marshal(filepath.Join(root, id, stateFilename), expectedState); err != nil {
t.Fatal(err)
}
factory, err := New(root, nil)
if err != nil {
t.Fatal(err)
}
container, err := factory.Load(id)
if err != nil {
t.Fatal(err)
}
if container.ID() != id {
t.Fatalf("expected container id %q but received %q", id, container.ID())
}
config := container.Config()
if config.Rootfs != expectedConfig.Rootfs {
t.Fatalf("expected rootfs %q but received %q", expectedConfig.Rootfs, config.Rootfs)
}
lcontainer, ok := container.(*linuxContainer)
if !ok {
t.Fatal("expected linux container on linux based systems")
}
if lcontainer.state.InitPid != expectedState.InitPid {
t.Fatalf("expected init pid %d but received %d", expectedState.InitPid, lcontainer.state.InitPid)
if lcontainer.initProcess.pid() != expectedState.InitPid {
t.Fatalf("expected init pid %d but received %d", expectedState.InitPid, lcontainer.initProcess.pid())
}
}

View File

@ -21,18 +21,17 @@ import (
type initType string
const (
initSetns initType = "setns"
initStandard initType = "standard"
initUserns initType = "userns"
initUsernsSideCar initType = "userns_sidecar"
initSetns initType = "setns"
initStandard initType = "standard"
initUserns initType = "userns"
initUsernsSetup initType = "userns_setup"
)
// Process is used for transferring parameters from Exec() to Init()
type initConfig struct {
Args []string `json:"args,omitempty"`
Env []string `json:"env,omitempty"`
Config *configs.Config `json:"config,omitempty"`
NetworkState *configs.NetworkState `json:"network_state,omitempty"`
Args []string `json:"args,omitempty"`
Env []string `json:"env,omitempty"`
Config *configs.Config `json:"config,omitempty"`
}
type initer interface {
@ -60,10 +59,9 @@ func newContainerInit(t initType, pipe *os.File) (initer, error) {
env: config.Env,
config: config.Config,
}, nil
case initUsernsSideCar:
case initUsernsSetup:
return &linuxUsernsSideCar{
config: config.Config,
network: config.NetworkState,
config: config.Config,
}, nil
case initStandard:
return &linuxStandardInit{
@ -186,13 +184,13 @@ func setupUser(config *configs.Config) error {
// setupVethNetwork uses the Network config if it is not nil to initialize
// the new veth interface inside the container for use by changing the name to eth0
// setting the MTU and IP address along with the default gateway
func setupNetwork(config *configs.Config, networkState *configs.NetworkState) error {
func setupNetwork(config *configs.Config) error {
for _, config := range config.Networks {
strategy, err := network.GetStrategy(config.Type)
if err != nil {
return err
}
err1 := strategy.Initialize(config, networkState)
err1 := strategy.Initialize(config)
if err1 != nil {
return err1
}

262
linux_process.go Normal file
View File

@ -0,0 +1,262 @@
// +build linux
package libcontainer
import (
"encoding/json"
"fmt"
"io"
"os"
"os/exec"
"syscall"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/network"
"github.com/docker/libcontainer/system"
"github.com/golang/glog"
)
type parentProcess interface {
// pid returns the pid for the running process.
pid() int
// start starts the process execution.
start() error
// send a SIGKILL to the process and wait for the exit.
terminate() error
// wait waits on the process returning the process state.
wait() (*os.ProcessState, error)
// startTime return's the process start time.
startTime() (string, error)
}
type setnsProcess struct {
cmd *exec.Cmd
parentPipe *os.File
childPipe *os.File
forkedProcess *os.Process
cgroupPaths map[string]string
config *initConfig
}
func (p *setnsProcess) startTime() (string, error) {
return system.GetProcessStartTime(p.pid())
}
func (p *setnsProcess) start() (err error) {
defer p.parentPipe.Close()
if p.forkedProcess, err = p.execSetns(); err != nil {
return err
}
if len(p.cgroupPaths) > 0 {
if err := cgroups.EnterPid(p.cgroupPaths, p.forkedProcess.Pid); err != nil {
return err
}
}
if err := json.NewEncoder(p.parentPipe).Encode(p.config); err != nil {
return err
}
return nil
}
// execSetns runs the process that executes C code to perform the setns calls
// because setns support requires the C process to fork off a child and perform the setns
// before the go runtime boots, we wait on the process to die and receive the child's pid
// over the provided pipe.
func (p *setnsProcess) execSetns() (*os.Process, error) {
err := p.cmd.Start()
p.childPipe.Close()
if err != nil {
return nil, err
}
status, err := p.cmd.Process.Wait()
if err != nil {
return nil, err
}
if !status.Success() {
return nil, &exec.ExitError{status}
}
var pid *pid
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
return nil, err
}
return os.FindProcess(pid.Pid)
}
// terminate sends a SIGKILL to the forked process for the setns routine then waits to
// avoid the process becomming a zombie.
func (p *setnsProcess) terminate() error {
if p.forkedProcess == nil {
return nil
}
err := p.forkedProcess.Kill()
if _, werr := p.wait(); err == nil {
err = werr
}
return err
}
func (p *setnsProcess) wait() (*os.ProcessState, error) {
return p.forkedProcess.Wait()
}
func (p *setnsProcess) pid() int {
return p.forkedProcess.Pid
}
type initProcess struct {
cmd *exec.Cmd
parentPipe *os.File
childPipe *os.File
config *initConfig
manager cgroups.Manager
}
func (p *initProcess) pid() int {
return p.cmd.Process.Pid
}
func (p *initProcess) start() error {
defer p.parentPipe.Close()
err := p.cmd.Start()
p.childPipe.Close()
if err != nil {
return err
}
// Do this before syncing with child so that no children
// can escape the cgroup
if err := p.manager.Apply(p.pid()); err != nil {
return err
}
defer func() {
if err != nil {
// TODO: should not be the responsibility to call here
p.manager.Destroy()
}
}()
if err := p.createNetworkInterfaces(); err != nil {
return err
}
// Start the setup process to setup the init process
if p.cmd.SysProcAttr.Cloneflags&syscall.CLONE_NEWUSER != 0 {
parent, err := p.newUsernsSetupProcess()
if err != nil {
return err
}
if err := parent.start(); err != nil {
if err := parent.terminate(); err != nil {
glog.Warning(err)
}
return err
}
if _, err := parent.wait(); err != nil {
return err
}
}
if err := p.sendConfig(); err != nil {
return err
}
// wait for the child process to fully complete and receive an error message
// if one was encoutered
var ierr *initError
if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF {
return err
}
if ierr != nil {
return ierr
}
return nil
}
func (p *initProcess) wait() (*os.ProcessState, error) {
state, err := p.cmd.Process.Wait()
if err != nil {
return nil, err
}
// we should kill all processes in cgroup when init is died if we use host PID namespace
if p.cmd.SysProcAttr.Cloneflags&syscall.CLONE_NEWPID == 0 {
// TODO: this will not work for the success path because libcontainer
// does not wait on the process. This needs to be moved to destroy or add a Wait()
// method back onto the container.
var procs []*os.Process
p.manager.Freeze(configs.Frozen)
pids, err := p.manager.GetPids()
if err != nil {
return nil, err
}
for _, pid := range pids {
// TODO: log err without aborting if we are unable to find
// a single PID
if p, err := os.FindProcess(pid); err == nil {
procs = append(procs, p)
p.Kill()
}
}
p.manager.Freeze(configs.Thawed)
for _, p := range procs {
p.Wait()
}
}
return state, nil
}
func (p *initProcess) terminate() error {
if p.cmd.Process == nil {
return nil
}
err := p.cmd.Process.Kill()
if _, werr := p.wait(); err == nil {
err = werr
}
return err
}
func (p *initProcess) startTime() (string, error) {
return system.GetProcessStartTime(p.pid())
}
func (p *initProcess) sendConfig() error {
// send the state to the container's init process then shutdown writes for the parent
if err := json.NewEncoder(p.parentPipe).Encode(p.config); err != nil {
return err
}
// shutdown writes for the parent side of the pipe
return syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR)
}
func (p *initProcess) createNetworkInterfaces() error {
for _, config := range p.config.Config.Networks {
strategy, err := network.GetStrategy(config.Type)
if err != nil {
return err
}
if err := strategy.Create(config, p.pid()); err != nil {
return err
}
}
return nil
}
func (p *initProcess) newUsernsSetupProcess() (parentProcess, error) {
parentPipe, childPipe, err := newPipe()
if err != nil {
return nil, err
}
cmd := exec.Command(p.cmd.Args[0], p.cmd.Args[1:]...)
cmd.ExtraFiles = []*os.File{childPipe}
cmd.Dir = p.cmd.Dir
cmd.Env = append(cmd.Env,
fmt.Sprintf("_LIBCONTAINER_INITPID=%d", p.pid()),
fmt.Sprintf("_LIBCONTAINER_INITTYPE=userns_setup"),
)
return &setnsProcess{
cmd: cmd,
childPipe: childPipe,
parentPipe: parentPipe,
config: p.config,
}, nil
}

View File

@ -38,7 +38,7 @@ func (l *linuxStandardInit) Init() error {
return err
}
}
if err := setupNetwork(l.config.Config, l.config.NetworkState); err != nil {
if err := setupNetwork(l.config.Config); err != nil {
return err
}
if err := setupRoute(l.config.Config); err != nil {

View File

@ -15,12 +15,11 @@ import (
// except the user namespace, so it run as root in the root user namespace
// to perform these operations.
type linuxUsernsSideCar struct {
config *configs.Config
network *configs.NetworkState
config *configs.Config
}
func (l *linuxUsernsSideCar) Init() error {
if err := setupNetwork(l.config, l.network); err != nil {
if err := setupNetwork(l.config); err != nil {
return err
}
if err := setupRoute(l.config); err != nil {

View File

@ -12,11 +12,11 @@ import (
type Loopback struct {
}
func (l *Loopback) Create(n *configs.Network, nspid int, networkState *configs.NetworkState) error {
func (l *Loopback) Create(n *configs.Network, nspid int) error {
return nil
}
func (l *Loopback) Initialize(config *configs.Network, networkState *configs.NetworkState) error {
func (l *Loopback) Initialize(config *configs.Network) error {
// Do not set the MTU on the loopback interface - use the default.
if err := InterfaceUp("lo"); err != nil {
return fmt.Errorf("lo up %s", err)

View File

@ -5,8 +5,6 @@ import (
"path/filepath"
"strconv"
"strings"
"github.com/docker/libcontainer/configs"
)
type NetworkStats struct {
@ -21,14 +19,12 @@ type NetworkStats struct {
}
// Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo.
func GetStats(networkState *configs.NetworkState) (*NetworkStats, error) {
func GetStats(vethHostInterface string) (*NetworkStats, error) {
// This can happen if the network runtime information is missing - possible if the container was created by an old version of libcontainer.
if networkState.VethHost == "" {
if vethHostInterface == "" {
return &NetworkStats{}, nil
}
out := &NetworkStats{}
type netStatsPair struct {
// Where to write the output.
Out *uint64
@ -36,7 +32,6 @@ func GetStats(networkState *configs.NetworkState) (*NetworkStats, error) {
// The network stats file to read.
File string
}
// Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container.
netStats := []netStatsPair{
{Out: &out.RxBytes, File: "tx_bytes"},
@ -50,13 +45,12 @@ func GetStats(networkState *configs.NetworkState) (*NetworkStats, error) {
{Out: &out.TxDropped, File: "rx_dropped"},
}
for _, netStat := range netStats {
data, err := readSysfsNetworkStats(networkState.VethHost, netStat.File)
data, err := readSysfsNetworkStats(vethHostInterface, netStat.File)
if err != nil {
return nil, err
}
*(netStat.Out) = data
}
return out, nil
}

View File

@ -20,8 +20,8 @@ var strategies = map[string]NetworkStrategy{
// NetworkStrategy represents a specific network configuration for
// a container's networking stack
type NetworkStrategy interface {
Create(*configs.Network, int, *configs.NetworkState) error
Initialize(*configs.Network, *configs.NetworkState) error
Create(*configs.Network, int) error
Initialize(*configs.Network) error
}
// GetStrategy returns the specific network strategy for the

View File

@ -1 +0,0 @@
package network

View File

@ -6,8 +6,6 @@ import (
"fmt"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/netlink"
"github.com/docker/libcontainer/utils"
)
// Veth is a network strategy that uses a bridge and creates
@ -18,42 +16,32 @@ type Veth struct {
const defaultDevice = "eth0"
func (v *Veth) Create(n *configs.Network, nspid int, networkState *configs.NetworkState) error {
func (v *Veth) Create(n *configs.Network, nspid int) error {
var (
bridge = n.Bridge
prefix = n.VethPrefix
txQueueLen = n.TxQueueLen
)
if bridge == "" {
return fmt.Errorf("bridge is not specified")
}
if prefix == "" {
return fmt.Errorf("veth prefix is not specified")
}
name1, name2, err := createVethPair(prefix, txQueueLen)
if err != nil {
if err := CreateVethPair(n.VethHost, n.VethChild, txQueueLen); err != nil {
return err
}
if err := SetInterfaceMaster(name1, bridge); err != nil {
if err := SetInterfaceMaster(n.VethHost, bridge); err != nil {
return err
}
if err := SetMtu(name1, n.Mtu); err != nil {
if err := SetMtu(n.VethHost, n.Mtu); err != nil {
return err
}
if err := InterfaceUp(name1); err != nil {
if err := InterfaceUp(n.VethHost); err != nil {
return err
}
if err := SetInterfaceInNamespacePid(name2, nspid); err != nil {
return err
}
networkState.VethHost = name1
networkState.VethChild = name2
return SetInterfaceInNamespacePid(n.VethChild, nspid)
return nil
}
func (v *Veth) Initialize(config *configs.Network, networkState *configs.NetworkState) error {
var vethChild = networkState.VethChild
func (v *Veth) Initialize(config *configs.Network) error {
vethChild := config.VethChild
if vethChild == "" {
return fmt.Errorf("vethChild is not specified")
}
@ -95,29 +83,3 @@ func (v *Veth) Initialize(config *configs.Network, networkState *configs.Network
}
return nil
}
// createVethPair will automatically generage two random names for
// the veth pair and ensure that they have been created
func createVethPair(prefix string, txQueueLen int) (name1 string, name2 string, err error) {
for i := 0; i < 10; i++ {
if name1, err = utils.GenerateRandomName(prefix, 7); err != nil {
return
}
if name2, err = utils.GenerateRandomName(prefix, 7); err != nil {
return
}
if err = CreateVethPair(name1, name2, txQueueLen); err != nil {
if err == netlink.ErrInterfaceExists {
continue
}
return
}
break
}
return
}

View File

@ -1,53 +0,0 @@
// +build linux
package network
import (
"testing"
"github.com/docker/libcontainer/netlink"
)
func TestGenerateVethNames(t *testing.T) {
if testing.Short() {
return
}
prefix := "veth"
name1, name2, err := createVethPair(prefix, 0)
if err != nil {
t.Fatal(err)
}
if name1 == "" {
t.Fatal("name1 should not be empty")
}
if name2 == "" {
t.Fatal("name2 should not be empty")
}
}
func TestCreateDuplicateVethPair(t *testing.T) {
if testing.Short() {
return
}
prefix := "veth"
name1, name2, err := createVethPair(prefix, 0)
if err != nil {
t.Fatal(err)
}
// retry to create the name interfaces and make sure that we get the correct error
err = CreateVethPair(name1, name2, 0)
if err == nil {
t.Fatal("expected error to not be nil with duplicate interface")
}
if err != netlink.ErrInterfaceExists {
t.Fatalf("expected error to be ErrInterfaceExists but received %q", err)
}
}

View File

@ -4,7 +4,6 @@ package libcontainer
import (
"fmt"
"github.com/docker/libcontainer/configs"
"io/ioutil"
"os"
"path/filepath"
@ -16,8 +15,8 @@ const oomCgroupName = "memory"
// NotifyOnOOM returns channel on which you can expect event about OOM,
// if process died without OOM this channel will be closed.
// s is current *libcontainer.State for container.
func NotifyOnOOM(s *configs.State) (<-chan struct{}, error) {
dir := s.CgroupPaths[oomCgroupName]
func NotifyOnOOM(paths map[string]string) (<-chan struct{}, error) {
dir := paths[oomCgroupName]
if dir == "" {
return nil, fmt.Errorf("There is no path for %q in state", oomCgroupName)
}

View File

@ -11,8 +11,6 @@ import (
"syscall"
"testing"
"time"
"github.com/docker/libcontainer/configs"
)
func TestNotifyOnOOM(t *testing.T) {
@ -29,12 +27,10 @@ func TestNotifyOnOOM(t *testing.T) {
t.Fatal(err)
}
var eventFd, oomControlFd int
st := &configs.State{
CgroupPaths: map[string]string{
"memory": memoryPath,
},
paths := map[string]string{
"memory": memoryPath,
}
ooms, err := NotifyOnOOM(st)
ooms, err := NotifyOnOOM(paths)
if err != nil {
t.Fatal("expected no error, got:", err)
}

View File

@ -1,9 +1,6 @@
package libcontainer
import (
"io"
"os/exec"
)
import "io"
// Process specifies the configuration and IO for a process inside
// a container.
@ -22,6 +19,4 @@ type Process struct {
// Stderr is a pointer to a writer which receives the standard error stream.
Stderr io.Writer
cmd *exec.Cmd
}