Sets custom namespaces for init processes
An init process can join other namespaces (pidns, ipc etc.). This leverages C code defined in nsenter package to spawn a process with correct namespaces and clone if necessary. This moves all setns and cloneflags related code to nsenter layer, which mean that we dont use Go os/exec to create process with cloneflags and set uid/gid_map or setgroups anymore. The necessary data is passed from Go to C using a netlink binary-encoding format. With this change, setns and init processes are almost the same, which brings some opportunity for refactoring. Signed-off-by: Daniel, Dao Quang Minh <dqminh89@gmail.com> [mickael.laventure@docker.com: adapted to apply on master @ d97d5e] Signed-off-by: Kenfe-Mickael Laventure <mickael.laventure@docker.com>
This commit is contained in:
parent
d6bf4049f8
commit
42d5d04801
|
@ -23,6 +23,7 @@ import (
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
"github.com/opencontainers/runc/libcontainer/criurpc"
|
"github.com/opencontainers/runc/libcontainer/criurpc"
|
||||||
"github.com/opencontainers/runc/libcontainer/utils"
|
"github.com/opencontainers/runc/libcontainer/utils"
|
||||||
|
"github.com/syndtr/gocapability/capability"
|
||||||
"github.com/vishvananda/netlink/nl"
|
"github.com/vishvananda/netlink/nl"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -268,37 +269,40 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*initProcess, error) {
|
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*initProcess, error) {
|
||||||
t := "_LIBCONTAINER_INITTYPE=" + string(initStandard)
|
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
|
||||||
cloneFlags := c.config.Namespaces.CloneFlags()
|
nsMaps := make(map[configs.NamespaceType]string)
|
||||||
if cloneFlags&syscall.CLONE_NEWUSER != 0 {
|
for _, ns := range c.config.Namespaces {
|
||||||
if err := c.addUidGidMappings(cmd.SysProcAttr); err != nil {
|
if ns.Path != "" {
|
||||||
// user mappings are not supported
|
nsMaps[ns.Type] = ns.Path
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
enableSetgroups(cmd.SysProcAttr)
|
|
||||||
// Default to root user when user namespaces are enabled.
|
|
||||||
if cmd.SysProcAttr.Credential == nil {
|
|
||||||
cmd.SysProcAttr.Credential = &syscall.Credential{}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
cmd.Env = append(cmd.Env, t)
|
_, sharePidns := nsMaps[configs.NEWPID]
|
||||||
cmd.SysProcAttr.Cloneflags = cloneFlags
|
data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps, "")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
return &initProcess{
|
return &initProcess{
|
||||||
cmd: cmd,
|
cmd: cmd,
|
||||||
childPipe: childPipe,
|
childPipe: childPipe,
|
||||||
parentPipe: parentPipe,
|
parentPipe: parentPipe,
|
||||||
manager: c.cgroupManager,
|
manager: c.cgroupManager,
|
||||||
config: c.newInitConfig(p),
|
config: c.newInitConfig(p),
|
||||||
container: c,
|
container: c,
|
||||||
process: p,
|
process: p,
|
||||||
|
bootstrapData: data,
|
||||||
|
sharePidns: sharePidns,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*setnsProcess, error) {
|
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*setnsProcess, error) {
|
||||||
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
|
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
|
||||||
|
state, err := c.currentState()
|
||||||
|
if err != nil {
|
||||||
|
return nil, newSystemError(err)
|
||||||
|
}
|
||||||
// for setns process, we dont have to set cloneflags as the process namespaces
|
// for setns process, we dont have to set cloneflags as the process namespaces
|
||||||
// will only be set via setns syscall
|
// will only be set via setns syscall
|
||||||
data, err := c.bootstrapData(0, c.initProcess.pid(), p.consolePath)
|
data, err := c.bootstrapData(0, state.NamespacePaths, p.consolePath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -1069,28 +1073,6 @@ func (c *linuxContainer) currentState() (*State, error) {
|
||||||
return state, nil
|
return state, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// bootstrapData encodes the necessary data in netlink binary format as a io.Reader.
|
|
||||||
// Consumer can write the data to a bootstrap program such as one that uses
|
|
||||||
// nsenter package to bootstrap the container's init process correctly, i.e. with
|
|
||||||
// correct namespaces, uid/gid mapping etc.
|
|
||||||
func (c *linuxContainer) bootstrapData(cloneFlags uintptr, pid int, consolePath string) (io.Reader, error) {
|
|
||||||
// create the netlink message
|
|
||||||
r := nl.NewNetlinkRequest(int(InitMsg), 0)
|
|
||||||
// write pid
|
|
||||||
r.AddData(&Int32msg{
|
|
||||||
Type: PidAttr,
|
|
||||||
Value: uint32(pid),
|
|
||||||
})
|
|
||||||
// write console path
|
|
||||||
if consolePath != "" {
|
|
||||||
r.AddData(&Bytemsg{
|
|
||||||
Type: ConsolePathAttr,
|
|
||||||
Value: []byte(consolePath),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
return bytes.NewReader(r.Serialize()), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// orderNamespacePaths sorts namespace paths into a list of paths that we
|
// orderNamespacePaths sorts namespace paths into a list of paths that we
|
||||||
// can setns in order.
|
// can setns in order.
|
||||||
func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceType]string) ([]string, error) {
|
func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceType]string) ([]string, error) {
|
||||||
|
@ -1126,3 +1108,92 @@ func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceTyp
|
||||||
}
|
}
|
||||||
return paths, nil
|
return paths, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func encodeIDMapping(idMap []configs.IDMap) ([]byte, error) {
|
||||||
|
data := bytes.NewBuffer(nil)
|
||||||
|
for _, im := range idMap {
|
||||||
|
line := fmt.Sprintf("%d %d %d\n", im.ContainerID, im.HostID, im.Size)
|
||||||
|
if _, err := data.WriteString(line); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return data.Bytes(), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// bootstrapData encodes the necessary data in netlink binary format
|
||||||
|
// as a io.Reader.
|
||||||
|
// Consumer can write the data to a bootstrap program
|
||||||
|
// such as one that uses nsenter package to bootstrap the container's
|
||||||
|
// init process correctly, i.e. with correct namespaces, uid/gid
|
||||||
|
// mapping etc.
|
||||||
|
func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string, consolePath string) (io.Reader, error) {
|
||||||
|
// create the netlink message
|
||||||
|
r := nl.NewNetlinkRequest(int(InitMsg), 0)
|
||||||
|
|
||||||
|
// write cloneFlags
|
||||||
|
r.AddData(&Int32msg{
|
||||||
|
Type: CloneFlagsAttr,
|
||||||
|
Value: uint32(cloneFlags),
|
||||||
|
})
|
||||||
|
|
||||||
|
// write console path
|
||||||
|
if consolePath != "" {
|
||||||
|
r.AddData(&Bytemsg{
|
||||||
|
Type: ConsolePathAttr,
|
||||||
|
Value: []byte(consolePath),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// write custom namespace paths
|
||||||
|
if len(nsMaps) > 0 {
|
||||||
|
nsPaths, err := c.orderNamespacePaths(nsMaps)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
r.AddData(&Bytemsg{
|
||||||
|
Type: NsPathsAttr,
|
||||||
|
Value: []byte(strings.Join(nsPaths, ",")),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// write namespace paths only when we are not joining an existing user ns
|
||||||
|
_, joinExistingUser := nsMaps[configs.NEWUSER]
|
||||||
|
if !joinExistingUser {
|
||||||
|
// write uid mappings
|
||||||
|
if len(c.config.UidMappings) > 0 {
|
||||||
|
b, err := encodeIDMapping(c.config.UidMappings)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
r.AddData(&Bytemsg{
|
||||||
|
Type: UidmapAttr,
|
||||||
|
Value: b,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// write gid mappings
|
||||||
|
if len(c.config.GidMappings) > 0 {
|
||||||
|
b, err := encodeIDMapping(c.config.UidMappings)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
r.AddData(&Bytemsg{
|
||||||
|
Type: GidmapAttr,
|
||||||
|
Value: b,
|
||||||
|
})
|
||||||
|
// check if we have CAP_SETGID to setgroup properly
|
||||||
|
pid, err := capability.NewPid(os.Getpid())
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if !pid.Get(capability.EFFECTIVE, capability.CAP_SETGID) {
|
||||||
|
r.AddData(&Boolmsg{
|
||||||
|
Type: SetgroupAttr,
|
||||||
|
Value: true,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return bytes.NewReader(r.Serialize()), nil
|
||||||
|
}
|
||||||
|
|
|
@ -185,25 +185,6 @@ func syncParentHooks(pipe io.ReadWriter) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// joinExistingNamespaces gets all the namespace paths specified for the container and
|
|
||||||
// does a setns on the namespace fd so that the current process joins the namespace.
|
|
||||||
func joinExistingNamespaces(namespaces []configs.Namespace) error {
|
|
||||||
for _, ns := range namespaces {
|
|
||||||
if ns.Path != "" {
|
|
||||||
f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
err = system.Setns(f.Fd(), uintptr(ns.Syscall()))
|
|
||||||
f.Close()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// setupUser changes the groups, gid, and uid for the user inside the container
|
// setupUser changes the groups, gid, and uid for the user inside the container
|
||||||
func setupUser(config *initConfig) error {
|
func setupUser(config *initConfig) error {
|
||||||
// Set up defaults.
|
// Set up defaults.
|
||||||
|
|
|
@ -12,8 +12,12 @@ import (
|
||||||
// The number is randomly chosen to not conflict with known netlink types
|
// The number is randomly chosen to not conflict with known netlink types
|
||||||
const (
|
const (
|
||||||
InitMsg uint16 = 62000
|
InitMsg uint16 = 62000
|
||||||
PidAttr uint16 = 27281
|
CloneFlagsAttr uint16 = 27281
|
||||||
ConsolePathAttr uint16 = 27282
|
ConsolePathAttr uint16 = 27282
|
||||||
|
NsPathsAttr uint16 = 27283
|
||||||
|
UidmapAttr uint16 = 27284
|
||||||
|
GidmapAttr uint16 = 27285
|
||||||
|
SetgroupAttr uint16 = 27286
|
||||||
// When syscall.NLA_HDRLEN is in gccgo, take this out.
|
// When syscall.NLA_HDRLEN is in gccgo, take this out.
|
||||||
syscall_NLA_HDRLEN = (syscall.SizeofNlAttr + syscall.NLA_ALIGNTO - 1) & ^(syscall.NLA_ALIGNTO - 1)
|
syscall_NLA_HDRLEN = (syscall.SizeofNlAttr + syscall.NLA_ALIGNTO - 1) & ^(syscall.NLA_ALIGNTO - 1)
|
||||||
)
|
)
|
||||||
|
@ -60,3 +64,25 @@ func (msg *Bytemsg) Serialize() []byte {
|
||||||
func (msg *Bytemsg) Len() int {
|
func (msg *Bytemsg) Len() int {
|
||||||
return syscall_NLA_HDRLEN + len(msg.Value) + 1 // null-terminated
|
return syscall_NLA_HDRLEN + len(msg.Value) + 1 // null-terminated
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Boolmsg struct {
|
||||||
|
Type uint16
|
||||||
|
Value bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (msg *Boolmsg) Serialize() []byte {
|
||||||
|
buf := make([]byte, msg.Len())
|
||||||
|
native := nl.NativeEndian()
|
||||||
|
native.PutUint16(buf[0:2], uint16(msg.Len()))
|
||||||
|
native.PutUint16(buf[2:4], msg.Type)
|
||||||
|
if msg.Value {
|
||||||
|
buf[4] = 1
|
||||||
|
} else {
|
||||||
|
buf[4] = 0
|
||||||
|
}
|
||||||
|
return buf
|
||||||
|
}
|
||||||
|
|
||||||
|
func (msg *Boolmsg) Len() int {
|
||||||
|
return syscall_NLA_HDRLEN + 1
|
||||||
|
}
|
||||||
|
|
|
@ -3,7 +3,9 @@ package nsenter
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"strings"
|
"strings"
|
||||||
|
@ -18,35 +20,51 @@ type pid struct {
|
||||||
Pid int `json:"Pid"`
|
Pid int `json:"Pid"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNsenterAlivePid(t *testing.T) {
|
func TestNsenterValidPaths(t *testing.T) {
|
||||||
args := []string{"nsenter-exec"}
|
args := []string{"nsenter-exec"}
|
||||||
parent, child, err := newPipe()
|
parent, child, err := newPipe()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("failed to create pipe %v", err)
|
t.Fatalf("failed to create pipe %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespaces := []string{
|
||||||
|
// join pid ns of the current process
|
||||||
|
fmt.Sprintf("/proc/%d/ns/pid", os.Getpid()),
|
||||||
|
}
|
||||||
cmd := &exec.Cmd{
|
cmd := &exec.Cmd{
|
||||||
Path: os.Args[0],
|
Path: os.Args[0],
|
||||||
Args: args,
|
Args: args,
|
||||||
ExtraFiles: []*os.File{child},
|
ExtraFiles: []*os.File{child},
|
||||||
Env: []string{"_LIBCONTAINER_INITTYPE=setns", "_LIBCONTAINER_INITPIPE=3"},
|
Env: []string{"_LIBCONTAINER_INITPIPE=3"},
|
||||||
|
Stdout: os.Stdout,
|
||||||
|
Stderr: os.Stderr,
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := cmd.Start(); err != nil {
|
if err := cmd.Start(); err != nil {
|
||||||
t.Fatalf("nsenter failed to start %v", err)
|
t.Fatalf("nsenter failed to start %v", err)
|
||||||
}
|
}
|
||||||
|
// write cloneFlags
|
||||||
r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0)
|
r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0)
|
||||||
r.AddData(&libcontainer.Int32msg{
|
r.AddData(&libcontainer.Int32msg{
|
||||||
Type: libcontainer.PidAttr,
|
Type: libcontainer.CloneFlagsAttr,
|
||||||
Value: uint32(os.Getpid()),
|
Value: uint32(syscall.CLONE_NEWNET),
|
||||||
|
})
|
||||||
|
r.AddData(&libcontainer.Bytemsg{
|
||||||
|
Type: libcontainer.NsPathsAttr,
|
||||||
|
Value: []byte(strings.Join(namespaces, ",")),
|
||||||
})
|
})
|
||||||
if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil {
|
if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
decoder := json.NewDecoder(parent)
|
decoder := json.NewDecoder(parent)
|
||||||
var pid *pid
|
var pid *pid
|
||||||
|
|
||||||
if err := decoder.Decode(&pid); err != nil {
|
if err := decoder.Decode(&pid); err != nil {
|
||||||
|
dir, _ := ioutil.ReadDir(fmt.Sprintf("/proc/%d/ns", os.Getpid()))
|
||||||
|
for _, d := range dir {
|
||||||
|
t.Log(d.Name())
|
||||||
|
}
|
||||||
t.Fatalf("%v", err)
|
t.Fatalf("%v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -60,70 +78,43 @@ func TestNsenterAlivePid(t *testing.T) {
|
||||||
p.Wait()
|
p.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNsenterInvalidPid(t *testing.T) {
|
func TestNsenterInvalidPaths(t *testing.T) {
|
||||||
args := []string{"nsenter-exec"}
|
args := []string{"nsenter-exec"}
|
||||||
parent, child, err := newPipe()
|
parent, child, err := newPipe()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("failed to create pipe %v", err)
|
t.Fatalf("failed to create pipe %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespaces := []string{
|
||||||
|
// join pid ns of the current process
|
||||||
|
fmt.Sprintf("/proc/%d/ns/pid", -1),
|
||||||
|
}
|
||||||
cmd := &exec.Cmd{
|
cmd := &exec.Cmd{
|
||||||
Path: os.Args[0],
|
Path: os.Args[0],
|
||||||
Args: args,
|
Args: args,
|
||||||
ExtraFiles: []*os.File{child},
|
ExtraFiles: []*os.File{child},
|
||||||
Env: []string{"_LIBCONTAINER_INITTYPE=setns", "_LIBCONTAINER_INITPIPE=3"},
|
Env: []string{"_LIBCONTAINER_INITPIPE=3"},
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := cmd.Start(); err != nil {
|
if err := cmd.Start(); err != nil {
|
||||||
t.Fatal("nsenter exits with a zero exit status")
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
// write cloneFlags
|
||||||
r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0)
|
r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0)
|
||||||
r.AddData(&libcontainer.Int32msg{
|
r.AddData(&libcontainer.Int32msg{
|
||||||
Type: libcontainer.PidAttr,
|
Type: libcontainer.CloneFlagsAttr,
|
||||||
Value: 0,
|
Value: uint32(syscall.CLONE_NEWNET),
|
||||||
|
})
|
||||||
|
r.AddData(&libcontainer.Bytemsg{
|
||||||
|
Type: libcontainer.NsPathsAttr,
|
||||||
|
Value: []byte(strings.Join(namespaces, ",")),
|
||||||
})
|
})
|
||||||
if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil {
|
if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := cmd.Wait(); err == nil {
|
if err := cmd.Wait(); err == nil {
|
||||||
t.Fatal("nsenter exits with a zero exit status")
|
t.Fatalf("nsenter exits with a zero exit status")
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestNsenterDeadPid(t *testing.T) {
|
|
||||||
deadCmd := exec.Command("true")
|
|
||||||
if err := deadCmd.Run(); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
args := []string{"nsenter-exec"}
|
|
||||||
parent, child, err := newPipe()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create pipe %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
cmd := &exec.Cmd{
|
|
||||||
Path: os.Args[0],
|
|
||||||
Args: args,
|
|
||||||
ExtraFiles: []*os.File{child},
|
|
||||||
Env: []string{"_LIBCONTAINER_INITTYPE=setns", "_LIBCONTAINER_INITPIPE=3"},
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := cmd.Start(); err != nil {
|
|
||||||
t.Fatal("nsenter exits with a zero exit status")
|
|
||||||
}
|
|
||||||
|
|
||||||
r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0)
|
|
||||||
r.AddData(&libcontainer.Int32msg{
|
|
||||||
Type: libcontainer.PidAttr,
|
|
||||||
Value: uint32(deadCmd.Process.Pid),
|
|
||||||
})
|
|
||||||
if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := cmd.Wait(); err == nil {
|
|
||||||
t.Fatal("nsenter exits with a zero exit status")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,6 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#include <linux/limits.h>
|
#include <linux/limits.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/wait.h>
|
#include <sys/wait.h>
|
||||||
|
@ -16,6 +15,14 @@
|
||||||
#include <setjmp.h>
|
#include <setjmp.h>
|
||||||
#include <sched.h>
|
#include <sched.h>
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
|
#include <endian.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <inttypes.h>
|
||||||
|
|
||||||
|
// netlink related
|
||||||
|
#include <linux/types.h>
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#include <linux/netlink.h>
|
||||||
|
|
||||||
#include <bits/sockaddr.h>
|
#include <bits/sockaddr.h>
|
||||||
#include <linux/netlink.h>
|
#include <linux/netlink.h>
|
||||||
|
@ -57,166 +64,246 @@ int setns(int fd, int nstype)
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static int clone_parent(jmp_buf * env) __attribute__ ((noinline));
|
static int clone_parent(jmp_buf * env, int flags) __attribute__ ((noinline));
|
||||||
static int clone_parent(jmp_buf * env)
|
static int clone_parent(jmp_buf * env, int flags)
|
||||||
{
|
{
|
||||||
struct clone_arg ca;
|
struct clone_arg ca;
|
||||||
int child;
|
int child;
|
||||||
|
|
||||||
ca.env = env;
|
ca.env = env;
|
||||||
child = clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD, &ca);
|
child =
|
||||||
|
clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD | flags,
|
||||||
|
&ca);
|
||||||
return child;
|
return child;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// get init pipe from the parent. It's used to read bootstrap data, and to
|
||||||
|
// write pid to after nsexec finishes setting up the environment.
|
||||||
|
static int get_init_pipe()
|
||||||
|
{
|
||||||
|
char buf[PATH_MAX], *initpipe;
|
||||||
|
int pipenum = -1;
|
||||||
|
|
||||||
|
initpipe = getenv("_LIBCONTAINER_INITPIPE");
|
||||||
|
if (initpipe == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
pipenum = atoi(initpipe);
|
||||||
|
snprintf(buf, sizeof(buf), "%d", pipenum);
|
||||||
|
if (strcmp(initpipe, buf)) {
|
||||||
|
pr_perror("Unable to parse _LIBCONTAINER_INITPIPE");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return pipenum;
|
||||||
|
}
|
||||||
|
|
||||||
|
// num_namespaces returns the number of additional namespaces to setns. The
|
||||||
|
// argument is a comma-separated string of namespace paths.
|
||||||
|
static int num_namespaces(char *nspaths)
|
||||||
|
{
|
||||||
|
int size = 0, i = 0;
|
||||||
|
|
||||||
|
for (i = 0; nspaths[i]; i++) {
|
||||||
|
if (nspaths[i] == ',') {
|
||||||
|
size += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return size + 1;
|
||||||
|
}
|
||||||
|
|
||||||
static uint32_t readint32(char *buf)
|
static uint32_t readint32(char *buf)
|
||||||
{
|
{
|
||||||
return *(uint32_t *) buf;
|
return *(uint32_t *) buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint8_t readint8(char *buf)
|
||||||
|
{
|
||||||
|
return *(uint8_t *) buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void writedata(int fd, char *data, int start, int len)
|
||||||
|
{
|
||||||
|
int written = 0;
|
||||||
|
while (written < len) {
|
||||||
|
size_t nbyte, i;
|
||||||
|
if ((len - written) < 1024) {
|
||||||
|
nbyte = len - written;
|
||||||
|
} else {
|
||||||
|
nbyte = 1024;
|
||||||
|
}
|
||||||
|
i = write(fd, data + start + written, nbyte);
|
||||||
|
if (i == -1) {
|
||||||
|
pr_perror("failed to write data to %d", fd);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
written += i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// list of known message types we want to send to bootstrap program
|
// list of known message types we want to send to bootstrap program
|
||||||
// These are defined in libcontainer/message_linux.go
|
// These are defined in libcontainer/message_linux.go
|
||||||
#define INIT_MSG 62000
|
#define INIT_MSG 62000
|
||||||
#define PID_ATTR 27281
|
#define CLONE_FLAGS_ATTR 27281
|
||||||
#define CONSOLE_PATH_ATTR 27282
|
#define CONSOLE_PATH_ATTR 27282
|
||||||
|
#define NS_PATHS_ATTR 27283
|
||||||
|
#define UIDMAP_ATTR 27284
|
||||||
|
#define GIDMAP_ATTR 27285
|
||||||
|
#define SETGROUP_ATTR 27286
|
||||||
|
|
||||||
void nsexec()
|
void nsexec()
|
||||||
{
|
{
|
||||||
char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt", "user" };
|
|
||||||
const int num = sizeof(namespaces) / sizeof(char *);
|
|
||||||
jmp_buf env;
|
jmp_buf env;
|
||||||
char buf[PATH_MAX], *val;
|
int pipenum;
|
||||||
int i, tfd, self_tfd, child, n, len, pipenum, consolefd = -1;
|
|
||||||
pid_t pid = 0;
|
|
||||||
|
|
||||||
// if we dont have INITTYPE or this is the init process, skip the bootstrap process
|
// if we dont have init pipe, then just return to the parent
|
||||||
val = getenv("_LIBCONTAINER_INITTYPE");
|
pipenum = get_init_pipe();
|
||||||
if (val == NULL || strcmp(val, "standard") == 0) {
|
if (pipenum == -1) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (strcmp(val, "setns") != 0) {
|
// Retrieve the netlink header
|
||||||
pr_perror("Invalid inittype %s", val);
|
struct nlmsghdr nl_msg_hdr;
|
||||||
|
int len;
|
||||||
|
|
||||||
|
if ((len = read(pipenum, &nl_msg_hdr, NLMSG_HDRLEN)) != NLMSG_HDRLEN) {
|
||||||
|
pr_perror("Failed to read netlink header, got %d instead of %d",
|
||||||
|
len, NLMSG_HDRLEN);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
val = getenv("_LIBCONTAINER_INITPIPE");
|
if (nl_msg_hdr.nlmsg_type == NLMSG_ERROR) {
|
||||||
if (val == NULL) {
|
pr_perror("failed to read netlink message");
|
||||||
pr_perror("Child pipe not found");
|
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
pipenum = atoi(val);
|
if (nl_msg_hdr.nlmsg_type != INIT_MSG) {
|
||||||
snprintf(buf, sizeof(buf), "%d", pipenum);
|
pr_perror("unexpected msg type %d", nl_msg_hdr.nlmsg_type);
|
||||||
if (strcmp(val, buf)) {
|
|
||||||
pr_perror("Unable to parse _LIBCONTAINER_INITPIPE");
|
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
// Retrieve data
|
||||||
|
int nl_total_size = NLMSG_PAYLOAD(&nl_msg_hdr, 0);
|
||||||
|
char data[nl_total_size];
|
||||||
|
|
||||||
char nlbuf[NLMSG_HDRLEN];
|
if ((len = read(pipenum, data, nl_total_size)) != nl_total_size) {
|
||||||
struct nlmsghdr *nh;
|
pr_perror
|
||||||
if ((n = read(pipenum, nlbuf, NLMSG_HDRLEN)) != NLMSG_HDRLEN) {
|
("Failed to read netlink payload, got %d instead of %d",
|
||||||
pr_perror("Failed to read netlink header, got %d", n);
|
len, nl_total_size);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
// Process the passed attributes
|
||||||
nh = (struct nlmsghdr *)nlbuf;
|
|
||||||
if (nh->nlmsg_type == NLMSG_ERROR) {
|
|
||||||
pr_perror("Invalid netlink header message");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
if (nh->nlmsg_type != INIT_MSG) {
|
|
||||||
pr_perror("Unexpected netlink message type %d", nh->nlmsg_type);
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
// read the netlink payload
|
|
||||||
len = NLMSG_PAYLOAD(nh, 0);
|
|
||||||
char data[len];
|
|
||||||
if ((n = read(pipenum, data, len)) != len) {
|
|
||||||
pr_perror("Failed to read netlink payload, got %d", n);
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
int start = 0;
|
int start = 0;
|
||||||
struct nlattr *attr;
|
uint32_t cloneflags = -1;
|
||||||
while (start < len) {
|
uint8_t is_setgroup = 0;
|
||||||
int payload_len;
|
int consolefd = -1;
|
||||||
attr = (struct nlattr *)((void *)data + start);
|
int uidmap_start = -1, uidmap_len = -1;
|
||||||
|
int gidmap_start = -1, gidmap_len = -1;
|
||||||
|
int payload_len;
|
||||||
|
struct nlattr *nlattr;
|
||||||
|
|
||||||
|
while (start < nl_total_size) {
|
||||||
|
nlattr = (struct nlattr *)(data + start);
|
||||||
start += NLA_HDRLEN;
|
start += NLA_HDRLEN;
|
||||||
payload_len = attr->nla_len - NLA_HDRLEN;
|
payload_len = nlattr->nla_len - NLA_HDRLEN;
|
||||||
switch (attr->nla_type) {
|
|
||||||
case PID_ATTR:
|
if (nlattr->nla_type == CLONE_FLAGS_ATTR) {
|
||||||
pid = (pid_t) readint32(data + start);
|
cloneflags = readint32(data + start);
|
||||||
break;
|
} else if (nlattr->nla_type == CONSOLE_PATH_ATTR) {
|
||||||
case CONSOLE_PATH_ATTR:
|
// get the console path before setns because it may change mnt namespace
|
||||||
consolefd = open((char *)data + start, O_RDWR);
|
consolefd = open(data + start, O_RDWR);
|
||||||
if (consolefd < 0) {
|
if (consolefd < 0) {
|
||||||
pr_perror("Failed to open console %s", (char *)data + start);
|
pr_perror("Failed to open console %s",
|
||||||
|
data + start);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
break;
|
} else if (nlattr->nla_type == NS_PATHS_ATTR) {
|
||||||
|
char nspaths[payload_len + 1];
|
||||||
|
|
||||||
|
strncpy(nspaths, data + start, payload_len);
|
||||||
|
nspaths[payload_len] = '\0';
|
||||||
|
|
||||||
|
// if custom namespaces are required, open all descriptors and perform
|
||||||
|
// setns on them
|
||||||
|
int nslen = num_namespaces(nspaths);
|
||||||
|
int fds[nslen];
|
||||||
|
char *nslist[nslen];
|
||||||
|
int i;
|
||||||
|
char *ns, *saveptr;
|
||||||
|
|
||||||
|
for (i = 0; i < nslen; i++) {
|
||||||
|
char *str = NULL;
|
||||||
|
|
||||||
|
if (i == 0) {
|
||||||
|
str = nspaths;
|
||||||
|
}
|
||||||
|
ns = strtok_r(str, ",", &saveptr);
|
||||||
|
if (ns == NULL) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
fds[i] = open(ns, O_RDONLY);
|
||||||
|
if (fds[i] == -1) {
|
||||||
|
pr_perror("Failed to open %s", ns);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
nslist[i] = ns;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < nslen; i++) {
|
||||||
|
if (setns(fds[i], 0) != 0) {
|
||||||
|
pr_perror("Failed to setns to %s",
|
||||||
|
nslist[i]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(fds[i]);
|
||||||
|
}
|
||||||
|
} else if (nlattr->nla_type == UIDMAP_ATTR) {
|
||||||
|
uidmap_len = payload_len;
|
||||||
|
uidmap_start = start;
|
||||||
|
} else if (nlattr->nla_type == GIDMAP_ATTR) {
|
||||||
|
gidmap_len = payload_len;
|
||||||
|
gidmap_start = start;
|
||||||
|
} else if (nlattr->nla_type == SETGROUP_ATTR) {
|
||||||
|
is_setgroup = readint8(data + start);
|
||||||
|
} else {
|
||||||
|
pr_perror("unknown netlink message type %d",
|
||||||
|
nlattr->nla_type);
|
||||||
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
start += NLA_ALIGN(payload_len);
|
start += NLA_ALIGN(payload_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
// required pid to be passed
|
// required clone_flags to be passed
|
||||||
if (pid == 0) {
|
if (cloneflags == -1) {
|
||||||
pr_perror("missing pid");
|
pr_perror("missing clone_flags");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
// prepare sync pipe between parent and child. We need this to let the child
|
||||||
/* Check that the specified process exists */
|
// know that the parent has finished setting up
|
||||||
snprintf(buf, PATH_MAX - 1, "/proc/%d/ns", pid);
|
int syncpipe[2] = { -1, -1 };
|
||||||
tfd = open(buf, O_DIRECTORY | O_RDONLY);
|
if (pipe(syncpipe) != 0) {
|
||||||
if (tfd == -1) {
|
pr_perror("failed to setup sync pipe between parent and child");
|
||||||
pr_perror("Failed to open \"%s\"", buf);
|
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
};
|
||||||
|
|
||||||
self_tfd = open("/proc/self/ns", O_DIRECTORY | O_RDONLY);
|
|
||||||
if (self_tfd == -1) {
|
|
||||||
pr_perror("Failed to open /proc/self/ns");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < num; i++) {
|
|
||||||
struct stat st;
|
|
||||||
struct stat self_st;
|
|
||||||
int fd;
|
|
||||||
|
|
||||||
/* Symlinks on all namespaces exist for dead processes, but they can't be opened */
|
|
||||||
if (fstatat(tfd, namespaces[i], &st, 0) == -1) {
|
|
||||||
// Ignore nonexistent namespaces.
|
|
||||||
if (errno == ENOENT)
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Skip namespaces we're already part of */
|
|
||||||
if (fstatat(self_tfd, namespaces[i], &self_st, 0) != -1 && st.st_ino == self_st.st_ino) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
fd = openat(tfd, namespaces[i], O_RDONLY);
|
|
||||||
if (fd == -1) {
|
|
||||||
pr_perror("Failed to open ns file %s for ns %s", buf, namespaces[i]);
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
// Set the namespace.
|
|
||||||
if (setns(fd, 0) == -1) {
|
|
||||||
pr_perror("Failed to setns for %s", namespaces[i]);
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
close(fd);
|
|
||||||
}
|
|
||||||
|
|
||||||
close(self_tfd);
|
|
||||||
close(tfd);
|
|
||||||
|
|
||||||
if (setjmp(env) == 1) {
|
if (setjmp(env) == 1) {
|
||||||
// Child
|
// Child
|
||||||
|
uint8_t s;
|
||||||
|
|
||||||
|
// close the writing side of pipe
|
||||||
|
close(syncpipe[1]);
|
||||||
|
|
||||||
|
// sync with parent
|
||||||
|
if (read(syncpipe[0], &s, 1) != 1 || s != 1) {
|
||||||
|
pr_perror("failed to read sync byte from parent");
|
||||||
|
exit(1);
|
||||||
|
};
|
||||||
|
|
||||||
if (setsid() == -1) {
|
if (setsid() == -1) {
|
||||||
pr_perror("setsid failed");
|
pr_perror("setsid failed");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (consolefd != -1) {
|
if (consolefd != -1) {
|
||||||
if (ioctl(consolefd, TIOCSCTTY, 0) == -1) {
|
if (ioctl(consolefd, TIOCSCTTY, 0) == -1) {
|
||||||
pr_perror("ioctl TIOCSCTTY failed");
|
pr_perror("ioctl TIOCSCTTY failed");
|
||||||
|
@ -243,19 +330,75 @@ void nsexec()
|
||||||
// We must fork to actually enter the PID namespace, use CLONE_PARENT
|
// We must fork to actually enter the PID namespace, use CLONE_PARENT
|
||||||
// so the child can have the right parent, and we don't need to forward
|
// so the child can have the right parent, and we don't need to forward
|
||||||
// the child's exit code or resend its death signal.
|
// the child's exit code or resend its death signal.
|
||||||
child = clone_parent(&env);
|
int child = clone_parent(&env, cloneflags);
|
||||||
if (child < 0) {
|
if (child < 0) {
|
||||||
pr_perror("Unable to fork");
|
pr_perror("Unable to fork");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
// if uid_map and gid_map were specified, writes the data to /proc files
|
||||||
|
if (uidmap_start > 0 && uidmap_len > 0) {
|
||||||
|
char buf[PATH_MAX];
|
||||||
|
if (snprintf(buf, sizeof(buf), "/proc/%d/uid_map", child) < 0) {
|
||||||
|
pr_perror("failed to construct uid_map file for %d",
|
||||||
|
child);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
len = snprintf(buf, sizeof(buf), "{ \"pid\" : %d }\n", child);
|
int fd = open(buf, O_RDWR);
|
||||||
|
writedata(fd, data, uidmap_start, uidmap_len);
|
||||||
|
}
|
||||||
|
|
||||||
if (write(pipenum, buf, len) != len) {
|
if (gidmap_start > 0 && gidmap_len > 0) {
|
||||||
|
if (is_setgroup == 1) {
|
||||||
|
char buf[PATH_MAX];
|
||||||
|
if (snprintf
|
||||||
|
(buf, sizeof(buf), "/proc/%d/setgroups",
|
||||||
|
child) < 0) {
|
||||||
|
pr_perror
|
||||||
|
("failed to construct setgroups file for %d",
|
||||||
|
child);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
int fd = open(buf, O_RDWR);
|
||||||
|
if (write(fd, "allow", 5) != 5) {
|
||||||
|
// If the kernel is too old to support /proc/PID/setgroups,
|
||||||
|
// write will return ENOENT; this is OK.
|
||||||
|
if (errno != ENOENT) {
|
||||||
|
pr_perror("failed to write allow to %s",
|
||||||
|
buf);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// write gid mappings
|
||||||
|
char buf[PATH_MAX];
|
||||||
|
if (snprintf(buf, sizeof(buf), "/proc/%d/gid_map", child) < 0) {
|
||||||
|
pr_perror("failed to construct gid_map file for %d",
|
||||||
|
child);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
int fd = open(buf, O_RDWR);
|
||||||
|
writedata(fd, data, gidmap_start, gidmap_len);
|
||||||
|
}
|
||||||
|
// Send the sync signal to the child
|
||||||
|
close(syncpipe[0]);
|
||||||
|
uint8_t s = 1;
|
||||||
|
if (write(syncpipe[1], &s, 1) != 1) {
|
||||||
|
pr_perror("failed to write sync byte to child");
|
||||||
|
exit(1);
|
||||||
|
};
|
||||||
|
|
||||||
|
// parent to finish the bootstrap process
|
||||||
|
char child_data[PATH_MAX];
|
||||||
|
len =
|
||||||
|
snprintf(child_data, sizeof(child_data), "{ \"pid\" : %d }\n",
|
||||||
|
child);
|
||||||
|
if (write(pipenum, child_data, len) != len) {
|
||||||
pr_perror("Unable to send a child pid");
|
pr_perror("Unable to send a child pid");
|
||||||
kill(child, SIGKILL);
|
kill(child, SIGKILL);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
|
|
@ -167,14 +167,16 @@ func (p *setnsProcess) setExternalDescriptors(newFds []string) {
|
||||||
}
|
}
|
||||||
|
|
||||||
type initProcess struct {
|
type initProcess struct {
|
||||||
cmd *exec.Cmd
|
cmd *exec.Cmd
|
||||||
parentPipe *os.File
|
parentPipe *os.File
|
||||||
childPipe *os.File
|
childPipe *os.File
|
||||||
config *initConfig
|
config *initConfig
|
||||||
manager cgroups.Manager
|
manager cgroups.Manager
|
||||||
container *linuxContainer
|
container *linuxContainer
|
||||||
fds []string
|
fds []string
|
||||||
process *Process
|
process *Process
|
||||||
|
bootstrapData io.Reader
|
||||||
|
sharePidns bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *initProcess) pid() int {
|
func (p *initProcess) pid() int {
|
||||||
|
@ -185,15 +187,49 @@ func (p *initProcess) externalDescriptors() []string {
|
||||||
return p.fds
|
return p.fds
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *initProcess) start() (err error) {
|
// execSetns runs the process that executes C code to perform the setns calls
|
||||||
|
// because setns support requires the C process to fork off a child and perform the setns
|
||||||
|
// before the go runtime boots, we wait on the process to die and receive the child's pid
|
||||||
|
// over the provided pipe.
|
||||||
|
// This is called by initProcess.start function
|
||||||
|
func (p *initProcess) execSetns() error {
|
||||||
|
status, err := p.cmd.Process.Wait()
|
||||||
|
if err != nil {
|
||||||
|
p.cmd.Wait()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if !status.Success() {
|
||||||
|
p.cmd.Wait()
|
||||||
|
return &exec.ExitError{ProcessState: status}
|
||||||
|
}
|
||||||
|
var pid *pid
|
||||||
|
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
|
||||||
|
p.cmd.Wait()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
process, err := os.FindProcess(pid.Pid)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
p.cmd.Process = process
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *initProcess) start() error {
|
||||||
defer p.parentPipe.Close()
|
defer p.parentPipe.Close()
|
||||||
err = p.cmd.Start()
|
err := p.cmd.Start()
|
||||||
p.process.ops = p
|
p.process.ops = p
|
||||||
p.childPipe.Close()
|
p.childPipe.Close()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
p.process.ops = nil
|
p.process.ops = nil
|
||||||
return newSystemError(err)
|
return newSystemError(err)
|
||||||
}
|
}
|
||||||
|
if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := p.execSetns(); err != nil {
|
||||||
|
return newSystemError(err)
|
||||||
|
}
|
||||||
// Save the standard descriptor names before the container process
|
// Save the standard descriptor names before the container process
|
||||||
// can potentially move them (e.g., via dup2()). If we don't do this now,
|
// can potentially move them (e.g., via dup2()). If we don't do this now,
|
||||||
// we won't know at checkpoint time which file descriptor to look up.
|
// we won't know at checkpoint time which file descriptor to look up.
|
||||||
|
@ -317,7 +353,7 @@ func (p *initProcess) wait() (*os.ProcessState, error) {
|
||||||
return p.cmd.ProcessState, err
|
return p.cmd.ProcessState, err
|
||||||
}
|
}
|
||||||
// we should kill all processes in cgroup when init is died if we use host PID namespace
|
// we should kill all processes in cgroup when init is died if we use host PID namespace
|
||||||
if p.cmd.SysProcAttr.Cloneflags&syscall.CLONE_NEWPID == 0 {
|
if p.sharePidns {
|
||||||
killCgroupProcesses(p.manager)
|
killCgroupProcesses(p.manager)
|
||||||
}
|
}
|
||||||
return p.cmd.ProcessState, nil
|
return p.cmd.ProcessState, nil
|
||||||
|
|
|
@ -55,10 +55,6 @@ func (l *linuxStandardInit) Init() error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// join any namespaces via a path to the namespace fd if provided
|
|
||||||
if err := joinExistingNamespaces(l.config.Config.Namespaces); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
var console *linuxConsole
|
var console *linuxConsole
|
||||||
if l.config.Console != "" {
|
if l.config.Console != "" {
|
||||||
console = newConsoleFromPath(l.config.Console)
|
console = newConsoleFromPath(l.config.Console)
|
||||||
|
@ -66,9 +62,6 @@ func (l *linuxStandardInit) Init() error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if _, err := syscall.Setsid(); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if console != nil {
|
if console != nil {
|
||||||
if err := system.Setctty(); err != nil {
|
if err := system.Setctty(); err != nil {
|
||||||
return err
|
return err
|
||||||
|
|
Loading…
Reference in New Issue