setns: replace env with netlink for bootstrap data
replace passing of pid and console path via environment variable with passing them with netlink message via an established pipe. this change requires us to set _LIBCONTAINER_INITTYPE and _LIBCONTAINER_INITPIPE as the env environment of the bootstrap process as we only send the bootstrap data for setns process right now. When init and setns bootstrap process are unified (i.e., init use nsexec instead of Go to clone new process), we can remove _LIBCONTAINER_INITTYPE. Note: - we read nlmsghdr first before reading the content so we can get the total length of the payload and allocate buffer properly instead of allocating one large buffer. - check read bytes vs the wanted number. It's an error if we failed to read the desired number of bytes from the pipe into the buffer. Signed-off-by: Daniel, Dao Quang Minh <dqminh89@gmail.com>
This commit is contained in:
parent
d914bf7347
commit
7d423cb7a1
|
@ -3,8 +3,10 @@
|
||||||
package libcontainer
|
package libcontainer
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
@ -19,6 +21,7 @@ import (
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
"github.com/opencontainers/runc/libcontainer/criurpc"
|
"github.com/opencontainers/runc/libcontainer/criurpc"
|
||||||
|
"github.com/vishvananda/netlink/nl"
|
||||||
)
|
)
|
||||||
|
|
||||||
const stdioFdCount = 3
|
const stdioFdCount = 3
|
||||||
|
@ -274,12 +277,12 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*setnsProcess, error) {
|
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*setnsProcess, error) {
|
||||||
cmd.Env = append(cmd.Env,
|
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE=setns")
|
||||||
fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.initProcess.pid()),
|
// for setns process, we dont have to set cloneflags as the process namespaces
|
||||||
"_LIBCONTAINER_INITTYPE=setns",
|
// will only be set via setns syscall
|
||||||
)
|
data, err := c.bootstrapData(0, c.initProcess.pid(), p.consolePath)
|
||||||
if p.consolePath != "" {
|
if err != nil {
|
||||||
cmd.Env = append(cmd.Env, "_LIBCONTAINER_CONSOLE_PATH="+p.consolePath)
|
return nil, err
|
||||||
}
|
}
|
||||||
// TODO: set on container for process management
|
// TODO: set on container for process management
|
||||||
return &setnsProcess{
|
return &setnsProcess{
|
||||||
|
@ -289,6 +292,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
|
||||||
parentPipe: parentPipe,
|
parentPipe: parentPipe,
|
||||||
config: c.newInitConfig(p),
|
config: c.newInitConfig(p),
|
||||||
process: p,
|
process: p,
|
||||||
|
bootstrapData: data,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1021,3 +1025,25 @@ func (c *linuxContainer) currentState() (*State, error) {
|
||||||
}
|
}
|
||||||
return state, nil
|
return state, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// bootstrapData encodes the necessary data in netlink binary format as a io.Reader.
|
||||||
|
// Consumer can write the data to a bootstrap program such as one that uses
|
||||||
|
// nsenter package to bootstrap the container's init process correctly, i.e. with
|
||||||
|
// correct namespaces, uid/gid mapping etc.
|
||||||
|
func (c *linuxContainer) bootstrapData(cloneFlags uintptr, pid int, consolePath string) (io.Reader, error) {
|
||||||
|
// create the netlink message
|
||||||
|
r := nl.NewNetlinkRequest(int(InitMsg), 0)
|
||||||
|
// write pid
|
||||||
|
r.AddData(&Int32msg{
|
||||||
|
Type: PidAttr,
|
||||||
|
Value: uint32(pid),
|
||||||
|
})
|
||||||
|
// write console path
|
||||||
|
if consolePath != "" {
|
||||||
|
r.AddData(&Bytemsg{
|
||||||
|
Type: ConsolePathAttr,
|
||||||
|
Value: []byte(consolePath),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return bytes.NewReader(r.Serialize()), nil
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,60 @@
|
||||||
|
// +build linux
|
||||||
|
|
||||||
|
package libcontainer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"syscall"
|
||||||
|
|
||||||
|
"github.com/vishvananda/netlink/nl"
|
||||||
|
)
|
||||||
|
|
||||||
|
// list of known message types we want to send to bootstrap program
|
||||||
|
// The number is randomly chosen to not conflict with known netlink types
|
||||||
|
const (
|
||||||
|
InitMsg uint16 = 62000
|
||||||
|
PidAttr uint16 = 27281
|
||||||
|
ConsolePathAttr uint16 = 27282
|
||||||
|
)
|
||||||
|
|
||||||
|
type Int32msg struct {
|
||||||
|
Type uint16
|
||||||
|
Value uint32
|
||||||
|
}
|
||||||
|
|
||||||
|
// int32msg has the following representation
|
||||||
|
// | nlattr len | nlattr type |
|
||||||
|
// | uint32 value |
|
||||||
|
func (msg *Int32msg) Serialize() []byte {
|
||||||
|
buf := make([]byte, msg.Len())
|
||||||
|
native := nl.NativeEndian()
|
||||||
|
native.PutUint16(buf[0:2], uint16(msg.Len()))
|
||||||
|
native.PutUint16(buf[2:4], msg.Type)
|
||||||
|
native.PutUint32(buf[4:8], msg.Value)
|
||||||
|
return buf
|
||||||
|
}
|
||||||
|
|
||||||
|
func (msg *Int32msg) Len() int {
|
||||||
|
return syscall.NLA_HDRLEN + 4
|
||||||
|
}
|
||||||
|
|
||||||
|
// bytemsg has the following representation
|
||||||
|
// | nlattr len | nlattr type |
|
||||||
|
// | value | pad |
|
||||||
|
type Bytemsg struct {
|
||||||
|
Type uint16
|
||||||
|
Value []byte
|
||||||
|
}
|
||||||
|
|
||||||
|
func (msg *Bytemsg) Serialize() []byte {
|
||||||
|
l := msg.Len()
|
||||||
|
buf := make([]byte, (l+syscall.NLA_ALIGNTO-1) & ^(syscall.NLA_ALIGNTO-1))
|
||||||
|
native := nl.NativeEndian()
|
||||||
|
native.PutUint16(buf[0:2], uint16(l))
|
||||||
|
native.PutUint16(buf[2:4], msg.Type)
|
||||||
|
copy(buf[4:], msg.Value)
|
||||||
|
return buf
|
||||||
|
}
|
||||||
|
|
||||||
|
func (msg *Bytemsg) Len() int {
|
||||||
|
return syscall.NLA_HDRLEN + len(msg.Value) + 1 // null-terminated
|
||||||
|
}
|
|
@ -1,12 +1,17 @@
|
||||||
package nsenter
|
package nsenter
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"strings"
|
"strings"
|
||||||
|
"syscall"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer"
|
||||||
|
"github.com/vishvananda/netlink/nl"
|
||||||
)
|
)
|
||||||
|
|
||||||
type pid struct {
|
type pid struct {
|
||||||
|
@ -15,7 +20,7 @@ type pid struct {
|
||||||
|
|
||||||
func TestNsenterAlivePid(t *testing.T) {
|
func TestNsenterAlivePid(t *testing.T) {
|
||||||
args := []string{"nsenter-exec"}
|
args := []string{"nsenter-exec"}
|
||||||
r, w, err := os.Pipe()
|
parent, child, err := newPipe()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("failed to create pipe %v", err)
|
t.Fatalf("failed to create pipe %v", err)
|
||||||
}
|
}
|
||||||
|
@ -23,16 +28,22 @@ func TestNsenterAlivePid(t *testing.T) {
|
||||||
cmd := &exec.Cmd{
|
cmd := &exec.Cmd{
|
||||||
Path: os.Args[0],
|
Path: os.Args[0],
|
||||||
Args: args,
|
Args: args,
|
||||||
ExtraFiles: []*os.File{w},
|
ExtraFiles: []*os.File{child},
|
||||||
Env: []string{fmt.Sprintf("_LIBCONTAINER_INITPID=%d", os.Getpid()), "_LIBCONTAINER_INITPIPE=3"},
|
Env: []string{"_LIBCONTAINER_INITTYPE=setns", "_LIBCONTAINER_INITPIPE=3"},
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := cmd.Start(); err != nil {
|
if err := cmd.Start(); err != nil {
|
||||||
t.Fatalf("nsenter failed to start %v", err)
|
t.Fatalf("nsenter failed to start %v", err)
|
||||||
}
|
}
|
||||||
w.Close()
|
r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0)
|
||||||
|
r.AddData(&libcontainer.Int32msg{
|
||||||
decoder := json.NewDecoder(r)
|
Type: libcontainer.PidAttr,
|
||||||
|
Value: uint32(os.Getpid()),
|
||||||
|
})
|
||||||
|
if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
decoder := json.NewDecoder(parent)
|
||||||
var pid *pid
|
var pid *pid
|
||||||
|
|
||||||
if err := decoder.Decode(&pid); err != nil {
|
if err := decoder.Decode(&pid); err != nil {
|
||||||
|
@ -51,34 +62,67 @@ func TestNsenterAlivePid(t *testing.T) {
|
||||||
|
|
||||||
func TestNsenterInvalidPid(t *testing.T) {
|
func TestNsenterInvalidPid(t *testing.T) {
|
||||||
args := []string{"nsenter-exec"}
|
args := []string{"nsenter-exec"}
|
||||||
|
parent, child, err := newPipe()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to create pipe %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
cmd := &exec.Cmd{
|
cmd := &exec.Cmd{
|
||||||
Path: os.Args[0],
|
Path: os.Args[0],
|
||||||
Args: args,
|
Args: args,
|
||||||
Env: []string{"_LIBCONTAINER_INITPID=-1"},
|
ExtraFiles: []*os.File{child},
|
||||||
|
Env: []string{"_LIBCONTAINER_INITTYPE=setns", "_LIBCONTAINER_INITPIPE=3"},
|
||||||
}
|
}
|
||||||
|
|
||||||
err := cmd.Run()
|
if err := cmd.Start(); err != nil {
|
||||||
if err == nil {
|
t.Fatal("nsenter exits with a zero exit status")
|
||||||
|
}
|
||||||
|
r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0)
|
||||||
|
r.AddData(&libcontainer.Int32msg{
|
||||||
|
Type: libcontainer.PidAttr,
|
||||||
|
Value: 0,
|
||||||
|
})
|
||||||
|
if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := cmd.Wait(); err == nil {
|
||||||
t.Fatal("nsenter exits with a zero exit status")
|
t.Fatal("nsenter exits with a zero exit status")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNsenterDeadPid(t *testing.T) {
|
func TestNsenterDeadPid(t *testing.T) {
|
||||||
dead_cmd := exec.Command("true")
|
deadCmd := exec.Command("true")
|
||||||
if err := dead_cmd.Run(); err != nil {
|
if err := deadCmd.Run(); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
args := []string{"nsenter-exec"}
|
args := []string{"nsenter-exec"}
|
||||||
|
parent, child, err := newPipe()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to create pipe %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
cmd := &exec.Cmd{
|
cmd := &exec.Cmd{
|
||||||
Path: os.Args[0],
|
Path: os.Args[0],
|
||||||
Args: args,
|
Args: args,
|
||||||
Env: []string{fmt.Sprintf("_LIBCONTAINER_INITPID=%d", dead_cmd.Process.Pid)},
|
ExtraFiles: []*os.File{child},
|
||||||
|
Env: []string{"_LIBCONTAINER_INITTYPE=setns", "_LIBCONTAINER_INITPIPE=3"},
|
||||||
}
|
}
|
||||||
|
|
||||||
err := cmd.Run()
|
if err := cmd.Start(); err != nil {
|
||||||
if err == nil {
|
t.Fatal("nsenter exits with a zero exit status")
|
||||||
|
}
|
||||||
|
|
||||||
|
r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0)
|
||||||
|
r.AddData(&libcontainer.Int32msg{
|
||||||
|
Type: libcontainer.PidAttr,
|
||||||
|
Value: uint32(deadCmd.Process.Pid),
|
||||||
|
})
|
||||||
|
if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := cmd.Wait(); err == nil {
|
||||||
t.Fatal("nsenter exits with a zero exit status")
|
t.Fatal("nsenter exits with a zero exit status")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -89,3 +133,11 @@ func init() {
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func newPipe() (parent *os.File, child *os.File, err error) {
|
||||||
|
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
|
||||||
|
}
|
||||||
|
|
|
@ -17,6 +17,11 @@
|
||||||
#include <sched.h>
|
#include <sched.h>
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
|
|
||||||
|
#include <linux/netlink.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <sys/socket.h>
|
||||||
|
|
||||||
/* All arguments should be above stack, because it grows down */
|
/* All arguments should be above stack, because it grows down */
|
||||||
struct clone_arg {
|
struct clone_arg {
|
||||||
/*
|
/*
|
||||||
|
@ -63,24 +68,33 @@ static int clone_parent(jmp_buf * env)
|
||||||
return child;
|
return child;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint32_t readint32(char *buf)
|
||||||
|
{
|
||||||
|
return *(uint32_t *) buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
// list of known message types we want to send to bootstrap program
|
||||||
|
// These are defined in libcontainer/message_linux.go
|
||||||
|
#define INIT_MSG 62000
|
||||||
|
#define PID_ATTR 27281
|
||||||
|
#define CONSOLE_PATH_ATTR 27282
|
||||||
|
|
||||||
void nsexec()
|
void nsexec()
|
||||||
{
|
{
|
||||||
char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt", "user" };
|
char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt", "user" };
|
||||||
const int num = sizeof(namespaces) / sizeof(char *);
|
const int num = sizeof(namespaces) / sizeof(char *);
|
||||||
jmp_buf env;
|
jmp_buf env;
|
||||||
char buf[PATH_MAX], *val;
|
char buf[PATH_MAX], *val;
|
||||||
int i, tfd, self_tfd, child, len, pipenum, consolefd = -1;
|
int i, tfd, self_tfd, child, n, len, pipenum, consolefd = -1;
|
||||||
pid_t pid;
|
pid_t pid = 0;
|
||||||
char *console;
|
|
||||||
|
|
||||||
val = getenv("_LIBCONTAINER_INITPID");
|
// if we dont have INITTYPE or this is the init process, skip the bootstrap process
|
||||||
if (val == NULL)
|
val = getenv("_LIBCONTAINER_INITTYPE");
|
||||||
|
if (val == NULL || strcmp(val, "standard") == 0) {
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
pid = atoi(val);
|
if (strcmp(val, "setns") != 0) {
|
||||||
snprintf(buf, sizeof(buf), "%d", pid);
|
pr_perror("Invalid inittype %s", val);
|
||||||
if (strcmp(val, buf)) {
|
|
||||||
pr_perror("Unable to parse _LIBCONTAINER_INITPID");
|
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -89,7 +103,6 @@ void nsexec()
|
||||||
pr_perror("Child pipe not found");
|
pr_perror("Child pipe not found");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
pipenum = atoi(val);
|
pipenum = atoi(val);
|
||||||
snprintf(buf, sizeof(buf), "%d", pipenum);
|
snprintf(buf, sizeof(buf), "%d", pipenum);
|
||||||
if (strcmp(val, buf)) {
|
if (strcmp(val, buf)) {
|
||||||
|
@ -97,13 +110,56 @@ void nsexec()
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
console = getenv("_LIBCONTAINER_CONSOLE_PATH");
|
char nlbuf[NLMSG_HDRLEN];
|
||||||
if (console != NULL) {
|
struct nlmsghdr *nh;
|
||||||
consolefd = open(console, O_RDWR);
|
if ((n = read(pipenum, nlbuf, NLMSG_HDRLEN)) != NLMSG_HDRLEN) {
|
||||||
if (consolefd < 0) {
|
pr_perror("Failed to read netlink header, got %d", n);
|
||||||
pr_perror("Failed to open console %s", console);
|
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nh = (struct nlmsghdr *)nlbuf;
|
||||||
|
if (nh->nlmsg_type == NLMSG_ERROR) {
|
||||||
|
pr_perror("Invalid netlink header message");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if (nh->nlmsg_type != INIT_MSG) {
|
||||||
|
pr_perror("Unexpected netlink message type %d", nh->nlmsg_type);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
// read the netlink payload
|
||||||
|
len = NLMSG_PAYLOAD(nh, 0);
|
||||||
|
char data[len];
|
||||||
|
if ((n = read(pipenum, data, len)) != len) {
|
||||||
|
pr_perror("Failed to read netlink payload, got %d", n);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
int start = 0;
|
||||||
|
struct nlattr *attr;
|
||||||
|
while (start < len) {
|
||||||
|
int payload_len;
|
||||||
|
attr = (struct nlattr *)((void *)data + start);
|
||||||
|
start += NLA_HDRLEN;
|
||||||
|
payload_len = attr->nla_len - NLA_HDRLEN;
|
||||||
|
switch (attr->nla_type) {
|
||||||
|
case PID_ATTR:
|
||||||
|
pid = (pid_t) readint32(data + start);
|
||||||
|
break;
|
||||||
|
case CONSOLE_PATH_ATTR:
|
||||||
|
consolefd = open((char *)data + start, O_RDWR);
|
||||||
|
if (consolefd < 0) {
|
||||||
|
pr_perror("Failed to open console %s", (char *)data + start);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
start += NLA_ALIGN(payload_len);
|
||||||
|
}
|
||||||
|
|
||||||
|
// required pid to be passed
|
||||||
|
if (pid == 0) {
|
||||||
|
pr_perror("missing pid");
|
||||||
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check that the specified process exists */
|
/* Check that the specified process exists */
|
||||||
|
@ -133,15 +189,13 @@ void nsexec()
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Skip namespaces we're already part of */
|
/* Skip namespaces we're already part of */
|
||||||
if (fstatat(self_tfd, namespaces[i], &self_st, 0) != -1 &&
|
if (fstatat(self_tfd, namespaces[i], &self_st, 0) != -1 && st.st_ino == self_st.st_ino) {
|
||||||
st.st_ino == self_st.st_ino) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
fd = openat(tfd, namespaces[i], O_RDONLY);
|
fd = openat(tfd, namespaces[i], O_RDONLY);
|
||||||
if (fd == -1) {
|
if (fd == -1) {
|
||||||
pr_perror("Failed to open ns file %s for ns %s", buf,
|
pr_perror("Failed to open ns file %s for ns %s", buf, namespaces[i]);
|
||||||
namespaces[i]);
|
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
// Set the namespace.
|
// Set the namespace.
|
||||||
|
|
Loading…
Reference in New Issue