setns: replace env with netlink for bootstrap data

replace passing of pid and console path via environment variable with passing
them with netlink message via an established pipe.

this change requires us to set _LIBCONTAINER_INITTYPE and
_LIBCONTAINER_INITPIPE as the env environment of the bootstrap process as we
only send the bootstrap data for setns process right now. When init and setns
bootstrap process are unified (i.e., init use nsexec instead of Go to clone new
process), we can remove _LIBCONTAINER_INITTYPE.

Note:
- we read nlmsghdr first before reading the content so we can get the total
  length of the payload and allocate buffer properly instead of allocating
  one large buffer.

- check read bytes vs the wanted number. It's an error if we failed to read
  the desired number of bytes from the pipe into the buffer.

Signed-off-by: Daniel, Dao Quang Minh <dqminh89@gmail.com>
This commit is contained in:
Daniel, Dao Quang Minh 2015-10-17 15:35:36 +00:00
parent d914bf7347
commit 7d423cb7a1
4 changed files with 248 additions and 56 deletions

View File

@ -3,8 +3,10 @@
package libcontainer package libcontainer
import ( import (
"bytes"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io"
"io/ioutil" "io/ioutil"
"os" "os"
"os/exec" "os/exec"
@ -19,6 +21,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/criurpc" "github.com/opencontainers/runc/libcontainer/criurpc"
"github.com/vishvananda/netlink/nl"
) )
const stdioFdCount = 3 const stdioFdCount = 3
@ -274,12 +277,12 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
} }
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*setnsProcess, error) { func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*setnsProcess, error) {
cmd.Env = append(cmd.Env, cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE=setns")
fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.initProcess.pid()), // for setns process, we dont have to set cloneflags as the process namespaces
"_LIBCONTAINER_INITTYPE=setns", // will only be set via setns syscall
) data, err := c.bootstrapData(0, c.initProcess.pid(), p.consolePath)
if p.consolePath != "" { if err != nil {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_CONSOLE_PATH="+p.consolePath) return nil, err
} }
// TODO: set on container for process management // TODO: set on container for process management
return &setnsProcess{ return &setnsProcess{
@ -289,6 +292,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
parentPipe: parentPipe, parentPipe: parentPipe,
config: c.newInitConfig(p), config: c.newInitConfig(p),
process: p, process: p,
bootstrapData: data,
}, nil }, nil
} }
@ -1021,3 +1025,25 @@ func (c *linuxContainer) currentState() (*State, error) {
} }
return state, nil return state, nil
} }
// bootstrapData encodes the necessary data in netlink binary format as a io.Reader.
// Consumer can write the data to a bootstrap program such as one that uses
// nsenter package to bootstrap the container's init process correctly, i.e. with
// correct namespaces, uid/gid mapping etc.
func (c *linuxContainer) bootstrapData(cloneFlags uintptr, pid int, consolePath string) (io.Reader, error) {
// create the netlink message
r := nl.NewNetlinkRequest(int(InitMsg), 0)
// write pid
r.AddData(&Int32msg{
Type: PidAttr,
Value: uint32(pid),
})
// write console path
if consolePath != "" {
r.AddData(&Bytemsg{
Type: ConsolePathAttr,
Value: []byte(consolePath),
})
}
return bytes.NewReader(r.Serialize()), nil
}

View File

@ -0,0 +1,60 @@
// +build linux
package libcontainer
import (
"syscall"
"github.com/vishvananda/netlink/nl"
)
// list of known message types we want to send to bootstrap program
// The number is randomly chosen to not conflict with known netlink types
const (
InitMsg uint16 = 62000
PidAttr uint16 = 27281
ConsolePathAttr uint16 = 27282
)
type Int32msg struct {
Type uint16
Value uint32
}
// int32msg has the following representation
// | nlattr len | nlattr type |
// | uint32 value |
func (msg *Int32msg) Serialize() []byte {
buf := make([]byte, msg.Len())
native := nl.NativeEndian()
native.PutUint16(buf[0:2], uint16(msg.Len()))
native.PutUint16(buf[2:4], msg.Type)
native.PutUint32(buf[4:8], msg.Value)
return buf
}
func (msg *Int32msg) Len() int {
return syscall.NLA_HDRLEN + 4
}
// bytemsg has the following representation
// | nlattr len | nlattr type |
// | value | pad |
type Bytemsg struct {
Type uint16
Value []byte
}
func (msg *Bytemsg) Serialize() []byte {
l := msg.Len()
buf := make([]byte, (l+syscall.NLA_ALIGNTO-1) & ^(syscall.NLA_ALIGNTO-1))
native := nl.NativeEndian()
native.PutUint16(buf[0:2], uint16(l))
native.PutUint16(buf[2:4], msg.Type)
copy(buf[4:], msg.Value)
return buf
}
func (msg *Bytemsg) Len() int {
return syscall.NLA_HDRLEN + len(msg.Value) + 1 // null-terminated
}

View File

@ -1,12 +1,17 @@
package nsenter package nsenter
import ( import (
"bytes"
"encoding/json" "encoding/json"
"fmt" "io"
"os" "os"
"os/exec" "os/exec"
"strings" "strings"
"syscall"
"testing" "testing"
"github.com/opencontainers/runc/libcontainer"
"github.com/vishvananda/netlink/nl"
) )
type pid struct { type pid struct {
@ -15,7 +20,7 @@ type pid struct {
func TestNsenterAlivePid(t *testing.T) { func TestNsenterAlivePid(t *testing.T) {
args := []string{"nsenter-exec"} args := []string{"nsenter-exec"}
r, w, err := os.Pipe() parent, child, err := newPipe()
if err != nil { if err != nil {
t.Fatalf("failed to create pipe %v", err) t.Fatalf("failed to create pipe %v", err)
} }
@ -23,16 +28,22 @@ func TestNsenterAlivePid(t *testing.T) {
cmd := &exec.Cmd{ cmd := &exec.Cmd{
Path: os.Args[0], Path: os.Args[0],
Args: args, Args: args,
ExtraFiles: []*os.File{w}, ExtraFiles: []*os.File{child},
Env: []string{fmt.Sprintf("_LIBCONTAINER_INITPID=%d", os.Getpid()), "_LIBCONTAINER_INITPIPE=3"}, Env: []string{"_LIBCONTAINER_INITTYPE=setns", "_LIBCONTAINER_INITPIPE=3"},
} }
if err := cmd.Start(); err != nil { if err := cmd.Start(); err != nil {
t.Fatalf("nsenter failed to start %v", err) t.Fatalf("nsenter failed to start %v", err)
} }
w.Close() r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0)
r.AddData(&libcontainer.Int32msg{
decoder := json.NewDecoder(r) Type: libcontainer.PidAttr,
Value: uint32(os.Getpid()),
})
if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil {
t.Fatal(err)
}
decoder := json.NewDecoder(parent)
var pid *pid var pid *pid
if err := decoder.Decode(&pid); err != nil { if err := decoder.Decode(&pid); err != nil {
@ -51,34 +62,67 @@ func TestNsenterAlivePid(t *testing.T) {
func TestNsenterInvalidPid(t *testing.T) { func TestNsenterInvalidPid(t *testing.T) {
args := []string{"nsenter-exec"} args := []string{"nsenter-exec"}
parent, child, err := newPipe()
if err != nil {
t.Fatalf("failed to create pipe %v", err)
}
cmd := &exec.Cmd{ cmd := &exec.Cmd{
Path: os.Args[0], Path: os.Args[0],
Args: args, Args: args,
Env: []string{"_LIBCONTAINER_INITPID=-1"}, ExtraFiles: []*os.File{child},
Env: []string{"_LIBCONTAINER_INITTYPE=setns", "_LIBCONTAINER_INITPIPE=3"},
} }
err := cmd.Run() if err := cmd.Start(); err != nil {
if err == nil { t.Fatal("nsenter exits with a zero exit status")
}
r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0)
r.AddData(&libcontainer.Int32msg{
Type: libcontainer.PidAttr,
Value: 0,
})
if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil {
t.Fatal(err)
}
if err := cmd.Wait(); err == nil {
t.Fatal("nsenter exits with a zero exit status") t.Fatal("nsenter exits with a zero exit status")
} }
} }
func TestNsenterDeadPid(t *testing.T) { func TestNsenterDeadPid(t *testing.T) {
dead_cmd := exec.Command("true") deadCmd := exec.Command("true")
if err := dead_cmd.Run(); err != nil { if err := deadCmd.Run(); err != nil {
t.Fatal(err) t.Fatal(err)
} }
args := []string{"nsenter-exec"} args := []string{"nsenter-exec"}
parent, child, err := newPipe()
if err != nil {
t.Fatalf("failed to create pipe %v", err)
}
cmd := &exec.Cmd{ cmd := &exec.Cmd{
Path: os.Args[0], Path: os.Args[0],
Args: args, Args: args,
Env: []string{fmt.Sprintf("_LIBCONTAINER_INITPID=%d", dead_cmd.Process.Pid)}, ExtraFiles: []*os.File{child},
Env: []string{"_LIBCONTAINER_INITTYPE=setns", "_LIBCONTAINER_INITPIPE=3"},
} }
err := cmd.Run() if err := cmd.Start(); err != nil {
if err == nil { t.Fatal("nsenter exits with a zero exit status")
}
r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0)
r.AddData(&libcontainer.Int32msg{
Type: libcontainer.PidAttr,
Value: uint32(deadCmd.Process.Pid),
})
if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil {
t.Fatal(err)
}
if err := cmd.Wait(); err == nil {
t.Fatal("nsenter exits with a zero exit status") t.Fatal("nsenter exits with a zero exit status")
} }
} }
@ -89,3 +133,11 @@ func init() {
} }
return return
} }
func newPipe() (parent *os.File, child *os.File, err error) {
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
if err != nil {
return nil, nil, err
}
return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
}

View File

@ -17,6 +17,11 @@
#include <sched.h> #include <sched.h>
#include <signal.h> #include <signal.h>
#include <linux/netlink.h>
#include <linux/types.h>
#include <stdint.h>
#include <sys/socket.h>
/* All arguments should be above stack, because it grows down */ /* All arguments should be above stack, because it grows down */
struct clone_arg { struct clone_arg {
/* /*
@ -63,24 +68,33 @@ static int clone_parent(jmp_buf * env)
return child; return child;
} }
static uint32_t readint32(char *buf)
{
return *(uint32_t *) buf;
}
// list of known message types we want to send to bootstrap program
// These are defined in libcontainer/message_linux.go
#define INIT_MSG 62000
#define PID_ATTR 27281
#define CONSOLE_PATH_ATTR 27282
void nsexec() void nsexec()
{ {
char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt", "user" }; char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt", "user" };
const int num = sizeof(namespaces) / sizeof(char *); const int num = sizeof(namespaces) / sizeof(char *);
jmp_buf env; jmp_buf env;
char buf[PATH_MAX], *val; char buf[PATH_MAX], *val;
int i, tfd, self_tfd, child, len, pipenum, consolefd = -1; int i, tfd, self_tfd, child, n, len, pipenum, consolefd = -1;
pid_t pid; pid_t pid = 0;
char *console;
val = getenv("_LIBCONTAINER_INITPID"); // if we dont have INITTYPE or this is the init process, skip the bootstrap process
if (val == NULL) val = getenv("_LIBCONTAINER_INITTYPE");
if (val == NULL || strcmp(val, "standard") == 0) {
return; return;
}
pid = atoi(val); if (strcmp(val, "setns") != 0) {
snprintf(buf, sizeof(buf), "%d", pid); pr_perror("Invalid inittype %s", val);
if (strcmp(val, buf)) {
pr_perror("Unable to parse _LIBCONTAINER_INITPID");
exit(1); exit(1);
} }
@ -89,7 +103,6 @@ void nsexec()
pr_perror("Child pipe not found"); pr_perror("Child pipe not found");
exit(1); exit(1);
} }
pipenum = atoi(val); pipenum = atoi(val);
snprintf(buf, sizeof(buf), "%d", pipenum); snprintf(buf, sizeof(buf), "%d", pipenum);
if (strcmp(val, buf)) { if (strcmp(val, buf)) {
@ -97,13 +110,56 @@ void nsexec()
exit(1); exit(1);
} }
console = getenv("_LIBCONTAINER_CONSOLE_PATH"); char nlbuf[NLMSG_HDRLEN];
if (console != NULL) { struct nlmsghdr *nh;
consolefd = open(console, O_RDWR); if ((n = read(pipenum, nlbuf, NLMSG_HDRLEN)) != NLMSG_HDRLEN) {
if (consolefd < 0) { pr_perror("Failed to read netlink header, got %d", n);
pr_perror("Failed to open console %s", console);
exit(1); exit(1);
} }
nh = (struct nlmsghdr *)nlbuf;
if (nh->nlmsg_type == NLMSG_ERROR) {
pr_perror("Invalid netlink header message");
exit(1);
}
if (nh->nlmsg_type != INIT_MSG) {
pr_perror("Unexpected netlink message type %d", nh->nlmsg_type);
exit(1);
}
// read the netlink payload
len = NLMSG_PAYLOAD(nh, 0);
char data[len];
if ((n = read(pipenum, data, len)) != len) {
pr_perror("Failed to read netlink payload, got %d", n);
exit(1);
}
int start = 0;
struct nlattr *attr;
while (start < len) {
int payload_len;
attr = (struct nlattr *)((void *)data + start);
start += NLA_HDRLEN;
payload_len = attr->nla_len - NLA_HDRLEN;
switch (attr->nla_type) {
case PID_ATTR:
pid = (pid_t) readint32(data + start);
break;
case CONSOLE_PATH_ATTR:
consolefd = open((char *)data + start, O_RDWR);
if (consolefd < 0) {
pr_perror("Failed to open console %s", (char *)data + start);
exit(1);
}
break;
}
start += NLA_ALIGN(payload_len);
}
// required pid to be passed
if (pid == 0) {
pr_perror("missing pid");
exit(1);
} }
/* Check that the specified process exists */ /* Check that the specified process exists */
@ -133,15 +189,13 @@ void nsexec()
} }
/* Skip namespaces we're already part of */ /* Skip namespaces we're already part of */
if (fstatat(self_tfd, namespaces[i], &self_st, 0) != -1 && if (fstatat(self_tfd, namespaces[i], &self_st, 0) != -1 && st.st_ino == self_st.st_ino) {
st.st_ino == self_st.st_ino) {
continue; continue;
} }
fd = openat(tfd, namespaces[i], O_RDONLY); fd = openat(tfd, namespaces[i], O_RDONLY);
if (fd == -1) { if (fd == -1) {
pr_perror("Failed to open ns file %s for ns %s", buf, pr_perror("Failed to open ns file %s for ns %s", buf, namespaces[i]);
namespaces[i]);
exit(1); exit(1);
} }
// Set the namespace. // Set the namespace.