2015-01-26 16:33:56 +08:00
|
|
|
#define _GNU_SOURCE
|
2014-12-23 06:06:22 +08:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#include <linux/limits.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/wait.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/stat.h>
|
2015-03-05 08:04:20 +08:00
|
|
|
#include <sys/ioctl.h>
|
2014-12-23 06:06:22 +08:00
|
|
|
#include <fcntl.h>
|
|
|
|
#include <signal.h>
|
2015-01-26 16:33:56 +08:00
|
|
|
#include <setjmp.h>
|
|
|
|
#include <sched.h>
|
|
|
|
#include <signal.h>
|
|
|
|
|
2015-10-17 23:35:36 +08:00
|
|
|
#include <linux/netlink.h>
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <sys/socket.h>
|
|
|
|
|
2015-01-26 16:33:56 +08:00
|
|
|
/* All arguments should be above stack, because it grows down */
|
|
|
|
struct clone_arg {
|
|
|
|
/*
|
|
|
|
* Reserve some space for clone() to locate arguments
|
|
|
|
* and retcode in this place
|
|
|
|
*/
|
2015-10-06 18:41:14 +08:00
|
|
|
char stack[4096] __attribute__ ((aligned(16)));
|
2015-01-26 16:33:56 +08:00
|
|
|
char stack_ptr[0];
|
|
|
|
jmp_buf *env;
|
|
|
|
};
|
|
|
|
|
2015-03-05 08:04:20 +08:00
|
|
|
#define pr_perror(fmt, ...) fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__)
|
|
|
|
|
2015-01-26 16:33:56 +08:00
|
|
|
static int child_func(void *_arg)
|
|
|
|
{
|
2015-02-07 04:48:57 +08:00
|
|
|
struct clone_arg *arg = (struct clone_arg *)_arg;
|
2015-01-26 16:33:56 +08:00
|
|
|
longjmp(*arg->env, 1);
|
|
|
|
}
|
2014-12-23 06:06:22 +08:00
|
|
|
|
|
|
|
// Use raw setns syscall for versions of glibc that don't include it (namely glibc-2.12)
|
|
|
|
#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14
|
|
|
|
#define _GNU_SOURCE
|
|
|
|
#include "syscall.h"
|
2015-06-14 13:49:52 +08:00
|
|
|
#if defined(__NR_setns) && !defined(SYS_setns)
|
|
|
|
#define SYS_setns __NR_setns
|
|
|
|
#endif
|
2014-12-23 06:06:22 +08:00
|
|
|
#ifdef SYS_setns
|
|
|
|
int setns(int fd, int nstype)
|
|
|
|
{
|
|
|
|
return syscall(SYS_setns, fd, nstype);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
2015-02-07 04:48:57 +08:00
|
|
|
static int clone_parent(jmp_buf * env) __attribute__ ((noinline));
|
|
|
|
static int clone_parent(jmp_buf * env)
|
2015-01-26 16:33:56 +08:00
|
|
|
{
|
|
|
|
struct clone_arg ca;
|
|
|
|
int child;
|
|
|
|
|
|
|
|
ca.env = env;
|
|
|
|
child = clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD, &ca);
|
|
|
|
|
|
|
|
return child;
|
|
|
|
}
|
|
|
|
|
2015-10-17 23:35:36 +08:00
|
|
|
static uint32_t readint32(char *buf)
|
|
|
|
{
|
|
|
|
return *(uint32_t *) buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
// list of known message types we want to send to bootstrap program
|
|
|
|
// These are defined in libcontainer/message_linux.go
|
|
|
|
#define INIT_MSG 62000
|
|
|
|
#define PID_ATTR 27281
|
|
|
|
#define CONSOLE_PATH_ATTR 27282
|
|
|
|
|
2014-12-23 06:06:22 +08:00
|
|
|
void nsexec()
|
|
|
|
{
|
2015-08-09 00:30:55 +08:00
|
|
|
char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt", "user" };
|
2014-12-23 06:06:22 +08:00
|
|
|
const int num = sizeof(namespaces) / sizeof(char *);
|
2015-01-26 16:33:56 +08:00
|
|
|
jmp_buf env;
|
2014-12-23 06:06:22 +08:00
|
|
|
char buf[PATH_MAX], *val;
|
2015-10-17 23:35:36 +08:00
|
|
|
int i, tfd, self_tfd, child, n, len, pipenum, consolefd = -1;
|
|
|
|
pid_t pid = 0;
|
2014-12-23 06:06:22 +08:00
|
|
|
|
2015-10-17 23:35:36 +08:00
|
|
|
// if we dont have INITTYPE or this is the init process, skip the bootstrap process
|
|
|
|
val = getenv("_LIBCONTAINER_INITTYPE");
|
|
|
|
if (val == NULL || strcmp(val, "standard") == 0) {
|
2014-12-23 06:06:22 +08:00
|
|
|
return;
|
2015-10-17 23:35:36 +08:00
|
|
|
}
|
|
|
|
if (strcmp(val, "setns") != 0) {
|
|
|
|
pr_perror("Invalid inittype %s", val);
|
2014-12-23 06:06:22 +08:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2015-04-11 02:23:09 +08:00
|
|
|
val = getenv("_LIBCONTAINER_INITPIPE");
|
|
|
|
if (val == NULL) {
|
|
|
|
pr_perror("Child pipe not found");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
pipenum = atoi(val);
|
|
|
|
snprintf(buf, sizeof(buf), "%d", pipenum);
|
|
|
|
if (strcmp(val, buf)) {
|
|
|
|
pr_perror("Unable to parse _LIBCONTAINER_INITPIPE");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2015-10-17 23:35:36 +08:00
|
|
|
char nlbuf[NLMSG_HDRLEN];
|
|
|
|
struct nlmsghdr *nh;
|
|
|
|
if ((n = read(pipenum, nlbuf, NLMSG_HDRLEN)) != NLMSG_HDRLEN) {
|
|
|
|
pr_perror("Failed to read netlink header, got %d", n);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
nh = (struct nlmsghdr *)nlbuf;
|
|
|
|
if (nh->nlmsg_type == NLMSG_ERROR) {
|
|
|
|
pr_perror("Invalid netlink header message");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
if (nh->nlmsg_type != INIT_MSG) {
|
|
|
|
pr_perror("Unexpected netlink message type %d", nh->nlmsg_type);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
// read the netlink payload
|
|
|
|
len = NLMSG_PAYLOAD(nh, 0);
|
|
|
|
char data[len];
|
|
|
|
if ((n = read(pipenum, data, len)) != len) {
|
|
|
|
pr_perror("Failed to read netlink payload, got %d", n);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
int start = 0;
|
|
|
|
struct nlattr *attr;
|
|
|
|
while (start < len) {
|
|
|
|
int payload_len;
|
|
|
|
attr = (struct nlattr *)((void *)data + start);
|
|
|
|
start += NLA_HDRLEN;
|
|
|
|
payload_len = attr->nla_len - NLA_HDRLEN;
|
|
|
|
switch (attr->nla_type) {
|
|
|
|
case PID_ATTR:
|
|
|
|
pid = (pid_t) readint32(data + start);
|
|
|
|
break;
|
|
|
|
case CONSOLE_PATH_ATTR:
|
|
|
|
consolefd = open((char *)data + start, O_RDWR);
|
|
|
|
if (consolefd < 0) {
|
|
|
|
pr_perror("Failed to open console %s", (char *)data + start);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
break;
|
2015-03-05 08:04:20 +08:00
|
|
|
}
|
2015-10-17 23:35:36 +08:00
|
|
|
start += NLA_ALIGN(payload_len);
|
|
|
|
}
|
|
|
|
|
|
|
|
// required pid to be passed
|
|
|
|
if (pid == 0) {
|
|
|
|
pr_perror("missing pid");
|
|
|
|
exit(1);
|
2015-03-05 08:04:20 +08:00
|
|
|
}
|
|
|
|
|
2014-12-23 06:06:22 +08:00
|
|
|
/* Check that the specified process exists */
|
|
|
|
snprintf(buf, PATH_MAX - 1, "/proc/%d/ns", pid);
|
|
|
|
tfd = open(buf, O_DIRECTORY | O_RDONLY);
|
|
|
|
if (tfd == -1) {
|
2015-01-26 18:56:13 +08:00
|
|
|
pr_perror("Failed to open \"%s\"", buf);
|
2014-12-23 06:06:22 +08:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2015-08-09 00:30:55 +08:00
|
|
|
self_tfd = open("/proc/self/ns", O_DIRECTORY | O_RDONLY);
|
|
|
|
if (self_tfd == -1) {
|
|
|
|
pr_perror("Failed to open /proc/self/ns");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2014-12-23 06:06:22 +08:00
|
|
|
for (i = 0; i < num; i++) {
|
|
|
|
struct stat st;
|
2015-08-09 00:30:55 +08:00
|
|
|
struct stat self_st;
|
2014-12-23 06:06:22 +08:00
|
|
|
int fd;
|
|
|
|
|
|
|
|
/* Symlinks on all namespaces exist for dead processes, but they can't be opened */
|
2015-08-09 00:30:55 +08:00
|
|
|
if (fstatat(tfd, namespaces[i], &st, 0) == -1) {
|
2014-12-23 06:06:22 +08:00
|
|
|
// Ignore nonexistent namespaces.
|
|
|
|
if (errno == ENOENT)
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2015-08-09 00:30:55 +08:00
|
|
|
/* Skip namespaces we're already part of */
|
2015-10-17 23:35:36 +08:00
|
|
|
if (fstatat(self_tfd, namespaces[i], &self_st, 0) != -1 && st.st_ino == self_st.st_ino) {
|
2015-08-09 00:30:55 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2014-12-23 06:06:22 +08:00
|
|
|
fd = openat(tfd, namespaces[i], O_RDONLY);
|
|
|
|
if (fd == -1) {
|
2015-10-17 23:35:36 +08:00
|
|
|
pr_perror("Failed to open ns file %s for ns %s", buf, namespaces[i]);
|
2014-12-23 06:06:22 +08:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
// Set the namespace.
|
|
|
|
if (setns(fd, 0) == -1) {
|
2015-01-26 18:56:13 +08:00
|
|
|
pr_perror("Failed to setns for %s", namespaces[i]);
|
2014-12-23 06:06:22 +08:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
close(fd);
|
|
|
|
}
|
|
|
|
|
2015-08-09 00:30:55 +08:00
|
|
|
close(self_tfd);
|
|
|
|
close(tfd);
|
|
|
|
|
2015-01-26 16:33:56 +08:00
|
|
|
if (setjmp(env) == 1) {
|
2015-04-14 13:55:04 +08:00
|
|
|
// Child
|
|
|
|
|
2015-03-05 08:04:20 +08:00
|
|
|
if (setsid() == -1) {
|
|
|
|
pr_perror("setsid failed");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
if (consolefd != -1) {
|
|
|
|
if (ioctl(consolefd, TIOCSCTTY, 0) == -1) {
|
|
|
|
pr_perror("ioctl TIOCSCTTY failed");
|
|
|
|
exit(1);
|
|
|
|
}
|
2015-06-10 06:19:47 +08:00
|
|
|
if (dup3(consolefd, STDIN_FILENO, 0) != STDIN_FILENO) {
|
2015-03-05 08:04:20 +08:00
|
|
|
pr_perror("Failed to dup 0");
|
|
|
|
exit(1);
|
|
|
|
}
|
2015-06-10 06:19:47 +08:00
|
|
|
if (dup3(consolefd, STDOUT_FILENO, 0) != STDOUT_FILENO) {
|
2015-03-05 08:04:20 +08:00
|
|
|
pr_perror("Failed to dup 1");
|
|
|
|
exit(1);
|
|
|
|
}
|
2015-06-10 06:19:47 +08:00
|
|
|
if (dup3(consolefd, STDERR_FILENO, 0) != STDERR_FILENO) {
|
2015-03-05 08:04:20 +08:00
|
|
|
pr_perror("Failed to dup 2");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
2015-01-26 16:33:56 +08:00
|
|
|
// Finish executing, let the Go runtime take over.
|
|
|
|
return;
|
|
|
|
}
|
2015-04-14 13:55:04 +08:00
|
|
|
// Parent
|
2015-01-26 16:33:56 +08:00
|
|
|
|
2015-04-14 13:55:04 +08:00
|
|
|
// We must fork to actually enter the PID namespace, use CLONE_PARENT
|
|
|
|
// so the child can have the right parent, and we don't need to forward
|
|
|
|
// the child's exit code or resend its death signal.
|
2015-01-26 16:33:56 +08:00
|
|
|
child = clone_parent(&env);
|
2014-12-23 06:06:22 +08:00
|
|
|
if (child < 0) {
|
2015-01-26 18:56:13 +08:00
|
|
|
pr_perror("Unable to fork");
|
2014-12-23 06:06:22 +08:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2015-01-26 16:33:56 +08:00
|
|
|
len = snprintf(buf, sizeof(buf), "{ \"pid\" : %d }\n", child);
|
|
|
|
|
2015-04-11 02:23:09 +08:00
|
|
|
if (write(pipenum, buf, len) != len) {
|
2015-01-26 16:33:56 +08:00
|
|
|
pr_perror("Unable to send a child pid");
|
|
|
|
kill(child, SIGKILL);
|
2014-12-23 06:06:22 +08:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2015-01-26 16:33:56 +08:00
|
|
|
exit(0);
|
2014-12-23 06:06:22 +08:00
|
|
|
}
|