diff --git a/namespaces/execin.go b/namespaces/execin.go index 2424f2de..cff1cca7 100644 --- a/namespaces/execin.go +++ b/namespaces/execin.go @@ -16,6 +16,10 @@ import ( "github.com/docker/libcontainer/system" ) +type pid struct { + Pid int `json:"Pid"` +} + // ExecIn reexec's cmd with _LIBCONTAINER_INITPID=PID so that it is able to run the // setns code in a single threaded environment joining the existing containers' namespaces. func ExecIn(args []string, env []string, console string, cmd *exec.Cmd, container *configs.Config, state *configs.State) (int, error) { @@ -36,13 +40,38 @@ func ExecIn(args []string, env []string, console string, cmd *exec.Cmd, containe } child.Close() + s, err := cmd.Process.Wait() + if err != nil { + return -1, err + } + if !s.Success() { + return -1, &exec.ExitError{s} + } + + decoder := json.NewDecoder(parent) + var pid *pid + + if err := decoder.Decode(&pid); err != nil { + return -1, err + } + + p, err := os.FindProcess(pid.Pid) + if err != nil { + return -1, err + } + terminate := func(terr error) (int, error) { // TODO: log the errors for kill and wait - cmd.Process.Kill() - cmd.Wait() + p.Kill() + p.Wait() return -1, terr } + // Enter cgroups. + if err := EnterCgroups(state, pid.Pid); err != nil { + return terminate(err) + } + encoder := json.NewEncoder(parent) if err := encoder.Encode(container); err != nil { @@ -58,12 +87,7 @@ func ExecIn(args []string, env []string, console string, cmd *exec.Cmd, containe return terminate(err) } - // Enter cgroups. - if err := EnterCgroups(state, cmd.Process.Pid); err != nil { - return terminate(err) - } - - return cmd.Process.Pid, nil + return pid.Pid, nil } // Finalize entering into a container and execute a specified command diff --git a/namespaces/nsenter/nsenter.go b/namespaces/nsenter/nsenter.go index e0ade6eb..880dca83 100644 --- a/namespaces/nsenter/nsenter.go +++ b/namespaces/nsenter/nsenter.go @@ -3,7 +3,9 @@ package nsenter /* -__attribute__((constructor)) init() { +#cgo CFLAGS: -Wall +extern void nsexec(); +void __attribute__((constructor)) init() { nsexec(); } */ diff --git a/namespaces/nsenter/nsexec.c b/namespaces/nsenter/nsexec.c index 90c293d0..426dfc57 100644 --- a/namespaces/nsenter/nsexec.c +++ b/namespaces/nsenter/nsexec.c @@ -1,3 +1,4 @@ +#define _GNU_SOURCE #include #include #include @@ -11,13 +12,32 @@ #include #include #include +#include +#include +#include + +/* All arguments should be above stack, because it grows down */ +struct clone_arg { + /* + * Reserve some space for clone() to locate arguments + * and retcode in this place + */ + char stack[4096] __attribute__((aligned (8))); + char stack_ptr[0]; + jmp_buf *env; +}; + +static int child_func(void *_arg) +{ + struct clone_arg *arg = (struct clone_arg *) _arg; + longjmp(*arg->env, 1); +} #define pr_perror(fmt, ...) fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__) // Use raw setns syscall for versions of glibc that don't include it (namely glibc-2.12) #if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14 #define _GNU_SOURCE -#include #include "syscall.h" #ifdef SYS_setns int setns(int fd, int nstype) @@ -27,12 +47,25 @@ int setns(int fd, int nstype) #endif #endif +static int clone_parent(jmp_buf *env) __attribute__ ((noinline)); +static int clone_parent(jmp_buf *env) +{ + struct clone_arg ca; + int child; + + ca.env = env; + child = clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD, &ca); + + return child; +} + void nsexec() { char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt" }; const int num = sizeof(namespaces) / sizeof(char *); + jmp_buf env; char buf[PATH_MAX], *val; - int child, i, tfd; + int i, tfd, child, len; pid_t pid; val = getenv("_LIBCONTAINER_INITPID"); @@ -78,31 +111,24 @@ void nsexec() close(fd); } - child = fork(); + if (setjmp(env) == 1) { + // Finish executing, let the Go runtime take over. + return; + } + + child = clone_parent(&env); if (child < 0) { pr_perror("Unable to fork"); exit(1); } - // We must fork to actually enter the PID namespace. - if (child == 0) { - // Finish executing, let the Go runtime take over. - return; - } else { - // Parent, wait for the child. - int status = 0; - if (waitpid(child, &status, 0) == -1) { - pr_perror("Failed to waitpid"); - exit(1); - } - // Forward the child's exit code or re-send its death signal. - if (WIFEXITED(status)) { - exit(WEXITSTATUS(status)); - } else if (WIFSIGNALED(status)) { - kill(getpid(), WTERMSIG(status)); - } + len = snprintf(buf, sizeof(buf), "{ \"pid\" : %d }\n", child); + + if (write(3, buf, len) != len) { + pr_perror("Unable to send a child pid"); + kill(child, SIGKILL); exit(1); } - return; + exit(0); }