From 1a380ac436d47020819bf2398705f7528515844a Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 23 Dec 2014 01:05:56 +0300 Subject: [PATCH 1/5] nsinit: remove ticks around nsenter If we really need these command, we need to expand API. Signed-off-by: Andrey Vagin --- nsinit/exec.go | 14 -------- nsinit/main.go | 28 ---------------- nsinit/nsenter.go | 84 ----------------------------------------------- 3 files changed, 126 deletions(-) delete mode 100644 nsinit/nsenter.go diff --git a/nsinit/exec.go b/nsinit/exec.go index 266f5935..6c98c0f3 100644 --- a/nsinit/exec.go +++ b/nsinit/exec.go @@ -6,7 +6,6 @@ import ( "log" "os" "syscall" - "text/tabwriter" "github.com/codegangsta/cli" "github.com/docker/libcontainer" @@ -30,19 +29,6 @@ var execCommand = cli.Command{ } func execAction(context *cli.Context) { - if context.Bool("list") { - w := tabwriter.NewWriter(os.Stdout, 10, 1, 3, ' ', 0) - fmt.Fprint(w, "NAME\tUSAGE\n") - - for k, f := range argvs { - fmt.Fprintf(w, "%s\t%s\n", k, f.Usage) - } - - w.Flush() - - return - } - var exitCode int process := &libcontainer.ProcessConfig{ diff --git a/nsinit/main.go b/nsinit/main.go index 561ce3a9..d1e4bf1e 100644 --- a/nsinit/main.go +++ b/nsinit/main.go @@ -3,43 +3,15 @@ package main import ( "log" "os" - "strings" "github.com/codegangsta/cli" ) var ( logPath = os.Getenv("log") - argvs = make(map[string]*rFunc) ) -func init() { - argvs["exec"] = &rFunc{ - Usage: "execute a process inside an existing container", - Action: nsenterExec, - } - - argvs["mknod"] = &rFunc{ - Usage: "mknod a device inside an existing container", - Action: nsenterMknod, - } - - argvs["ip"] = &rFunc{ - Usage: "display the container's network interfaces", - Action: nsenterIp, - } -} - func main() { - // we need to check our argv 0 for any registred functions to run instead of the - // normal cli code path - f, exists := argvs[strings.TrimPrefix(os.Args[0], "nsenter-")] - if exists { - runFunc(f) - - return - } - app := cli.NewApp() app.Name = "nsinit" diff --git a/nsinit/nsenter.go b/nsinit/nsenter.go deleted file mode 100644 index 8365215e..00000000 --- a/nsinit/nsenter.go +++ /dev/null @@ -1,84 +0,0 @@ -package main - -import ( - "fmt" - "log" - "net" - "os" - "strconv" - "strings" - "text/tabwriter" - - "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/devices" - "github.com/docker/libcontainer/mount/nodes" - "github.com/docker/libcontainer/namespaces" - _ "github.com/docker/libcontainer/namespaces/nsenter" -) - -// nsenterExec exec's a process inside an existing container -func nsenterExec(config *configs.Config, args []string) { - if err := namespaces.FinalizeSetns(config, args); err != nil { - log.Fatalf("failed to nsenter: %s", err) - } -} - -// nsenterMknod runs mknod inside an existing container -// -// mknod -func nsenterMknod(config *configs.Config, args []string) { - if len(args) != 4 { - log.Fatalf("expected mknod to have 4 arguments not %d", len(args)) - } - - t := rune(args[1][0]) - - major, err := strconv.Atoi(args[2]) - if err != nil { - log.Fatal(err) - } - - minor, err := strconv.Atoi(args[3]) - if err != nil { - log.Fatal(err) - } - - n := &devices.Device{ - Path: args[0], - Type: t, - MajorNumber: int64(major), - MinorNumber: int64(minor), - } - - if err := nodes.CreateDeviceNode("/", n); err != nil { - log.Fatal(err) - } -} - -// nsenterIp displays the network interfaces inside a container's net namespace -func nsenterIp(config *configs.Config, args []string) { - interfaces, err := net.Interfaces() - if err != nil { - log.Fatal(err) - } - - w := tabwriter.NewWriter(os.Stdout, 10, 1, 3, ' ', 0) - fmt.Fprint(w, "NAME\tMTU\tMAC\tFLAG\tADDRS\n") - - for _, iface := range interfaces { - addrs, err := iface.Addrs() - if err != nil { - log.Fatal(err) - } - - o := []string{} - - for _, a := range addrs { - o = append(o, a.String()) - } - - fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%s\n", iface.Name, iface.MTU, iface.HardwareAddr, iface.Flags, strings.Join(o, ",")) - } - - w.Flush() -} From 13841ef37da97bfaaed4e14a6c18638a53a62d01 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 23 Dec 2014 16:09:35 +0300 Subject: [PATCH 2/5] new-api: return the Running state only if the init process is alive Signed-off-by: Andrey Vagin --- linux_container.go | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/linux_container.go b/linux_container.go index bbd0cb1d..ea202df4 100644 --- a/linux_container.go +++ b/linux_container.go @@ -34,7 +34,27 @@ func (c *linuxContainer) Config() *configs.Config { } func (c *linuxContainer) RunState() (configs.RunState, error) { - return configs.Destroyed, nil // FIXME return a real state + if c.state.InitPid <= 0 { + return configs.Destroyed, nil + } + + // return Running if the init process is alive + err := syscall.Kill(c.state.InitPid, 0) + if err != nil { + errn, y := err.(syscall.Errno) + if !y { + return 0, err + } + + if errn == syscall.ESRCH { + return configs.Destroyed, nil + } + return 0, err + } + + //FIXME get a cgroup state to check other states + + return configs.Running, nil } func (c *linuxContainer) Processes() ([]int, error) { From 11ce56a9e07e88f41f0455d0a1e4fdec7d779850 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 23 Dec 2014 16:10:22 +0300 Subject: [PATCH 3/5] new-api: clean up startInitProcess() Signed-off-by: Andrey Vagin --- linux_container.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linux_container.go b/linux_container.go index ea202df4..555e93b8 100644 --- a/linux_container.go +++ b/linux_container.go @@ -124,7 +124,7 @@ func (c *linuxContainer) updateStateFile() error { } func (c *linuxContainer) startInitProcess(config *ProcessConfig) error { - cmd := exec.Command(c.initArgs[0], append(c.initArgs[1:], config.Args...)...) + cmd := exec.Command(c.initArgs[0], c.initArgs[1:]...) cmd.Stdin = config.Stdin cmd.Stdout = config.Stdout cmd.Stderr = config.Stderr From d572094b75e169e7ca9cbd2ca11d0d8336d72b04 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 23 Dec 2014 01:06:22 +0300 Subject: [PATCH 4/5] new-api: execute a process inside an existing container A new constructor function (like nsenter) is added in this patch. This function gets arguments from environment variables and its behaviour doesn't depend on a command line arguments. A program which calls factory.StartInitialization() must import the nsenter package. It looks ugly, but I don't know another way how to enter into CT from a go code. Signed-off-by: Andrey Vagin --- linux_container.go | 46 +++++++------- linux_factory.go | 5 ++ namespaces/execin.go | 104 ++++++++++++++++++------------- namespaces/nsenter/nsenter.go | 1 + namespaces/nsenter/nsexec.c | 114 ++++++++++++++++++++++++++++++++++ nsinit/init.go | 1 + 6 files changed, 203 insertions(+), 68 deletions(-) create mode 100644 namespaces/nsenter/nsexec.c diff --git a/linux_container.go b/linux_container.go index 555e93b8..9189be2e 100644 --- a/linux_container.go +++ b/linux_container.go @@ -41,12 +41,7 @@ func (c *linuxContainer) RunState() (configs.RunState, error) { // return Running if the init process is alive err := syscall.Kill(c.state.InitPid, 0) if err != nil { - errn, y := err.(syscall.Errno) - if !y { - return 0, err - } - - if errn == syscall.ESRCH { + if err == syscall.ESRCH { return configs.Destroyed, nil } return 0, err @@ -82,18 +77,32 @@ func (c *linuxContainer) Stats() (*ContainerStats, error) { return stats, nil } -func (c *linuxContainer) StartProcess(pconfig *ProcessConfig) (int, error) { +func (c *linuxContainer) StartProcess(config *ProcessConfig) (int, error) { state, err := c.RunState() if err != nil { return -1, err } - if state != configs.Destroyed { - glog.Info("start new container process") - panic("not implemented") + cmd := exec.Command(c.initArgs[0], c.initArgs[1:]...) + cmd.Stdin = config.Stdin + cmd.Stdout = config.Stdout + cmd.Stderr = config.Stderr + + cmd.Env = config.Env + cmd.Dir = c.config.RootFs + + if cmd.SysProcAttr == nil { + cmd.SysProcAttr = &syscall.SysProcAttr{} } - if err := c.startInitProcess(pconfig); err != nil { + cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL + + if state != configs.Destroyed { + glog.Info("start new container process") + return namespaces.ExecIn(config.Args, config.Env, cmd, c.config, c.state) + } + + if err := c.startInitProcess(cmd, config); err != nil { return -1, err } @@ -123,21 +132,8 @@ func (c *linuxContainer) updateStateFile() error { return nil } -func (c *linuxContainer) startInitProcess(config *ProcessConfig) error { - cmd := exec.Command(c.initArgs[0], c.initArgs[1:]...) - cmd.Stdin = config.Stdin - cmd.Stdout = config.Stdout - cmd.Stderr = config.Stderr - - cmd.Env = config.Env - cmd.Dir = c.config.RootFs - - if cmd.SysProcAttr == nil { - cmd.SysProcAttr = &syscall.SysProcAttr{} - } - +func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, config *ProcessConfig) error { cmd.SysProcAttr.Cloneflags = uintptr(namespaces.GetNamespaceFlags(c.config.Namespaces)) - cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL err := namespaces.Exec(config.Args, config.Env, cmd, c.config, c.state) if err != nil { diff --git a/linux_factory.go b/linux_factory.go index ecef9dcd..10e464ec 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -167,5 +167,10 @@ func (l *linuxFactory) loadContainerState(root string) (*configs.State, error) { func (f *linuxFactory) StartInitialization(pipefd uintptr) (err error) { pipe := os.NewFile(uintptr(pipefd), "pipe") + pid := os.Getenv("_LIBCONTAINER_INITPID") + if pid != "" { + return namespaces.InitIn(pipe) + } + return namespaces.Init(pipe) } diff --git a/namespaces/execin.go b/namespaces/execin.go index 2b63b8c6..5d2708ac 100644 --- a/namespaces/execin.go +++ b/namespaces/execin.go @@ -5,12 +5,9 @@ package namespaces import ( "encoding/json" "fmt" - "io" + "io/ioutil" "os" "os/exec" - "path/filepath" - "strconv" - "syscall" "github.com/docker/libcontainer/apparmor" "github.com/docker/libcontainer/cgroups" @@ -19,27 +16,10 @@ import ( "github.com/docker/libcontainer/system" ) -// ExecIn reexec's the initPath with the argv 0 rewrite to "nsenter" so that it is able to run the +// ExecIn reexec's cmd with _LIBCONTAINER_INITPID=PID so that it is able to run the // setns code in a single threaded environment joining the existing containers' namespaces. -func ExecIn(container *configs.Config, state *configs.State, userArgs []string, initPath, action string, - stdin io.Reader, stdout, stderr io.Writer, console string, startCallback func(*exec.Cmd)) (int, error) { - - args := []string{fmt.Sprintf("nsenter-%s", action), "--nspid", strconv.Itoa(state.InitPid)} - - if console != "" { - args = append(args, "--console", console) - } - - cmd := &exec.Cmd{ - Path: initPath, - Args: append(args, append([]string{"--"}, userArgs...)...), - } - - if filepath.Base(initPath) == initPath { - if lp, err := exec.LookPath(initPath); err == nil { - cmd.Path = lp - } - } +func ExecIn(args []string, env []string, cmd *exec.Cmd, container *configs.Config, state *configs.State) (int, error) { + var err error parent, child, err := newInitPipe() if err != nil { @@ -47,13 +27,8 @@ func ExecIn(container *configs.Config, state *configs.State, userArgs []string, } defer parent.Close() - // Note: these are only used in non-tty mode - // if there is a tty for the container it will be opened within the namespace and the - // fds will be duped to stdin, stdiout, and stderr - cmd.Stdin = stdin - cmd.Stdout = stdout - cmd.Stderr = stderr cmd.ExtraFiles = []*os.File{child} + cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", state.InitPid)) if err := cmd.Start(); err != nil { child.Close() @@ -68,6 +43,20 @@ func ExecIn(container *configs.Config, state *configs.State, userArgs []string, return -1, terr } + encoder := json.NewEncoder(parent) + + if err := encoder.Encode(container); err != nil { + return terminate(err) + } + + process := processArgs{ + Env: append(env[0:], container.Env...), + Args: args, + } + if err := encoder.Encode(process); err != nil { + return terminate(err) + } + // Enter cgroups. if err := EnterCgroups(state, cmd.Process.Pid); err != nil { return terminate(err) @@ -77,21 +66,54 @@ func ExecIn(container *configs.Config, state *configs.State, userArgs []string, return terminate(err) } - if startCallback != nil { - startCallback(cmd) + return cmd.Process.Pid, nil +} + +// Finalize entering into a container and execute a specified command +func InitIn(pipe *os.File) (err error) { + defer func() { + // if we have an error during the initialization of the container's init then send it back to the + // parent process in the form of an initError. + if err != nil { + // ensure that any data sent from the parent is consumed so it doesn't + // receive ECONNRESET when the child writes to the pipe. + ioutil.ReadAll(pipe) + if err := json.NewEncoder(pipe).Encode(initError{ + Message: err.Error(), + }); err != nil { + panic(err) + } + } + // ensure that this pipe is always closed + pipe.Close() + }() + + decoder := json.NewDecoder(pipe) + + var container *configs.Config + if err := decoder.Decode(&container); err != nil { + return err } - if err := cmd.Wait(); err != nil { - if _, ok := err.(*exec.ExitError); !ok { - return -1, err - } + var process *processArgs + if err := decoder.Decode(&process); err != nil { + return err } - return cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil + + if err := FinalizeSetns(container); err != nil { + return err + } + + if err := system.Execv(process.Args[0], process.Args[0:], process.Env); err != nil { + return err + } + + panic("unreachable") } // Finalize expects that the setns calls have been setup and that is has joined an // existing namespace -func FinalizeSetns(container *configs.Config, args []string) error { +func FinalizeSetns(container *configs.Config) error { // clear the current processes env and replace it with the environment defined on the container if err := LoadContainerEnvironment(container); err != nil { return err @@ -111,11 +133,7 @@ func FinalizeSetns(container *configs.Config, args []string) error { } } - if err := system.Execv(args[0], args[0:], os.Environ()); err != nil { - return err - } - - panic("unreachable") + return nil } func EnterCgroups(state *configs.State, pid int) error { diff --git a/namespaces/nsenter/nsenter.go b/namespaces/nsenter/nsenter.go index 7d21e8e5..39471614 100644 --- a/namespaces/nsenter/nsenter.go +++ b/namespaces/nsenter/nsenter.go @@ -5,6 +5,7 @@ package nsenter /* __attribute__((constructor)) init() { nsenter(); + nsexec(); } */ import "C" diff --git a/namespaces/nsenter/nsexec.c b/namespaces/nsenter/nsexec.c new file mode 100644 index 00000000..95498bf0 --- /dev/null +++ b/namespaces/nsenter/nsexec.c @@ -0,0 +1,114 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +// Use raw setns syscall for versions of glibc that don't include it (namely glibc-2.12) +#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14 +#define _GNU_SOURCE +#include +#include "syscall.h" +#ifdef SYS_setns +int setns(int fd, int nstype) +{ + return syscall(SYS_setns, fd, nstype); +} +#endif +#endif + +void nsexec() +{ + char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt" }; + const int num = sizeof(namespaces) / sizeof(char *); + char buf[PATH_MAX], *val; + int child, i, tfd; + pid_t pid; + + val = getenv("_LIBCONTAINER_INITPID"); + if (val == NULL) + return; + + pid = atoi(val); + snprintf(buf, sizeof(buf), "%d", pid); + if (strcmp(val, buf)) { + fprintf(stderr, "Unable to parse _LIBCONTAINER_INITPID"); + exit(1); + } + + /* Check that the specified process exists */ + snprintf(buf, PATH_MAX - 1, "/proc/%d/ns", pid); + tfd = open(buf, O_DIRECTORY | O_RDONLY); + if (tfd == -1) { + fprintf(stderr, + "nsenter: Failed to open \"%s\" with error: \"%s\"\n", + buf, strerror(errno)); + exit(1); + } + + for (i = 0; i < num; i++) { + struct stat st; + int fd; + + /* Symlinks on all namespaces exist for dead processes, but they can't be opened */ + if (fstatat(tfd, namespaces[i], &st, AT_SYMLINK_NOFOLLOW) == -1) { + // Ignore nonexistent namespaces. + if (errno == ENOENT) + continue; + } + + fd = openat(tfd, namespaces[i], O_RDONLY); + if (fd == -1) { + fprintf(stderr, + "nsenter: Failed to open ns file \"%s\" for ns \"%s\" with error: \"%s\"\n", + buf, namespaces[i], strerror(errno)); + exit(1); + } + // Set the namespace. + if (setns(fd, 0) == -1) { + fprintf(stderr, + "nsenter: Failed to setns for \"%s\" with error: \"%s\"\n", + namespaces[i], strerror(errno)); + exit(1); + } + close(fd); + } + + child = fork(); + if (child < 0) { + fprintf(stderr, "Unable to fork: %s", strerror(errno)); + exit(1); + } + // We must fork to actually enter the PID namespace. + if (child == 0) { + // Finish executing, let the Go runtime take over. + return; + } else { + // Parent, wait for the child. + int status = 0; + if (waitpid(child, &status, 0) == -1) { + fprintf(stderr, + "nsenter: Failed to waitpid with error: \"%s\"\n", + strerror(errno)); + exit(1); + } + // Forward the child's exit code or re-send its death signal. + if (WIFEXITED(status)) { + exit(WEXITSTATUS(status)); + } else if (WIFSIGNALED(status)) { + kill(getpid(), WTERMSIG(status)); + } + + exit(1); + } + + return; +} diff --git a/nsinit/init.go b/nsinit/init.go index 08836139..bf59345a 100644 --- a/nsinit/init.go +++ b/nsinit/init.go @@ -5,6 +5,7 @@ import ( "github.com/codegangsta/cli" "github.com/docker/libcontainer" + _ "github.com/docker/libcontainer/namespaces/nsenter" ) var ( From 195a08efbce00f1a121df96945281fa1d2a115ab Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 24 Dec 2014 11:25:00 +0300 Subject: [PATCH 5/5] new-api: set Cloneflags in namespace.Exec() This place looks more suitable. Signed-off-by: Andrey Vagin --- linux_container.go | 2 -- namespaces/exec.go | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/linux_container.go b/linux_container.go index 9189be2e..7845f8db 100644 --- a/linux_container.go +++ b/linux_container.go @@ -133,8 +133,6 @@ func (c *linuxContainer) updateStateFile() error { } func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, config *ProcessConfig) error { - cmd.SysProcAttr.Cloneflags = uintptr(namespaces.GetNamespaceFlags(c.config.Namespaces)) - err := namespaces.Exec(config.Args, config.Env, cmd, c.config, c.state) if err != nil { return err diff --git a/namespaces/exec.go b/namespaces/exec.go index 0822154d..1d7914a0 100644 --- a/namespaces/exec.go +++ b/namespaces/exec.go @@ -31,9 +31,10 @@ func Exec(args []string, env []string, command *exec.Cmd, container *configs.Con return err } defer parent.Close() - command.ExtraFiles = []*os.File{child} + command.Dir = container.RootFs + command.SysProcAttr.Cloneflags = uintptr(GetNamespaceFlags(container.Namespaces)) if err := command.Start(); err != nil { child.Close()