diff --git a/.gitignore b/.gitignore index fecde9ca..dab07a18 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ vendor/pkg /runc +contrib/cmd/recvtty/recvtty Godeps/_workspace/src/github.com/opencontainers/runc man/man8 release diff --git a/Dockerfile b/Dockerfile index c3a23a4f..05687663 100644 --- a/Dockerfile +++ b/Dockerfile @@ -48,7 +48,8 @@ RUN mkdir -p /go/src/github.com/mvdan \ # setup a playground for us to spawn containers in ENV ROOTFS /busybox RUN mkdir -p ${ROOTFS} \ - && curl -o- -sSL 'https://github.com/jpetazzo/docker-busybox/raw/buildroot-2014.11/rootfs.tar' | tar -C ${ROOTFS} -xf - + && curl -o- -sSL 'https://github.com/docker-library/busybox/raw/a0558a9006ce0dd6f6ec5d56cfd3f32ebeeb815f/glibc/busybox.tar.xz' | tar xfJC - ${ROOTFS} + COPY script/tmpmount / WORKDIR /go/src/github.com/opencontainers/runc diff --git a/Makefile b/Makefile index 5028338b..5f22c703 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,8 @@ -.PHONY: dbuild man \ +.PHONY: all shell dbuild man \ localtest localunittest localintegration \ test unittest integration +SOURCES := $(shell find . 2>&1 | grep -E '.*\.(c|h|go)$$') PREFIX := $(DESTDIR)/usr/local BINDIR := $(PREFIX)/sbin GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null) @@ -25,13 +26,23 @@ VERSION := ${shell cat ./VERSION} SHELL := $(shell command -v bash 2>/dev/null) -all: $(RUNC_LINK) +.DEFAULT: runc + +runc: $(SOURCES) | $(RUNC_LINK) go build -i -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION}" -tags "$(BUILDTAGS)" -o runc . -static: $(RUNC_LINK) - CGO_ENABLED=1 go build -i -tags "$(BUILDTAGS) cgo static_build" -ldflags "-w -extldflags -static -X main.gitCommit=${COMMIT} -X main.version=${VERSION}" -o runc . +all: runc recvtty -release: $(RUNC_LINK) +recvtty: contrib/cmd/recvtty/recvtty + +contrib/cmd/recvtty/recvtty: $(SOURCES) | $(RUNC_LINK) + go build -i -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION}" -tags "$(BUILDTAGS)" -o contrib/cmd/recvtty/recvtty ./contrib/cmd/recvtty + +static: $(SOURCES) | $(RUNC_LINK) + CGO_ENABLED=1 go build -i -tags "$(BUILDTAGS) cgo static_build" -ldflags "-w -extldflags -static -X main.gitCommit=${COMMIT} -X main.version=${VERSION}" -o runc . + CGO_ENABLED=1 go build -i -tags "$(BUILDTAGS) cgo static_build" -ldflags "-w -extldflags -static -X main.gitCommit=${COMMIT} -X main.version=${VERSION}" -o contrib/cmd/recvtty/recvtty ./contrib/cmd/recvtty + +release: $(RUNC_LINK) | $(RUNC_LINK) @flag_list=(seccomp selinux apparmor static ambient); \ unset expression; \ for flag in "$${flag_list[@]}"; do \ @@ -61,7 +72,7 @@ $(RUNC_LINK): ln -sfn $(CURDIR) $(RUNC_LINK) dbuild: runcimage - docker run --rm -v $(CURDIR):/go/src/$(PROJECT) --privileged $(RUNC_IMAGE) make + docker run --rm -v $(CURDIR):/go/src/$(PROJECT) --privileged $(RUNC_IMAGE) make clean all lint: go vet ./... @@ -91,6 +102,9 @@ integration: runcimage localintegration: all bats -t tests/integration${TESTFLAGS} +shell: all + docker run -e TESTFLAGS -ti --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) bash + install: install -D -m0755 runc $(BINDIR)/runc @@ -112,6 +126,7 @@ uninstall-man: clean: rm -f runc + rm -f contrib/cmd/recvtty/recvtty rm -f $(RUNC_LINK) rm -rf $(GOPATH)/pkg rm -rf $(RELEASE_DIR) diff --git a/contrib/cmd/recvtty/recvtty.go b/contrib/cmd/recvtty/recvtty.go new file mode 100644 index 00000000..196b3283 --- /dev/null +++ b/contrib/cmd/recvtty/recvtty.go @@ -0,0 +1,247 @@ +/* + * Copyright 2016 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "fmt" + "io" + "io/ioutil" + "net" + "os" + "strings" + + "github.com/opencontainers/runc/libcontainer" + "github.com/opencontainers/runc/libcontainer/utils" + "github.com/urfave/cli" +) + +// version will be populated by the Makefile, read from +// VERSION file of the source code. +var version = "" + +// gitCommit will be the hash that the binary was built from +// and will be populated by the Makefile +var gitCommit = "" + +const ( + usage = `Open Container Initiative contrib/cmd/recvtty + +recvtty is a reference implementation of a consumer of runC's --console-socket +API. It has two main modes of operation: + + * single: Only permit one terminal to be sent to the socket, which is + then hooked up to the stdio of the recvtty process. This is useful + for rudimentary shell management of a container. + + * null: Permit as many terminals to be sent to the socket, but they + are read to /dev/null. This is used for testing, and imitates the + old runC API's --console=/dev/pts/ptmx hack which would allow for a + similar trick. This is probably not what you want to use, unless + you're doing something like our bats integration tests. + +To use recvtty, just specify a socket path at which you want to receive +terminals: + + $ recvtty [--mode ] socket.sock +` +) + +func bail(err error) { + fmt.Fprintf(os.Stderr, "[recvtty] fatal error: %v\n", err) + os.Exit(1) +} + +func handleSingle(path string) error { + // Open a socket. + ln, err := net.Listen("unix", path) + if err != nil { + return err + } + defer ln.Close() + + // We only accept a single connection, since we can only really have + // one reader for os.Stdin. Plus this is all a PoC. + conn, err := ln.Accept() + if err != nil { + return err + } + defer conn.Close() + + // Close ln, to allow for other instances to take over. + ln.Close() + + // Get the fd of the connection. + unixconn, ok := conn.(*net.UnixConn) + if !ok { + return fmt.Errorf("failed to cast to unixconn") + } + + socket, err := unixconn.File() + if err != nil { + return err + } + defer socket.Close() + + // Get the master file descriptor from runC. + master, err := utils.RecvFd(socket) + if err != nil { + return err + } + + // Print the file descriptor tag. + ti, err := libcontainer.GetTerminalInfo(master.Name()) + if err != nil { + return err + } + fmt.Printf("[recvtty] received masterfd: container '%s'\n", ti.ContainerID) + + // Copy from our stdio to the master fd. + quitChan := make(chan struct{}) + go func() { + io.Copy(os.Stdout, master) + quitChan <- struct{}{} + }() + go func() { + io.Copy(master, os.Stdin) + quitChan <- struct{}{} + }() + + // Only close the master fd once we've stopped copying. + <-quitChan + master.Close() + return nil +} + +func handleNull(path string) error { + // Open a socket. + ln, err := net.Listen("unix", path) + if err != nil { + return err + } + defer ln.Close() + + // As opposed to handleSingle we accept as many connections as we get, but + // we don't interact with Stdin at all (and we copy stdout to /dev/null). + for { + conn, err := ln.Accept() + if err != nil { + return err + } + go func(conn net.Conn) { + // Don't leave references lying around. + defer conn.Close() + + // Get the fd of the connection. + unixconn, ok := conn.(*net.UnixConn) + if !ok { + return + } + + socket, err := unixconn.File() + if err != nil { + return + } + defer socket.Close() + + // Get the master file descriptor from runC. + master, err := utils.RecvFd(socket) + if err != nil { + return + } + + // Print the file descriptor tag. + ti, err := libcontainer.GetTerminalInfo(master.Name()) + if err != nil { + bail(err) + } + fmt.Printf("[recvtty] received masterfd: container '%s'\n", ti.ContainerID) + + // Just do a dumb copy to /dev/null. + devnull, err := os.OpenFile("/dev/null", os.O_RDWR, 0) + if err != nil { + // TODO: Handle this nicely. + return + } + + io.Copy(devnull, master) + devnull.Close() + }(conn) + } +} + +func main() { + app := cli.NewApp() + app.Name = "recvtty" + app.Usage = usage + + // Set version to be the same as runC. + var v []string + if version != "" { + v = append(v, version) + } + if gitCommit != "" { + v = append(v, fmt.Sprintf("commit: %s", gitCommit)) + } + app.Version = strings.Join(v, "\n") + + // Set the flags. + app.Flags = []cli.Flag{ + cli.StringFlag{ + Name: "mode, m", + Value: "single", + Usage: "Mode of operation (single or null)", + }, + cli.StringFlag{ + Name: "pid-file", + Value: "", + Usage: "Path to write daemon process ID to", + }, + } + + app.Action = func(ctx *cli.Context) error { + args := ctx.Args() + if len(args) != 1 { + return fmt.Errorf("need to specify a single socket path") + } + path := ctx.Args()[0] + + pidPath := ctx.String("pid-file") + if pidPath != "" { + pid := fmt.Sprintf("%d\n", os.Getpid()) + if err := ioutil.WriteFile(pidPath, []byte(pid), 0644); err != nil { + return err + } + } + + switch ctx.String("mode") { + case "single": + if err := handleSingle(path); err != nil { + return err + } + case "null": + if err := handleNull(path); err != nil { + return err + } + default: + return fmt.Errorf("need to select a valid mode: %s", ctx.String("mode")) + } + return nil + } + if err := app.Run(os.Args); err != nil { + bail(err) + } +} diff --git a/create.go b/create.go index 9d8a52b3..a91b7746 100644 --- a/create.go +++ b/create.go @@ -30,9 +30,9 @@ command(s) that get executed on start, edit the args parameter of the spec. See Usage: `path to the root of the bundle directory, defaults to the current directory`, }, cli.StringFlag{ - Name: "console", + Name: "console-socket", Value: "", - Usage: "specify the pty slave path for use with the container", + Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal", }, cli.StringFlag{ Name: "pid-file", diff --git a/exec.go b/exec.go index e49e904b..540ffeb6 100644 --- a/exec.go +++ b/exec.go @@ -26,12 +26,12 @@ Where "" is the name for the instance of the container and EXAMPLE: For example, if the container is configured to run the linux ps command the following will output a list of processes running in the container: - + # runc exec ps`, Flags: []cli.Flag{ cli.StringFlag{ - Name: "console", - Usage: "specify the pty slave path for use with the container", + Name: "console-socket", + Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal", }, cli.StringFlag{ Name: "cwd", @@ -131,7 +131,7 @@ func execProcess(context *cli.Context) (int, error) { enableSubreaper: false, shouldDestroy: false, container: container, - console: context.String("console"), + consoleSocket: context.String("console-socket"), detach: detach, pidFile: context.String("pid-file"), } diff --git a/libcontainer/console.go b/libcontainer/console.go index b25e1eb4..d19043f4 100644 --- a/libcontainer/console.go +++ b/libcontainer/console.go @@ -1,6 +1,11 @@ package libcontainer -import "io" +import ( + "encoding/json" + "fmt" + "io" + "os" +) // Console represents a pseudo TTY. type Console interface { @@ -10,5 +15,59 @@ type Console interface { Path() string // Fd returns the fd for the master of the pty. - Fd() uintptr + File() *os.File +} + +const ( + TerminalInfoVersion uint32 = 201610041 + TerminalInfoType uint8 = 'T' +) + +// TerminalInfo is the structure which is passed as the non-ancillary data +// in the sendmsg(2) call when runc is run with --console-socket. It +// contains some information about the container which the console master fd +// relates to (to allow for consumers to use a single unix socket to handle +// multiple containers). This structure will probably move to runtime-spec +// at some point. But for now it lies in libcontainer. +type TerminalInfo struct { + // Version of the API. + Version uint32 `json:"version"` + + // Type of message (future proofing). + Type uint8 `json:"type"` + + // Container contains the ID of the container. + ContainerID string `json:"container_id"` +} + +func (ti *TerminalInfo) String() string { + encoded, err := json.Marshal(*ti) + if err != nil { + panic(err) + } + return string(encoded) +} + +func NewTerminalInfo(containerId string) *TerminalInfo { + return &TerminalInfo{ + Version: TerminalInfoVersion, + Type: TerminalInfoType, + ContainerID: containerId, + } +} + +func GetTerminalInfo(encoded string) (*TerminalInfo, error) { + ti := new(TerminalInfo) + if err := json.Unmarshal([]byte(encoded), ti); err != nil { + return nil, err + } + + if ti.Type != TerminalInfoType { + return nil, fmt.Errorf("terminal info: incorrect type in payload (%q): %q", TerminalInfoType, ti.Type) + } + if ti.Version != TerminalInfoVersion { + return nil, fmt.Errorf("terminal info: incorrect version in payload (%q): %q", TerminalInfoVersion, ti.Version) + } + + return ti, nil } diff --git a/libcontainer/console_freebsd.go b/libcontainer/console_freebsd.go index 300e34c4..b7166a31 100644 --- a/libcontainer/console_freebsd.go +++ b/libcontainer/console_freebsd.go @@ -6,8 +6,8 @@ import ( "errors" ) -// NewConsole returns an initialized console that can be used within a container by copying bytes +// newConsole returns an initialized console that can be used within a container by copying bytes // from the master side to the slave that is attached as the tty for the container's init process. -func NewConsole(uid, gid int) (Console, error) { +func newConsole() (Console, error) { return nil, errors.New("libcontainer console is not supported on FreeBSD") } diff --git a/libcontainer/console_linux.go b/libcontainer/console_linux.go index 5c8769b2..6e38b462 100644 --- a/libcontainer/console_linux.go +++ b/libcontainer/console_linux.go @@ -3,16 +3,13 @@ package libcontainer import ( "fmt" "os" - "path/filepath" "syscall" "unsafe" - - "github.com/opencontainers/runc/libcontainer/label" ) -// NewConsole returns an initialized console that can be used within a container by copying bytes +// newConsole returns an initialized console that can be used within a container by copying bytes // from the master side to the slave that is attached as the tty for the container's init process. -func NewConsole(uid, gid int) (Console, error) { +func newConsole() (Console, error) { master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) if err != nil { return nil, err @@ -27,34 +24,20 @@ func NewConsole(uid, gid int) (Console, error) { if err := unlockpt(master); err != nil { return nil, err } - if err := os.Chmod(console, 0600); err != nil { - return nil, err - } - if err := os.Chown(console, uid, gid); err != nil { - return nil, err - } return &linuxConsole{ slavePath: console, master: master, }, nil } -// newConsoleFromPath is an internal function returning an initialized console for use inside -// a container's MNT namespace. -func newConsoleFromPath(slavePath string) *linuxConsole { - return &linuxConsole{ - slavePath: slavePath, - } -} - // linuxConsole is a linux pseudo TTY for use within a container. type linuxConsole struct { master *os.File slavePath string } -func (c *linuxConsole) Fd() uintptr { - return c.master.Fd() +func (c *linuxConsole) File() *os.File { + return c.master } func (c *linuxConsole) Path() string { @@ -78,21 +61,17 @@ func (c *linuxConsole) Close() error { // mount initializes the console inside the rootfs mounting with the specified mount label // and applying the correct ownership of the console. -func (c *linuxConsole) mount(rootfs, mountLabel string) error { +func (c *linuxConsole) mount() error { oldMask := syscall.Umask(0000) defer syscall.Umask(oldMask) - if err := label.SetFileLabel(c.slavePath, mountLabel); err != nil { - return err - } - dest := filepath.Join(rootfs, "/dev/console") - f, err := os.Create(dest) + f, err := os.Create("/dev/console") if err != nil && !os.IsExist(err) { return err } if f != nil { f.Close() } - return syscall.Mount(c.slavePath, dest, "bind", syscall.MS_BIND, "") + return syscall.Mount(c.slavePath, "/dev/console", "bind", syscall.MS_BIND, "") } // dupStdio opens the slavePath for the console and dups the fds to the current diff --git a/libcontainer/console_solaris.go b/libcontainer/console_solaris.go index e90ca0d8..e5ca5459 100644 --- a/libcontainer/console_solaris.go +++ b/libcontainer/console_solaris.go @@ -4,8 +4,8 @@ import ( "errors" ) -// NewConsole returns an initialized console that can be used within a container by copying bytes +// newConsole returns an initialized console that can be used within a container by copying bytes // from the master side to the slave that is attached as the tty for the container's init process. -func NewConsole(uid, gid int) (Console, error) { +func newConsole() (Console, error) { return nil, errors.New("libcontainer console is not supported on Solaris") } diff --git a/libcontainer/console_windows.go b/libcontainer/console_windows.go index 2baf3137..c61e866a 100644 --- a/libcontainer/console_windows.go +++ b/libcontainer/console_windows.go @@ -1,7 +1,7 @@ package libcontainer -// NewConsole returns an initialized console that can be used within a container -func NewConsole(uid, gid int) (Console, error) { +// newConsole returns an initialized console that can be used within a container +func newConsole() (Console, error) { return &windowsConsole{}, nil } diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 53d6f78e..8a7184ae 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -342,10 +342,11 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c } } _, sharePidns := nsMaps[configs.NEWPID] - data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps, "") + data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps) if err != nil { return nil, err } + p.consoleChan = make(chan *os.File, 1) return &initProcess{ cmd: cmd, childPipe: childPipe, @@ -368,11 +369,12 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, } // for setns process, we don't have to set cloneflags as the process namespaces // will only be set via setns syscall - data, err := c.bootstrapData(0, state.NamespacePaths, p.consolePath) + data, err := c.bootstrapData(0, state.NamespacePaths) if err != nil { return nil, err } // TODO: set on container for process management + p.consoleChan = make(chan *os.File, 1) return &setnsProcess{ cmd: cmd, cgroupPaths: c.cgroupManager.GetPaths(), @@ -393,7 +395,6 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig { User: process.User, AdditionalGroups: process.AdditionalGroups, Cwd: process.Cwd, - Console: process.consolePath, Capabilities: process.Capabilities, PassedFilesCount: len(process.ExtraFiles), ContainerId: c.ID(), @@ -415,6 +416,17 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig { if len(process.Rlimits) > 0 { cfg.Rlimits = process.Rlimits } + /* + * TODO: This should not be automatically computed. We should implement + * this as a field in libcontainer.Process, and then we only dup the + * new console over the file descriptors which were not explicitly + * set with process.Std{in,out,err}. The reason I've left this as-is + * is because the GetConsole() interface is new, there's no need to + * polish this interface right now. + */ + if process.Stdin == nil && process.Stdout == nil && process.Stderr == nil { + cfg.CreateConsole = true + } return cfg } @@ -1281,7 +1293,7 @@ func encodeIDMapping(idMap []configs.IDMap) ([]byte, error) { // such as one that uses nsenter package to bootstrap the container's // init process correctly, i.e. with correct namespaces, uid/gid // mapping etc. -func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string, consolePath string) (io.Reader, error) { +func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string) (io.Reader, error) { // create the netlink message r := nl.NewNetlinkRequest(int(InitMsg), 0) @@ -1291,14 +1303,6 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na Value: uint32(cloneFlags), }) - // write console path - if consolePath != "" { - r.AddData(&Bytemsg{ - Type: ConsolePathAttr, - Value: []byte(consolePath), - }) - } - // write custom namespace paths if len(nsMaps) > 0 { nsPaths, err := c.orderNamespacePaths(nsMaps) diff --git a/libcontainer/factory_linux.go b/libcontainer/factory_linux.go index 6e2bf3ad..fbaaf639 100644 --- a/libcontainer/factory_linux.go +++ b/libcontainer/factory_linux.go @@ -253,12 +253,9 @@ func (l *LinuxFactory) StartInitialization() (err error) { // send it back to the parent process in the form of an initError. // If container's init successed, syscall.Exec will not return, hence // this defer function will never be called. - if _, ok := i.(*linuxStandardInit); ok { - // Synchronisation only necessary for standard init. - if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil { - fmt.Fprintln(os.Stderr, err) - return - } + if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil { + fmt.Fprintln(os.Stderr, err) + return } if werr := utils.WriteJSON(pipe, newSystemError(err)); werr != nil { fmt.Fprintln(os.Stderr, err) diff --git a/libcontainer/generic_error.go b/libcontainer/generic_error.go index de37715c..6e7de2fe 100644 --- a/libcontainer/generic_error.go +++ b/libcontainer/generic_error.go @@ -9,20 +9,6 @@ import ( "github.com/opencontainers/runc/libcontainer/stacktrace" ) -type syncType uint8 - -const ( - procReady syncType = iota - procError - procRun - procHooks - procResume -) - -type syncT struct { - Type syncType `json:"type"` -} - var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}} Code: {{.ECode}} {{if .Message }} diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index aeccadd9..e763bc15 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -54,12 +54,12 @@ type initConfig struct { User string `json:"user"` AdditionalGroups []string `json:"additional_groups"` Config *configs.Config `json:"config"` - Console string `json:"console"` Networks []*network `json:"network"` PassedFilesCount int `json:"passed_files_count"` ContainerId string `json:"containerid"` Rlimits []configs.Rlimit `json:"rlimits"` ExecFifoPath string `json:"start_pipe_path"` + CreateConsole bool `json:"create_console"` } type initer interface { @@ -77,6 +77,7 @@ func newContainerInit(t initType, pipe *os.File, stateDirFD int) (initer, error) switch t { case initSetns: return &linuxSetnsInit{ + pipe: pipe, config: config, }, nil case initStandard: @@ -150,24 +151,79 @@ func finalizeNamespace(config *initConfig) error { return nil } +// setupConsole sets up the console from inside the container, and sends the +// master pty fd to the config.Pipe (using cmsg). This is done to ensure that +// consoles are scoped to a container properly (see runc#814 and the many +// issues related to that). This has to be run *after* we've pivoted to the new +// rootfs (and the users' configuration is entirely set up). +func setupConsole(pipe *os.File, config *initConfig, mount bool) error { + // At this point, /dev/ptmx points to something that we would expect. We + // used to change the owner of the slave path, but since the /dev/pts mount + // can have gid=X set (at the users' option). So touching the owner of the + // slave PTY is not necessary, as the kernel will handle that for us. Note + // however, that setupUser (specifically fixStdioPermissions) *will* change + // the UID owner of the console to be the user the process will run as (so + // they can actually control their console). + console, err := newConsole() + if err != nil { + return err + } + // After we return from here, we don't need the console anymore. + defer console.Close() + + linuxConsole, ok := console.(*linuxConsole) + if !ok { + return fmt.Errorf("failed to cast console to *linuxConsole") + } + + // Mount the console inside our rootfs. + if mount { + if err := linuxConsole.mount(); err != nil { + return err + } + } + + if err := writeSync(pipe, procConsole); err != nil { + return err + } + + // We need to have a two-way synchronisation here. Though it might seem + // pointless, it's important to make sure that the sendmsg(2) payload + // doesn't get swallowed by an out-of-place read(2) [which happens if the + // syscalls get reordered so that sendmsg(2) is before the other side's + // read(2) of procConsole]. + if err := readSync(pipe, procConsoleReq); err != nil { + return err + } + + // While we can access console.master, using the API is a good idea. + if err := utils.SendFd(pipe, linuxConsole.File()); err != nil { + return err + } + + // Make sure the other side recieved the fd. + if err := readSync(pipe, procConsoleAck); err != nil { + return err + } + + // Now, dup over all the things. + return linuxConsole.dupStdio() +} + // syncParentReady sends to the given pipe a JSON payload which indicates that // the init is ready to Exec the child process. It then waits for the parent to // indicate that it is cleared to Exec. func syncParentReady(pipe io.ReadWriter) error { // Tell parent. - if err := utils.WriteJSON(pipe, syncT{procReady}); err != nil { + if err := writeSync(pipe, procReady); err != nil { return err } + // Wait for parent to give the all-clear. - var procSync syncT - if err := json.NewDecoder(pipe).Decode(&procSync); err != nil { - if err == io.EOF { - return fmt.Errorf("parent closed synchronisation channel") - } - if procSync.Type != procRun { - return fmt.Errorf("invalid synchronisation flag from parent") - } + if err := readSync(pipe, procRun); err != nil { + return err } + return nil } @@ -176,19 +232,15 @@ func syncParentReady(pipe io.ReadWriter) error { // indicate that it is cleared to resume. func syncParentHooks(pipe io.ReadWriter) error { // Tell parent. - if err := utils.WriteJSON(pipe, syncT{procHooks}); err != nil { + if err := writeSync(pipe, procHooks); err != nil { return err } + // Wait for parent to give the all-clear. - var procSync syncT - if err := json.NewDecoder(pipe).Decode(&procSync); err != nil { - if err == io.EOF { - return fmt.Errorf("parent closed synchronisation channel") - } - if procSync.Type != procResume { - return fmt.Errorf("invalid synchronisation flag from parent") - } + if err := readSync(pipe, procResume); err != nil { + return err } + return nil } @@ -262,11 +314,17 @@ func fixStdioPermissions(u *user.ExecUser) error { if err := syscall.Fstat(int(fd), &s); err != nil { return err } - // skip chown of /dev/null if it was used as one of the STDIO fds. + // Skip chown of /dev/null if it was used as one of the STDIO fds. if s.Rdev == null.Rdev { continue } - if err := syscall.Fchown(int(fd), u.Uid, u.Gid); err != nil { + // We only change the uid owner (as it is possible for the mount to + // prefer a different gid, and there's no reason for us to change it). + // The reason why we don't just leave the default uid=X mount setup is + // that users expect to be able to actually use their console. Without + // this code, you couldn't effectively run as a non-root user inside a + // container and also have a console set up. + if err := syscall.Fchown(int(fd), u.Uid, int(s.Gid)); err != nil { return err } } diff --git a/libcontainer/integration/exec_test.go b/libcontainer/integration/exec_test.go index 2ba98d64..7e5d9e1e 100644 --- a/libcontainer/integration/exec_test.go +++ b/libcontainer/integration/exec_test.go @@ -43,7 +43,7 @@ func testExecPS(t *testing.T, userns bool) { config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER}) } - buffers, exitCode, err := runContainer(config, "", "ps") + buffers, exitCode, err := runContainer(config, "", "ps", "-o", "pid,user,comm") if err != nil { t.Fatalf("%s: %s", buffers, err) } diff --git a/libcontainer/integration/execin_test.go b/libcontainer/integration/execin_test.go index f25ef357..971ad166 100644 --- a/libcontainer/integration/execin_test.go +++ b/libcontainer/integration/execin_test.go @@ -279,15 +279,15 @@ func TestExecInTTY(t *testing.T) { Args: []string{"ps"}, Env: standardEnvironment, } - console, err := ps.NewConsole(0, 0) + err = container.Run(ps) + ok(t, err) + console, err := ps.GetConsole() copy := make(chan struct{}) go func() { io.Copy(&stdout, console) close(copy) }() ok(t, err) - err = container.Run(ps) - ok(t, err) select { case <-time.After(5 * time.Second): t.Fatal("Waiting for copy timed out") diff --git a/libcontainer/message_linux.go b/libcontainer/message_linux.go index 400bd362..a189c724 100644 --- a/libcontainer/message_linux.go +++ b/libcontainer/message_linux.go @@ -11,13 +11,12 @@ import ( // list of known message types we want to send to bootstrap program // The number is randomly chosen to not conflict with known netlink types const ( - InitMsg uint16 = 62000 - CloneFlagsAttr uint16 = 27281 - ConsolePathAttr uint16 = 27282 - NsPathsAttr uint16 = 27283 - UidmapAttr uint16 = 27284 - GidmapAttr uint16 = 27285 - SetgroupAttr uint16 = 27286 + InitMsg uint16 = 62000 + CloneFlagsAttr uint16 = 27281 + NsPathsAttr uint16 = 27282 + UidmapAttr uint16 = 27283 + GidmapAttr uint16 = 27284 + SetgroupAttr uint16 = 27285 // When syscall.NLA_HDRLEN is in gccgo, take this out. syscall_NLA_HDRLEN = (syscall.SizeofNlAttr + syscall.NLA_ALIGNTO - 1) & ^(syscall.NLA_ALIGNTO - 1) ) diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c index b7a7b3ff..5c6b95e1 100644 --- a/libcontainer/nsenter/nsexec.c +++ b/libcontainer/nsenter/nsexec.c @@ -71,7 +71,6 @@ struct nlconfig_t { char *namespaces; size_t namespaces_len; uint8_t is_setgroup; - int consolefd; }; /* @@ -80,11 +79,10 @@ struct nlconfig_t { */ #define INIT_MSG 62000 #define CLONE_FLAGS_ATTR 27281 -#define CONSOLE_PATH_ATTR 27282 -#define NS_PATHS_ATTR 27283 -#define UIDMAP_ATTR 27284 -#define GIDMAP_ATTR 27285 -#define SETGROUP_ATTR 27286 +#define NS_PATHS_ATTR 27282 +#define UIDMAP_ATTR 27283 +#define GIDMAP_ATTR 27284 +#define SETGROUP_ATTR 27285 /* * Use the raw syscall for versions of glibc which don't include a function for @@ -306,7 +304,6 @@ static void nl_parse(int fd, struct nlconfig_t *config) /* Parse the netlink payload. */ config->data = data; - config->consolefd = -1; while (current < data + size) { struct nlattr *nlattr = (struct nlattr *)current; size_t payload_len = nlattr->nla_len - NLA_HDRLEN; @@ -319,15 +316,6 @@ static void nl_parse(int fd, struct nlconfig_t *config) case CLONE_FLAGS_ATTR: config->cloneflags = readint32(current); break; - case CONSOLE_PATH_ATTR: - /* - * We open the console here because we currently evaluate console - * paths from the *host* namespaces. - */ - config->consolefd = open(current, O_RDWR); - if (config->consolefd < 0) - bail("failed to open console %s", current); - break; case NS_PATHS_ATTR: config->namespaces = current; config->namespaces_len = payload_len; @@ -722,7 +710,6 @@ void nsexec(void) * We're inside the child now, having jumped from the * start_child() code after forking in the parent. */ - int consolefd = config.consolefd; enum sync_t s; /* We're in a child and thus need to tell the parent if we die. */ @@ -743,17 +730,6 @@ void nsexec(void) if (setgroups(0, NULL) < 0) bail("setgroups failed"); - if (consolefd != -1) { - if (ioctl(consolefd, TIOCSCTTY, 0) < 0) - bail("ioctl TIOCSCTTY failed"); - if (dup3(consolefd, STDIN_FILENO, 0) != STDIN_FILENO) - bail("failed to dup stdin"); - if (dup3(consolefd, STDOUT_FILENO, 0) != STDOUT_FILENO) - bail("failed to dup stdout"); - if (dup3(consolefd, STDERR_FILENO, 0) != STDERR_FILENO) - bail("failed to dup stderr"); - } - s = SYNC_CHILD_READY; if (write(syncfd, &s, sizeof(s)) != sizeof(s)) bail("failed to sync with patent: write(SYNC_CHILD_READY)"); diff --git a/libcontainer/process.go b/libcontainer/process.go index 334add57..859d2708 100644 --- a/libcontainer/process.go +++ b/libcontainer/process.go @@ -47,8 +47,9 @@ type Process struct { // ExtraFiles specifies additional open files to be inherited by the container ExtraFiles []*os.File - // consolePath is the path to the console allocated to the container. - consolePath string + // consoleChan provides the masterfd console. + // TODO: Make this persistent in Process. + consoleChan chan *os.File // Capabilities specify the capabilities to keep when executing the process inside the container // All capabilities not specified will be dropped from the processes capability mask @@ -105,21 +106,14 @@ type IO struct { Stderr io.ReadCloser } -// NewConsole creates new console for process and returns it -func (p *Process) NewConsole(rootuid, rootgid int) (Console, error) { - console, err := NewConsole(rootuid, rootgid) - if err != nil { - return nil, err +func (p *Process) GetConsole() (Console, error) { + consoleFd, ok := <-p.consoleChan + if !ok { + return nil, fmt.Errorf("failed to get console from process") } - p.consolePath = console.Path() - return console, nil -} -// ConsoleFromPath sets the process's console with the path provided -func (p *Process) ConsoleFromPath(path string) error { - if p.consolePath != "" { - return newGenericError(fmt.Errorf("console path already exists for process"), ConsoleExists) - } - p.consolePath = path - return nil + // TODO: Fix this so that it used the console API. + return &linuxConsole{ + master: consoleFd, + }, nil } diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index 4b54e4b2..927d4742 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go @@ -100,15 +100,42 @@ func (p *setnsProcess) start() (err error) { return newSystemErrorWithCause(err, "writing config to pipe") } + ierr := parseSync(p.parentPipe, func(sync *syncT) error { + switch sync.Type { + case procConsole: + if err := writeSync(p.parentPipe, procConsoleReq); err != nil { + return newSystemErrorWithCause(err, "writing syncT 'request fd'") + } + + masterFile, err := utils.RecvFd(p.parentPipe) + if err != nil { + return newSystemErrorWithCause(err, "getting master pty from child pipe") + } + + if p.process.consoleChan == nil { + // TODO: Don't panic here, do something more sane. + panic("consoleChan is nil") + } + p.process.consoleChan <- masterFile + + if err := writeSync(p.parentPipe, procConsoleAck); err != nil { + return newSystemErrorWithCause(err, "writing syncT 'ack fd'") + } + case procReady: + // This shouldn't happen. + panic("unexpected procReady in setns") + case procHooks: + // This shouldn't happen. + panic("unexpected procHooks in setns") + default: + return newSystemError(fmt.Errorf("invalid JSON payload from child")) + } + return nil + }) + if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil { return newSystemErrorWithCause(err, "calling shutdown on init pipe") } - // wait for the child process to fully complete and receive an error message - // if one was encoutered - var ierr *genericError - if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF { - return newSystemErrorWithCause(err, "decoding init error from pipe") - } // Must be done after Shutdown so the child will exit and we can wait for it. if ierr != nil { p.wait() @@ -270,22 +297,31 @@ func (p *initProcess) start() error { return newSystemErrorWithCause(err, "sending config to init process") } var ( - procSync syncT sentRun bool sentResume bool - ierr *genericError ) - dec := json.NewDecoder(p.parentPipe) -loop: - for { - if err := dec.Decode(&procSync); err != nil { - if err == io.EOF { - break loop + ierr := parseSync(p.parentPipe, func(sync *syncT) error { + switch sync.Type { + case procConsole: + if err := writeSync(p.parentPipe, procConsoleReq); err != nil { + return newSystemErrorWithCause(err, "writing syncT 'request fd'") + } + + masterFile, err := utils.RecvFd(p.parentPipe) + if err != nil { + return newSystemErrorWithCause(err, "getting master pty from child pipe") + } + + if p.process.consoleChan == nil { + // TODO: Don't panic here, do something more sane. + panic("consoleChan is nil") + } + p.process.consoleChan <- masterFile + + if err := writeSync(p.parentPipe, procConsoleAck); err != nil { + return newSystemErrorWithCause(err, "writing syncT 'ack fd'") } - return newSystemErrorWithCause(err, "decoding sync type from init pipe") - } - switch procSync.Type { case procReady: if err := p.manager.Set(p.config.Config); err != nil { return newSystemErrorWithCause(err, "setting cgroup config for ready process") @@ -316,8 +352,8 @@ loop: } } // Sync with child. - if err := utils.WriteJSON(p.parentPipe, syncT{procRun}); err != nil { - return newSystemErrorWithCause(err, "writing syncT run type") + if err := writeSync(p.parentPipe, procRun); err != nil { + return newSystemErrorWithCause(err, "writing syncT 'run'") } sentRun = true case procHooks: @@ -336,25 +372,17 @@ loop: } } // Sync with child. - if err := utils.WriteJSON(p.parentPipe, syncT{procResume}); err != nil { - return newSystemErrorWithCause(err, "writing syncT resume type") + if err := writeSync(p.parentPipe, procResume); err != nil { + return newSystemErrorWithCause(err, "writing syncT 'resume'") } sentResume = true - case procError: - // wait for the child process to fully complete and receive an error message - // if one was encoutered - if err := dec.Decode(&ierr); err != nil && err != io.EOF { - return newSystemErrorWithCause(err, "decoding proc error from init") - } - if ierr != nil { - break loop - } - // Programmer error. - panic("No error following JSON procError payload.") default: return newSystemError(fmt.Errorf("invalid JSON payload from child")) } - } + + return nil + }) + if !sentRun { return newSystemErrorWithCause(ierr, "container init") } @@ -364,6 +392,7 @@ loop: if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil { return newSystemErrorWithCause(err, "shutting down init pipe") } + // Must be done after Shutdown so the child will exit and we can wait for it. if ierr != nil { p.wait() @@ -440,6 +469,8 @@ func getPipeFds(pid int) ([]string, error) { dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd") for i := 0; i < 3; i++ { + // XXX: This breaks if the path is not a valid symlink (which can + // happen in certain particularly unlucky mount namespace setups). f := filepath.Join(dirPath, strconv.Itoa(i)) target, err := os.Readlink(f) if err != nil { @@ -450,8 +481,10 @@ func getPipeFds(pid int) ([]string, error) { return fds, nil } -// InitializeIO creates pipes for use with the process's STDIO -// and returns the opposite side for each +// InitializeIO creates pipes for use with the process's stdio and returns the +// opposite side for each. Do not use this if you want to have a pseudoterminal +// set up for you by libcontainer (TODO: fix that too). +// TODO: This is mostly unnecessary, and should be handled by clients. func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) { var fds []uintptr i = &IO{} diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index d4f8595f..cc5a2c1f 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -36,9 +36,11 @@ func needsSetupDev(config *configs.Config) bool { return true } -// setupRootfs sets up the devices, mount points, and filesystems for use inside a -// new mount namespace. -func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWriter) (err error) { +// prepareRootfs sets up the devices, mount points, and filesystems for use +// inside a new mount namespace. It doesn't set anything as ro or pivot_root, +// because console setup happens inside the caller. You must call +// finalizeRootfs in order to finish the rootfs setup. +func prepareRootfs(pipe io.ReadWriter, config *configs.Config) (err error) { if err := prepareRoot(config); err != nil { return newSystemErrorWithCause(err, "preparing rootfs") } @@ -50,6 +52,7 @@ func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWrit return newSystemErrorWithCause(err, "running premount command") } } + if err := mountToRootfs(m, config.Rootfs, config.MountLabel); err != nil { return newSystemErrorWithCausef(err, "mounting %q to rootfs %q at %q", m.Source, config.Rootfs, m.Destination) } @@ -60,17 +63,19 @@ func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWrit } } } + if setupDev { if err := createDevices(config); err != nil { return newSystemErrorWithCause(err, "creating device nodes") } - if err := setupPtmx(config, console); err != nil { + if err := setupPtmx(config); err != nil { return newSystemErrorWithCause(err, "setting up ptmx") } if err := setupDevSymlinks(config.Rootfs); err != nil { return newSystemErrorWithCause(err, "setting up /dev symlinks") } } + // Signal the parent to run the pre-start hooks. // The hooks are run after the mounts are setup, but before we switch to the new // root, so that the old root is still available in the hooks for any mount @@ -78,9 +83,19 @@ func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWrit if err := syncParentHooks(pipe); err != nil { return err } + + // The reason these operations are done here rather than in finalizeRootfs + // is because the console-handling code gets quite sticky if we have to set + // up the console before doing the pivot_root(2). This is because the + // Console API has to also work with the ExecIn case, which means that the + // API must be able to deal with being inside as well as outside the + // container. It's just cleaner to do this here (at the expense of the + // operation not being perfectly split). + if err := syscall.Chdir(config.Rootfs); err != nil { return newSystemErrorWithCausef(err, "changing dir to %q", config.Rootfs) } + if config.NoPivotRoot { err = msMoveRoot(config.Rootfs) } else { @@ -89,11 +104,19 @@ func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWrit if err != nil { return newSystemErrorWithCause(err, "jailing process inside rootfs") } + if setupDev { if err := reOpenDevNull(); err != nil { return newSystemErrorWithCause(err, "reopening /dev/null inside container") } } + + return nil +} + +// finalizeRootfs actually switches the root of the process and sets anything +// to ro if necessary. You must call prepareRootfs first. +func finalizeRootfs(config *configs.Config) (err error) { // remount dev as ro if specified for _, m := range config.Mounts { if libcontainerUtils.CleanPath(m.Destination) == "/dev" { @@ -105,12 +128,14 @@ func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWrit break } } + // set rootfs ( / ) as readonly if config.Readonlyfs { if err := setReadonly(); err != nil { return newSystemErrorWithCause(err, "setting rootfs as readonly") } } + syscall.Umask(0022) return nil } @@ -578,7 +603,7 @@ func setReadonly() error { return syscall.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "") } -func setupPtmx(config *configs.Config, console *linuxConsole) error { +func setupPtmx(config *configs.Config) error { ptmx := filepath.Join(config.Rootfs, "dev/ptmx") if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) { return err @@ -586,9 +611,6 @@ func setupPtmx(config *configs.Config, console *linuxConsole) error { if err := os.Symlink("pts/ptmx", ptmx); err != nil { return fmt.Errorf("symlink dev ptmx %s", err) } - if console != nil { - return console.mount(config.Rootfs, config.MountLabel) - } return nil } diff --git a/libcontainer/setns_init_linux.go b/libcontainer/setns_init_linux.go index 2a8f3452..2e80450d 100644 --- a/libcontainer/setns_init_linux.go +++ b/libcontainer/setns_init_linux.go @@ -16,6 +16,7 @@ import ( // linuxSetnsInit performs the container's initialization for running a new process // inside an existing container. type linuxSetnsInit struct { + pipe *os.File config *initConfig } @@ -30,6 +31,14 @@ func (l *linuxSetnsInit) Init() error { return err } } + if l.config.CreateConsole { + if err := setupConsole(l.pipe, l.config, false); err != nil { + return err + } + if err := system.Setctty(); err != nil { + return err + } + } if l.config.NoNewPrivileges { if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil { return err diff --git a/libcontainer/standard_init_linux.go b/libcontainer/standard_init_linux.go index 9c19ce7a..08887c39 100644 --- a/libcontainer/standard_init_linux.go +++ b/libcontainer/standard_init_linux.go @@ -4,7 +4,6 @@ package libcontainer import ( "fmt" - "io" "os" "os/exec" "syscall" @@ -18,7 +17,7 @@ import ( ) type linuxStandardInit struct { - pipe io.ReadWriteCloser + pipe *os.File parentPid int stateDirFD int config *initConfig @@ -59,18 +58,6 @@ func (l *linuxStandardInit) Init() error { } } - var console *linuxConsole - if l.config.Console != "" { - console = newConsoleFromPath(l.config.Console) - if err := console.dupStdio(); err != nil { - return err - } - } - if console != nil { - if err := system.Setctty(); err != nil { - return err - } - } if err := setupNetwork(l.config); err != nil { return err } @@ -79,12 +66,33 @@ func (l *linuxStandardInit) Init() error { } label.Init() - // InitializeMountNamespace() can be executed only for a new mount namespace + + // prepareRootfs() can be executed only for a new mount namespace. if l.config.Config.Namespaces.Contains(configs.NEWNS) { - if err := setupRootfs(l.config.Config, console, l.pipe); err != nil { + if err := prepareRootfs(l.pipe, l.config.Config); err != nil { return err } } + + // Set up the console. This has to be done *before* we finalize the rootfs, + // but *after* we've given the user the chance to set up all of the mounts + // they wanted. + if l.config.CreateConsole { + if err := setupConsole(l.pipe, l.config, true); err != nil { + return err + } + if err := system.Setctty(); err != nil { + return err + } + } + + // Finish the rootfs setup. + if l.config.Config.Namespaces.Contains(configs.NEWNS) { + if err := finalizeRootfs(l.config.Config); err != nil { + return err + } + } + if hostname := l.config.Config.Hostname; hostname != "" { if err := syscall.Sethostname([]byte(hostname)); err != nil { return err diff --git a/libcontainer/sync.go b/libcontainer/sync.go new file mode 100644 index 00000000..4016cd0d --- /dev/null +++ b/libcontainer/sync.go @@ -0,0 +1,110 @@ +package libcontainer + +import ( + "encoding/json" + "fmt" + "io" + + "github.com/opencontainers/runc/libcontainer/utils" +) + +type syncType string + +// Constants that are used for synchronisation between the parent and child +// during container setup. They come in pairs (with procError being a generic +// response which is followed by a &genericError). +// +// [ child ] <-> [ parent ] +// +// procHooks --> [run hooks] +// <-- procResume +// +// procConsole --> +// <-- procConsoleReq +// [send(fd)] --> [recv(fd)] +// <-- procConsoleAck +// +// procReady --> [final setup] +// <-- procRun +const ( + procError syncType = "procError" + procReady syncType = "procReady" + procRun syncType = "procRun" + procHooks syncType = "procHooks" + procResume syncType = "procResume" + procConsole syncType = "procConsole" + procConsoleReq syncType = "procConsoleReq" + procConsoleAck syncType = "procConsoleAck" +) + +type syncT struct { + Type syncType `json:"type"` +} + +// writeSync is used to write to a synchronisation pipe. An error is returned +// if there was a problem writing the payload. +func writeSync(pipe io.Writer, sync syncType) error { + if err := utils.WriteJSON(pipe, syncT{sync}); err != nil { + return err + } + return nil +} + +// readSync is used to read from a synchronisation pipe. An error is returned +// if we got a genericError, the pipe was closed, or we got an unexpected flag. +func readSync(pipe io.Reader, expected syncType) error { + var procSync syncT + if err := json.NewDecoder(pipe).Decode(&procSync); err != nil { + if err == io.EOF { + return fmt.Errorf("parent closed synchronisation channel") + } + + if procSync.Type == procError { + var ierr genericError + + if err := json.NewDecoder(pipe).Decode(&ierr); err != nil { + return fmt.Errorf("failed reading error from parent: %v", err) + } + + return &ierr + } + + if procSync.Type != expected { + return fmt.Errorf("invalid synchronisation flag from parent") + } + } + return nil +} + +// parseSync runs the given callback function on each syncT received from the +// child. It will return once io.EOF is returned from the given pipe. +func parseSync(pipe io.Reader, fn func(*syncT) error) error { + dec := json.NewDecoder(pipe) + for { + var sync syncT + if err := dec.Decode(&sync); err != nil { + if err == io.EOF { + break + } + return err + } + + // We handle this case outside fn for cleanliness reasons. + var ierr *genericError + if sync.Type == procError { + if err := dec.Decode(&ierr); err != nil && err != io.EOF { + return newSystemErrorWithCause(err, "decoding proc error from init") + } + if ierr != nil { + return ierr + } + // Programmer error. + panic("No error following JSON procError payload.") + } + + if err := fn(&sync); err != nil { + return err + } + } + return nil +} diff --git a/libcontainer/utils/cmsg.c b/libcontainer/utils/cmsg.c new file mode 100644 index 00000000..e77ca69f --- /dev/null +++ b/libcontainer/utils/cmsg.c @@ -0,0 +1,148 @@ +/* + * Copyright 2016 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "cmsg.h" + +#define error(fmt, ...) \ + ({ \ + fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__); \ + errno = ECOMM; \ + goto err; /* return value */ \ + }) + +/* + * Sends a file descriptor along the sockfd provided. Returns the return + * value of sendmsg(2). Any synchronisation and preparation of state + * should be done external to this (we expect the other side to be in + * recvfd() in the code). + */ +ssize_t sendfd(int sockfd, struct file_t file) +{ + struct msghdr msg = {0}; + struct iovec iov[1] = {0}; + struct cmsghdr *cmsg; + int *fdptr; + int ret; + + union { + char buf[CMSG_SPACE(sizeof(file.fd))]; + struct cmsghdr align; + } u; + + /* + * We need to send some other data along with the ancillary data, + * otherwise the other side won't recieve any data. This is very + * well-hidden in the documentation (and only applies to + * SOCK_STREAM). See the bottom part of unix(7). + */ + iov[0].iov_base = file.name; + iov[0].iov_len = strlen(file.name) + 1; + + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = u.buf; + msg.msg_controllen = sizeof(u.buf); + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + + fdptr = (int *) CMSG_DATA(cmsg); + memcpy(fdptr, &file.fd, sizeof(int)); + + return sendmsg(sockfd, &msg, 0); +} + +/* + * Receives a file descriptor from the sockfd provided. Returns the file + * descriptor as sent from sendfd(). It will return the file descriptor + * or die (literally) trying. Any synchronisation and preparation of + * state should be done external to this (we expect the other side to be + * in sendfd() in the code). + */ +struct file_t recvfd(int sockfd) +{ + struct msghdr msg = {0}; + struct iovec iov[1] = {0}; + struct cmsghdr *cmsg; + struct file_t file = {0}; + int *fdptr; + int olderrno; + + union { + char buf[CMSG_SPACE(sizeof(file.fd))]; + struct cmsghdr align; + } u; + + /* Allocate a buffer. */ + /* TODO: Make this dynamic with MSG_PEEK. */ + file.name = malloc(TAG_BUFFER); + if (!file.name) + error("recvfd: failed to allocate file.tag buffer\n"); + + /* + * We need to "recieve" the non-ancillary data even though we don't + * plan to use it at all. Otherwise, things won't work as expected. + * See unix(7) and other well-hidden documentation. + */ + iov[0].iov_base = file.name; + iov[0].iov_len = TAG_BUFFER; + + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = u.buf; + msg.msg_controllen = sizeof(u.buf); + + ssize_t ret = recvmsg(sockfd, &msg, 0); + if (ret < 0) + goto err; + + cmsg = CMSG_FIRSTHDR(&msg); + if (!cmsg) + error("recvfd: got NULL from CMSG_FIRSTHDR"); + if (cmsg->cmsg_level != SOL_SOCKET) + error("recvfd: expected SOL_SOCKET in cmsg: %d", cmsg->cmsg_level); + if (cmsg->cmsg_type != SCM_RIGHTS) + error("recvfd: expected SCM_RIGHTS in cmsg: %d", cmsg->cmsg_type); + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) + error("recvfd: expected correct CMSG_LEN in cmsg: %lu", cmsg->cmsg_len); + + fdptr = (int *) CMSG_DATA(cmsg); + if (!fdptr || *fdptr < 0) + error("recvfd: recieved invalid pointer"); + + file.fd = *fdptr; + return file; + +err: + olderrno = errno; + free(file.name); + errno = olderrno; + return (struct file_t){0}; +} diff --git a/libcontainer/utils/cmsg.go b/libcontainer/utils/cmsg.go new file mode 100644 index 00000000..5e614ac8 --- /dev/null +++ b/libcontainer/utils/cmsg.go @@ -0,0 +1,57 @@ +// +build linux + +package utils + +/* + * Copyright 2016 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +#include +#include +#include "cmsg.h" +*/ +import "C" + +import ( + "os" + "unsafe" +) + +// RecvFd waits for a file descriptor to be sent over the given AF_UNIX +// socket. The file name of the remote file descriptor will be recreated +// locally (it is sent as non-auxilliary data in the same payload). +func RecvFd(socket *os.File) (*os.File, error) { + file, err := C.recvfd(C.int(socket.Fd())) + if err != nil { + return nil, err + } + defer C.free(unsafe.Pointer(file.name)) + return os.NewFile(uintptr(file.fd), C.GoString(file.name)), nil +} + +// SendFd sends a file descriptor over the given AF_UNIX socket. In +// addition, the file.Name() of the given file will also be sent as +// non-auxilliary data in the same payload (allowing to send contextual +// information for a file descriptor). +func SendFd(socket, file *os.File) error { + var cfile C.struct_file_t + cfile.fd = C.int(file.Fd()) + cfile.name = C.CString(file.Name()) + defer C.free(unsafe.Pointer(cfile.name)) + + _, err := C.sendfd(C.int(socket.Fd()), cfile) + return err +} diff --git a/libcontainer/utils/cmsg.h b/libcontainer/utils/cmsg.h new file mode 100644 index 00000000..3fe76425 --- /dev/null +++ b/libcontainer/utils/cmsg.h @@ -0,0 +1,36 @@ +/* + * Copyright 2016 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#if !defined(CMSG_H) +#define CMSG_H + +#include + +/* TODO: Implement this properly with MSG_PEEK. */ +#define TAG_BUFFER 4096 + +/* This mirrors Go's (*os.File). */ +struct file_t { + char *name; + int fd; +}; + +struct file_t recvfd(int sockfd); +ssize_t sendfd(int sockfd, struct file_t file); + +#endif /* !defined(CMSG_H) */ diff --git a/restore.go b/restore.go index 7de15242..2977ab0a 100644 --- a/restore.go +++ b/restore.go @@ -158,15 +158,16 @@ func restoreContainer(context *cli.Context, spec *specs.Spec, config *configs.Co defer destroy(container) } process := &libcontainer.Process{} - tty, err := setupIO(process, rootuid, rootgid, "", false, detach) + tty, err := setupIO(process, rootuid, rootgid, false, detach) if err != nil { return -1, err } - defer tty.Close() - handler := newSignalHandler(tty, !context.Bool("no-subreaper")) + handler := newSignalHandler(!context.Bool("no-subreaper")) if err := container.Restore(process, options); err != nil { return -1, err } + // We don't need to do a tty.recvtty because config.Terminal is always false. + defer tty.Close() if err := tty.ClosePostStart(); err != nil { return -1, err } @@ -180,7 +181,7 @@ func restoreContainer(context *cli.Context, spec *specs.Spec, config *configs.Co if detach { return 0, nil } - return handler.forward(process) + return handler.forward(process, tty) } func criuOptions(context *cli.Context) *libcontainer.CriuOpts { diff --git a/run.go b/run.go index 64b986bd..a5d42abb 100644 --- a/run.go +++ b/run.go @@ -32,9 +32,9 @@ command(s) that get executed on start, edit the args parameter of the spec. See Usage: `path to the root of the bundle directory, defaults to the current directory`, }, cli.StringFlag{ - Name: "console", + Name: "console-socket", Value: "", - Usage: "specify the pty slave path for use with the container", + Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal", }, cli.BoolFlag{ Name: "detach, d", diff --git a/signals.go b/signals.go index 5eee44e7..f3dcf179 100644 --- a/signals.go +++ b/signals.go @@ -17,7 +17,7 @@ const signalBufferSize = 2048 // newSignalHandler returns a signal handler for processing SIGCHLD and SIGWINCH signals // while still forwarding all other signals to the process. -func newSignalHandler(tty *tty, enableSubreaper bool) *signalHandler { +func newSignalHandler(enableSubreaper bool) *signalHandler { if enableSubreaper { // set us as the subreaper before registering the signal handler for the container if err := system.SetSubreaper(1); err != nil { @@ -30,7 +30,6 @@ func newSignalHandler(tty *tty, enableSubreaper bool) *signalHandler { // handle all signals for the process. signal.Notify(s) return &signalHandler{ - tty: tty, signals: s, } } @@ -44,12 +43,11 @@ type exit struct { type signalHandler struct { signals chan os.Signal - tty *tty } // forward handles the main signal event loop forwarding, resizing, or reaping depending // on the signal received. -func (h *signalHandler) forward(process *libcontainer.Process) (int, error) { +func (h *signalHandler) forward(process *libcontainer.Process, tty *tty) (int, error) { // make sure we know the pid of our main process so that we can return // after it dies. pid1, err := process.Pid() @@ -57,11 +55,11 @@ func (h *signalHandler) forward(process *libcontainer.Process) (int, error) { return -1, err } // perform the initial tty resize. - h.tty.resize() + tty.resize() for s := range h.signals { switch s { case syscall.SIGWINCH: - h.tty.resize() + tty.resize() case syscall.SIGCHLD: exits, err := h.reap() if err != nil { diff --git a/tests/integration/cgroups.bats b/tests/integration/cgroups.bats index 73eff1d0..9cd95a7d 100644 --- a/tests/integration/cgroups.bats +++ b/tests/integration/cgroups.bats @@ -43,7 +43,7 @@ EOF sed -i "s/\(\"resources\": {\)/\1\n${DATA}/" ${BUSYBOX_BUNDLE}/config.json # run a detached busybox to work with - runc run -d --console /dev/pts/ptmx test_cgroups_kmem + runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_kmem [ "$status" -eq 0 ] wait_for_container 15 1 test_cgroups_kmem @@ -61,7 +61,7 @@ EOF sed -i 's/\("linux": {\)/\1\n "cgroupsPath": "\/runc-cgroups-integration-test",/' ${BUSYBOX_BUNDLE}/config.json # run a detached busybox to work with - runc run -d --console /dev/pts/ptmx test_cgroups_kmem + runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_kmem [ "$status" -eq 0 ] wait_for_container 15 1 test_cgroups_kmem diff --git a/tests/integration/create.bats b/tests/integration/create.bats index c3527955..abd4da24 100644 --- a/tests/integration/create.bats +++ b/tests/integration/create.bats @@ -12,7 +12,7 @@ function teardown() { } @test "runc create" { - runc create --console /dev/pts/ptmx test_busybox + runc create --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] testcontainer test_busybox created @@ -25,7 +25,7 @@ function teardown() { } @test "runc create exec" { - runc create --console /dev/pts/ptmx test_busybox + runc create --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] testcontainer test_busybox created @@ -33,6 +33,8 @@ function teardown() { runc exec test_busybox true [ "$status" -eq 0 ] + testcontainer test_busybox created + # start the command runc start test_busybox [ "$status" -eq 0 ] @@ -41,7 +43,7 @@ function teardown() { } @test "runc create --pid-file" { - runc create --pid-file pid.txt --console /dev/pts/ptmx test_busybox + runc create --pid-file pid.txt --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] testcontainer test_busybox created @@ -67,7 +69,7 @@ function teardown() { run cd pid_file [ "$status" -eq 0 ] - runc create --pid-file pid.txt -b $BUSYBOX_BUNDLE --console /dev/pts/ptmx test_busybox + runc create --pid-file pid.txt -b $BUSYBOX_BUNDLE --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] testcontainer test_busybox created diff --git a/tests/integration/delete.bats b/tests/integration/delete.bats index 78f982c5..61a9c4c9 100644 --- a/tests/integration/delete.bats +++ b/tests/integration/delete.bats @@ -13,7 +13,7 @@ function teardown() { @test "runc delete" { # run busybox detached - runc run -d --console /dev/pts/ptmx test_busybox + runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] # check state @@ -34,7 +34,7 @@ function teardown() { @test "runc delete --force" { # run busybox detached - runc run -d --console /dev/pts/ptmx test_busybox + runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] # check state @@ -51,13 +51,13 @@ function teardown() { @test "run delete with multi-containers" { # create busybox1 detached - runc create --console /dev/pts/ptmx test_busybox1 + runc create --console-socket $CONSOLE_SOCKET test_busybox1 [ "$status" -eq 0 ] testcontainer test_busybox1 created # run busybox2 detached - runc run -d --console /dev/pts/ptmx test_busybox2 + runc run -d --console-socket $CONSOLE_SOCKET test_busybox2 [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox2 @@ -86,20 +86,20 @@ function teardown() { @test "run delete --force with multi-containers" { # create busybox1 detached - runc create --console /dev/pts/ptmx test_busybox1 + runc create --console-socket $CONSOLE_SOCKET test_busybox1 [ "$status" -eq 0 ] testcontainer test_busybox1 created # run busybox2 detached - runc run -d --console /dev/pts/ptmx test_busybox2 + runc run -d --console-socket $CONSOLE_SOCKET test_busybox2 [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox2 testcontainer test_busybox2 running # delete both test_busybox1 and test_busybox2 container - runc delete --force test_busybox1 test_busybox2 + runc delete --force test_busybox1 test_busybox2 runc state test_busybox1 [ "$status" -ne 0 ] diff --git a/tests/integration/events.bats b/tests/integration/events.bats index 18855d5e..182b721b 100644 --- a/tests/integration/events.bats +++ b/tests/integration/events.bats @@ -13,7 +13,7 @@ function teardown() { @test "events --stats" { # run busybox detached - runc run -d --console /dev/pts/ptmx test_busybox + runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] # check state @@ -28,7 +28,7 @@ function teardown() { @test "events --interval default " { # run busybox detached - runc run -d --console /dev/pts/ptmx test_busybox + runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] # check state @@ -55,7 +55,7 @@ function teardown() { @test "events --interval 1s " { # run busybox detached - runc run -d --console /dev/pts/ptmx test_busybox + runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] # check state @@ -81,7 +81,7 @@ function teardown() { @test "events --interval 100ms " { # run busybox detached - runc run -d --console /dev/pts/ptmx test_busybox + runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] # check state diff --git a/tests/integration/exec.bats b/tests/integration/exec.bats index 23d5c24f..ba60ea17 100644 --- a/tests/integration/exec.bats +++ b/tests/integration/exec.bats @@ -13,7 +13,7 @@ function teardown() { @test "runc exec" { # run busybox detached - runc run -d --console /dev/pts/ptmx test_busybox + runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox @@ -26,7 +26,7 @@ function teardown() { @test "runc exec --pid-file" { # run busybox detached - runc run -d --console /dev/pts/ptmx test_busybox + runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox @@ -53,7 +53,7 @@ function teardown() { [ "$status" -eq 0 ] # run busybox detached - runc run -d -b $BUSYBOX_BUNDLE --console /dev/pts/ptmx test_busybox + runc run -d -b $BUSYBOX_BUNDLE --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox @@ -74,7 +74,7 @@ function teardown() { @test "runc exec ls -la" { # run busybox detached - runc run -d --console /dev/pts/ptmx test_busybox + runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox @@ -88,7 +88,7 @@ function teardown() { @test "runc exec ls -la with --cwd" { # run busybox detached - runc run -d --console /dev/pts/ptmx test_busybox + runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox @@ -100,7 +100,7 @@ function teardown() { @test "runc exec --env" { # run busybox detached - runc run -d --console /dev/pts/ptmx test_busybox + runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox @@ -113,7 +113,7 @@ function teardown() { @test "runc exec --user" { # run busybox detached - runc run -d --console /dev/pts/ptmx test_busybox + runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox diff --git a/tests/integration/help.bats b/tests/integration/help.bats index e63b0c11..ca404f34 100644 --- a/tests/integration/help.bats +++ b/tests/integration/help.bats @@ -64,11 +64,11 @@ load helpers runc start -h [ "$status" -eq 0 ] [[ ${lines[1]} =~ runc\ start+ ]] - + runc run -h [ "$status" -eq 0 ] [[ ${lines[1]} =~ runc\ run+ ]] - + runc state -h [ "$status" -eq 0 ] [[ ${lines[1]} =~ runc\ state+ ]] diff --git a/tests/integration/helpers.bash b/tests/integration/helpers.bash index b5b83298..1903eda1 100644 --- a/tests/integration/helpers.bash +++ b/tests/integration/helpers.bash @@ -3,6 +3,7 @@ # Root directory of integration tests. INTEGRATION_ROOT=$(dirname "$(readlink -f "$BASH_SOURCE")") RUNC="${INTEGRATION_ROOT}/../../runc" +RECVTTY="${INTEGRATION_ROOT}/../../contrib/cmd/recvtty/recvtty" GOPATH="${INTEGRATION_ROOT}/../../../.." # Test data path. @@ -17,7 +18,7 @@ HELLO_IMAGE="$TESTDATA/hello-world.tar" HELLO_BUNDLE="$BATS_TMPDIR/hello-world" # CRIU PATH -CRIU="/usr/local/sbin/criu" +CRIU="$(which criu)" # Kernel version KERNEL_VERSION="$(uname -r)" @@ -28,6 +29,9 @@ KERNEL_MINOR="${KERNEL_MINOR%%.*}" # Root state path. ROOT="$BATS_TMPDIR/runc" +# Path to console socket. +CONSOLE_SOCKET="$BATS_TMPDIR/console.sock" + # Cgroup mount CGROUP_BASE_PATH=$(grep "cgroup" /proc/self/mountinfo | gawk 'toupper($NF) ~ /\/ { print $5; exit }') @@ -142,14 +146,31 @@ function testcontainer() { [[ "${output}" == *"$2"* ]] } +function setup_recvtty() { + # We need to start recvtty in the background, so we double fork in the shell. + ("$RECVTTY" --pid-file "$BATS_TMPDIR/recvtty.pid" --mode null "$CONSOLE_SOCKET" &) & +} + +function teardown_recvtty() { + # When we kill recvtty, the container will also be killed. + if [ -f "$BATS_TMPDIR/recvtty.pid" ]; then + kill -9 $(cat "$BATS_TMPDIR/recvtty.pid") + fi + + # Clean up the files that might be left over. + rm -f "$BATS_TMPDIR/recvtty.pid" + rm -f "$CONSOLE_SOCKET" +} + function setup_busybox() { + setup_recvtty run mkdir "$BUSYBOX_BUNDLE" run mkdir "$BUSYBOX_BUNDLE"/rootfs if [ -e "/testdata/busybox.tar" ]; then BUSYBOX_IMAGE="/testdata/busybox.tar" fi if [ ! -e $BUSYBOX_IMAGE ]; then - curl -o $BUSYBOX_IMAGE -sSL 'https://github.com/jpetazzo/docker-busybox/raw/buildroot-2014.11/rootfs.tar' + curl -o $BUSYBOX_IMAGE -sSL 'https://github.com/docker-library/busybox/raw/a0558a9006ce0dd6f6ec5d56cfd3f32ebeeb815f/glibc/busybox.tar.xz' fi tar -C "$BUSYBOX_BUNDLE"/rootfs -xf "$BUSYBOX_IMAGE" cd "$BUSYBOX_BUNDLE" @@ -157,6 +178,7 @@ function setup_busybox() { } function setup_hello() { + setup_recvtty run mkdir "$HELLO_BUNDLE" run mkdir "$HELLO_BUNDLE"/rootfs tar -C "$HELLO_BUNDLE"/rootfs -xf "$HELLO_IMAGE" @@ -185,12 +207,14 @@ function teardown_running_container_inroot() { function teardown_busybox() { cd "$INTEGRATION_ROOT" + teardown_recvtty teardown_running_container test_busybox run rm -f -r "$BUSYBOX_BUNDLE" } function teardown_hello() { cd "$INTEGRATION_ROOT" + teardown_recvtty teardown_running_container test_hello run rm -f -r "$HELLO_BUNDLE" } diff --git a/tests/integration/kill.bats b/tests/integration/kill.bats index e0d89e94..a049de65 100644 --- a/tests/integration/kill.bats +++ b/tests/integration/kill.bats @@ -15,7 +15,7 @@ function teardown() { @test "kill detached busybox" { # run busybox detached - runc run -d --console /dev/pts/ptmx test_busybox + runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] # check state diff --git a/tests/integration/list.bats b/tests/integration/list.bats index 41c9e10d..302728a5 100644 --- a/tests/integration/list.bats +++ b/tests/integration/list.bats @@ -19,15 +19,15 @@ function teardown() { @test "list" { # run a few busyboxes detached - ROOT=$HELLO_BUNDLE runc run -d --console /dev/pts/ptmx test_box1 + ROOT=$HELLO_BUNDLE runc run -d --console-socket $CONSOLE_SOCKET test_box1 [ "$status" -eq 0 ] wait_for_container_inroot 15 1 test_box1 $HELLO_BUNDLE - ROOT=$HELLO_BUNDLE runc run -d --console /dev/pts/ptmx test_box2 + ROOT=$HELLO_BUNDLE runc run -d --console-socket $CONSOLE_SOCKET test_box2 [ "$status" -eq 0 ] wait_for_container_inroot 15 1 test_box2 $HELLO_BUNDLE - ROOT=$HELLO_BUNDLE runc run -d --console /dev/pts/ptmx test_box3 + ROOT=$HELLO_BUNDLE runc run -d --console-socket $CONSOLE_SOCKET test_box3 [ "$status" -eq 0 ] wait_for_container_inroot 15 1 test_box3 $HELLO_BUNDLE diff --git a/tests/integration/mask.bats b/tests/integration/mask.bats index d645b512..074b0f2e 100644 --- a/tests/integration/mask.bats +++ b/tests/integration/mask.bats @@ -20,7 +20,7 @@ function teardown() { @test "mask paths [file]" { # run busybox detached - runc run -d --console /dev/pts/ptmx test_busybox + runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox @@ -40,7 +40,7 @@ function teardown() { @test "mask paths [directory]" { # run busybox detached - runc run -d --console /dev/pts/ptmx test_busybox + runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox diff --git a/tests/integration/pause.bats b/tests/integration/pause.bats index 02642f43..e657d0a1 100644 --- a/tests/integration/pause.bats +++ b/tests/integration/pause.bats @@ -13,7 +13,7 @@ function teardown() { @test "runc pause and resume" { # run busybox detached - runc run -d --console /dev/pts/ptmx test_busybox + runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox @@ -35,13 +35,13 @@ function teardown() { @test "runc pause and resume with multi-container" { # run test_busybox1 detached - runc run -d --console /dev/pts/ptmx test_busybox1 + runc run -d --console-socket $CONSOLE_SOCKET test_busybox1 [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox1 # run test_busybox2 detached - runc run -d --console /dev/pts/ptmx test_busybox2 + runc run -d --console-socket $CONSOLE_SOCKET test_busybox2 [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox2 @@ -74,27 +74,27 @@ function teardown() { @test "runc pause and resume with nonexist container" { # run test_busybox1 detached - runc run -d --console /dev/pts/ptmx test_busybox1 + runc run -d --console-socket $CONSOLE_SOCKET test_busybox1 [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox1 # run test_busybox2 detached - runc run -d --console /dev/pts/ptmx test_busybox2 + runc run -d --console-socket $CONSOLE_SOCKET test_busybox2 [ "$status" -eq 0 ] wait_for_container 15 1 test_busybox2 - # pause test_busybox1, test_busybox2 and nonexistant container - runc pause test_busybox1 test_busybox2 nonexistant + # pause test_busybox1, test_busybox2 and nonexistent container + runc pause test_busybox1 test_busybox2 nonexistent [ "$status" -ne 0 ] # test state of test_busybox1 and test_busybox2 is paused testcontainer test_busybox1 paused testcontainer test_busybox2 paused - # resume test_busybox1, test_busybox2 and nonexistant container - runc resume test_busybox1 test_busybox2 nonexistant + # resume test_busybox1, test_busybox2 and nonexistent container + runc resume test_busybox1 test_busybox2 nonexistent [ "$status" -ne 0 ] # test state of two containers is back to running diff --git a/tests/integration/ps.bats b/tests/integration/ps.bats index 75f955f9..7a200150 100644 --- a/tests/integration/ps.bats +++ b/tests/integration/ps.bats @@ -13,7 +13,7 @@ function teardown() { @test "ps" { # start busybox detached - runc run -d --console /dev/pts/ptmx test_busybox + runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] # check state @@ -29,7 +29,7 @@ function teardown() { @test "ps -f json" { # start busybox detached - runc run -d --console /dev/pts/ptmx test_busybox + runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] # check state @@ -44,7 +44,7 @@ function teardown() { @test "ps -e -x" { # start busybox detached - runc run -d --console /dev/pts/ptmx test_busybox + runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] # check state diff --git a/tests/integration/root.bats b/tests/integration/root.bats index a2cb377c..ee132917 100644 --- a/tests/integration/root.bats +++ b/tests/integration/root.bats @@ -15,11 +15,11 @@ function teardown() { @test "global --root" { # run busybox detached using $HELLO_BUNDLE for state - ROOT=$HELLO_BUNDLE runc run -d --console /dev/pts/ptmx test_dotbox + ROOT=$HELLO_BUNDLE runc run -d --console-socket $CONSOLE_SOCKET test_dotbox [ "$status" -eq 0 ] # run busybox detached in default root - runc run -d --console /dev/pts/ptmx test_busybox + runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] # check state of the busyboxes are only in their respective root path diff --git a/tests/integration/start.bats b/tests/integration/start.bats index 502103c9..cd33dee5 100644 --- a/tests/integration/start.bats +++ b/tests/integration/start.bats @@ -12,12 +12,12 @@ function teardown() { } @test "runc start" { - runc create --console /dev/pts/ptmx test_busybox1 + runc create --console-socket $CONSOLE_SOCKET test_busybox1 [ "$status" -eq 0 ] testcontainer test_busybox1 created - runc create --console /dev/pts/ptmx test_busybox2 + runc create --console-socket $CONSOLE_SOCKET test_busybox2 [ "$status" -eq 0 ] testcontainer test_busybox2 created diff --git a/tests/integration/start_detached.bats b/tests/integration/start_detached.bats index 02ca90d1..605fde22 100644 --- a/tests/integration/start_detached.bats +++ b/tests/integration/start_detached.bats @@ -13,7 +13,7 @@ function teardown() { @test "runc run detached" { # run busybox detached - runc run -d --console /dev/pts/ptmx test_busybox + runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] # check state @@ -29,7 +29,7 @@ function teardown() { sed -i 's;"gid": 0;"gid": 100;g' config.json # run busybox detached - runc run -d --console /dev/pts/ptmx test_busybox + runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] # check state @@ -40,7 +40,7 @@ function teardown() { @test "runc run detached --pid-file" { # run busybox detached - runc run --pid-file pid.txt -d --console /dev/pts/ptmx test_busybox + runc run --pid-file pid.txt -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] # check state @@ -64,7 +64,7 @@ function teardown() { [ "$status" -eq 0 ] # run busybox detached - runc run --pid-file pid.txt -d -b $BUSYBOX_BUNDLE --console /dev/pts/ptmx test_busybox + runc run --pid-file pid.txt -d -b $BUSYBOX_BUNDLE --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] # check state diff --git a/tests/integration/state.bats b/tests/integration/state.bats index 0073a2a3..eed2eb3c 100644 --- a/tests/integration/state.bats +++ b/tests/integration/state.bats @@ -16,7 +16,7 @@ function teardown() { [ "$status" -ne 0 ] # run busybox detached - runc run -d --console /dev/pts/ptmx test_busybox + runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] # check state diff --git a/tests/integration/tty.bats b/tests/integration/tty.bats new file mode 100644 index 00000000..b9a1f108 --- /dev/null +++ b/tests/integration/tty.bats @@ -0,0 +1,113 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + teardown_busybox + setup_busybox +} + +function teardown() { + teardown_busybox +} + +@test "runc run [tty ptsname]" { + # Replace sh script with readlink. + sed -i 's|"sh"|"sh", "-c", "for file in /proc/self/fd/[012]; do readlink $file; done"|' config.json + + # run busybox + runc run test_busybox + [ "$status" -eq 0 ] + [[ ${lines[0]} =~ /dev/pts/+ ]] + [[ ${lines[1]} =~ /dev/pts/+ ]] + [[ ${lines[2]} =~ /dev/pts/+ ]] +} + +@test "runc run [tty owner]" { + # Replace sh script with stat. + sed -i 's/"sh"/"sh", "-c", "stat -c %u:%g $(tty) | tr : \\\\\\\\n"/' config.json + + # run busybox + runc run test_busybox + [ "$status" -eq 0 ] + [[ ${lines[0]} =~ 0 ]] + # This is set by the default config.json (it corresponds to the standard tty group). + [[ ${lines[1]} =~ 5 ]] +} + +@test "runc run [tty owner] ({u,g}id != 0)" { + # replace "uid": 0 with "uid": 1000 + # and do a similar thing for gid. + sed -i 's;"uid": 0;"uid": 1000;g' config.json + sed -i 's;"gid": 0;"gid": 100;g' config.json + + # Replace sh script with stat. + sed -i 's/"sh"/"sh", "-c", "stat -c %u:%g $(tty) | tr : \\\\\\\\n"/' config.json + + # run busybox + runc run test_busybox + [ "$status" -eq 0 ] + [[ ${lines[0]} =~ 1000 ]] + # This is set by the default config.json (it corresponds to the standard tty group). + [[ ${lines[1]} =~ 5 ]] +} + +@test "runc exec [tty ptsname]" { + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + # check state + wait_for_container 15 1 test_busybox + + # make sure we're running + testcontainer test_busybox running + + # run the exec + runc exec test_busybox sh -c 'for file in /proc/self/fd/[012]; do readlink $file; done' + [ "$status" -eq 0 ] + [[ ${lines[0]} =~ /dev/pts/+ ]] + [[ ${lines[1]} =~ /dev/pts/+ ]] + [[ ${lines[2]} =~ /dev/pts/+ ]] +} + +@test "runc exec [tty owner]" { + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + # check state + wait_for_container 15 1 test_busybox + + # make sure we're running + testcontainer test_busybox running + + # run the exec + runc exec test_busybox sh -c 'stat -c %u:%g $(tty) | tr : \\n' + [ "$status" -eq 0 ] + [[ ${lines[0]} =~ 0 ]] + [[ ${lines[1]} =~ 5 ]] +} + +@test "runc exec [tty owner] ({u,g}id != 0)" { + # replace "uid": 0 with "uid": 1000 + # and do a similar thing for gid. + sed -i 's;"uid": 0;"uid": 1000;g' config.json + sed -i 's;"gid": 0;"gid": 100;g' config.json + + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + # check state + wait_for_container 15 1 test_busybox + + # make sure we're running + testcontainer test_busybox running + + # run the exec + runc exec test_busybox sh -c 'stat -c %u:%g $(tty) | tr : \\n' + [ "$status" -eq 0 ] + [[ ${lines[0]} =~ 1000 ]] + [[ ${lines[1]} =~ 5 ]] +} diff --git a/tests/integration/update.bats b/tests/integration/update.bats index 4d84cb2d..6790e0b4 100644 --- a/tests/integration/update.bats +++ b/tests/integration/update.bats @@ -51,7 +51,7 @@ function check_cgroup_value() { @test "update" { requires cgroups_kmem # run a few busyboxes detached - runc run -d --console /dev/pts/ptmx test_update + runc run -d --console-socket $CONSOLE_SOCKET test_update [ "$status" -eq 0 ] wait_for_container 15 1 test_update diff --git a/tty.go b/tty.go index 5a76ebe3..a3244342 100644 --- a/tty.go +++ b/tty.go @@ -7,11 +7,27 @@ import ( "io" "os" "sync" + "syscall" "github.com/docker/docker/pkg/term" "github.com/opencontainers/runc/libcontainer" + "github.com/opencontainers/runc/libcontainer/utils" ) +type tty struct { + console libcontainer.Console + state *term.State + closers []io.Closer + postStart []io.Closer + wg sync.WaitGroup +} + +func (t *tty) copyIO(w io.Writer, r io.ReadCloser) { + defer t.wg.Done() + io.Copy(w, r) + r.Close() +} + // setup standard pipes so that the TTY of the calling runc process // is not inherited by the container. func createStdioPipes(p *libcontainer.Process, rootuid, rootgid int) (*tty, error) { @@ -46,45 +62,56 @@ func createStdioPipes(p *libcontainer.Process, rootuid, rootgid int) (*tty, erro return t, nil } -func (t *tty) copyIO(w io.Writer, r io.ReadCloser) { - defer t.wg.Done() - io.Copy(w, r) - r.Close() -} - -func createTty(p *libcontainer.Process, rootuid, rootgid int, consolePath string) (*tty, error) { - if consolePath != "" { - if err := p.ConsoleFromPath(consolePath); err != nil { - return nil, err +func dupStdio(process *libcontainer.Process, rootuid, rootgid int) error { + process.Stdin = os.Stdin + process.Stdout = os.Stdout + process.Stderr = os.Stderr + for _, fd := range []uintptr{ + os.Stdin.Fd(), + os.Stdout.Fd(), + os.Stderr.Fd(), + } { + if err := syscall.Fchown(int(fd), rootuid, rootgid); err != nil { + return err } - return &tty{}, nil } - console, err := p.NewConsole(rootuid, rootgid) - if err != nil { - return nil, err - } - go io.Copy(console, os.Stdin) - go io.Copy(os.Stdout, console) - - state, err := term.SetRawTerminal(os.Stdin.Fd()) - if err != nil { - return nil, fmt.Errorf("failed to set the terminal from the stdin: %v", err) - } - return &tty{ - console: console, - state: state, - closers: []io.Closer{ - console, - }, - }, nil + return nil } -type tty struct { - console libcontainer.Console - state *term.State - closers []io.Closer - postStart []io.Closer - wg sync.WaitGroup +func (t *tty) recvtty(process *libcontainer.Process, detach bool) error { + console, err := process.GetConsole() + if err != nil { + return err + } + + if !detach { + go io.Copy(console, os.Stdin) + t.wg.Add(1) + go t.copyIO(os.Stdout, console) + + state, err := term.SetRawTerminal(os.Stdin.Fd()) + if err != nil { + return fmt.Errorf("failed to set the terminal from the stdin: %v", err) + } + t.state = state + } + + t.console = console + t.closers = []io.Closer{console} + return nil +} + +func (t *tty) sendtty(socket *os.File, ti *libcontainer.TerminalInfo) error { + if t.console == nil { + return fmt.Errorf("tty.console not set") + } + + // Create a fake file to contain the terminal info. + console := os.NewFile(t.console.File().Fd(), ti.String()) + if err := utils.SendFd(socket, console); err != nil { + return err + } + return nil } // ClosePostStart closes any fds that are provided to the container and dup2'd @@ -122,5 +149,5 @@ func (t *tty) resize() error { if err != nil { return err } - return term.SetWinsize(t.console.Fd(), ws) + return term.SetWinsize(t.console.File().Fd(), ws) } diff --git a/utils_linux.go b/utils_linux.go index 94c520fb..336955db 100644 --- a/utils_linux.go +++ b/utils_linux.go @@ -5,6 +5,7 @@ package main import ( "errors" "fmt" + "net" "os" "path/filepath" "strconv" @@ -94,22 +95,6 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) { return lp, nil } -func dupStdio(process *libcontainer.Process, rootuid, rootgid int) error { - process.Stdin = os.Stdin - process.Stdout = os.Stdout - process.Stderr = os.Stderr - for _, fd := range []uintptr{ - os.Stdin.Fd(), - os.Stdout.Fd(), - os.Stderr.Fd(), - } { - if err := syscall.Fchown(int(fd), rootuid, rootgid); err != nil { - return err - } - } - return nil -} - // If systemd is supporting sd_notify protocol, this function will add support // for sd_notify protocol from within the container. func setupSdNotify(spec *specs.Spec, notifySocket string) { @@ -123,23 +108,27 @@ func destroy(container libcontainer.Container) { } } -// setupIO sets the proper IO on the process depending on the configuration -// If there is a nil error then there must be a non nil tty returned -func setupIO(process *libcontainer.Process, rootuid, rootgid int, console string, createTTY, detach bool) (*tty, error) { - // detach and createTty will not work unless a console path is passed - // so error out here before changing any terminal settings - if createTTY && detach && console == "" { - return nil, fmt.Errorf("cannot allocate tty if runc will detach") - } +// setupIO modifies the given process config according to the options. +func setupIO(process *libcontainer.Process, rootuid, rootgid int, createTTY, detach bool) (*tty, error) { + // This is entirely handled by recvtty. if createTTY { - return createTty(process, rootuid, rootgid, console) + process.Stdin = nil + process.Stdout = nil + process.Stderr = nil + return &tty{}, nil } + + // When we detach, we just dup over stdio and call it a day. There's no + // requirement that we set up anything nice for our caller or the + // container. if detach { if err := dupStdio(process, rootuid, rootgid); err != nil { return nil, err } return &tty{}, nil } + + // XXX: This doesn't sit right with me. It's ugly. return createStdioPipes(process, rootuid, rootgid) } @@ -192,11 +181,15 @@ type runner struct { detach bool listenFDs []*os.File pidFile string - console string + consoleSocket string container libcontainer.Container create bool } +func (r *runner) terminalinfo() *libcontainer.TerminalInfo { + return libcontainer.NewTerminalInfo(r.container.ID()) +} + func (r *runner) run(config *specs.Process) (int, error) { process, err := newProcess(*config) if err != nil { @@ -207,31 +200,90 @@ func (r *runner) run(config *specs.Process) (int, error) { process.Env = append(process.Env, fmt.Sprintf("LISTEN_FDS=%d", len(r.listenFDs)), "LISTEN_PID=1") process.ExtraFiles = append(process.ExtraFiles, r.listenFDs...) } + rootuid, err := r.container.Config().HostUID() if err != nil { r.destroy() return -1, err } + rootgid, err := r.container.Config().HostGID() if err != nil { r.destroy() return -1, err } - tty, err := setupIO(process, rootuid, rootgid, r.console, config.Terminal, r.detach || r.create) - if err != nil { + + detach := r.detach || r.create + + // Check command-line for sanity. + if detach && config.Terminal && r.consoleSocket == "" { r.destroy() - return -1, err + return -1, fmt.Errorf("cannot allocate tty if runc will detach without setting console socket") } - handler := newSignalHandler(tty, r.enableSubreaper) + // XXX: Should we change this? + if (!detach || !config.Terminal) && r.consoleSocket != "" { + r.destroy() + return -1, fmt.Errorf("cannot use console socket if runc will not detach or allocate tty") + } + startFn := r.container.Start if !r.create { startFn = r.container.Run } - defer tty.Close() + // Setting up IO is a two stage process. We need to modify process to deal + // with detaching containers, and then we get a tty after the container has + // started. + handler := newSignalHandler(r.enableSubreaper) + tty, err := setupIO(process, rootuid, rootgid, config.Terminal, detach) + if err != nil { + r.destroy() + return -1, err + } if err := startFn(process); err != nil { r.destroy() return -1, err } + if config.Terminal { + if err := tty.recvtty(process, r.detach || r.create); err != nil { + r.terminate(process) + r.destroy() + return -1, err + } + } + defer tty.Close() + + if config.Terminal && detach { + conn, err := net.Dial("unix", r.consoleSocket) + if err != nil { + r.terminate(process) + r.destroy() + return -1, err + } + defer conn.Close() + + unixconn, ok := conn.(*net.UnixConn) + if !ok { + r.terminate(process) + r.destroy() + return -1, fmt.Errorf("casting to UnixConn failed") + } + + socket, err := unixconn.File() + if err != nil { + r.terminate(process) + r.destroy() + return -1, err + } + defer socket.Close() + + err = tty.sendtty(socket, r.terminalinfo()) + if err != nil { + r.terminate(process) + r.destroy() + return -1, err + } + } + if err := tty.ClosePostStart(); err != nil { r.terminate(process) r.destroy() @@ -244,10 +296,10 @@ func (r *runner) run(config *specs.Process) (int, error) { return -1, err } } - if r.detach || r.create { + if detach { return 0, nil } - status, err := handler.forward(process) + status, err := handler.forward(process, tty) if err != nil { r.terminate(process) } @@ -298,7 +350,7 @@ func startContainer(context *cli.Context, spec *specs.Spec, create bool) (int, e shouldDestroy: true, container: container, listenFDs: listenFDs, - console: context.String("console"), + consoleSocket: context.String("console-socket"), detach: context.Bool("detach"), pidFile: context.String("pid-file"), create: create,