Merge pull request #1018 from cyphar/console-rewrite

Consoles, consoles, consoles.
This commit is contained in:
Mrunal Patel 2016-12-07 14:37:19 -08:00 committed by GitHub
commit 34f23cb99c
51 changed files with 1311 additions and 355 deletions

1
.gitignore vendored
View File

@ -1,5 +1,6 @@
vendor/pkg
/runc
contrib/cmd/recvtty/recvtty
Godeps/_workspace/src/github.com/opencontainers/runc
man/man8
release

View File

@ -48,7 +48,8 @@ RUN mkdir -p /go/src/github.com/mvdan \
# setup a playground for us to spawn containers in
ENV ROOTFS /busybox
RUN mkdir -p ${ROOTFS} \
&& curl -o- -sSL 'https://github.com/jpetazzo/docker-busybox/raw/buildroot-2014.11/rootfs.tar' | tar -C ${ROOTFS} -xf -
&& curl -o- -sSL 'https://github.com/docker-library/busybox/raw/a0558a9006ce0dd6f6ec5d56cfd3f32ebeeb815f/glibc/busybox.tar.xz' | tar xfJC - ${ROOTFS}
COPY script/tmpmount /
WORKDIR /go/src/github.com/opencontainers/runc

View File

@ -1,7 +1,8 @@
.PHONY: dbuild man \
.PHONY: all shell dbuild man \
localtest localunittest localintegration \
test unittest integration
SOURCES := $(shell find . 2>&1 | grep -E '.*\.(c|h|go)$$')
PREFIX := $(DESTDIR)/usr/local
BINDIR := $(PREFIX)/sbin
GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null)
@ -25,13 +26,23 @@ VERSION := ${shell cat ./VERSION}
SHELL := $(shell command -v bash 2>/dev/null)
all: $(RUNC_LINK)
.DEFAULT: runc
runc: $(SOURCES) | $(RUNC_LINK)
go build -i -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION}" -tags "$(BUILDTAGS)" -o runc .
static: $(RUNC_LINK)
CGO_ENABLED=1 go build -i -tags "$(BUILDTAGS) cgo static_build" -ldflags "-w -extldflags -static -X main.gitCommit=${COMMIT} -X main.version=${VERSION}" -o runc .
all: runc recvtty
release: $(RUNC_LINK)
recvtty: contrib/cmd/recvtty/recvtty
contrib/cmd/recvtty/recvtty: $(SOURCES) | $(RUNC_LINK)
go build -i -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION}" -tags "$(BUILDTAGS)" -o contrib/cmd/recvtty/recvtty ./contrib/cmd/recvtty
static: $(SOURCES) | $(RUNC_LINK)
CGO_ENABLED=1 go build -i -tags "$(BUILDTAGS) cgo static_build" -ldflags "-w -extldflags -static -X main.gitCommit=${COMMIT} -X main.version=${VERSION}" -o runc .
CGO_ENABLED=1 go build -i -tags "$(BUILDTAGS) cgo static_build" -ldflags "-w -extldflags -static -X main.gitCommit=${COMMIT} -X main.version=${VERSION}" -o contrib/cmd/recvtty/recvtty ./contrib/cmd/recvtty
release: $(RUNC_LINK) | $(RUNC_LINK)
@flag_list=(seccomp selinux apparmor static ambient); \
unset expression; \
for flag in "$${flag_list[@]}"; do \
@ -61,7 +72,7 @@ $(RUNC_LINK):
ln -sfn $(CURDIR) $(RUNC_LINK)
dbuild: runcimage
docker run --rm -v $(CURDIR):/go/src/$(PROJECT) --privileged $(RUNC_IMAGE) make
docker run --rm -v $(CURDIR):/go/src/$(PROJECT) --privileged $(RUNC_IMAGE) make clean all
lint:
go vet ./...
@ -91,6 +102,9 @@ integration: runcimage
localintegration: all
bats -t tests/integration${TESTFLAGS}
shell: all
docker run -e TESTFLAGS -ti --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) bash
install:
install -D -m0755 runc $(BINDIR)/runc
@ -112,6 +126,7 @@ uninstall-man:
clean:
rm -f runc
rm -f contrib/cmd/recvtty/recvtty
rm -f $(RUNC_LINK)
rm -rf $(GOPATH)/pkg
rm -rf $(RELEASE_DIR)

View File

@ -0,0 +1,247 @@
/*
* Copyright 2016 SUSE LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"fmt"
"io"
"io/ioutil"
"net"
"os"
"strings"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/urfave/cli"
)
// version will be populated by the Makefile, read from
// VERSION file of the source code.
var version = ""
// gitCommit will be the hash that the binary was built from
// and will be populated by the Makefile
var gitCommit = ""
const (
usage = `Open Container Initiative contrib/cmd/recvtty
recvtty is a reference implementation of a consumer of runC's --console-socket
API. It has two main modes of operation:
* single: Only permit one terminal to be sent to the socket, which is
then hooked up to the stdio of the recvtty process. This is useful
for rudimentary shell management of a container.
* null: Permit as many terminals to be sent to the socket, but they
are read to /dev/null. This is used for testing, and imitates the
old runC API's --console=/dev/pts/ptmx hack which would allow for a
similar trick. This is probably not what you want to use, unless
you're doing something like our bats integration tests.
To use recvtty, just specify a socket path at which you want to receive
terminals:
$ recvtty [--mode <single|null>] socket.sock
`
)
func bail(err error) {
fmt.Fprintf(os.Stderr, "[recvtty] fatal error: %v\n", err)
os.Exit(1)
}
func handleSingle(path string) error {
// Open a socket.
ln, err := net.Listen("unix", path)
if err != nil {
return err
}
defer ln.Close()
// We only accept a single connection, since we can only really have
// one reader for os.Stdin. Plus this is all a PoC.
conn, err := ln.Accept()
if err != nil {
return err
}
defer conn.Close()
// Close ln, to allow for other instances to take over.
ln.Close()
// Get the fd of the connection.
unixconn, ok := conn.(*net.UnixConn)
if !ok {
return fmt.Errorf("failed to cast to unixconn")
}
socket, err := unixconn.File()
if err != nil {
return err
}
defer socket.Close()
// Get the master file descriptor from runC.
master, err := utils.RecvFd(socket)
if err != nil {
return err
}
// Print the file descriptor tag.
ti, err := libcontainer.GetTerminalInfo(master.Name())
if err != nil {
return err
}
fmt.Printf("[recvtty] received masterfd: container '%s'\n", ti.ContainerID)
// Copy from our stdio to the master fd.
quitChan := make(chan struct{})
go func() {
io.Copy(os.Stdout, master)
quitChan <- struct{}{}
}()
go func() {
io.Copy(master, os.Stdin)
quitChan <- struct{}{}
}()
// Only close the master fd once we've stopped copying.
<-quitChan
master.Close()
return nil
}
func handleNull(path string) error {
// Open a socket.
ln, err := net.Listen("unix", path)
if err != nil {
return err
}
defer ln.Close()
// As opposed to handleSingle we accept as many connections as we get, but
// we don't interact with Stdin at all (and we copy stdout to /dev/null).
for {
conn, err := ln.Accept()
if err != nil {
return err
}
go func(conn net.Conn) {
// Don't leave references lying around.
defer conn.Close()
// Get the fd of the connection.
unixconn, ok := conn.(*net.UnixConn)
if !ok {
return
}
socket, err := unixconn.File()
if err != nil {
return
}
defer socket.Close()
// Get the master file descriptor from runC.
master, err := utils.RecvFd(socket)
if err != nil {
return
}
// Print the file descriptor tag.
ti, err := libcontainer.GetTerminalInfo(master.Name())
if err != nil {
bail(err)
}
fmt.Printf("[recvtty] received masterfd: container '%s'\n", ti.ContainerID)
// Just do a dumb copy to /dev/null.
devnull, err := os.OpenFile("/dev/null", os.O_RDWR, 0)
if err != nil {
// TODO: Handle this nicely.
return
}
io.Copy(devnull, master)
devnull.Close()
}(conn)
}
}
func main() {
app := cli.NewApp()
app.Name = "recvtty"
app.Usage = usage
// Set version to be the same as runC.
var v []string
if version != "" {
v = append(v, version)
}
if gitCommit != "" {
v = append(v, fmt.Sprintf("commit: %s", gitCommit))
}
app.Version = strings.Join(v, "\n")
// Set the flags.
app.Flags = []cli.Flag{
cli.StringFlag{
Name: "mode, m",
Value: "single",
Usage: "Mode of operation (single or null)",
},
cli.StringFlag{
Name: "pid-file",
Value: "",
Usage: "Path to write daemon process ID to",
},
}
app.Action = func(ctx *cli.Context) error {
args := ctx.Args()
if len(args) != 1 {
return fmt.Errorf("need to specify a single socket path")
}
path := ctx.Args()[0]
pidPath := ctx.String("pid-file")
if pidPath != "" {
pid := fmt.Sprintf("%d\n", os.Getpid())
if err := ioutil.WriteFile(pidPath, []byte(pid), 0644); err != nil {
return err
}
}
switch ctx.String("mode") {
case "single":
if err := handleSingle(path); err != nil {
return err
}
case "null":
if err := handleNull(path); err != nil {
return err
}
default:
return fmt.Errorf("need to select a valid mode: %s", ctx.String("mode"))
}
return nil
}
if err := app.Run(os.Args); err != nil {
bail(err)
}
}

View File

@ -30,9 +30,9 @@ command(s) that get executed on start, edit the args parameter of the spec. See
Usage: `path to the root of the bundle directory, defaults to the current directory`,
},
cli.StringFlag{
Name: "console",
Name: "console-socket",
Value: "",
Usage: "specify the pty slave path for use with the container",
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
},
cli.StringFlag{
Name: "pid-file",

View File

@ -26,12 +26,12 @@ Where "<container-id>" is the name for the instance of the container and
EXAMPLE:
For example, if the container is configured to run the linux ps command the
following will output a list of processes running in the container:
# runc exec <container-id> ps`,
Flags: []cli.Flag{
cli.StringFlag{
Name: "console",
Usage: "specify the pty slave path for use with the container",
Name: "console-socket",
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
},
cli.StringFlag{
Name: "cwd",
@ -131,7 +131,7 @@ func execProcess(context *cli.Context) (int, error) {
enableSubreaper: false,
shouldDestroy: false,
container: container,
console: context.String("console"),
consoleSocket: context.String("console-socket"),
detach: detach,
pidFile: context.String("pid-file"),
}

View File

@ -1,6 +1,11 @@
package libcontainer
import "io"
import (
"encoding/json"
"fmt"
"io"
"os"
)
// Console represents a pseudo TTY.
type Console interface {
@ -10,5 +15,59 @@ type Console interface {
Path() string
// Fd returns the fd for the master of the pty.
Fd() uintptr
File() *os.File
}
const (
TerminalInfoVersion uint32 = 201610041
TerminalInfoType uint8 = 'T'
)
// TerminalInfo is the structure which is passed as the non-ancillary data
// in the sendmsg(2) call when runc is run with --console-socket. It
// contains some information about the container which the console master fd
// relates to (to allow for consumers to use a single unix socket to handle
// multiple containers). This structure will probably move to runtime-spec
// at some point. But for now it lies in libcontainer.
type TerminalInfo struct {
// Version of the API.
Version uint32 `json:"version"`
// Type of message (future proofing).
Type uint8 `json:"type"`
// Container contains the ID of the container.
ContainerID string `json:"container_id"`
}
func (ti *TerminalInfo) String() string {
encoded, err := json.Marshal(*ti)
if err != nil {
panic(err)
}
return string(encoded)
}
func NewTerminalInfo(containerId string) *TerminalInfo {
return &TerminalInfo{
Version: TerminalInfoVersion,
Type: TerminalInfoType,
ContainerID: containerId,
}
}
func GetTerminalInfo(encoded string) (*TerminalInfo, error) {
ti := new(TerminalInfo)
if err := json.Unmarshal([]byte(encoded), ti); err != nil {
return nil, err
}
if ti.Type != TerminalInfoType {
return nil, fmt.Errorf("terminal info: incorrect type in payload (%q): %q", TerminalInfoType, ti.Type)
}
if ti.Version != TerminalInfoVersion {
return nil, fmt.Errorf("terminal info: incorrect version in payload (%q): %q", TerminalInfoVersion, ti.Version)
}
return ti, nil
}

View File

@ -6,8 +6,8 @@ import (
"errors"
)
// NewConsole returns an initialized console that can be used within a container by copying bytes
// newConsole returns an initialized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func NewConsole(uid, gid int) (Console, error) {
func newConsole() (Console, error) {
return nil, errors.New("libcontainer console is not supported on FreeBSD")
}

View File

@ -3,16 +3,13 @@ package libcontainer
import (
"fmt"
"os"
"path/filepath"
"syscall"
"unsafe"
"github.com/opencontainers/runc/libcontainer/label"
)
// NewConsole returns an initialized console that can be used within a container by copying bytes
// newConsole returns an initialized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func NewConsole(uid, gid int) (Console, error) {
func newConsole() (Console, error) {
master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
if err != nil {
return nil, err
@ -27,34 +24,20 @@ func NewConsole(uid, gid int) (Console, error) {
if err := unlockpt(master); err != nil {
return nil, err
}
if err := os.Chmod(console, 0600); err != nil {
return nil, err
}
if err := os.Chown(console, uid, gid); err != nil {
return nil, err
}
return &linuxConsole{
slavePath: console,
master: master,
}, nil
}
// newConsoleFromPath is an internal function returning an initialized console for use inside
// a container's MNT namespace.
func newConsoleFromPath(slavePath string) *linuxConsole {
return &linuxConsole{
slavePath: slavePath,
}
}
// linuxConsole is a linux pseudo TTY for use within a container.
type linuxConsole struct {
master *os.File
slavePath string
}
func (c *linuxConsole) Fd() uintptr {
return c.master.Fd()
func (c *linuxConsole) File() *os.File {
return c.master
}
func (c *linuxConsole) Path() string {
@ -78,21 +61,17 @@ func (c *linuxConsole) Close() error {
// mount initializes the console inside the rootfs mounting with the specified mount label
// and applying the correct ownership of the console.
func (c *linuxConsole) mount(rootfs, mountLabel string) error {
func (c *linuxConsole) mount() error {
oldMask := syscall.Umask(0000)
defer syscall.Umask(oldMask)
if err := label.SetFileLabel(c.slavePath, mountLabel); err != nil {
return err
}
dest := filepath.Join(rootfs, "/dev/console")
f, err := os.Create(dest)
f, err := os.Create("/dev/console")
if err != nil && !os.IsExist(err) {
return err
}
if f != nil {
f.Close()
}
return syscall.Mount(c.slavePath, dest, "bind", syscall.MS_BIND, "")
return syscall.Mount(c.slavePath, "/dev/console", "bind", syscall.MS_BIND, "")
}
// dupStdio opens the slavePath for the console and dups the fds to the current

View File

@ -4,8 +4,8 @@ import (
"errors"
)
// NewConsole returns an initialized console that can be used within a container by copying bytes
// newConsole returns an initialized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func NewConsole(uid, gid int) (Console, error) {
func newConsole() (Console, error) {
return nil, errors.New("libcontainer console is not supported on Solaris")
}

View File

@ -1,7 +1,7 @@
package libcontainer
// NewConsole returns an initialized console that can be used within a container
func NewConsole(uid, gid int) (Console, error) {
// newConsole returns an initialized console that can be used within a container
func newConsole() (Console, error) {
return &windowsConsole{}, nil
}

View File

@ -342,10 +342,11 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
}
}
_, sharePidns := nsMaps[configs.NEWPID]
data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps, "")
data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps)
if err != nil {
return nil, err
}
p.consoleChan = make(chan *os.File, 1)
return &initProcess{
cmd: cmd,
childPipe: childPipe,
@ -368,11 +369,12 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
}
// for setns process, we don't have to set cloneflags as the process namespaces
// will only be set via setns syscall
data, err := c.bootstrapData(0, state.NamespacePaths, p.consolePath)
data, err := c.bootstrapData(0, state.NamespacePaths)
if err != nil {
return nil, err
}
// TODO: set on container for process management
p.consoleChan = make(chan *os.File, 1)
return &setnsProcess{
cmd: cmd,
cgroupPaths: c.cgroupManager.GetPaths(),
@ -393,7 +395,6 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
User: process.User,
AdditionalGroups: process.AdditionalGroups,
Cwd: process.Cwd,
Console: process.consolePath,
Capabilities: process.Capabilities,
PassedFilesCount: len(process.ExtraFiles),
ContainerId: c.ID(),
@ -415,6 +416,17 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
if len(process.Rlimits) > 0 {
cfg.Rlimits = process.Rlimits
}
/*
* TODO: This should not be automatically computed. We should implement
* this as a field in libcontainer.Process, and then we only dup the
* new console over the file descriptors which were not explicitly
* set with process.Std{in,out,err}. The reason I've left this as-is
* is because the GetConsole() interface is new, there's no need to
* polish this interface right now.
*/
if process.Stdin == nil && process.Stdout == nil && process.Stderr == nil {
cfg.CreateConsole = true
}
return cfg
}
@ -1281,7 +1293,7 @@ func encodeIDMapping(idMap []configs.IDMap) ([]byte, error) {
// such as one that uses nsenter package to bootstrap the container's
// init process correctly, i.e. with correct namespaces, uid/gid
// mapping etc.
func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string, consolePath string) (io.Reader, error) {
func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string) (io.Reader, error) {
// create the netlink message
r := nl.NewNetlinkRequest(int(InitMsg), 0)
@ -1291,14 +1303,6 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na
Value: uint32(cloneFlags),
})
// write console path
if consolePath != "" {
r.AddData(&Bytemsg{
Type: ConsolePathAttr,
Value: []byte(consolePath),
})
}
// write custom namespace paths
if len(nsMaps) > 0 {
nsPaths, err := c.orderNamespacePaths(nsMaps)

View File

@ -253,12 +253,9 @@ func (l *LinuxFactory) StartInitialization() (err error) {
// send it back to the parent process in the form of an initError.
// If container's init successed, syscall.Exec will not return, hence
// this defer function will never be called.
if _, ok := i.(*linuxStandardInit); ok {
// Synchronisation only necessary for standard init.
if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil {
fmt.Fprintln(os.Stderr, err)
return
}
if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil {
fmt.Fprintln(os.Stderr, err)
return
}
if werr := utils.WriteJSON(pipe, newSystemError(err)); werr != nil {
fmt.Fprintln(os.Stderr, err)

View File

@ -9,20 +9,6 @@ import (
"github.com/opencontainers/runc/libcontainer/stacktrace"
)
type syncType uint8
const (
procReady syncType = iota
procError
procRun
procHooks
procResume
)
type syncT struct {
Type syncType `json:"type"`
}
var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}}
Code: {{.ECode}}
{{if .Message }}

View File

@ -54,12 +54,12 @@ type initConfig struct {
User string `json:"user"`
AdditionalGroups []string `json:"additional_groups"`
Config *configs.Config `json:"config"`
Console string `json:"console"`
Networks []*network `json:"network"`
PassedFilesCount int `json:"passed_files_count"`
ContainerId string `json:"containerid"`
Rlimits []configs.Rlimit `json:"rlimits"`
ExecFifoPath string `json:"start_pipe_path"`
CreateConsole bool `json:"create_console"`
}
type initer interface {
@ -77,6 +77,7 @@ func newContainerInit(t initType, pipe *os.File, stateDirFD int) (initer, error)
switch t {
case initSetns:
return &linuxSetnsInit{
pipe: pipe,
config: config,
}, nil
case initStandard:
@ -150,24 +151,79 @@ func finalizeNamespace(config *initConfig) error {
return nil
}
// setupConsole sets up the console from inside the container, and sends the
// master pty fd to the config.Pipe (using cmsg). This is done to ensure that
// consoles are scoped to a container properly (see runc#814 and the many
// issues related to that). This has to be run *after* we've pivoted to the new
// rootfs (and the users' configuration is entirely set up).
func setupConsole(pipe *os.File, config *initConfig, mount bool) error {
// At this point, /dev/ptmx points to something that we would expect. We
// used to change the owner of the slave path, but since the /dev/pts mount
// can have gid=X set (at the users' option). So touching the owner of the
// slave PTY is not necessary, as the kernel will handle that for us. Note
// however, that setupUser (specifically fixStdioPermissions) *will* change
// the UID owner of the console to be the user the process will run as (so
// they can actually control their console).
console, err := newConsole()
if err != nil {
return err
}
// After we return from here, we don't need the console anymore.
defer console.Close()
linuxConsole, ok := console.(*linuxConsole)
if !ok {
return fmt.Errorf("failed to cast console to *linuxConsole")
}
// Mount the console inside our rootfs.
if mount {
if err := linuxConsole.mount(); err != nil {
return err
}
}
if err := writeSync(pipe, procConsole); err != nil {
return err
}
// We need to have a two-way synchronisation here. Though it might seem
// pointless, it's important to make sure that the sendmsg(2) payload
// doesn't get swallowed by an out-of-place read(2) [which happens if the
// syscalls get reordered so that sendmsg(2) is before the other side's
// read(2) of procConsole].
if err := readSync(pipe, procConsoleReq); err != nil {
return err
}
// While we can access console.master, using the API is a good idea.
if err := utils.SendFd(pipe, linuxConsole.File()); err != nil {
return err
}
// Make sure the other side recieved the fd.
if err := readSync(pipe, procConsoleAck); err != nil {
return err
}
// Now, dup over all the things.
return linuxConsole.dupStdio()
}
// syncParentReady sends to the given pipe a JSON payload which indicates that
// the init is ready to Exec the child process. It then waits for the parent to
// indicate that it is cleared to Exec.
func syncParentReady(pipe io.ReadWriter) error {
// Tell parent.
if err := utils.WriteJSON(pipe, syncT{procReady}); err != nil {
if err := writeSync(pipe, procReady); err != nil {
return err
}
// Wait for parent to give the all-clear.
var procSync syncT
if err := json.NewDecoder(pipe).Decode(&procSync); err != nil {
if err == io.EOF {
return fmt.Errorf("parent closed synchronisation channel")
}
if procSync.Type != procRun {
return fmt.Errorf("invalid synchronisation flag from parent")
}
if err := readSync(pipe, procRun); err != nil {
return err
}
return nil
}
@ -176,19 +232,15 @@ func syncParentReady(pipe io.ReadWriter) error {
// indicate that it is cleared to resume.
func syncParentHooks(pipe io.ReadWriter) error {
// Tell parent.
if err := utils.WriteJSON(pipe, syncT{procHooks}); err != nil {
if err := writeSync(pipe, procHooks); err != nil {
return err
}
// Wait for parent to give the all-clear.
var procSync syncT
if err := json.NewDecoder(pipe).Decode(&procSync); err != nil {
if err == io.EOF {
return fmt.Errorf("parent closed synchronisation channel")
}
if procSync.Type != procResume {
return fmt.Errorf("invalid synchronisation flag from parent")
}
if err := readSync(pipe, procResume); err != nil {
return err
}
return nil
}
@ -262,11 +314,17 @@ func fixStdioPermissions(u *user.ExecUser) error {
if err := syscall.Fstat(int(fd), &s); err != nil {
return err
}
// skip chown of /dev/null if it was used as one of the STDIO fds.
// Skip chown of /dev/null if it was used as one of the STDIO fds.
if s.Rdev == null.Rdev {
continue
}
if err := syscall.Fchown(int(fd), u.Uid, u.Gid); err != nil {
// We only change the uid owner (as it is possible for the mount to
// prefer a different gid, and there's no reason for us to change it).
// The reason why we don't just leave the default uid=X mount setup is
// that users expect to be able to actually use their console. Without
// this code, you couldn't effectively run as a non-root user inside a
// container and also have a console set up.
if err := syscall.Fchown(int(fd), u.Uid, int(s.Gid)); err != nil {
return err
}
}

View File

@ -43,7 +43,7 @@ func testExecPS(t *testing.T, userns bool) {
config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER})
}
buffers, exitCode, err := runContainer(config, "", "ps")
buffers, exitCode, err := runContainer(config, "", "ps", "-o", "pid,user,comm")
if err != nil {
t.Fatalf("%s: %s", buffers, err)
}

View File

@ -279,15 +279,15 @@ func TestExecInTTY(t *testing.T) {
Args: []string{"ps"},
Env: standardEnvironment,
}
console, err := ps.NewConsole(0, 0)
err = container.Run(ps)
ok(t, err)
console, err := ps.GetConsole()
copy := make(chan struct{})
go func() {
io.Copy(&stdout, console)
close(copy)
}()
ok(t, err)
err = container.Run(ps)
ok(t, err)
select {
case <-time.After(5 * time.Second):
t.Fatal("Waiting for copy timed out")

View File

@ -11,13 +11,12 @@ import (
// list of known message types we want to send to bootstrap program
// The number is randomly chosen to not conflict with known netlink types
const (
InitMsg uint16 = 62000
CloneFlagsAttr uint16 = 27281
ConsolePathAttr uint16 = 27282
NsPathsAttr uint16 = 27283
UidmapAttr uint16 = 27284
GidmapAttr uint16 = 27285
SetgroupAttr uint16 = 27286
InitMsg uint16 = 62000
CloneFlagsAttr uint16 = 27281
NsPathsAttr uint16 = 27282
UidmapAttr uint16 = 27283
GidmapAttr uint16 = 27284
SetgroupAttr uint16 = 27285
// When syscall.NLA_HDRLEN is in gccgo, take this out.
syscall_NLA_HDRLEN = (syscall.SizeofNlAttr + syscall.NLA_ALIGNTO - 1) & ^(syscall.NLA_ALIGNTO - 1)
)

View File

@ -71,7 +71,6 @@ struct nlconfig_t {
char *namespaces;
size_t namespaces_len;
uint8_t is_setgroup;
int consolefd;
};
/*
@ -80,11 +79,10 @@ struct nlconfig_t {
*/
#define INIT_MSG 62000
#define CLONE_FLAGS_ATTR 27281
#define CONSOLE_PATH_ATTR 27282
#define NS_PATHS_ATTR 27283
#define UIDMAP_ATTR 27284
#define GIDMAP_ATTR 27285
#define SETGROUP_ATTR 27286
#define NS_PATHS_ATTR 27282
#define UIDMAP_ATTR 27283
#define GIDMAP_ATTR 27284
#define SETGROUP_ATTR 27285
/*
* Use the raw syscall for versions of glibc which don't include a function for
@ -306,7 +304,6 @@ static void nl_parse(int fd, struct nlconfig_t *config)
/* Parse the netlink payload. */
config->data = data;
config->consolefd = -1;
while (current < data + size) {
struct nlattr *nlattr = (struct nlattr *)current;
size_t payload_len = nlattr->nla_len - NLA_HDRLEN;
@ -319,15 +316,6 @@ static void nl_parse(int fd, struct nlconfig_t *config)
case CLONE_FLAGS_ATTR:
config->cloneflags = readint32(current);
break;
case CONSOLE_PATH_ATTR:
/*
* We open the console here because we currently evaluate console
* paths from the *host* namespaces.
*/
config->consolefd = open(current, O_RDWR);
if (config->consolefd < 0)
bail("failed to open console %s", current);
break;
case NS_PATHS_ATTR:
config->namespaces = current;
config->namespaces_len = payload_len;
@ -722,7 +710,6 @@ void nsexec(void)
* We're inside the child now, having jumped from the
* start_child() code after forking in the parent.
*/
int consolefd = config.consolefd;
enum sync_t s;
/* We're in a child and thus need to tell the parent if we die. */
@ -743,17 +730,6 @@ void nsexec(void)
if (setgroups(0, NULL) < 0)
bail("setgroups failed");
if (consolefd != -1) {
if (ioctl(consolefd, TIOCSCTTY, 0) < 0)
bail("ioctl TIOCSCTTY failed");
if (dup3(consolefd, STDIN_FILENO, 0) != STDIN_FILENO)
bail("failed to dup stdin");
if (dup3(consolefd, STDOUT_FILENO, 0) != STDOUT_FILENO)
bail("failed to dup stdout");
if (dup3(consolefd, STDERR_FILENO, 0) != STDERR_FILENO)
bail("failed to dup stderr");
}
s = SYNC_CHILD_READY;
if (write(syncfd, &s, sizeof(s)) != sizeof(s))
bail("failed to sync with patent: write(SYNC_CHILD_READY)");

View File

@ -47,8 +47,9 @@ type Process struct {
// ExtraFiles specifies additional open files to be inherited by the container
ExtraFiles []*os.File
// consolePath is the path to the console allocated to the container.
consolePath string
// consoleChan provides the masterfd console.
// TODO: Make this persistent in Process.
consoleChan chan *os.File
// Capabilities specify the capabilities to keep when executing the process inside the container
// All capabilities not specified will be dropped from the processes capability mask
@ -105,21 +106,14 @@ type IO struct {
Stderr io.ReadCloser
}
// NewConsole creates new console for process and returns it
func (p *Process) NewConsole(rootuid, rootgid int) (Console, error) {
console, err := NewConsole(rootuid, rootgid)
if err != nil {
return nil, err
func (p *Process) GetConsole() (Console, error) {
consoleFd, ok := <-p.consoleChan
if !ok {
return nil, fmt.Errorf("failed to get console from process")
}
p.consolePath = console.Path()
return console, nil
}
// ConsoleFromPath sets the process's console with the path provided
func (p *Process) ConsoleFromPath(path string) error {
if p.consolePath != "" {
return newGenericError(fmt.Errorf("console path already exists for process"), ConsoleExists)
}
p.consolePath = path
return nil
// TODO: Fix this so that it used the console API.
return &linuxConsole{
master: consoleFd,
}, nil
}

View File

@ -100,15 +100,42 @@ func (p *setnsProcess) start() (err error) {
return newSystemErrorWithCause(err, "writing config to pipe")
}
ierr := parseSync(p.parentPipe, func(sync *syncT) error {
switch sync.Type {
case procConsole:
if err := writeSync(p.parentPipe, procConsoleReq); err != nil {
return newSystemErrorWithCause(err, "writing syncT 'request fd'")
}
masterFile, err := utils.RecvFd(p.parentPipe)
if err != nil {
return newSystemErrorWithCause(err, "getting master pty from child pipe")
}
if p.process.consoleChan == nil {
// TODO: Don't panic here, do something more sane.
panic("consoleChan is nil")
}
p.process.consoleChan <- masterFile
if err := writeSync(p.parentPipe, procConsoleAck); err != nil {
return newSystemErrorWithCause(err, "writing syncT 'ack fd'")
}
case procReady:
// This shouldn't happen.
panic("unexpected procReady in setns")
case procHooks:
// This shouldn't happen.
panic("unexpected procHooks in setns")
default:
return newSystemError(fmt.Errorf("invalid JSON payload from child"))
}
return nil
})
if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
return newSystemErrorWithCause(err, "calling shutdown on init pipe")
}
// wait for the child process to fully complete and receive an error message
// if one was encoutered
var ierr *genericError
if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF {
return newSystemErrorWithCause(err, "decoding init error from pipe")
}
// Must be done after Shutdown so the child will exit and we can wait for it.
if ierr != nil {
p.wait()
@ -270,22 +297,31 @@ func (p *initProcess) start() error {
return newSystemErrorWithCause(err, "sending config to init process")
}
var (
procSync syncT
sentRun bool
sentResume bool
ierr *genericError
)
dec := json.NewDecoder(p.parentPipe)
loop:
for {
if err := dec.Decode(&procSync); err != nil {
if err == io.EOF {
break loop
ierr := parseSync(p.parentPipe, func(sync *syncT) error {
switch sync.Type {
case procConsole:
if err := writeSync(p.parentPipe, procConsoleReq); err != nil {
return newSystemErrorWithCause(err, "writing syncT 'request fd'")
}
masterFile, err := utils.RecvFd(p.parentPipe)
if err != nil {
return newSystemErrorWithCause(err, "getting master pty from child pipe")
}
if p.process.consoleChan == nil {
// TODO: Don't panic here, do something more sane.
panic("consoleChan is nil")
}
p.process.consoleChan <- masterFile
if err := writeSync(p.parentPipe, procConsoleAck); err != nil {
return newSystemErrorWithCause(err, "writing syncT 'ack fd'")
}
return newSystemErrorWithCause(err, "decoding sync type from init pipe")
}
switch procSync.Type {
case procReady:
if err := p.manager.Set(p.config.Config); err != nil {
return newSystemErrorWithCause(err, "setting cgroup config for ready process")
@ -316,8 +352,8 @@ loop:
}
}
// Sync with child.
if err := utils.WriteJSON(p.parentPipe, syncT{procRun}); err != nil {
return newSystemErrorWithCause(err, "writing syncT run type")
if err := writeSync(p.parentPipe, procRun); err != nil {
return newSystemErrorWithCause(err, "writing syncT 'run'")
}
sentRun = true
case procHooks:
@ -336,25 +372,17 @@ loop:
}
}
// Sync with child.
if err := utils.WriteJSON(p.parentPipe, syncT{procResume}); err != nil {
return newSystemErrorWithCause(err, "writing syncT resume type")
if err := writeSync(p.parentPipe, procResume); err != nil {
return newSystemErrorWithCause(err, "writing syncT 'resume'")
}
sentResume = true
case procError:
// wait for the child process to fully complete and receive an error message
// if one was encoutered
if err := dec.Decode(&ierr); err != nil && err != io.EOF {
return newSystemErrorWithCause(err, "decoding proc error from init")
}
if ierr != nil {
break loop
}
// Programmer error.
panic("No error following JSON procError payload.")
default:
return newSystemError(fmt.Errorf("invalid JSON payload from child"))
}
}
return nil
})
if !sentRun {
return newSystemErrorWithCause(ierr, "container init")
}
@ -364,6 +392,7 @@ loop:
if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
return newSystemErrorWithCause(err, "shutting down init pipe")
}
// Must be done after Shutdown so the child will exit and we can wait for it.
if ierr != nil {
p.wait()
@ -440,6 +469,8 @@ func getPipeFds(pid int) ([]string, error) {
dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd")
for i := 0; i < 3; i++ {
// XXX: This breaks if the path is not a valid symlink (which can
// happen in certain particularly unlucky mount namespace setups).
f := filepath.Join(dirPath, strconv.Itoa(i))
target, err := os.Readlink(f)
if err != nil {
@ -450,8 +481,10 @@ func getPipeFds(pid int) ([]string, error) {
return fds, nil
}
// InitializeIO creates pipes for use with the process's STDIO
// and returns the opposite side for each
// InitializeIO creates pipes for use with the process's stdio and returns the
// opposite side for each. Do not use this if you want to have a pseudoterminal
// set up for you by libcontainer (TODO: fix that too).
// TODO: This is mostly unnecessary, and should be handled by clients.
func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) {
var fds []uintptr
i = &IO{}

View File

@ -36,9 +36,11 @@ func needsSetupDev(config *configs.Config) bool {
return true
}
// setupRootfs sets up the devices, mount points, and filesystems for use inside a
// new mount namespace.
func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWriter) (err error) {
// prepareRootfs sets up the devices, mount points, and filesystems for use
// inside a new mount namespace. It doesn't set anything as ro or pivot_root,
// because console setup happens inside the caller. You must call
// finalizeRootfs in order to finish the rootfs setup.
func prepareRootfs(pipe io.ReadWriter, config *configs.Config) (err error) {
if err := prepareRoot(config); err != nil {
return newSystemErrorWithCause(err, "preparing rootfs")
}
@ -50,6 +52,7 @@ func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWrit
return newSystemErrorWithCause(err, "running premount command")
}
}
if err := mountToRootfs(m, config.Rootfs, config.MountLabel); err != nil {
return newSystemErrorWithCausef(err, "mounting %q to rootfs %q at %q", m.Source, config.Rootfs, m.Destination)
}
@ -60,17 +63,19 @@ func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWrit
}
}
}
if setupDev {
if err := createDevices(config); err != nil {
return newSystemErrorWithCause(err, "creating device nodes")
}
if err := setupPtmx(config, console); err != nil {
if err := setupPtmx(config); err != nil {
return newSystemErrorWithCause(err, "setting up ptmx")
}
if err := setupDevSymlinks(config.Rootfs); err != nil {
return newSystemErrorWithCause(err, "setting up /dev symlinks")
}
}
// Signal the parent to run the pre-start hooks.
// The hooks are run after the mounts are setup, but before we switch to the new
// root, so that the old root is still available in the hooks for any mount
@ -78,9 +83,19 @@ func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWrit
if err := syncParentHooks(pipe); err != nil {
return err
}
// The reason these operations are done here rather than in finalizeRootfs
// is because the console-handling code gets quite sticky if we have to set
// up the console before doing the pivot_root(2). This is because the
// Console API has to also work with the ExecIn case, which means that the
// API must be able to deal with being inside as well as outside the
// container. It's just cleaner to do this here (at the expense of the
// operation not being perfectly split).
if err := syscall.Chdir(config.Rootfs); err != nil {
return newSystemErrorWithCausef(err, "changing dir to %q", config.Rootfs)
}
if config.NoPivotRoot {
err = msMoveRoot(config.Rootfs)
} else {
@ -89,11 +104,19 @@ func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWrit
if err != nil {
return newSystemErrorWithCause(err, "jailing process inside rootfs")
}
if setupDev {
if err := reOpenDevNull(); err != nil {
return newSystemErrorWithCause(err, "reopening /dev/null inside container")
}
}
return nil
}
// finalizeRootfs actually switches the root of the process and sets anything
// to ro if necessary. You must call prepareRootfs first.
func finalizeRootfs(config *configs.Config) (err error) {
// remount dev as ro if specified
for _, m := range config.Mounts {
if libcontainerUtils.CleanPath(m.Destination) == "/dev" {
@ -105,12 +128,14 @@ func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWrit
break
}
}
// set rootfs ( / ) as readonly
if config.Readonlyfs {
if err := setReadonly(); err != nil {
return newSystemErrorWithCause(err, "setting rootfs as readonly")
}
}
syscall.Umask(0022)
return nil
}
@ -578,7 +603,7 @@ func setReadonly() error {
return syscall.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "")
}
func setupPtmx(config *configs.Config, console *linuxConsole) error {
func setupPtmx(config *configs.Config) error {
ptmx := filepath.Join(config.Rootfs, "dev/ptmx")
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
return err
@ -586,9 +611,6 @@ func setupPtmx(config *configs.Config, console *linuxConsole) error {
if err := os.Symlink("pts/ptmx", ptmx); err != nil {
return fmt.Errorf("symlink dev ptmx %s", err)
}
if console != nil {
return console.mount(config.Rootfs, config.MountLabel)
}
return nil
}

View File

@ -16,6 +16,7 @@ import (
// linuxSetnsInit performs the container's initialization for running a new process
// inside an existing container.
type linuxSetnsInit struct {
pipe *os.File
config *initConfig
}
@ -30,6 +31,14 @@ func (l *linuxSetnsInit) Init() error {
return err
}
}
if l.config.CreateConsole {
if err := setupConsole(l.pipe, l.config, false); err != nil {
return err
}
if err := system.Setctty(); err != nil {
return err
}
}
if l.config.NoNewPrivileges {
if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
return err

View File

@ -4,7 +4,6 @@ package libcontainer
import (
"fmt"
"io"
"os"
"os/exec"
"syscall"
@ -18,7 +17,7 @@ import (
)
type linuxStandardInit struct {
pipe io.ReadWriteCloser
pipe *os.File
parentPid int
stateDirFD int
config *initConfig
@ -59,18 +58,6 @@ func (l *linuxStandardInit) Init() error {
}
}
var console *linuxConsole
if l.config.Console != "" {
console = newConsoleFromPath(l.config.Console)
if err := console.dupStdio(); err != nil {
return err
}
}
if console != nil {
if err := system.Setctty(); err != nil {
return err
}
}
if err := setupNetwork(l.config); err != nil {
return err
}
@ -79,12 +66,33 @@ func (l *linuxStandardInit) Init() error {
}
label.Init()
// InitializeMountNamespace() can be executed only for a new mount namespace
// prepareRootfs() can be executed only for a new mount namespace.
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
if err := setupRootfs(l.config.Config, console, l.pipe); err != nil {
if err := prepareRootfs(l.pipe, l.config.Config); err != nil {
return err
}
}
// Set up the console. This has to be done *before* we finalize the rootfs,
// but *after* we've given the user the chance to set up all of the mounts
// they wanted.
if l.config.CreateConsole {
if err := setupConsole(l.pipe, l.config, true); err != nil {
return err
}
if err := system.Setctty(); err != nil {
return err
}
}
// Finish the rootfs setup.
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
if err := finalizeRootfs(l.config.Config); err != nil {
return err
}
}
if hostname := l.config.Config.Hostname; hostname != "" {
if err := syscall.Sethostname([]byte(hostname)); err != nil {
return err

110
libcontainer/sync.go Normal file
View File

@ -0,0 +1,110 @@
package libcontainer
import (
"encoding/json"
"fmt"
"io"
"github.com/opencontainers/runc/libcontainer/utils"
)
type syncType string
// Constants that are used for synchronisation between the parent and child
// during container setup. They come in pairs (with procError being a generic
// response which is followed by a &genericError).
//
// [ child ] <-> [ parent ]
//
// procHooks --> [run hooks]
// <-- procResume
//
// procConsole -->
// <-- procConsoleReq
// [send(fd)] --> [recv(fd)]
// <-- procConsoleAck
//
// procReady --> [final setup]
// <-- procRun
const (
procError syncType = "procError"
procReady syncType = "procReady"
procRun syncType = "procRun"
procHooks syncType = "procHooks"
procResume syncType = "procResume"
procConsole syncType = "procConsole"
procConsoleReq syncType = "procConsoleReq"
procConsoleAck syncType = "procConsoleAck"
)
type syncT struct {
Type syncType `json:"type"`
}
// writeSync is used to write to a synchronisation pipe. An error is returned
// if there was a problem writing the payload.
func writeSync(pipe io.Writer, sync syncType) error {
if err := utils.WriteJSON(pipe, syncT{sync}); err != nil {
return err
}
return nil
}
// readSync is used to read from a synchronisation pipe. An error is returned
// if we got a genericError, the pipe was closed, or we got an unexpected flag.
func readSync(pipe io.Reader, expected syncType) error {
var procSync syncT
if err := json.NewDecoder(pipe).Decode(&procSync); err != nil {
if err == io.EOF {
return fmt.Errorf("parent closed synchronisation channel")
}
if procSync.Type == procError {
var ierr genericError
if err := json.NewDecoder(pipe).Decode(&ierr); err != nil {
return fmt.Errorf("failed reading error from parent: %v", err)
}
return &ierr
}
if procSync.Type != expected {
return fmt.Errorf("invalid synchronisation flag from parent")
}
}
return nil
}
// parseSync runs the given callback function on each syncT received from the
// child. It will return once io.EOF is returned from the given pipe.
func parseSync(pipe io.Reader, fn func(*syncT) error) error {
dec := json.NewDecoder(pipe)
for {
var sync syncT
if err := dec.Decode(&sync); err != nil {
if err == io.EOF {
break
}
return err
}
// We handle this case outside fn for cleanliness reasons.
var ierr *genericError
if sync.Type == procError {
if err := dec.Decode(&ierr); err != nil && err != io.EOF {
return newSystemErrorWithCause(err, "decoding proc error from init")
}
if ierr != nil {
return ierr
}
// Programmer error.
panic("No error following JSON procError payload.")
}
if err := fn(&sync); err != nil {
return err
}
}
return nil
}

148
libcontainer/utils/cmsg.c Normal file
View File

@ -0,0 +1,148 @@
/*
* Copyright 2016 SUSE LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <unistd.h>
#include "cmsg.h"
#define error(fmt, ...) \
({ \
fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__); \
errno = ECOMM; \
goto err; /* return value */ \
})
/*
* Sends a file descriptor along the sockfd provided. Returns the return
* value of sendmsg(2). Any synchronisation and preparation of state
* should be done external to this (we expect the other side to be in
* recvfd() in the code).
*/
ssize_t sendfd(int sockfd, struct file_t file)
{
struct msghdr msg = {0};
struct iovec iov[1] = {0};
struct cmsghdr *cmsg;
int *fdptr;
int ret;
union {
char buf[CMSG_SPACE(sizeof(file.fd))];
struct cmsghdr align;
} u;
/*
* We need to send some other data along with the ancillary data,
* otherwise the other side won't recieve any data. This is very
* well-hidden in the documentation (and only applies to
* SOCK_STREAM). See the bottom part of unix(7).
*/
iov[0].iov_base = file.name;
iov[0].iov_len = strlen(file.name) + 1;
msg.msg_name = NULL;
msg.msg_namelen = 0;
msg.msg_iov = iov;
msg.msg_iovlen = 1;
msg.msg_control = u.buf;
msg.msg_controllen = sizeof(u.buf);
cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
fdptr = (int *) CMSG_DATA(cmsg);
memcpy(fdptr, &file.fd, sizeof(int));
return sendmsg(sockfd, &msg, 0);
}
/*
* Receives a file descriptor from the sockfd provided. Returns the file
* descriptor as sent from sendfd(). It will return the file descriptor
* or die (literally) trying. Any synchronisation and preparation of
* state should be done external to this (we expect the other side to be
* in sendfd() in the code).
*/
struct file_t recvfd(int sockfd)
{
struct msghdr msg = {0};
struct iovec iov[1] = {0};
struct cmsghdr *cmsg;
struct file_t file = {0};
int *fdptr;
int olderrno;
union {
char buf[CMSG_SPACE(sizeof(file.fd))];
struct cmsghdr align;
} u;
/* Allocate a buffer. */
/* TODO: Make this dynamic with MSG_PEEK. */
file.name = malloc(TAG_BUFFER);
if (!file.name)
error("recvfd: failed to allocate file.tag buffer\n");
/*
* We need to "recieve" the non-ancillary data even though we don't
* plan to use it at all. Otherwise, things won't work as expected.
* See unix(7) and other well-hidden documentation.
*/
iov[0].iov_base = file.name;
iov[0].iov_len = TAG_BUFFER;
msg.msg_name = NULL;
msg.msg_namelen = 0;
msg.msg_iov = iov;
msg.msg_iovlen = 1;
msg.msg_control = u.buf;
msg.msg_controllen = sizeof(u.buf);
ssize_t ret = recvmsg(sockfd, &msg, 0);
if (ret < 0)
goto err;
cmsg = CMSG_FIRSTHDR(&msg);
if (!cmsg)
error("recvfd: got NULL from CMSG_FIRSTHDR");
if (cmsg->cmsg_level != SOL_SOCKET)
error("recvfd: expected SOL_SOCKET in cmsg: %d", cmsg->cmsg_level);
if (cmsg->cmsg_type != SCM_RIGHTS)
error("recvfd: expected SCM_RIGHTS in cmsg: %d", cmsg->cmsg_type);
if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
error("recvfd: expected correct CMSG_LEN in cmsg: %lu", cmsg->cmsg_len);
fdptr = (int *) CMSG_DATA(cmsg);
if (!fdptr || *fdptr < 0)
error("recvfd: recieved invalid pointer");
file.fd = *fdptr;
return file;
err:
olderrno = errno;
free(file.name);
errno = olderrno;
return (struct file_t){0};
}

View File

@ -0,0 +1,57 @@
// +build linux
package utils
/*
* Copyright 2016 SUSE LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
#include <errno.h>
#include <stdlib.h>
#include "cmsg.h"
*/
import "C"
import (
"os"
"unsafe"
)
// RecvFd waits for a file descriptor to be sent over the given AF_UNIX
// socket. The file name of the remote file descriptor will be recreated
// locally (it is sent as non-auxilliary data in the same payload).
func RecvFd(socket *os.File) (*os.File, error) {
file, err := C.recvfd(C.int(socket.Fd()))
if err != nil {
return nil, err
}
defer C.free(unsafe.Pointer(file.name))
return os.NewFile(uintptr(file.fd), C.GoString(file.name)), nil
}
// SendFd sends a file descriptor over the given AF_UNIX socket. In
// addition, the file.Name() of the given file will also be sent as
// non-auxilliary data in the same payload (allowing to send contextual
// information for a file descriptor).
func SendFd(socket, file *os.File) error {
var cfile C.struct_file_t
cfile.fd = C.int(file.Fd())
cfile.name = C.CString(file.Name())
defer C.free(unsafe.Pointer(cfile.name))
_, err := C.sendfd(C.int(socket.Fd()), cfile)
return err
}

36
libcontainer/utils/cmsg.h Normal file
View File

@ -0,0 +1,36 @@
/*
* Copyright 2016 SUSE LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#if !defined(CMSG_H)
#define CMSG_H
#include <sys/types.h>
/* TODO: Implement this properly with MSG_PEEK. */
#define TAG_BUFFER 4096
/* This mirrors Go's (*os.File). */
struct file_t {
char *name;
int fd;
};
struct file_t recvfd(int sockfd);
ssize_t sendfd(int sockfd, struct file_t file);
#endif /* !defined(CMSG_H) */

View File

@ -158,15 +158,16 @@ func restoreContainer(context *cli.Context, spec *specs.Spec, config *configs.Co
defer destroy(container)
}
process := &libcontainer.Process{}
tty, err := setupIO(process, rootuid, rootgid, "", false, detach)
tty, err := setupIO(process, rootuid, rootgid, false, detach)
if err != nil {
return -1, err
}
defer tty.Close()
handler := newSignalHandler(tty, !context.Bool("no-subreaper"))
handler := newSignalHandler(!context.Bool("no-subreaper"))
if err := container.Restore(process, options); err != nil {
return -1, err
}
// We don't need to do a tty.recvtty because config.Terminal is always false.
defer tty.Close()
if err := tty.ClosePostStart(); err != nil {
return -1, err
}
@ -180,7 +181,7 @@ func restoreContainer(context *cli.Context, spec *specs.Spec, config *configs.Co
if detach {
return 0, nil
}
return handler.forward(process)
return handler.forward(process, tty)
}
func criuOptions(context *cli.Context) *libcontainer.CriuOpts {

4
run.go
View File

@ -32,9 +32,9 @@ command(s) that get executed on start, edit the args parameter of the spec. See
Usage: `path to the root of the bundle directory, defaults to the current directory`,
},
cli.StringFlag{
Name: "console",
Name: "console-socket",
Value: "",
Usage: "specify the pty slave path for use with the container",
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
},
cli.BoolFlag{
Name: "detach, d",

View File

@ -17,7 +17,7 @@ const signalBufferSize = 2048
// newSignalHandler returns a signal handler for processing SIGCHLD and SIGWINCH signals
// while still forwarding all other signals to the process.
func newSignalHandler(tty *tty, enableSubreaper bool) *signalHandler {
func newSignalHandler(enableSubreaper bool) *signalHandler {
if enableSubreaper {
// set us as the subreaper before registering the signal handler for the container
if err := system.SetSubreaper(1); err != nil {
@ -30,7 +30,6 @@ func newSignalHandler(tty *tty, enableSubreaper bool) *signalHandler {
// handle all signals for the process.
signal.Notify(s)
return &signalHandler{
tty: tty,
signals: s,
}
}
@ -44,12 +43,11 @@ type exit struct {
type signalHandler struct {
signals chan os.Signal
tty *tty
}
// forward handles the main signal event loop forwarding, resizing, or reaping depending
// on the signal received.
func (h *signalHandler) forward(process *libcontainer.Process) (int, error) {
func (h *signalHandler) forward(process *libcontainer.Process, tty *tty) (int, error) {
// make sure we know the pid of our main process so that we can return
// after it dies.
pid1, err := process.Pid()
@ -57,11 +55,11 @@ func (h *signalHandler) forward(process *libcontainer.Process) (int, error) {
return -1, err
}
// perform the initial tty resize.
h.tty.resize()
tty.resize()
for s := range h.signals {
switch s {
case syscall.SIGWINCH:
h.tty.resize()
tty.resize()
case syscall.SIGCHLD:
exits, err := h.reap()
if err != nil {

View File

@ -43,7 +43,7 @@ EOF
sed -i "s/\(\"resources\": {\)/\1\n${DATA}/" ${BUSYBOX_BUNDLE}/config.json
# run a detached busybox to work with
runc run -d --console /dev/pts/ptmx test_cgroups_kmem
runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_kmem
[ "$status" -eq 0 ]
wait_for_container 15 1 test_cgroups_kmem
@ -61,7 +61,7 @@ EOF
sed -i 's/\("linux": {\)/\1\n "cgroupsPath": "\/runc-cgroups-integration-test",/' ${BUSYBOX_BUNDLE}/config.json
# run a detached busybox to work with
runc run -d --console /dev/pts/ptmx test_cgroups_kmem
runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_kmem
[ "$status" -eq 0 ]
wait_for_container 15 1 test_cgroups_kmem

View File

@ -12,7 +12,7 @@ function teardown() {
}
@test "runc create" {
runc create --console /dev/pts/ptmx test_busybox
runc create --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
testcontainer test_busybox created
@ -25,7 +25,7 @@ function teardown() {
}
@test "runc create exec" {
runc create --console /dev/pts/ptmx test_busybox
runc create --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
testcontainer test_busybox created
@ -33,6 +33,8 @@ function teardown() {
runc exec test_busybox true
[ "$status" -eq 0 ]
testcontainer test_busybox created
# start the command
runc start test_busybox
[ "$status" -eq 0 ]
@ -41,7 +43,7 @@ function teardown() {
}
@test "runc create --pid-file" {
runc create --pid-file pid.txt --console /dev/pts/ptmx test_busybox
runc create --pid-file pid.txt --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
testcontainer test_busybox created
@ -67,7 +69,7 @@ function teardown() {
run cd pid_file
[ "$status" -eq 0 ]
runc create --pid-file pid.txt -b $BUSYBOX_BUNDLE --console /dev/pts/ptmx test_busybox
runc create --pid-file pid.txt -b $BUSYBOX_BUNDLE --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
testcontainer test_busybox created

View File

@ -13,7 +13,7 @@ function teardown() {
@test "runc delete" {
# run busybox detached
runc run -d --console /dev/pts/ptmx test_busybox
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
# check state
@ -34,7 +34,7 @@ function teardown() {
@test "runc delete --force" {
# run busybox detached
runc run -d --console /dev/pts/ptmx test_busybox
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
# check state
@ -51,13 +51,13 @@ function teardown() {
@test "run delete with multi-containers" {
# create busybox1 detached
runc create --console /dev/pts/ptmx test_busybox1
runc create --console-socket $CONSOLE_SOCKET test_busybox1
[ "$status" -eq 0 ]
testcontainer test_busybox1 created
# run busybox2 detached
runc run -d --console /dev/pts/ptmx test_busybox2
runc run -d --console-socket $CONSOLE_SOCKET test_busybox2
[ "$status" -eq 0 ]
wait_for_container 15 1 test_busybox2
@ -86,20 +86,20 @@ function teardown() {
@test "run delete --force with multi-containers" {
# create busybox1 detached
runc create --console /dev/pts/ptmx test_busybox1
runc create --console-socket $CONSOLE_SOCKET test_busybox1
[ "$status" -eq 0 ]
testcontainer test_busybox1 created
# run busybox2 detached
runc run -d --console /dev/pts/ptmx test_busybox2
runc run -d --console-socket $CONSOLE_SOCKET test_busybox2
[ "$status" -eq 0 ]
wait_for_container 15 1 test_busybox2
testcontainer test_busybox2 running
# delete both test_busybox1 and test_busybox2 container
runc delete --force test_busybox1 test_busybox2
runc delete --force test_busybox1 test_busybox2
runc state test_busybox1
[ "$status" -ne 0 ]

View File

@ -13,7 +13,7 @@ function teardown() {
@test "events --stats" {
# run busybox detached
runc run -d --console /dev/pts/ptmx test_busybox
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
# check state
@ -28,7 +28,7 @@ function teardown() {
@test "events --interval default " {
# run busybox detached
runc run -d --console /dev/pts/ptmx test_busybox
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
# check state
@ -55,7 +55,7 @@ function teardown() {
@test "events --interval 1s " {
# run busybox detached
runc run -d --console /dev/pts/ptmx test_busybox
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
# check state
@ -81,7 +81,7 @@ function teardown() {
@test "events --interval 100ms " {
# run busybox detached
runc run -d --console /dev/pts/ptmx test_busybox
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
# check state

View File

@ -13,7 +13,7 @@ function teardown() {
@test "runc exec" {
# run busybox detached
runc run -d --console /dev/pts/ptmx test_busybox
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
wait_for_container 15 1 test_busybox
@ -26,7 +26,7 @@ function teardown() {
@test "runc exec --pid-file" {
# run busybox detached
runc run -d --console /dev/pts/ptmx test_busybox
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
wait_for_container 15 1 test_busybox
@ -53,7 +53,7 @@ function teardown() {
[ "$status" -eq 0 ]
# run busybox detached
runc run -d -b $BUSYBOX_BUNDLE --console /dev/pts/ptmx test_busybox
runc run -d -b $BUSYBOX_BUNDLE --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
wait_for_container 15 1 test_busybox
@ -74,7 +74,7 @@ function teardown() {
@test "runc exec ls -la" {
# run busybox detached
runc run -d --console /dev/pts/ptmx test_busybox
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
wait_for_container 15 1 test_busybox
@ -88,7 +88,7 @@ function teardown() {
@test "runc exec ls -la with --cwd" {
# run busybox detached
runc run -d --console /dev/pts/ptmx test_busybox
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
wait_for_container 15 1 test_busybox
@ -100,7 +100,7 @@ function teardown() {
@test "runc exec --env" {
# run busybox detached
runc run -d --console /dev/pts/ptmx test_busybox
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
wait_for_container 15 1 test_busybox
@ -113,7 +113,7 @@ function teardown() {
@test "runc exec --user" {
# run busybox detached
runc run -d --console /dev/pts/ptmx test_busybox
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
wait_for_container 15 1 test_busybox

View File

@ -64,11 +64,11 @@ load helpers
runc start -h
[ "$status" -eq 0 ]
[[ ${lines[1]} =~ runc\ start+ ]]
runc run -h
[ "$status" -eq 0 ]
[[ ${lines[1]} =~ runc\ run+ ]]
runc state -h
[ "$status" -eq 0 ]
[[ ${lines[1]} =~ runc\ state+ ]]

View File

@ -3,6 +3,7 @@
# Root directory of integration tests.
INTEGRATION_ROOT=$(dirname "$(readlink -f "$BASH_SOURCE")")
RUNC="${INTEGRATION_ROOT}/../../runc"
RECVTTY="${INTEGRATION_ROOT}/../../contrib/cmd/recvtty/recvtty"
GOPATH="${INTEGRATION_ROOT}/../../../.."
# Test data path.
@ -17,7 +18,7 @@ HELLO_IMAGE="$TESTDATA/hello-world.tar"
HELLO_BUNDLE="$BATS_TMPDIR/hello-world"
# CRIU PATH
CRIU="/usr/local/sbin/criu"
CRIU="$(which criu)"
# Kernel version
KERNEL_VERSION="$(uname -r)"
@ -28,6 +29,9 @@ KERNEL_MINOR="${KERNEL_MINOR%%.*}"
# Root state path.
ROOT="$BATS_TMPDIR/runc"
# Path to console socket.
CONSOLE_SOCKET="$BATS_TMPDIR/console.sock"
# Cgroup mount
CGROUP_BASE_PATH=$(grep "cgroup" /proc/self/mountinfo | gawk 'toupper($NF) ~ /\<MEMORY\>/ { print $5; exit }')
@ -142,14 +146,31 @@ function testcontainer() {
[[ "${output}" == *"$2"* ]]
}
function setup_recvtty() {
# We need to start recvtty in the background, so we double fork in the shell.
("$RECVTTY" --pid-file "$BATS_TMPDIR/recvtty.pid" --mode null "$CONSOLE_SOCKET" &) &
}
function teardown_recvtty() {
# When we kill recvtty, the container will also be killed.
if [ -f "$BATS_TMPDIR/recvtty.pid" ]; then
kill -9 $(cat "$BATS_TMPDIR/recvtty.pid")
fi
# Clean up the files that might be left over.
rm -f "$BATS_TMPDIR/recvtty.pid"
rm -f "$CONSOLE_SOCKET"
}
function setup_busybox() {
setup_recvtty
run mkdir "$BUSYBOX_BUNDLE"
run mkdir "$BUSYBOX_BUNDLE"/rootfs
if [ -e "/testdata/busybox.tar" ]; then
BUSYBOX_IMAGE="/testdata/busybox.tar"
fi
if [ ! -e $BUSYBOX_IMAGE ]; then
curl -o $BUSYBOX_IMAGE -sSL 'https://github.com/jpetazzo/docker-busybox/raw/buildroot-2014.11/rootfs.tar'
curl -o $BUSYBOX_IMAGE -sSL 'https://github.com/docker-library/busybox/raw/a0558a9006ce0dd6f6ec5d56cfd3f32ebeeb815f/glibc/busybox.tar.xz'
fi
tar -C "$BUSYBOX_BUNDLE"/rootfs -xf "$BUSYBOX_IMAGE"
cd "$BUSYBOX_BUNDLE"
@ -157,6 +178,7 @@ function setup_busybox() {
}
function setup_hello() {
setup_recvtty
run mkdir "$HELLO_BUNDLE"
run mkdir "$HELLO_BUNDLE"/rootfs
tar -C "$HELLO_BUNDLE"/rootfs -xf "$HELLO_IMAGE"
@ -185,12 +207,14 @@ function teardown_running_container_inroot() {
function teardown_busybox() {
cd "$INTEGRATION_ROOT"
teardown_recvtty
teardown_running_container test_busybox
run rm -f -r "$BUSYBOX_BUNDLE"
}
function teardown_hello() {
cd "$INTEGRATION_ROOT"
teardown_recvtty
teardown_running_container test_hello
run rm -f -r "$HELLO_BUNDLE"
}

View File

@ -15,7 +15,7 @@ function teardown() {
@test "kill detached busybox" {
# run busybox detached
runc run -d --console /dev/pts/ptmx test_busybox
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
# check state

View File

@ -19,15 +19,15 @@ function teardown() {
@test "list" {
# run a few busyboxes detached
ROOT=$HELLO_BUNDLE runc run -d --console /dev/pts/ptmx test_box1
ROOT=$HELLO_BUNDLE runc run -d --console-socket $CONSOLE_SOCKET test_box1
[ "$status" -eq 0 ]
wait_for_container_inroot 15 1 test_box1 $HELLO_BUNDLE
ROOT=$HELLO_BUNDLE runc run -d --console /dev/pts/ptmx test_box2
ROOT=$HELLO_BUNDLE runc run -d --console-socket $CONSOLE_SOCKET test_box2
[ "$status" -eq 0 ]
wait_for_container_inroot 15 1 test_box2 $HELLO_BUNDLE
ROOT=$HELLO_BUNDLE runc run -d --console /dev/pts/ptmx test_box3
ROOT=$HELLO_BUNDLE runc run -d --console-socket $CONSOLE_SOCKET test_box3
[ "$status" -eq 0 ]
wait_for_container_inroot 15 1 test_box3 $HELLO_BUNDLE

View File

@ -20,7 +20,7 @@ function teardown() {
@test "mask paths [file]" {
# run busybox detached
runc run -d --console /dev/pts/ptmx test_busybox
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
wait_for_container 15 1 test_busybox
@ -40,7 +40,7 @@ function teardown() {
@test "mask paths [directory]" {
# run busybox detached
runc run -d --console /dev/pts/ptmx test_busybox
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
wait_for_container 15 1 test_busybox

View File

@ -13,7 +13,7 @@ function teardown() {
@test "runc pause and resume" {
# run busybox detached
runc run -d --console /dev/pts/ptmx test_busybox
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
wait_for_container 15 1 test_busybox
@ -35,13 +35,13 @@ function teardown() {
@test "runc pause and resume with multi-container" {
# run test_busybox1 detached
runc run -d --console /dev/pts/ptmx test_busybox1
runc run -d --console-socket $CONSOLE_SOCKET test_busybox1
[ "$status" -eq 0 ]
wait_for_container 15 1 test_busybox1
# run test_busybox2 detached
runc run -d --console /dev/pts/ptmx test_busybox2
runc run -d --console-socket $CONSOLE_SOCKET test_busybox2
[ "$status" -eq 0 ]
wait_for_container 15 1 test_busybox2
@ -74,27 +74,27 @@ function teardown() {
@test "runc pause and resume with nonexist container" {
# run test_busybox1 detached
runc run -d --console /dev/pts/ptmx test_busybox1
runc run -d --console-socket $CONSOLE_SOCKET test_busybox1
[ "$status" -eq 0 ]
wait_for_container 15 1 test_busybox1
# run test_busybox2 detached
runc run -d --console /dev/pts/ptmx test_busybox2
runc run -d --console-socket $CONSOLE_SOCKET test_busybox2
[ "$status" -eq 0 ]
wait_for_container 15 1 test_busybox2
# pause test_busybox1, test_busybox2 and nonexistant container
runc pause test_busybox1 test_busybox2 nonexistant
# pause test_busybox1, test_busybox2 and nonexistent container
runc pause test_busybox1 test_busybox2 nonexistent
[ "$status" -ne 0 ]
# test state of test_busybox1 and test_busybox2 is paused
testcontainer test_busybox1 paused
testcontainer test_busybox2 paused
# resume test_busybox1, test_busybox2 and nonexistant container
runc resume test_busybox1 test_busybox2 nonexistant
# resume test_busybox1, test_busybox2 and nonexistent container
runc resume test_busybox1 test_busybox2 nonexistent
[ "$status" -ne 0 ]
# test state of two containers is back to running

View File

@ -13,7 +13,7 @@ function teardown() {
@test "ps" {
# start busybox detached
runc run -d --console /dev/pts/ptmx test_busybox
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
# check state
@ -29,7 +29,7 @@ function teardown() {
@test "ps -f json" {
# start busybox detached
runc run -d --console /dev/pts/ptmx test_busybox
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
# check state
@ -44,7 +44,7 @@ function teardown() {
@test "ps -e -x" {
# start busybox detached
runc run -d --console /dev/pts/ptmx test_busybox
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
# check state

View File

@ -15,11 +15,11 @@ function teardown() {
@test "global --root" {
# run busybox detached using $HELLO_BUNDLE for state
ROOT=$HELLO_BUNDLE runc run -d --console /dev/pts/ptmx test_dotbox
ROOT=$HELLO_BUNDLE runc run -d --console-socket $CONSOLE_SOCKET test_dotbox
[ "$status" -eq 0 ]
# run busybox detached in default root
runc run -d --console /dev/pts/ptmx test_busybox
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
# check state of the busyboxes are only in their respective root path

View File

@ -12,12 +12,12 @@ function teardown() {
}
@test "runc start" {
runc create --console /dev/pts/ptmx test_busybox1
runc create --console-socket $CONSOLE_SOCKET test_busybox1
[ "$status" -eq 0 ]
testcontainer test_busybox1 created
runc create --console /dev/pts/ptmx test_busybox2
runc create --console-socket $CONSOLE_SOCKET test_busybox2
[ "$status" -eq 0 ]
testcontainer test_busybox2 created

View File

@ -13,7 +13,7 @@ function teardown() {
@test "runc run detached" {
# run busybox detached
runc run -d --console /dev/pts/ptmx test_busybox
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
# check state
@ -29,7 +29,7 @@ function teardown() {
sed -i 's;"gid": 0;"gid": 100;g' config.json
# run busybox detached
runc run -d --console /dev/pts/ptmx test_busybox
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
# check state
@ -40,7 +40,7 @@ function teardown() {
@test "runc run detached --pid-file" {
# run busybox detached
runc run --pid-file pid.txt -d --console /dev/pts/ptmx test_busybox
runc run --pid-file pid.txt -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
# check state
@ -64,7 +64,7 @@ function teardown() {
[ "$status" -eq 0 ]
# run busybox detached
runc run --pid-file pid.txt -d -b $BUSYBOX_BUNDLE --console /dev/pts/ptmx test_busybox
runc run --pid-file pid.txt -d -b $BUSYBOX_BUNDLE --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
# check state

View File

@ -16,7 +16,7 @@ function teardown() {
[ "$status" -ne 0 ]
# run busybox detached
runc run -d --console /dev/pts/ptmx test_busybox
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
# check state

113
tests/integration/tty.bats Normal file
View File

@ -0,0 +1,113 @@
#!/usr/bin/env bats
load helpers
function setup() {
teardown_busybox
setup_busybox
}
function teardown() {
teardown_busybox
}
@test "runc run [tty ptsname]" {
# Replace sh script with readlink.
sed -i 's|"sh"|"sh", "-c", "for file in /proc/self/fd/[012]; do readlink $file; done"|' config.json
# run busybox
runc run test_busybox
[ "$status" -eq 0 ]
[[ ${lines[0]} =~ /dev/pts/+ ]]
[[ ${lines[1]} =~ /dev/pts/+ ]]
[[ ${lines[2]} =~ /dev/pts/+ ]]
}
@test "runc run [tty owner]" {
# Replace sh script with stat.
sed -i 's/"sh"/"sh", "-c", "stat -c %u:%g $(tty) | tr : \\\\\\\\n"/' config.json
# run busybox
runc run test_busybox
[ "$status" -eq 0 ]
[[ ${lines[0]} =~ 0 ]]
# This is set by the default config.json (it corresponds to the standard tty group).
[[ ${lines[1]} =~ 5 ]]
}
@test "runc run [tty owner] ({u,g}id != 0)" {
# replace "uid": 0 with "uid": 1000
# and do a similar thing for gid.
sed -i 's;"uid": 0;"uid": 1000;g' config.json
sed -i 's;"gid": 0;"gid": 100;g' config.json
# Replace sh script with stat.
sed -i 's/"sh"/"sh", "-c", "stat -c %u:%g $(tty) | tr : \\\\\\\\n"/' config.json
# run busybox
runc run test_busybox
[ "$status" -eq 0 ]
[[ ${lines[0]} =~ 1000 ]]
# This is set by the default config.json (it corresponds to the standard tty group).
[[ ${lines[1]} =~ 5 ]]
}
@test "runc exec [tty ptsname]" {
# run busybox detached
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
# check state
wait_for_container 15 1 test_busybox
# make sure we're running
testcontainer test_busybox running
# run the exec
runc exec test_busybox sh -c 'for file in /proc/self/fd/[012]; do readlink $file; done'
[ "$status" -eq 0 ]
[[ ${lines[0]} =~ /dev/pts/+ ]]
[[ ${lines[1]} =~ /dev/pts/+ ]]
[[ ${lines[2]} =~ /dev/pts/+ ]]
}
@test "runc exec [tty owner]" {
# run busybox detached
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
# check state
wait_for_container 15 1 test_busybox
# make sure we're running
testcontainer test_busybox running
# run the exec
runc exec test_busybox sh -c 'stat -c %u:%g $(tty) | tr : \\n'
[ "$status" -eq 0 ]
[[ ${lines[0]} =~ 0 ]]
[[ ${lines[1]} =~ 5 ]]
}
@test "runc exec [tty owner] ({u,g}id != 0)" {
# replace "uid": 0 with "uid": 1000
# and do a similar thing for gid.
sed -i 's;"uid": 0;"uid": 1000;g' config.json
sed -i 's;"gid": 0;"gid": 100;g' config.json
# run busybox detached
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
[ "$status" -eq 0 ]
# check state
wait_for_container 15 1 test_busybox
# make sure we're running
testcontainer test_busybox running
# run the exec
runc exec test_busybox sh -c 'stat -c %u:%g $(tty) | tr : \\n'
[ "$status" -eq 0 ]
[[ ${lines[0]} =~ 1000 ]]
[[ ${lines[1]} =~ 5 ]]
}

View File

@ -51,7 +51,7 @@ function check_cgroup_value() {
@test "update" {
requires cgroups_kmem
# run a few busyboxes detached
runc run -d --console /dev/pts/ptmx test_update
runc run -d --console-socket $CONSOLE_SOCKET test_update
[ "$status" -eq 0 ]
wait_for_container 15 1 test_update

99
tty.go
View File

@ -7,11 +7,27 @@ import (
"io"
"os"
"sync"
"syscall"
"github.com/docker/docker/pkg/term"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/utils"
)
type tty struct {
console libcontainer.Console
state *term.State
closers []io.Closer
postStart []io.Closer
wg sync.WaitGroup
}
func (t *tty) copyIO(w io.Writer, r io.ReadCloser) {
defer t.wg.Done()
io.Copy(w, r)
r.Close()
}
// setup standard pipes so that the TTY of the calling runc process
// is not inherited by the container.
func createStdioPipes(p *libcontainer.Process, rootuid, rootgid int) (*tty, error) {
@ -46,45 +62,56 @@ func createStdioPipes(p *libcontainer.Process, rootuid, rootgid int) (*tty, erro
return t, nil
}
func (t *tty) copyIO(w io.Writer, r io.ReadCloser) {
defer t.wg.Done()
io.Copy(w, r)
r.Close()
}
func createTty(p *libcontainer.Process, rootuid, rootgid int, consolePath string) (*tty, error) {
if consolePath != "" {
if err := p.ConsoleFromPath(consolePath); err != nil {
return nil, err
func dupStdio(process *libcontainer.Process, rootuid, rootgid int) error {
process.Stdin = os.Stdin
process.Stdout = os.Stdout
process.Stderr = os.Stderr
for _, fd := range []uintptr{
os.Stdin.Fd(),
os.Stdout.Fd(),
os.Stderr.Fd(),
} {
if err := syscall.Fchown(int(fd), rootuid, rootgid); err != nil {
return err
}
return &tty{}, nil
}
console, err := p.NewConsole(rootuid, rootgid)
if err != nil {
return nil, err
}
go io.Copy(console, os.Stdin)
go io.Copy(os.Stdout, console)
state, err := term.SetRawTerminal(os.Stdin.Fd())
if err != nil {
return nil, fmt.Errorf("failed to set the terminal from the stdin: %v", err)
}
return &tty{
console: console,
state: state,
closers: []io.Closer{
console,
},
}, nil
return nil
}
type tty struct {
console libcontainer.Console
state *term.State
closers []io.Closer
postStart []io.Closer
wg sync.WaitGroup
func (t *tty) recvtty(process *libcontainer.Process, detach bool) error {
console, err := process.GetConsole()
if err != nil {
return err
}
if !detach {
go io.Copy(console, os.Stdin)
t.wg.Add(1)
go t.copyIO(os.Stdout, console)
state, err := term.SetRawTerminal(os.Stdin.Fd())
if err != nil {
return fmt.Errorf("failed to set the terminal from the stdin: %v", err)
}
t.state = state
}
t.console = console
t.closers = []io.Closer{console}
return nil
}
func (t *tty) sendtty(socket *os.File, ti *libcontainer.TerminalInfo) error {
if t.console == nil {
return fmt.Errorf("tty.console not set")
}
// Create a fake file to contain the terminal info.
console := os.NewFile(t.console.File().Fd(), ti.String())
if err := utils.SendFd(socket, console); err != nil {
return err
}
return nil
}
// ClosePostStart closes any fds that are provided to the container and dup2'd
@ -122,5 +149,5 @@ func (t *tty) resize() error {
if err != nil {
return err
}
return term.SetWinsize(t.console.Fd(), ws)
return term.SetWinsize(t.console.File().Fd(), ws)
}

View File

@ -5,6 +5,7 @@ package main
import (
"errors"
"fmt"
"net"
"os"
"path/filepath"
"strconv"
@ -94,22 +95,6 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) {
return lp, nil
}
func dupStdio(process *libcontainer.Process, rootuid, rootgid int) error {
process.Stdin = os.Stdin
process.Stdout = os.Stdout
process.Stderr = os.Stderr
for _, fd := range []uintptr{
os.Stdin.Fd(),
os.Stdout.Fd(),
os.Stderr.Fd(),
} {
if err := syscall.Fchown(int(fd), rootuid, rootgid); err != nil {
return err
}
}
return nil
}
// If systemd is supporting sd_notify protocol, this function will add support
// for sd_notify protocol from within the container.
func setupSdNotify(spec *specs.Spec, notifySocket string) {
@ -123,23 +108,27 @@ func destroy(container libcontainer.Container) {
}
}
// setupIO sets the proper IO on the process depending on the configuration
// If there is a nil error then there must be a non nil tty returned
func setupIO(process *libcontainer.Process, rootuid, rootgid int, console string, createTTY, detach bool) (*tty, error) {
// detach and createTty will not work unless a console path is passed
// so error out here before changing any terminal settings
if createTTY && detach && console == "" {
return nil, fmt.Errorf("cannot allocate tty if runc will detach")
}
// setupIO modifies the given process config according to the options.
func setupIO(process *libcontainer.Process, rootuid, rootgid int, createTTY, detach bool) (*tty, error) {
// This is entirely handled by recvtty.
if createTTY {
return createTty(process, rootuid, rootgid, console)
process.Stdin = nil
process.Stdout = nil
process.Stderr = nil
return &tty{}, nil
}
// When we detach, we just dup over stdio and call it a day. There's no
// requirement that we set up anything nice for our caller or the
// container.
if detach {
if err := dupStdio(process, rootuid, rootgid); err != nil {
return nil, err
}
return &tty{}, nil
}
// XXX: This doesn't sit right with me. It's ugly.
return createStdioPipes(process, rootuid, rootgid)
}
@ -192,11 +181,15 @@ type runner struct {
detach bool
listenFDs []*os.File
pidFile string
console string
consoleSocket string
container libcontainer.Container
create bool
}
func (r *runner) terminalinfo() *libcontainer.TerminalInfo {
return libcontainer.NewTerminalInfo(r.container.ID())
}
func (r *runner) run(config *specs.Process) (int, error) {
process, err := newProcess(*config)
if err != nil {
@ -207,31 +200,90 @@ func (r *runner) run(config *specs.Process) (int, error) {
process.Env = append(process.Env, fmt.Sprintf("LISTEN_FDS=%d", len(r.listenFDs)), "LISTEN_PID=1")
process.ExtraFiles = append(process.ExtraFiles, r.listenFDs...)
}
rootuid, err := r.container.Config().HostUID()
if err != nil {
r.destroy()
return -1, err
}
rootgid, err := r.container.Config().HostGID()
if err != nil {
r.destroy()
return -1, err
}
tty, err := setupIO(process, rootuid, rootgid, r.console, config.Terminal, r.detach || r.create)
if err != nil {
detach := r.detach || r.create
// Check command-line for sanity.
if detach && config.Terminal && r.consoleSocket == "" {
r.destroy()
return -1, err
return -1, fmt.Errorf("cannot allocate tty if runc will detach without setting console socket")
}
handler := newSignalHandler(tty, r.enableSubreaper)
// XXX: Should we change this?
if (!detach || !config.Terminal) && r.consoleSocket != "" {
r.destroy()
return -1, fmt.Errorf("cannot use console socket if runc will not detach or allocate tty")
}
startFn := r.container.Start
if !r.create {
startFn = r.container.Run
}
defer tty.Close()
// Setting up IO is a two stage process. We need to modify process to deal
// with detaching containers, and then we get a tty after the container has
// started.
handler := newSignalHandler(r.enableSubreaper)
tty, err := setupIO(process, rootuid, rootgid, config.Terminal, detach)
if err != nil {
r.destroy()
return -1, err
}
if err := startFn(process); err != nil {
r.destroy()
return -1, err
}
if config.Terminal {
if err := tty.recvtty(process, r.detach || r.create); err != nil {
r.terminate(process)
r.destroy()
return -1, err
}
}
defer tty.Close()
if config.Terminal && detach {
conn, err := net.Dial("unix", r.consoleSocket)
if err != nil {
r.terminate(process)
r.destroy()
return -1, err
}
defer conn.Close()
unixconn, ok := conn.(*net.UnixConn)
if !ok {
r.terminate(process)
r.destroy()
return -1, fmt.Errorf("casting to UnixConn failed")
}
socket, err := unixconn.File()
if err != nil {
r.terminate(process)
r.destroy()
return -1, err
}
defer socket.Close()
err = tty.sendtty(socket, r.terminalinfo())
if err != nil {
r.terminate(process)
r.destroy()
return -1, err
}
}
if err := tty.ClosePostStart(); err != nil {
r.terminate(process)
r.destroy()
@ -244,10 +296,10 @@ func (r *runner) run(config *specs.Process) (int, error) {
return -1, err
}
}
if r.detach || r.create {
if detach {
return 0, nil
}
status, err := handler.forward(process)
status, err := handler.forward(process, tty)
if err != nil {
r.terminate(process)
}
@ -298,7 +350,7 @@ func startContainer(context *cli.Context, spec *specs.Spec, create bool) (int, e
shouldDestroy: true,
container: container,
listenFDs: listenFDs,
console: context.String("console"),
consoleSocket: context.String("console-socket"),
detach: context.Bool("detach"),
pidFile: context.String("pid-file"),
create: create,