Update console and mount handling for user namespaces

This updates the console handling to chown the console on creation to
the root user within the container.

This also moves the setup mounts from the userns sidecar process into
the main init processes by trying to mknod devices, if it fails on an
EPERM then bind mount the device from the host into the container for
use.  This prevents access issues when the sidecar process mknods the
device for the usernamespace returning an EPERM when writting to
dev/null.

This also adds some error handling for init processes and nsinit updates
with added flags for testing and other functions.

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
Michael Crosby 2015-02-17 21:37:02 -08:00
parent cc42996625
commit 339edce03e
8 changed files with 178 additions and 92 deletions

View File

@ -12,13 +12,9 @@ import (
"github.com/docker/libcontainer/label"
)
const (
containerConsolePath string = "/dev/console"
)
// NewConsole returns an initalized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func NewConsole() (Console, error) {
func NewConsole(uid, gid int) (Console, error) {
master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
if err != nil {
return nil, err
@ -30,6 +26,12 @@ func NewConsole() (Console, error) {
if err := unlockpt(master); err != nil {
return nil, err
}
if err := os.Chmod(console, 0600); err != nil {
return nil, err
}
if err := os.Chown(console, uid, gid); err != nil {
return nil, err
}
return &linuxConsole{
slavePath: console,
master: master,
@ -78,16 +80,10 @@ func (c *linuxConsole) Close() error {
func (c *linuxConsole) mount(rootfs, mountLabel string, uid, gid int) error {
oldMask := syscall.Umask(0000)
defer syscall.Umask(oldMask)
if err := os.Chmod(c.slavePath, 0600); err != nil {
return err
}
if err := os.Chown(c.slavePath, uid, gid); err != nil {
return err
}
if err := label.SetFileLabel(c.slavePath, mountLabel); err != nil {
return err
}
dest := filepath.Join(rootfs, containerConsolePath)
dest := filepath.Join(rootfs, "/dev/console")
f, err := os.Create(dest)
if err != nil && !os.IsExist(err) {
return err

View File

@ -35,7 +35,7 @@ var baseMounts = []*configs.Mount{
Destination: "/dev/pts",
Device: "devpts",
Flags: syscall.MS_NOSUID | syscall.MS_NOEXEC,
Data: "newinstance,ptmxmode=0666,mode=620,gid=5",
Data: "newinstance,ptmxmode=0666,mode=0620,gid=5",
},
}
@ -43,32 +43,29 @@ var baseMounts = []*configs.Mount{
// new mount namespace.
func setupRootfs(config *configs.Config) (err error) {
if err := prepareRoot(config); err != nil {
return err
return newSystemError(err)
}
for _, m := range append(baseMounts, config.Mounts...) {
if err := mount(m, config.Rootfs, config.MountLabel); err != nil {
return err
return newSystemError(err)
}
}
if err := createDevices(config); err != nil {
return err
return newSystemError(err)
}
if err := setupPtmx(config); err != nil {
return err
return newSystemError(err)
}
// stdin, stdout and stderr could be pointing to /dev/null from parent namespace.
// Re-open them inside this namespace.
// FIXME: Need to fix this for user namespaces.
if !config.Namespaces.Contains(configs.NEWUSER) {
if err := reOpenDevNull(config.Rootfs); err != nil {
return err
}
// re-open them inside this namespace.
if err := reOpenDevNull(config.Rootfs); err != nil {
return newSystemError(err)
}
if err := setupDevSymlinks(config.Rootfs); err != nil {
return err
return newSystemError(err)
}
if err := syscall.Chdir(config.Rootfs); err != nil {
return err
return newSystemError(err)
}
if config.NoPivotRoot {
err = msMoveRoot(config.Rootfs)
@ -76,11 +73,11 @@ func setupRootfs(config *configs.Config) (err error) {
err = pivotRoot(config.Rootfs, config.PivotDir)
}
if err != nil {
return err
return newSystemError(err)
}
if config.Readonlyfs {
if err := setReadonly(); err != nil {
return err
return newSystemError(err)
}
}
syscall.Umask(0022)
@ -209,6 +206,28 @@ func createDeviceNode(rootfs string, node *configs.Device) error {
if err := os.MkdirAll(parent, 0755); err != nil {
return err
}
if err := mknodDevice(dest, node); err != nil {
if os.IsExist(err) {
return nil
}
// containers running in a user namespace are not allowed to mknod
// devices so we can just bind mount it from the host.
if err == syscall.EPERM {
f, err := os.Create(dest)
if err != nil {
if os.IsExist(err) {
return nil
}
return err
}
f.Close()
return syscall.Mount(node.Path, dest, "bind", syscall.MS_BIND, "")
}
}
return nil
}
func mknodDevice(dest string, node *configs.Device) error {
fileMode := node.FileMode
switch node.Type {
case 'c':
@ -218,13 +237,10 @@ func createDeviceNode(rootfs string, node *configs.Device) error {
default:
return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path)
}
if err := syscall.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil && !os.IsExist(err) {
return fmt.Errorf("mknod %s %s", node.Path, err)
if err := syscall.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil {
return err
}
if err := syscall.Chown(dest, int(node.Uid), int(node.Gid)); err != nil {
return fmt.Errorf("chown %s to %d:%d", node.Path, node.Uid, node.Gid)
}
return nil
return syscall.Chown(dest, int(node.Uid), int(node.Gid))
}
func prepareRoot(config *configs.Config) error {
@ -251,16 +267,8 @@ func setupPtmx(config *configs.Config) error {
return fmt.Errorf("symlink dev ptmx %s", err)
}
if config.Console != "" {
uid, err := config.HostUID()
if err != nil {
return err
}
gid, err := config.HostGID()
if err != nil {
return err
}
console := newConsoleFromPath(config.Console)
return console.mount(config.Rootfs, config.MountLabel, uid, gid)
return console.mount(config.Rootfs, config.MountLabel, 0, 0)
}
return nil
}

View File

@ -6,6 +6,7 @@ import (
"syscall"
"github.com/docker/libcontainer/apparmor"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/label"
"github.com/docker/libcontainer/system"
)
@ -17,63 +18,69 @@ type linuxUsernsInit struct {
func (l *linuxUsernsInit) Init() error {
// join any namespaces via a path to the namespace fd if provided
if err := joinExistingNamespaces(l.config.Config.Namespaces); err != nil {
return err
return newSystemError(err)
}
consolePath := l.config.Config.Console
if consolePath != "" {
// We use the containerConsolePath here, because the console has already been
// setup by the side car process for the user namespace scenario.
console := newConsoleFromPath(containerConsolePath)
console := newConsoleFromPath(consolePath)
if err := console.dupStdio(); err != nil {
return err
return newSystemError(err)
}
}
if _, err := syscall.Setsid(); err != nil {
return err
return newSystemError(err)
}
if consolePath != "" {
if err := system.Setctty(); err != nil {
return err
return newSystemError(err)
}
}
if l.config.Cwd == "" {
l.config.Cwd = "/"
}
if err := setupRlimits(l.config.Config); err != nil {
return err
return newSystemError(err)
}
// InitializeMountNamespace() can be executed only for a new mount namespace
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
if err := setupRootfs(l.config.Config); err != nil {
return newSystemError(err)
}
}
if hostname := l.config.Config.Hostname; hostname != "" {
if err := syscall.Sethostname([]byte(hostname)); err != nil {
return err
return newSystemError(err)
}
}
if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil {
return err
return newSystemError(err)
}
if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil {
return err
return newSystemError(err)
}
for _, path := range l.config.Config.ReadonlyPaths {
if err := remountReadonly(path); err != nil {
return err
return newSystemError(err)
}
}
for _, path := range l.config.Config.MaskPaths {
if err := maskFile(path); err != nil {
return err
return newSystemError(err)
}
}
pdeath, err := system.GetParentDeathSignal()
if err != nil {
return err
return newSystemError(err)
}
if err := finalizeNamespace(l.config); err != nil {
return err
return newSystemError(err)
}
// finalizeNamespace can change user/group which clears the parent death
// signal, so we restore it here.
if err := pdeath.Restore(); err != nil {
return err
return newSystemError(err)
}
// Signal self if parent is already dead. Does nothing if running in a new
// PID namespace, as Getppid will always return 0.

View File

@ -2,11 +2,6 @@
package libcontainer
import (
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/label"
)
// linuxUsernsSideCar is run to setup mounts and networking related operations
// for a user namespace enabled process as a user namespace root doesn't
// have permissions to perform these operations.
@ -24,12 +19,5 @@ func (l *linuxUsernsSideCar) Init() error {
if err := setupRoute(l.config.Config); err != nil {
return err
}
label.Init()
// InitializeMountNamespace() can be executed only for a new mount namespace
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
if err := setupRootfs(l.config.Config); err != nil {
return err
}
}
return nil
}

View File

@ -7,10 +7,13 @@ import (
"math"
"os"
"path/filepath"
"strings"
"syscall"
"github.com/Sirupsen/logrus"
"github.com/codegangsta/cli"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/utils"
)
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
@ -28,7 +31,17 @@ var createFlags = []cli.Flag{
cli.StringFlag{Name: "apparmor-profile", Usage: "set the apparmor profile"},
cli.StringFlag{Name: "process-label", Usage: "set the process label"},
cli.StringFlag{Name: "mount-label", Usage: "set the mount label"},
cli.StringFlag{Name: "rootfs", Usage: "set the rootfs"},
cli.IntFlag{Name: "userns-root-uid", Usage: "set the user namespace root uid"},
cli.StringFlag{Name: "net", Value: "", Usage: "network namespace"},
cli.StringFlag{Name: "ipc", Value: "", Usage: "ipc namespace"},
cli.StringFlag{Name: "pid", Value: "", Usage: "pid namespace"},
cli.StringFlag{Name: "uts", Value: "", Usage: "uts namespace"},
cli.StringFlag{Name: "mnt", Value: "", Usage: "mount namespace"},
cli.StringFlag{Name: "veth-bridge", Usage: "veth bridge"},
cli.StringFlag{Name: "veth-address", Usage: "veth ip address"},
cli.StringFlag{Name: "veth-gateway", Usage: "veth gateway address"},
cli.IntFlag{Name: "veth-mtu", Usage: "veth mtu"},
}
var configCommand = cli.Command{
@ -72,10 +85,11 @@ func modify(config *configs.Config, context *cli.Context) {
config.AppArmorProfile = context.String("apparmor-profile")
config.ProcessLabel = context.String("process-label")
config.MountLabel = context.String("mount-label")
config.Rootfs = context.String("rootfs")
userns_uid := context.Int("userns-root-uid")
if userns_uid != 0 {
config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER})
config.Namespaces.Add(configs.NEWUSER, "")
config.UidMappings = []configs.IDMap{
{ContainerID: 0, HostID: userns_uid, Size: 1},
{ContainerID: 1, HostID: 1, Size: userns_uid - 1},
@ -86,6 +100,84 @@ func modify(config *configs.Config, context *cli.Context) {
{ContainerID: 1, HostID: 1, Size: userns_uid - 1},
{ContainerID: userns_uid + 1, HostID: userns_uid + 1, Size: math.MaxInt32 - userns_uid},
}
for _, node := range config.Devices {
node.Uid = uint32(userns_uid)
node.Gid = uint32(userns_uid)
}
}
for _, rawBind := range context.StringSlice("bind") {
mount := &configs.Mount{
Device: "bind",
Flags: syscall.MS_BIND | syscall.MS_REC,
}
parts := strings.SplitN(rawBind, ":", 3)
switch len(parts) {
default:
logrus.Fatalf("invalid bind mount %s", rawBind)
case 2:
mount.Source, mount.Destination = parts[0], parts[1]
case 3:
mount.Source, mount.Destination = parts[0], parts[1]
switch parts[2] {
case "ro":
mount.Flags |= syscall.MS_RDONLY
case "rw":
default:
logrus.Fatalf("invalid bind mount mode %s", parts[2])
}
}
config.Mounts = append(config.Mounts, mount)
}
for _, tmpfs := range context.StringSlice("tmpfs") {
config.Mounts = append(config.Mounts, &configs.Mount{
Device: "tmpfs",
Destination: tmpfs,
Flags: syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV,
})
}
for flag, value := range map[string]configs.NamespaceType{
"net": configs.NEWNET,
"mnt": configs.NEWNS,
"pid": configs.NEWPID,
"ipc": configs.NEWIPC,
"uts": configs.NEWUTS,
} {
switch v := context.String(flag); v {
case "host":
config.Namespaces.Remove(value)
case "", "private":
if !config.Namespaces.Contains(value) {
config.Namespaces.Add(value, "")
}
if v == "net" {
config.Networks = []*configs.Network{
{
Type: "loopback",
Address: "127.0.0.1/0",
Gateway: "localhost",
},
}
}
default:
config.Namespaces.Remove(value)
config.Namespaces.Add(value, v)
}
}
if bridge := context.String("veth-bridge"); bridge != "" {
hostName, err := utils.GenerateRandomName("veth", 7)
if err != nil {
logrus.Fatal(err)
}
network := &configs.Network{
Type: "veth",
Name: "eth0",
Bridge: bridge,
Address: context.String("veth-address"),
Gateway: context.String("veth-gateway"),
Mtu: context.Int("veth-mtu"),
HostInterfaceName: hostName,
}
config.Networks = append(config.Networks, network)
}
}
@ -155,13 +247,6 @@ func getTemplate() *configs.Config {
Flags: defaultMountFlags | syscall.MS_RDONLY,
},
},
Networks: []*configs.Network{
{
Type: "loopback",
Address: "127.0.0.1/0",
Gateway: "localhost",
},
},
Rlimits: []configs.Rlimit{
{
Type: syscall.RLIMIT_NOFILE,

View File

@ -5,7 +5,6 @@ import (
"os/signal"
"syscall"
log "github.com/Sirupsen/logrus"
"github.com/codegangsta/cli"
"github.com/docker/libcontainer"
"github.com/docker/libcontainer/utils"
@ -27,12 +26,12 @@ var execCommand = cli.Command{
cli.StringFlag{Name: "config", Value: "container.json", Usage: "path to the configuration file"},
cli.BoolFlag{Name: "create", Usage: "create the container's configuration on the fly with arguments"},
cli.StringFlag{Name: "user,u", Value: "root", Usage: "set the user, uid, and/or gid for the process"},
cli.StringFlag{Name: "cwd", Value: "", Usage: "set the current working dir"},
cli.StringSliceFlag{Name: "env", Value: standardEnvironment, Usage: "set environment variables for the process"},
}, createFlags...),
}
func execAction(context *cli.Context) {
entry := log.WithField("parent", "nsinit")
factory, err := loadFactory(context)
if err != nil {
fatal(err)
@ -44,7 +43,6 @@ func execAction(context *cli.Context) {
created := false
container, err := factory.Load(context.String("id"))
if err != nil {
entry.Debug("creating container")
config, err := loadConfig(context)
if err != nil {
tty.Close()
@ -53,7 +51,6 @@ func execAction(context *cli.Context) {
if tty.console != nil {
config.Console = tty.console.Path()
}
created = true
if container, err = factory.Create(context.String("id"), config); err != nil {
tty.Close()
@ -65,11 +62,14 @@ func execAction(context *cli.Context) {
Args: context.Args(),
Env: context.StringSlice("env"),
User: context.String("user"),
Cwd: context.String("cwd"),
Stdin: os.Stdin,
Stdout: os.Stdout,
Stderr: os.Stderr,
}
tty.attach(process)
if err := tty.attach(process); err != nil {
fatal(err)
}
pid, err := container.Start(process)
if err != nil {
tty.Close()

View File

@ -18,10 +18,10 @@ var initCommand = cli.Command{
runtime.LockOSThread()
factory, err := libcontainer.New("")
if err != nil {
log.Fatal(err)
fatal(err)
}
if err := factory.StartInitialization(3); err != nil {
log.Fatal(err)
fatal(err)
}
panic("This line should never been executed")
},

View File

@ -11,19 +11,13 @@ import (
func newTty(context *cli.Context) (*tty, error) {
if context.Bool("tty") {
console, err := libcontainer.NewConsole()
if err != nil {
return nil, err
}
go io.Copy(console, os.Stdin)
go io.Copy(os.Stdout, console)
state, err := term.SetRawTerminal(os.Stdin.Fd())
rootid := context.Int("userns-root-uid")
console, err := libcontainer.NewConsole(rootid, rootid)
if err != nil {
return nil, err
}
return &tty{
console: console,
state: state,
}, nil
}
return &tty{}, nil
@ -44,12 +38,20 @@ func (t *tty) Close() error {
return nil
}
func (t *tty) attach(process *libcontainer.Process) {
func (t *tty) attach(process *libcontainer.Process) error {
if t.console != nil {
go io.Copy(t.console, os.Stdin)
go io.Copy(os.Stdout, t.console)
state, err := term.SetRawTerminal(os.Stdin.Fd())
if err != nil {
return err
}
t.state = state
process.Stderr = nil
process.Stdout = nil
process.Stdin = nil
}
return nil
}
func (t *tty) resize() error {