Merge pull request #385 from crosbymichael/userns-updates

Make usernamespaces work without sidecar process
This commit is contained in:
Mrunal Patel 2015-02-18 16:20:49 -08:00
commit d06a2dab9f
14 changed files with 184 additions and 266 deletions

View File

@ -57,11 +57,3 @@ type Error interface {
// Returns the error code for this error.
Code() ErrorCode
}
type initError struct {
Message string `json:"message,omitempty"`
}
func (i initError) Error() string {
return i.Message
}

View File

@ -11,8 +11,8 @@ import (
var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}}
Code: {{.ECode}}
{{if .Err }}
Message: {{.Err.Error}}
{{if .Message }}
Message: {{.Message}}
{{end}}
Frames:{{range $i, $frame := .Stack.Frames}}
---
@ -28,6 +28,7 @@ func newGenericError(err error, c ErrorCode) Error {
return &genericError{
Timestamp: time.Now(),
Err: err,
Message: err.Error(),
ECode: c,
Stack: stacktrace.Capture(1),
}
@ -41,6 +42,7 @@ func newSystemError(err error) Error {
Timestamp: time.Now(),
Err: err,
ECode: SystemError,
Message: err.Error(),
Stack: stacktrace.Capture(1),
}
}
@ -48,12 +50,13 @@ func newSystemError(err error) Error {
type genericError struct {
Timestamp time.Time
ECode ErrorCode
Err error
Err error `json:"-"`
Message string
Stack stacktrace.Stacktrace
}
func (e *genericError) Error() string {
return fmt.Sprintf("[%d] %s: %s", e.ECode, e.ECode, e.Err)
return fmt.Sprintf("[%d] %s: %s", e.ECode, e.ECode, e.Message)
}
func (e *genericError) Code() ErrorCode {

View File

@ -12,13 +12,9 @@ import (
"github.com/docker/libcontainer/label"
)
const (
containerConsolePath string = "/dev/console"
)
// NewConsole returns an initalized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func NewConsole() (Console, error) {
func NewConsole(uid, gid int) (Console, error) {
master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
if err != nil {
return nil, err
@ -30,6 +26,12 @@ func NewConsole() (Console, error) {
if err := unlockpt(master); err != nil {
return nil, err
}
if err := os.Chmod(console, 0600); err != nil {
return nil, err
}
if err := os.Chown(console, uid, gid); err != nil {
return nil, err
}
return &linuxConsole{
slavePath: console,
master: master,
@ -78,16 +80,10 @@ func (c *linuxConsole) Close() error {
func (c *linuxConsole) mount(rootfs, mountLabel string, uid, gid int) error {
oldMask := syscall.Umask(0000)
defer syscall.Umask(oldMask)
if err := os.Chmod(c.slavePath, 0600); err != nil {
return err
}
if err := os.Chown(c.slavePath, uid, gid); err != nil {
return err
}
if err := label.SetFileLabel(c.slavePath, mountLabel); err != nil {
return err
}
dest := filepath.Join(rootfs, containerConsolePath)
dest := filepath.Join(rootfs, "/dev/console")
f, err := os.Create(dest)
if err != nil && !os.IsExist(err) {
return err

View File

@ -147,7 +147,6 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
if cmd.SysProcAttr.Credential == nil {
cmd.SysProcAttr.Credential = &syscall.Credential{}
}
t = "_LIBCONTAINER_INITTYPE=userns"
}
cmd.Env = append(cmd.Env, t)
cmd.SysProcAttr.Cloneflags = cloneFlags

View File

@ -166,9 +166,7 @@ func (l *LinuxFactory) StartInitialization(pipefd uintptr) (err error) {
// ensure that any data sent from the parent is consumed so it doesn't
// receive ECONNRESET when the child writes to the pipe.
ioutil.ReadAll(pipe)
if err := json.NewEncoder(pipe).Encode(initError{
Message: err.Error(),
}); err != nil {
if err := json.NewEncoder(pipe).Encode(newSystemError(err)); err != nil {
panic(err)
}
}

View File

@ -21,10 +21,8 @@ import (
type initType string
const (
initSetns initType = "setns"
initStandard initType = "standard"
initUserns initType = "userns"
initUsernsSetup initType = "userns_setup"
initSetns initType = "setns"
initStandard initType = "standard"
)
type pid struct {
@ -67,14 +65,6 @@ func newContainerInit(t initType, pipe *os.File) (initer, error) {
return &linuxSetnsInit{
config: config,
}, nil
case initUserns:
return &linuxUsernsInit{
config: config,
}, nil
case initUsernsSetup:
return &linuxUsernsSideCar{
config: config,
}, nil
case initStandard:
return &linuxStandardInit{
config: config,

View File

@ -4,13 +4,11 @@ package libcontainer
import (
"encoding/json"
"fmt"
"io"
"os"
"os/exec"
"syscall"
log "github.com/Sirupsen/logrus"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/system"
)
@ -145,28 +143,12 @@ func (p *initProcess) start() error {
if err := p.createNetworkInterfaces(); err != nil {
return newSystemError(err)
}
// Start the setup process to setup the init process
if p.cmd.SysProcAttr.Cloneflags&syscall.CLONE_NEWUSER != 0 {
parent, err := p.newUsernsSetupProcess()
if err != nil {
return newSystemError(err)
}
if err := parent.start(); err != nil {
if err := parent.terminate(); err != nil {
log.Warn(err)
}
return err
}
if _, err := parent.wait(); err != nil {
return newSystemError(err)
}
}
if err := p.sendConfig(); err != nil {
return newSystemError(err)
}
// wait for the child process to fully complete and receive an error message
// if one was encoutered
var ierr *initError
var ierr *genericError
if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF {
return newSystemError(err)
}
@ -229,26 +211,6 @@ func (p *initProcess) createNetworkInterfaces() error {
return nil
}
func (p *initProcess) newUsernsSetupProcess() (parentProcess, error) {
parentPipe, childPipe, err := newPipe()
if err != nil {
return nil, newSystemError(err)
}
cmd := exec.Command(p.cmd.Args[0], p.cmd.Args[1:]...)
cmd.ExtraFiles = []*os.File{childPipe}
cmd.Dir = p.cmd.Dir
cmd.Env = append(cmd.Env,
fmt.Sprintf("_LIBCONTAINER_INITPID=%d", p.pid()),
fmt.Sprintf("_LIBCONTAINER_INITTYPE=userns_setup"),
)
return &setnsProcess{
cmd: cmd,
childPipe: childPipe,
parentPipe: parentPipe,
config: p.config,
}, nil
}
func (p *initProcess) signal(s os.Signal) error {
return p.cmd.Process.Signal(s)
}

View File

@ -35,7 +35,7 @@ var baseMounts = []*configs.Mount{
Destination: "/dev/pts",
Device: "devpts",
Flags: syscall.MS_NOSUID | syscall.MS_NOEXEC,
Data: "newinstance,ptmxmode=0666,mode=620,gid=5",
Data: "newinstance,ptmxmode=0666,mode=0620,gid=5",
},
}
@ -43,32 +43,29 @@ var baseMounts = []*configs.Mount{
// new mount namespace.
func setupRootfs(config *configs.Config) (err error) {
if err := prepareRoot(config); err != nil {
return err
return newSystemError(err)
}
for _, m := range append(baseMounts, config.Mounts...) {
if err := mount(m, config.Rootfs, config.MountLabel); err != nil {
return err
return newSystemError(err)
}
}
if err := createDevices(config); err != nil {
return err
return newSystemError(err)
}
if err := setupPtmx(config); err != nil {
return err
return newSystemError(err)
}
// stdin, stdout and stderr could be pointing to /dev/null from parent namespace.
// Re-open them inside this namespace.
// FIXME: Need to fix this for user namespaces.
if !config.Namespaces.Contains(configs.NEWUSER) {
if err := reOpenDevNull(config.Rootfs); err != nil {
return err
}
// re-open them inside this namespace.
if err := reOpenDevNull(config.Rootfs); err != nil {
return newSystemError(err)
}
if err := setupDevSymlinks(config.Rootfs); err != nil {
return err
return newSystemError(err)
}
if err := syscall.Chdir(config.Rootfs); err != nil {
return err
return newSystemError(err)
}
if config.NoPivotRoot {
err = msMoveRoot(config.Rootfs)
@ -76,11 +73,11 @@ func setupRootfs(config *configs.Config) (err error) {
err = pivotRoot(config.Rootfs, config.PivotDir)
}
if err != nil {
return err
return newSystemError(err)
}
if config.Readonlyfs {
if err := setReadonly(); err != nil {
return err
return newSystemError(err)
}
}
syscall.Umask(0022)
@ -202,13 +199,32 @@ func createDevices(config *configs.Config) error {
// Creates the device node in the rootfs of the container.
func createDeviceNode(rootfs string, node *configs.Device) error {
var (
dest = filepath.Join(rootfs, node.Path)
parent = filepath.Dir(dest)
)
if err := os.MkdirAll(parent, 0755); err != nil {
dest := filepath.Join(rootfs, node.Path)
if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil {
return err
}
if err := mknodDevice(dest, node); err != nil {
if os.IsExist(err) {
return nil
}
if err != syscall.EPERM {
return err
}
// containers running in a user namespace are not allowed to mknod
// devices so we can just bind mount it from the host.
f, err := os.Create(dest)
if err != nil && !os.IsExist(err) {
return err
}
if f != nil {
f.Close()
}
return syscall.Mount(node.Path, dest, "bind", syscall.MS_BIND, "")
}
return nil
}
func mknodDevice(dest string, node *configs.Device) error {
fileMode := node.FileMode
switch node.Type {
case 'c':
@ -218,13 +234,10 @@ func createDeviceNode(rootfs string, node *configs.Device) error {
default:
return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path)
}
if err := syscall.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil && !os.IsExist(err) {
return fmt.Errorf("mknod %s %s", node.Path, err)
if err := syscall.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil {
return err
}
if err := syscall.Chown(dest, int(node.Uid), int(node.Gid)); err != nil {
return fmt.Errorf("chown %s to %d:%d", node.Path, node.Uid, node.Gid)
}
return nil
return syscall.Chown(dest, int(node.Uid), int(node.Gid))
}
func prepareRoot(config *configs.Config) error {
@ -251,16 +264,8 @@ func setupPtmx(config *configs.Config) error {
return fmt.Errorf("symlink dev ptmx %s", err)
}
if config.Console != "" {
uid, err := config.HostUID()
if err != nil {
return err
}
gid, err := config.HostGID()
if err != nil {
return err
}
console := newConsoleFromPath(config.Console)
return console.mount(config.Rootfs, config.MountLabel, uid, gid)
return console.mount(config.Rootfs, config.MountLabel, 0, 0)
}
return nil
}

View File

@ -1,84 +0,0 @@
// +build linux
package libcontainer
import (
"syscall"
"github.com/docker/libcontainer/apparmor"
"github.com/docker/libcontainer/label"
"github.com/docker/libcontainer/system"
)
type linuxUsernsInit struct {
config *initConfig
}
func (l *linuxUsernsInit) Init() error {
// join any namespaces via a path to the namespace fd if provided
if err := joinExistingNamespaces(l.config.Config.Namespaces); err != nil {
return err
}
consolePath := l.config.Config.Console
if consolePath != "" {
// We use the containerConsolePath here, because the console has already been
// setup by the side car process for the user namespace scenario.
console := newConsoleFromPath(containerConsolePath)
if err := console.dupStdio(); err != nil {
return err
}
}
if _, err := syscall.Setsid(); err != nil {
return err
}
if consolePath != "" {
if err := system.Setctty(); err != nil {
return err
}
}
if l.config.Cwd == "" {
l.config.Cwd = "/"
}
if err := setupRlimits(l.config.Config); err != nil {
return err
}
if hostname := l.config.Config.Hostname; hostname != "" {
if err := syscall.Sethostname([]byte(hostname)); err != nil {
return err
}
}
if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil {
return err
}
if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil {
return err
}
for _, path := range l.config.Config.ReadonlyPaths {
if err := remountReadonly(path); err != nil {
return err
}
}
for _, path := range l.config.Config.MaskPaths {
if err := maskFile(path); err != nil {
return err
}
}
pdeath, err := system.GetParentDeathSignal()
if err != nil {
return err
}
if err := finalizeNamespace(l.config); err != nil {
return err
}
// finalizeNamespace can change user/group which clears the parent death
// signal, so we restore it here.
if err := pdeath.Restore(); err != nil {
return err
}
// Signal self if parent is already dead. Does nothing if running in a new
// PID namespace, as Getppid will always return 0.
if syscall.Getppid() == 1 {
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
}
return system.Execv(l.config.Args[0], l.config.Args[0:], l.config.Env)
}

View File

@ -1,35 +0,0 @@
// +build linux
package libcontainer
import (
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/label"
)
// linuxUsernsSideCar is run to setup mounts and networking related operations
// for a user namespace enabled process as a user namespace root doesn't
// have permissions to perform these operations.
// The setup process joins all the namespaces of user namespace enabled init
// except the user namespace, so it run as root in the root user namespace
// to perform these operations.
type linuxUsernsSideCar struct {
config *initConfig
}
func (l *linuxUsernsSideCar) Init() error {
if err := setupNetwork(l.config); err != nil {
return err
}
if err := setupRoute(l.config.Config); err != nil {
return err
}
label.Init()
// InitializeMountNamespace() can be executed only for a new mount namespace
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
if err := setupRootfs(l.config.Config); err != nil {
return err
}
}
return nil
}

View File

@ -7,10 +7,13 @@ import (
"math"
"os"
"path/filepath"
"strings"
"syscall"
"github.com/Sirupsen/logrus"
"github.com/codegangsta/cli"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/utils"
)
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
@ -28,7 +31,18 @@ var createFlags = []cli.Flag{
cli.StringFlag{Name: "apparmor-profile", Usage: "set the apparmor profile"},
cli.StringFlag{Name: "process-label", Usage: "set the process label"},
cli.StringFlag{Name: "mount-label", Usage: "set the mount label"},
cli.StringFlag{Name: "rootfs", Usage: "set the rootfs"},
cli.IntFlag{Name: "userns-root-uid", Usage: "set the user namespace root uid"},
cli.StringFlag{Name: "hostname", Value: "nsinit", Usage: "hostname value for the container"},
cli.StringFlag{Name: "net", Value: "", Usage: "network namespace"},
cli.StringFlag{Name: "ipc", Value: "", Usage: "ipc namespace"},
cli.StringFlag{Name: "pid", Value: "", Usage: "pid namespace"},
cli.StringFlag{Name: "uts", Value: "", Usage: "uts namespace"},
cli.StringFlag{Name: "mnt", Value: "", Usage: "mount namespace"},
cli.StringFlag{Name: "veth-bridge", Usage: "veth bridge"},
cli.StringFlag{Name: "veth-address", Usage: "veth ip address"},
cli.StringFlag{Name: "veth-gateway", Usage: "veth gateway address"},
cli.IntFlag{Name: "veth-mtu", Usage: "veth mtu"},
}
var configCommand = cli.Command{
@ -72,10 +86,11 @@ func modify(config *configs.Config, context *cli.Context) {
config.AppArmorProfile = context.String("apparmor-profile")
config.ProcessLabel = context.String("process-label")
config.MountLabel = context.String("mount-label")
config.Rootfs = context.String("rootfs")
userns_uid := context.Int("userns-root-uid")
if userns_uid != 0 {
config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER})
config.Namespaces.Add(configs.NEWUSER, "")
config.UidMappings = []configs.IDMap{
{ContainerID: 0, HostID: userns_uid, Size: 1},
{ContainerID: 1, HostID: 1, Size: userns_uid - 1},
@ -86,6 +101,87 @@ func modify(config *configs.Config, context *cli.Context) {
{ContainerID: 1, HostID: 1, Size: userns_uid - 1},
{ContainerID: userns_uid + 1, HostID: userns_uid + 1, Size: math.MaxInt32 - userns_uid},
}
for _, node := range config.Devices {
node.Uid = uint32(userns_uid)
node.Gid = uint32(userns_uid)
}
}
for _, rawBind := range context.StringSlice("bind") {
mount := &configs.Mount{
Device: "bind",
Flags: syscall.MS_BIND | syscall.MS_REC,
}
parts := strings.SplitN(rawBind, ":", 3)
switch len(parts) {
default:
logrus.Fatalf("invalid bind mount %s", rawBind)
case 2:
mount.Source, mount.Destination = parts[0], parts[1]
case 3:
mount.Source, mount.Destination = parts[0], parts[1]
switch parts[2] {
case "ro":
mount.Flags |= syscall.MS_RDONLY
case "rw":
default:
logrus.Fatalf("invalid bind mount mode %s", parts[2])
}
}
config.Mounts = append(config.Mounts, mount)
}
for _, tmpfs := range context.StringSlice("tmpfs") {
config.Mounts = append(config.Mounts, &configs.Mount{
Device: "tmpfs",
Destination: tmpfs,
Flags: syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV,
})
}
for flag, value := range map[string]configs.NamespaceType{
"net": configs.NEWNET,
"mnt": configs.NEWNS,
"pid": configs.NEWPID,
"ipc": configs.NEWIPC,
"uts": configs.NEWUTS,
} {
switch v := context.String(flag); v {
case "host":
config.Namespaces.Remove(value)
case "", "private":
if !config.Namespaces.Contains(value) {
config.Namespaces.Add(value, "")
}
if flag == "net" {
config.Networks = []*configs.Network{
{
Type: "loopback",
Address: "127.0.0.1/0",
Gateway: "localhost",
},
}
}
if flag == "uts" {
config.Hostname = context.String("hostname")
}
default:
config.Namespaces.Remove(value)
config.Namespaces.Add(value, v)
}
}
if bridge := context.String("veth-bridge"); bridge != "" {
hostName, err := utils.GenerateRandomName("veth", 7)
if err != nil {
logrus.Fatal(err)
}
network := &configs.Network{
Type: "veth",
Name: "eth0",
Bridge: bridge,
Address: context.String("veth-address"),
Gateway: context.String("veth-gateway"),
Mtu: context.Int("veth-mtu"),
HostInterfaceName: hostName,
}
config.Networks = append(config.Networks, network)
}
}
@ -126,8 +222,7 @@ func getTemplate() *configs.Config {
AllowAllDevices: false,
AllowedDevices: configs.DefaultAllowedDevices,
},
Devices: configs.DefaultAutoCreatedDevices,
Hostname: "nsinit",
Devices: configs.DefaultAutoCreatedDevices,
MaskPaths: []string{
"/proc/kcore",
},
@ -155,13 +250,6 @@ func getTemplate() *configs.Config {
Flags: defaultMountFlags | syscall.MS_RDONLY,
},
},
Networks: []*configs.Network{
{
Type: "loopback",
Address: "127.0.0.1/0",
Gateway: "localhost",
},
},
Rlimits: []configs.Rlimit{
{
Type: syscall.RLIMIT_NOFILE,

View File

@ -5,7 +5,6 @@ import (
"os/signal"
"syscall"
log "github.com/Sirupsen/logrus"
"github.com/codegangsta/cli"
"github.com/docker/libcontainer"
"github.com/docker/libcontainer/utils"
@ -27,33 +26,34 @@ var execCommand = cli.Command{
cli.StringFlag{Name: "config", Value: "container.json", Usage: "path to the configuration file"},
cli.BoolFlag{Name: "create", Usage: "create the container's configuration on the fly with arguments"},
cli.StringFlag{Name: "user,u", Value: "root", Usage: "set the user, uid, and/or gid for the process"},
cli.StringFlag{Name: "cwd", Value: "", Usage: "set the current working dir"},
cli.StringSliceFlag{Name: "env", Value: standardEnvironment, Usage: "set environment variables for the process"},
}, createFlags...),
}
func execAction(context *cli.Context) {
entry := log.WithField("parent", "nsinit")
factory, err := loadFactory(context)
if err != nil {
fatal(err)
}
tty, err := newTty(context)
config, err := loadConfig(context)
if err != nil {
fatal(err)
}
rootuid, err := config.HostUID()
if err != nil {
fatal(err)
}
tty, err := newTty(context, rootuid)
if err != nil {
fatal(err)
}
created := false
container, err := factory.Load(context.String("id"))
if err != nil {
entry.Debug("creating container")
config, err := loadConfig(context)
if err != nil {
tty.Close()
fatal(err)
}
if tty.console != nil {
config.Console = tty.console.Path()
}
created = true
if container, err = factory.Create(context.String("id"), config); err != nil {
tty.Close()
@ -65,11 +65,14 @@ func execAction(context *cli.Context) {
Args: context.Args(),
Env: context.StringSlice("env"),
User: context.String("user"),
Cwd: context.String("cwd"),
Stdin: os.Stdin,
Stdout: os.Stdout,
Stderr: os.Stderr,
}
tty.attach(process)
if err := tty.attach(process); err != nil {
fatal(err)
}
pid, err := container.Start(process)
if err != nil {
tty.Close()

View File

@ -18,10 +18,10 @@ var initCommand = cli.Command{
runtime.LockOSThread()
factory, err := libcontainer.New("")
if err != nil {
log.Fatal(err)
fatal(err)
}
if err := factory.StartInitialization(3); err != nil {
log.Fatal(err)
fatal(err)
}
panic("This line should never been executed")
},

View File

@ -9,21 +9,14 @@ import (
"github.com/docker/libcontainer"
)
func newTty(context *cli.Context) (*tty, error) {
func newTty(context *cli.Context, rootuid int) (*tty, error) {
if context.Bool("tty") {
console, err := libcontainer.NewConsole()
if err != nil {
return nil, err
}
go io.Copy(console, os.Stdin)
go io.Copy(os.Stdout, console)
state, err := term.SetRawTerminal(os.Stdin.Fd())
console, err := libcontainer.NewConsole(rootuid, rootuid)
if err != nil {
return nil, err
}
return &tty{
console: console,
state: state,
}, nil
}
return &tty{}, nil
@ -44,12 +37,20 @@ func (t *tty) Close() error {
return nil
}
func (t *tty) attach(process *libcontainer.Process) {
func (t *tty) attach(process *libcontainer.Process) error {
if t.console != nil {
go io.Copy(t.console, os.Stdin)
go io.Copy(os.Stdout, t.console)
state, err := term.SetRawTerminal(os.Stdin.Fd())
if err != nil {
return err
}
t.state = state
process.Stderr = nil
process.Stdout = nil
process.Stdin = nil
}
return nil
}
func (t *tty) resize() error {