From f103de57ec89137d86b77584f676b760ec7c7ba4 Mon Sep 17 00:00:00 2001
From: Akihiro Suda <suda.akihiro@lab.ntt.co.jp>
Date: Sat, 13 Jan 2018 16:39:28 +0900
Subject: [PATCH] main: support rootless mode in userns

Running rootless containers in userns is useful for mounting
filesystems (e.g. overlay) with mapped euid 0, but without actual root
privilege.

Usage: (Note that `unshare --mount` requires `--map-root-user`)

  user$ mkdir lower upper work rootfs
  user$ curl http://dl-cdn.alpinelinux.org/alpine/v3.7/releases/x86_64/alpine-minirootfs-3.7.0-x86_64.tar.gz | tar Cxz ./lower || ( true; echo "mknod errors were ignored" )
  user$ unshare --mount --map-root-user
  mappedroot# runc spec --rootless
  mappedroot# sed -i 's/"readonly": true/"readonly": false/g' config.json
  mappedroot# mount -t overlay -o lowerdir=./lower,upperdir=./upper,workdir=./work overlayfs ./rootfs
  mappedroot# runc run foo

Signed-off-by: Akihiro Suda <suda.akihiro@lab.ntt.co.jp>
---
 checkpoint.go                      |  6 +++++-
 libcontainer/system/linux.go       | 17 +++++++++++++++++
 libcontainer/system/unsupported.go | 12 +++++++++++-
 main.go                            | 11 ++++++++++-
 ps.go                              |  6 +++++-
 restore.go                         |  6 +++++-
 utils.go                           | 11 +++++++++++
 utils_linux.go                     | 27 +++++++++++++++++++++++----
 8 files changed, 87 insertions(+), 9 deletions(-)

diff --git a/checkpoint.go b/checkpoint.go
index e7c12674..61e58351 100644
--- a/checkpoint.go
+++ b/checkpoint.go
@@ -44,7 +44,11 @@ checkpointed.`,
 			return err
 		}
 		// XXX: Currently this is untested with rootless containers.
-		if isRootless() {
+		rootless, err := isRootless(context)
+		if err != nil {
+			return err
+		}
+		if rootless {
 			return fmt.Errorf("runc checkpoint requires root")
 		}
 
diff --git a/libcontainer/system/linux.go b/libcontainer/system/linux.go
index 70260ae2..8d353d98 100644
--- a/libcontainer/system/linux.go
+++ b/libcontainer/system/linux.go
@@ -3,6 +3,7 @@
 package system
 
 import (
+	"os"
 	"os/exec"
 	"syscall" // only for exec
 	"unsafe"
@@ -121,6 +122,22 @@ func UIDMapInUserNS(uidmap []user.IDMap) bool {
 	return true
 }
 
+// GetParentNSeuid returns the euid within the parent user namespace
+func GetParentNSeuid() int {
+	euid := os.Geteuid()
+	uidmap, err := user.CurrentProcessUIDMap()
+	if err != nil {
+		// This kernel-provided file only exists if user namespaces are supported
+		return euid
+	}
+	for _, um := range uidmap {
+		if um.ID <= euid && euid <= um.ID+um.Count-1 {
+			return um.ParentID + euid - um.ID
+		}
+	}
+	return euid
+}
+
 // SetSubreaper sets the value i as the subreaper setting for the calling process
 func SetSubreaper(i int) error {
 	return unix.Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
diff --git a/libcontainer/system/unsupported.go b/libcontainer/system/unsupported.go
index a8bea302..b94be74a 100644
--- a/libcontainer/system/unsupported.go
+++ b/libcontainer/system/unsupported.go
@@ -2,7 +2,11 @@
 
 package system
 
-import "github.com/opencontainers/runc/libcontainer/user"
+import (
+	"os"
+
+	"github.com/opencontainers/runc/libcontainer/user"
+)
 
 // RunningInUserNS is a stub for non-Linux systems
 // Always returns false
@@ -15,3 +19,9 @@ func RunningInUserNS() bool {
 func UIDMapInUserNS(uidmap []user.IDMap) bool {
 	return false
 }
+
+// GetParentNSeuid returns the euid within the parent user namespace
+// Always returns os.Geteuid on non-linux
+func GetParentNSeuid() int {
+	return os.Geteuid()
+}
diff --git a/main.go b/main.go
index 1b9728c5..278399a5 100644
--- a/main.go
+++ b/main.go
@@ -63,7 +63,11 @@ func main() {
 	app.Version = strings.Join(v, "\n")
 
 	root := "/run/runc"
-	if os.Geteuid() != 0 {
+	rootless, err := isRootless(nil)
+	if err != nil {
+		fatal(err)
+	}
+	if rootless {
 		runtimeDir := os.Getenv("XDG_RUNTIME_DIR")
 		if runtimeDir != "" {
 			root = runtimeDir + "/runc"
@@ -108,6 +112,11 @@ func main() {
 			Name:  "systemd-cgroup",
 			Usage: "enable systemd cgroup support, expects cgroupsPath to be of form \"slice:prefix:name\" for e.g. \"system.slice:runc:434234\"",
 		},
+		cli.StringFlag{
+			Name:  "rootless",
+			Value: "auto",
+			Usage: "enable rootless mode ('true', 'false', or 'auto')",
+		},
 	}
 	app.Commands = []cli.Command{
 		checkpointCommand,
diff --git a/ps.go b/ps.go
index 6e0c7376..eec9d5f5 100644
--- a/ps.go
+++ b/ps.go
@@ -29,7 +29,11 @@ var psCommand = cli.Command{
 			return err
 		}
 		// XXX: Currently not supported with rootless containers.
-		if isRootless() {
+		rootless, err := isRootless(context)
+		if err != nil {
+			return err
+		}
+		if rootless {
 			return fmt.Errorf("runc ps requires root")
 		}
 
diff --git a/restore.go b/restore.go
index 362be62d..724157da 100644
--- a/restore.go
+++ b/restore.go
@@ -96,7 +96,11 @@ using the runc checkpoint command.`,
 			return err
 		}
 		// XXX: Currently this is untested with rootless containers.
-		if isRootless() {
+		rootless, err := isRootless(context)
+		if err != nil {
+			return err
+		}
+		if rootless {
 			return fmt.Errorf("runc restore requires root")
 		}
 
diff --git a/utils.go b/utils.go
index 8ed1a88e..5165336f 100644
--- a/utils.go
+++ b/utils.go
@@ -4,6 +4,8 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
+	"strconv"
+	"strings"
 
 	"github.com/opencontainers/runtime-spec/specs-go"
 
@@ -81,3 +83,12 @@ func revisePidFile(context *cli.Context) error {
 	}
 	return context.Set("pid-file", pidFile)
 }
+
+// parseBoolOrAuto returns (nil, nil) if s is empty or "auto"
+func parseBoolOrAuto(s string) (*bool, error) {
+	if s == "" || strings.ToLower(s) == "auto" {
+		return nil, nil
+	}
+	b, err := strconv.ParseBool(s)
+	return &b, err
+}
diff --git a/utils_linux.go b/utils_linux.go
index 84731c84..2b7ddd58 100644
--- a/utils_linux.go
+++ b/utils_linux.go
@@ -16,6 +16,7 @@ import (
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/intelrdt"
 	"github.com/opencontainers/runc/libcontainer/specconv"
+	"github.com/opencontainers/runc/libcontainer/system"
 	"github.com/opencontainers/runc/libcontainer/utils"
 	"github.com/opencontainers/runtime-spec/specs-go"
 
@@ -217,19 +218,37 @@ func createPidFile(path string, process *libcontainer.Process) error {
 	return os.Rename(tmpName, path)
 }
 
-// XXX: Currently we autodetect rootless mode.
-func isRootless() bool {
-	return os.Geteuid() != 0
+func isRootless(context *cli.Context) (bool, error) {
+	if context != nil {
+		b, err := parseBoolOrAuto(context.GlobalString("rootless"))
+		if err != nil {
+			return false, err
+		}
+		if b != nil {
+			return *b, nil
+		}
+		// nil b stands for "auto detect"
+	}
+	// Even if os.Geteuid() == 0, it might still require rootless mode,
+	// especially when running within userns.
+	// So we use system.GetParentNSeuid() here.
+	//
+	// TODO(AkihiroSuda): how to support nested userns?
+	return system.GetParentNSeuid() != 0, nil
 }
 
 func createContainer(context *cli.Context, id string, spec *specs.Spec) (libcontainer.Container, error) {
+	rootless, err := isRootless(context)
+	if err != nil {
+		return nil, err
+	}
 	config, err := specconv.CreateLibcontainerConfig(&specconv.CreateOpts{
 		CgroupName:       id,
 		UseSystemdCgroup: context.GlobalBool("systemd-cgroup"),
 		NoPivotRoot:      context.Bool("no-pivot"),
 		NoNewKeyring:     context.Bool("no-new-keyring"),
 		Spec:             spec,
-		Rootless:         isRootless(),
+		Rootless:         rootless,
 	})
 	if err != nil {
 		return nil, err