From fdf85e35b3d4e45666b8fdb9d5f2d4dd5f911d28 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Thu, 20 Jul 2017 17:44:06 +0200 Subject: [PATCH 1/8] main: honor XDG_RUNTIME_DIR for rootless containers Signed-off-by: Giuseppe Scrivano --- main.go | 11 ++++++++++- man/runc.8.md | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/main.go b/main.go index 3eb2a3c1..4642335e 100644 --- a/main.go +++ b/main.go @@ -61,6 +61,15 @@ func main() { } v = append(v, fmt.Sprintf("spec: %s", specs.Version)) app.Version = strings.Join(v, "\n") + + root := "/run/runc" + if os.Geteuid() != 0 { + runtimeDir := os.Getenv("XDG_RUNTIME_DIR") + if runtimeDir != "" { + root = runtimeDir + "/runc" + } + } + app.Flags = []cli.Flag{ cli.BoolFlag{ Name: "debug", @@ -78,7 +87,7 @@ func main() { }, cli.StringFlag{ Name: "root", - Value: "/run/runc", + Value: root, Usage: "root directory for storage of container state (this should be located in tmpfs)", }, cli.StringFlag{ diff --git a/man/runc.8.md b/man/runc.8.md index b5a8c54f..6c6d7a55 100644 --- a/man/runc.8.md +++ b/man/runc.8.md @@ -50,7 +50,7 @@ value for "bundle" is the current directory. --debug enable debug output for logging --log value set the log file path where internal debug information is written (default: "/dev/null") --log-format value set the format used by logs ('text' (default), or 'json') (default: "text") - --root value root directory for storage of container state (this should be located in tmpfs) (default: "/run/runc") + --root value root directory for storage of container state (this should be located in tmpfs) (default: "/run/runc" or $XDG_RUNTIME_DIR/runc for rootless containers) --criu value path to the criu binary used for checkpoint and restore (default: "criu") --systemd-cgroup enable systemd cgroup support, expects cgroupsPath to be of form "slice:prefix:name" for e.g. "system.slice:runc:434234" --help, -h show help From d8b669400adc5807db3aa23d8da8df6a8f90fb70 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Thu, 20 Jul 2017 19:33:01 +0200 Subject: [PATCH 2/8] rootless: allow multiple user/group mappings Take advantage of the newuidmap/newgidmap tools to allow multiple users/groups to be mapped into the new user namespace in the rootless case. Signed-off-by: Giuseppe Scrivano [ rebased to handle intelrdt changes. ] Signed-off-by: Aleksa Sarai --- libcontainer/configs/validate/rootless.go | 61 +++++++------- libcontainer/container_linux.go | 10 +++ libcontainer/factory_linux.go | 30 +++++++ libcontainer/message_linux.go | 2 + libcontainer/nsenter/nsexec.c | 96 ++++++++++++++++++++--- utils_linux.go | 13 ++- 6 files changed, 171 insertions(+), 41 deletions(-) diff --git a/libcontainer/configs/validate/rootless.go b/libcontainer/configs/validate/rootless.go index 0cebfaf8..92bd167c 100644 --- a/libcontainer/configs/validate/rootless.go +++ b/libcontainer/configs/validate/rootless.go @@ -36,37 +36,27 @@ func (v *ConfigValidator) rootless(config *configs.Config) error { return nil } -func rootlessMappings(config *configs.Config) error { - rootuid, err := config.HostRootUID() - if err != nil { - return fmt.Errorf("failed to get root uid from uidMappings: %v", err) +func hasIDMapping(id int, mappings []configs.IDMap) bool { + for _, m := range mappings { + if id >= m.ContainerID && id < m.ContainerID+m.Size { + return true + } } + return false +} + +func rootlessMappings(config *configs.Config) error { if euid := geteuid(); euid != 0 { if !config.Namespaces.Contains(configs.NEWUSER) { return fmt.Errorf("rootless containers require user namespaces") } - if rootuid != euid { - return fmt.Errorf("rootless containers cannot map container root to a different host user") - } } - rootgid, err := config.HostRootGID() - if err != nil { - return fmt.Errorf("failed to get root gid from gidMappings: %v", err) + if len(config.UidMappings) == 0 { + return fmt.Errorf("rootless containers requires at least one UID mapping") } - - // Similar to the above test, we need to make sure that we aren't trying to - // map to a group ID that we don't have the right to be. - if rootgid != getegid() { - return fmt.Errorf("rootless containers cannot map container root to a different host group") - } - - // We can only map one user and group inside a container (our own). - if len(config.UidMappings) != 1 || config.UidMappings[0].Size != 1 { - return fmt.Errorf("rootless containers cannot map more than one user") - } - if len(config.GidMappings) != 1 || config.GidMappings[0].Size != 1 { - return fmt.Errorf("rootless containers cannot map more than one group") + if len(config.GidMappings) == 0 { + return fmt.Errorf("rootless containers requires at least one UID mapping") } return nil @@ -104,11 +94,28 @@ func rootlessMount(config *configs.Config) error { // Check that the options list doesn't contain any uid= or gid= entries // that don't resolve to root. for _, opt := range strings.Split(mount.Data, ",") { - if strings.HasPrefix(opt, "uid=") && opt != "uid=0" { - return fmt.Errorf("cannot specify uid= mount options in rootless containers where argument isn't 0") + if strings.HasPrefix(opt, "uid=") { + var uid int + n, err := fmt.Sscanf(opt, "uid=%d", &uid) + if n != 1 || err != nil { + // Ignore unknown mount options. + continue + } + if !hasIDMapping(uid, config.UidMappings) { + return fmt.Errorf("cannot specify uid= mount options for unmapped uid in rootless containers") + } } - if strings.HasPrefix(opt, "gid=") && opt != "gid=0" { - return fmt.Errorf("cannot specify gid= mount options in rootless containers where argument isn't 0") + + if strings.HasPrefix(opt, "gid=") { + var gid int + n, err := fmt.Sscanf(opt, "gid=%d", &gid) + if n != 1 || err != nil { + // Ignore unknown mount options. + continue + } + if !hasIDMapping(gid, config.GidMappings) { + return fmt.Errorf("cannot specify gid= mount options for unmapped gid in rootless containers") + } } } } diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 836dd297..3623f7da 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -44,6 +44,8 @@ type linuxContainer struct { initProcess parentProcess initProcessStartTime uint64 criuPath string + newuidmapPath string + newgidmapPath string m sync.Mutex criuVersion int state containerState @@ -1707,6 +1709,10 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na if !joinExistingUser { // write uid mappings if len(c.config.UidMappings) > 0 { + r.AddData(&Bytemsg{ + Type: UidmapPathAttr, + Value: []byte(c.newuidmapPath), + }) b, err := encodeIDMapping(c.config.UidMappings) if err != nil { return nil, err @@ -1729,6 +1735,10 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na }) // The following only applies if we are root. if !c.config.Rootless { + r.AddData(&Bytemsg{ + Type: GidmapPathAttr, + Value: []byte(c.newgidmapPath), + }) // check if we have CAP_SETGID to setgroup properly pid, err := capability.NewPid(os.Getpid()) if err != nil { diff --git a/libcontainer/factory_linux.go b/libcontainer/factory_linux.go index 3395f8c2..89deb96e 100644 --- a/libcontainer/factory_linux.go +++ b/libcontainer/factory_linux.go @@ -140,6 +140,9 @@ func New(root string, options ...func(*LinuxFactory) error) (Factory, error) { } Cgroupfs(l) for _, opt := range options { + if opt == nil { + continue + } if err := opt(l); err != nil { return nil, err } @@ -160,6 +163,11 @@ type LinuxFactory struct { // containers. CriuPath string + // New{u,g}uidmapPath is the path to the binaries used for mapping with + // rootless containers. + NewuidmapPath string + NewgidmapPath string + // Validator provides validation to container configurations. Validator validate.Validator @@ -201,6 +209,8 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err config: config, initArgs: l.InitArgs, criuPath: l.CriuPath, + newuidmapPath: l.NewuidmapPath, + newgidmapPath: l.NewgidmapPath, cgroupManager: l.NewCgroupsManager(config.Cgroups, nil), } c.intelRdtManager = nil @@ -236,6 +246,8 @@ func (l *LinuxFactory) Load(id string) (Container, error) { config: &state.Config, initArgs: l.InitArgs, criuPath: l.CriuPath, + newuidmapPath: l.NewuidmapPath, + newgidmapPath: l.NewgidmapPath, cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths), root: containerRoot, created: state.Created, @@ -349,3 +361,21 @@ func (l *LinuxFactory) validateID(id string) error { return nil } + +// NewuidmapPath returns an option func to configure a LinuxFactory with the +// provided .. +func NewuidmapPath(newuidmapPath string) func(*LinuxFactory) error { + return func(l *LinuxFactory) error { + l.NewuidmapPath = newuidmapPath + return nil + } +} + +// NewgidmapPath returns an option func to configure a LinuxFactory with the +// provided .. +func NewgidmapPath(newgidmapPath string) func(*LinuxFactory) error { + return func(l *LinuxFactory) error { + l.NewgidmapPath = newgidmapPath + return nil + } +} diff --git a/libcontainer/message_linux.go b/libcontainer/message_linux.go index 8829b71a..ab453cde 100644 --- a/libcontainer/message_linux.go +++ b/libcontainer/message_linux.go @@ -18,6 +18,8 @@ const ( SetgroupAttr uint16 = 27285 OomScoreAdjAttr uint16 = 27286 RootlessAttr uint16 = 27287 + UidmapPathAttr uint16 = 27288 + GidmapPathAttr uint16 = 27289 ) type Int32msg struct { diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c index 6814a5ab..e1716269 100644 --- a/libcontainer/nsenter/nsexec.c +++ b/libcontainer/nsenter/nsexec.c @@ -1,3 +1,4 @@ + #define _GNU_SOURCE #include #include @@ -19,6 +20,8 @@ #include #include #include +#include + #include #include @@ -69,6 +72,10 @@ struct nlconfig_t { size_t uidmap_len; char *gidmap; size_t gidmap_len; + char *uidmappath; + size_t uidmappath_len; + char *gidmappath; + size_t gidmappath_len; char *namespaces; size_t namespaces_len; uint8_t is_setgroup; @@ -89,6 +96,8 @@ struct nlconfig_t { #define SETGROUP_ATTR 27285 #define OOM_SCORE_ADJ_ATTR 27286 #define ROOTLESS_ATTR 27287 +#define UIDMAPPATH_ATTR 27288 +#define GIDMAPPATH_ATTR 27289 /* * Use the raw syscall for versions of glibc which don't include a function for @@ -191,22 +200,81 @@ static void update_setgroups(int pid, enum policy_t setgroup) } } -static void update_uidmap(int pid, char *map, size_t map_len) +static int try_mapping_tool(const char *app, int pid, char *map, size_t map_len) { - if (map == NULL || map_len <= 0) - return; + int child = fork(); + if (child < 0) + bail("failed to fork"); - if (write_file(map, map_len, "/proc/%d/uid_map", pid) < 0) - bail("failed to update /proc/%d/uid_map", pid); + if (child == 0) { +#define MAX_ARGV 20 + char *argv[MAX_ARGV]; + char pid_fmt[16]; + int argc = 0; + char *next; + + snprintf (pid_fmt, 16, "%d", pid); + + argv[argc++] = (char *) app; + argv[argc++] = pid_fmt; + /* + * Convert the map string into a list of argument that + * newuidmap/newgidmap can understand. + */ + + while (argc < MAX_ARGV) { + if (*map == '\0') { + argv[argc++] = NULL; + break; + } + argv[argc++] = map; + next = strpbrk (map, "\n "); + if (next == NULL) + break; + *next++ = '\0'; + map = next + strspn(next, "\n "); + } + execvp (app, argv); + } + else { + int status; + while (true) { + if (waitpid(child, &status, 0) < 0) { + if (errno == EINTR) + continue; + bail("failed to waitpid"); + } + if (WIFEXITED(status) || WIFSIGNALED(status)) + return WEXITSTATUS(status); + } + } + return -1; } -static void update_gidmap(int pid, char *map, size_t map_len) +static void update_uidmap(const char *path, int pid, char *map, size_t map_len) { if (map == NULL || map_len <= 0) return; - if (write_file(map, map_len, "/proc/%d/gid_map", pid) < 0) - bail("failed to update /proc/%d/gid_map", pid); + if (write_file(map, map_len, "/proc/%d/uid_map", pid) < 0) { + if(errno != EPERM) + bail("failed to update /proc/%d/gid_map", pid); + if (try_mapping_tool (path, pid, map, map_len)) + bail("failed to use newuid map on %d", pid); + } +} + +static void update_gidmap(const char *path, int pid, char *map, size_t map_len) +{ + if (map == NULL || map_len <= 0) + return; + + if (write_file(map, map_len, "/proc/%d/gid_map", pid) < 0) { + if(errno != EPERM) + bail("failed to update /proc/%d/gid_map", pid); + if (try_mapping_tool (path, pid, map, map_len)) + bail("failed to use newgid map on %d", pid); + } } static void update_oom_score_adj(char *data, size_t len) @@ -350,6 +418,14 @@ static void nl_parse(int fd, struct nlconfig_t *config) config->gidmap = current; config->gidmap_len = payload_len; break; + case UIDMAPPATH_ATTR: + config->uidmappath = current; + config->uidmappath_len = payload_len; + break; + case GIDMAPPATH_ATTR: + config->gidmappath = current; + config->gidmappath_len = payload_len; + break; case SETGROUP_ATTR: config->is_setgroup = readint8(current); break; @@ -596,8 +672,8 @@ void nsexec(void) update_setgroups(child, SETGROUPS_DENY); /* Set up mappings. */ - update_uidmap(child, config.uidmap, config.uidmap_len); - update_gidmap(child, config.gidmap, config.gidmap_len); + update_uidmap(config.uidmappath, child, config.uidmap, config.uidmap_len); + update_gidmap(config.gidmappath, child, config.gidmap, config.gidmap_len); s = SYNC_USERMAP_ACK; if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { diff --git a/utils_linux.go b/utils_linux.go index 791e52d3..f1bb5944 100644 --- a/utils_linux.go +++ b/utils_linux.go @@ -43,11 +43,16 @@ func loadFactory(context *cli.Context) (libcontainer.Factory, error) { return nil, fmt.Errorf("systemd cgroup flag passed, but systemd support for managing cgroups is not available") } } - if intelrdt.IsEnabled() { - intelRdtManager := libcontainer.IntelRdtFs - return libcontainer.New(abs, cgroupManager, intelRdtManager, libcontainer.CriuPath(context.GlobalString("criu"))) + + intelRdtManager := libcontainer.IntelRdtFs + if !intelrdt.IsEnabled() { + intelRdtManager = nil } - return libcontainer.New(abs, cgroupManager, libcontainer.CriuPath(context.GlobalString("criu"))) + + return libcontainer.New(abs, cgroupManager, intelRdtManager, + libcontainer.CriuPath(context.GlobalString("criu")), + libcontainer.NewuidmapPath("newuidmap"), + libcontainer.NewgidmapPath("newgidmap")) } // getContainer returns the specified container instance by loading it from state From 3282f5a7c1d9d0792841abac850209d221a89340 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Thu, 20 Jul 2017 19:46:08 +0200 Subject: [PATCH 3/8] tests: fix for rootless multiple uids/gids Signed-off-by: Giuseppe Scrivano --- .../configs/validate/rootless_test.go | 68 +++++++------------ 1 file changed, 24 insertions(+), 44 deletions(-) diff --git a/libcontainer/configs/validate/rootless_test.go b/libcontainer/configs/validate/rootless_test.go index 23d678d9..f802f8cb 100644 --- a/libcontainer/configs/validate/rootless_test.go +++ b/libcontainer/configs/validate/rootless_test.go @@ -66,28 +66,6 @@ func TestValidateRootlessMappingUid(t *testing.T) { if err := validator.Validate(config); err == nil { t.Errorf("Expected error to occur if no uid mappings provided") } - - config = rootlessConfig() - config.UidMappings[0].HostID = geteuid() + 1 - if err := validator.Validate(config); err == nil { - t.Errorf("Expected error to occur if geteuid() != mapped uid") - } - - config = rootlessConfig() - config.UidMappings[0].Size = 1024 - if err := validator.Validate(config); err == nil { - t.Errorf("Expected error to occur if more than one uid mapped") - } - - config = rootlessConfig() - config.UidMappings = append(config.UidMappings, configs.IDMap{ - HostID: geteuid() + 1, - ContainerID: 0, - Size: 1, - }) - if err := validator.Validate(config); err == nil { - t.Errorf("Expected error to occur if more than one uid extent mapped") - } } func TestValidateRootlessMappingGid(t *testing.T) { @@ -98,28 +76,6 @@ func TestValidateRootlessMappingGid(t *testing.T) { if err := validator.Validate(config); err == nil { t.Errorf("Expected error to occur if no gid mappings provided") } - - config = rootlessConfig() - config.GidMappings[0].HostID = getegid() + 1 - if err := validator.Validate(config); err == nil { - t.Errorf("Expected error to occur if getegid() != mapped gid") - } - - config = rootlessConfig() - config.GidMappings[0].Size = 1024 - if err := validator.Validate(config); err == nil { - t.Errorf("Expected error to occur if more than one gid mapped") - } - - config = rootlessConfig() - config.GidMappings = append(config.GidMappings, configs.IDMap{ - HostID: getegid() + 1, - ContainerID: 0, - Size: 1, - }) - if err := validator.Validate(config); err == nil { - t.Errorf("Expected error to occur if more than one gid extent mapped") - } } /* rootlessMount() */ @@ -149,6 +105,18 @@ func TestValidateRootlessMountUid(t *testing.T) { if err := validator.Validate(config); err != nil { t.Errorf("Expected error to not occur when setting uid=0 in mount options: %+v", err) } + + config.Mounts[0].Data = "uid=2" + config.UidMappings[0].Size = 10 + if err := validator.Validate(config); err != nil { + t.Errorf("Expected error to not occur when setting uid=2 in mount options and UidMapping[0].size is 10") + } + + config.Mounts[0].Data = "uid=20" + config.UidMappings[0].Size = 10 + if err := validator.Validate(config); err == nil { + t.Errorf("Expected error to occur when setting uid=20 in mount options and UidMapping[0].size is 10") + } } func TestValidateRootlessMountGid(t *testing.T) { @@ -176,6 +144,18 @@ func TestValidateRootlessMountGid(t *testing.T) { if err := validator.Validate(config); err != nil { t.Errorf("Expected error to not occur when setting gid=0 in mount options: %+v", err) } + + config.Mounts[0].Data = "gid=5" + config.GidMappings[0].Size = 10 + if err := validator.Validate(config); err != nil { + t.Errorf("Expected error to not occur when setting gid=5 in mount options and GidMapping[0].size is 10") + } + + config.Mounts[0].Data = "gid=11" + config.GidMappings[0].Size = 10 + if err := validator.Validate(config); err == nil { + t.Errorf("Expected error to occur when setting gid=11 in mount options and GidMapping[0].size is 10") + } } /* rootlessCgroup() */ From 6097ce74d8da6653a9cc607b10ae70b56496af57 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Wed, 6 Sep 2017 00:25:17 +1000 Subject: [PATCH 4/8] nsenter: correctly handle newgidmap path for rootless containers After quite a bit of debugging, I found that previous versions of this patchset did not include newgidmap in a rootless setting. Fix this by passing it whenever group mappings are applied, and also providing some better checking for try_mapping_tool. This commit also includes some stylistic improvements. Signed-off-by: Aleksa Sarai --- libcontainer/container_linux.go | 16 +++++---- libcontainer/nsenter/nsexec.c | 58 ++++++++++++++++++++++----------- 2 files changed, 49 insertions(+), 25 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 3623f7da..be2e5f4f 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -1709,10 +1709,12 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na if !joinExistingUser { // write uid mappings if len(c.config.UidMappings) > 0 { - r.AddData(&Bytemsg{ - Type: UidmapPathAttr, - Value: []byte(c.newuidmapPath), - }) + if c.config.Rootless && c.newuidmapPath != "" { + r.AddData(&Bytemsg{ + Type: UidmapPathAttr, + Value: []byte(c.newuidmapPath), + }) + } b, err := encodeIDMapping(c.config.UidMappings) if err != nil { return nil, err @@ -1733,12 +1735,14 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na Type: GidmapAttr, Value: b, }) - // The following only applies if we are root. - if !c.config.Rootless { + if c.config.Rootless && c.newgidmapPath != "" { r.AddData(&Bytemsg{ Type: GidmapPathAttr, Value: []byte(c.newgidmapPath), }) + } + // The following only applies if we are root. + if !c.config.Rootless { // check if we have CAP_SETGID to setgroup properly pid, err := capability.NewPid(os.Getpid()) if err != nil { diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c index e1716269..aea9b8c6 100644 --- a/libcontainer/nsenter/nsexec.c +++ b/libcontainer/nsenter/nsexec.c @@ -67,21 +67,27 @@ struct clone_t { struct nlconfig_t { char *data; + + /* Process settings. */ uint32_t cloneflags; + char *oom_score_adj; + size_t oom_score_adj_len; + + /* User namespace settings.*/ char *uidmap; size_t uidmap_len; char *gidmap; size_t gidmap_len; + char *namespaces; + size_t namespaces_len; + uint8_t is_setgroup; + + /* Rootless container settings.*/ + uint8_t is_rootless; char *uidmappath; size_t uidmappath_len; char *gidmappath; size_t gidmappath_len; - char *namespaces; - size_t namespaces_len; - uint8_t is_setgroup; - uint8_t is_rootless; - char *oom_score_adj; - size_t oom_score_adj_len; }; /* @@ -202,18 +208,29 @@ static void update_setgroups(int pid, enum policy_t setgroup) static int try_mapping_tool(const char *app, int pid, char *map, size_t map_len) { - int child = fork(); + int child; + + /* + * If @app is NULL, execvp will segfault. Just check it here and bail (if + * we're in this path, the caller is already getting desparate and there + * isn't a backup to this failing). This usually would be a configuration + * or programming issue. + */ + if (!app) + bail("mapping tool not present"); + + child = fork(); if (child < 0) bail("failed to fork"); - if (child == 0) { + if (!child) { #define MAX_ARGV 20 char *argv[MAX_ARGV]; char pid_fmt[16]; int argc = 0; char *next; - snprintf (pid_fmt, 16, "%d", pid); + snprintf(pid_fmt, 16, "%d", pid); argv[argc++] = (char *) app; argv[argc++] = pid_fmt; @@ -228,16 +245,18 @@ static int try_mapping_tool(const char *app, int pid, char *map, size_t map_len) break; } argv[argc++] = map; - next = strpbrk (map, "\n "); + next = strpbrk(map, "\n "); if (next == NULL) break; *next++ = '\0'; map = next + strspn(next, "\n "); } - execvp (app, argv); - } - else { + + execvp(app, argv); + bail("failed to execvp"); + } else { int status; + while (true) { if (waitpid(child, &status, 0) < 0) { if (errno == EINTR) @@ -246,8 +265,9 @@ static int try_mapping_tool(const char *app, int pid, char *map, size_t map_len) } if (WIFEXITED(status) || WIFSIGNALED(status)) return WEXITSTATUS(status); - } + } } + return -1; } @@ -257,9 +277,9 @@ static void update_uidmap(const char *path, int pid, char *map, size_t map_len) return; if (write_file(map, map_len, "/proc/%d/uid_map", pid) < 0) { - if(errno != EPERM) - bail("failed to update /proc/%d/gid_map", pid); - if (try_mapping_tool (path, pid, map, map_len)) + if (errno != EPERM) + bail("failed to update /proc/%d/uid_map", pid); + if (try_mapping_tool(path, pid, map, map_len)) bail("failed to use newuid map on %d", pid); } } @@ -270,9 +290,9 @@ static void update_gidmap(const char *path, int pid, char *map, size_t map_len) return; if (write_file(map, map_len, "/proc/%d/gid_map", pid) < 0) { - if(errno != EPERM) + if (errno != EPERM) bail("failed to update /proc/%d/gid_map", pid); - if (try_mapping_tool (path, pid, map, map_len)) + if (try_mapping_tool(path, pid, map, map_len)) bail("failed to use newgid map on %d", pid); } } From 969bb49cc36e65246bc7a77d96a8f05d06916388 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Wed, 6 Sep 2017 22:13:47 +1000 Subject: [PATCH 5/8] nsenter: do not resolve path in nsexec context With the addition of our new{uid,gid}map support, we used to call execvp(3) from inside nsexec. This would mean that the path resolution for the binaries would happen in nsexec. Move the resolution to the initial setup code, and pass the absolute path to nsexec. Signed-off-by: Aleksa Sarai --- libcontainer/nsenter/nsexec.c | 7 ++++--- utils_linux.go | 20 ++++++++++++++++++-- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c index aea9b8c6..a6a107e6 100644 --- a/libcontainer/nsenter/nsexec.c +++ b/libcontainer/nsenter/nsexec.c @@ -211,7 +211,7 @@ static int try_mapping_tool(const char *app, int pid, char *map, size_t map_len) int child; /* - * If @app is NULL, execvp will segfault. Just check it here and bail (if + * If @app is NULL, execve will segfault. Just check it here and bail (if * we're in this path, the caller is already getting desparate and there * isn't a backup to this failing). This usually would be a configuration * or programming issue. @@ -226,6 +226,7 @@ static int try_mapping_tool(const char *app, int pid, char *map, size_t map_len) if (!child) { #define MAX_ARGV 20 char *argv[MAX_ARGV]; + char *envp[] = {NULL}; char pid_fmt[16]; int argc = 0; char *next; @@ -252,8 +253,8 @@ static int try_mapping_tool(const char *app, int pid, char *map, size_t map_len) map = next + strspn(next, "\n "); } - execvp(app, argv); - bail("failed to execvp"); + execve(app, argv, envp); + bail("failed to execv"); } else { int status; diff --git a/utils_linux.go b/utils_linux.go index f1bb5944..85ec3c84 100644 --- a/utils_linux.go +++ b/utils_linux.go @@ -7,6 +7,7 @@ import ( "fmt" "net" "os" + "os/exec" "path/filepath" "strconv" @@ -35,6 +36,9 @@ func loadFactory(context *cli.Context) (libcontainer.Factory, error) { if err != nil { return nil, err } + + // We default to cgroupfs, and can only use systemd if the system is a + // systemd box. cgroupManager := libcontainer.Cgroupfs if context.GlobalBool("systemd-cgroup") { if systemd.UseSystemd() { @@ -49,10 +53,22 @@ func loadFactory(context *cli.Context) (libcontainer.Factory, error) { intelRdtManager = nil } + // We resolve the paths for {newuidmap,newgidmap} from the context of runc, + // to avoid doing a path lookup in the nsexec context. TODO: The binary + // names are not currently configurable. + newuidmap, err := exec.LookPath("newuidmap") + if err != nil { + newuidmap = "" + } + newgidmap, err := exec.LookPath("newgidmap") + if err != nil { + newgidmap = "" + } + return libcontainer.New(abs, cgroupManager, intelRdtManager, libcontainer.CriuPath(context.GlobalString("criu")), - libcontainer.NewuidmapPath("newuidmap"), - libcontainer.NewgidmapPath("newgidmap")) + libcontainer.NewuidmapPath(newuidmap), + libcontainer.NewgidmapPath(newgidmap)) } // getContainer returns the specified container instance by loading it from state From 1a5fdc1c5febeaa8efaa95e78de6c38f1229d4b7 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Thu, 7 Sep 2017 06:58:52 +1000 Subject: [PATCH 6/8] init: support setting -u with rootless containers Now that rootless containers have support for multiple uid and gid mappings, allow --user to work as expected. If the user is not mapped, an error occurs (as usual). Signed-off-by: Aleksa Sarai --- libcontainer/init_linux.go | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index 2020bb7a..efdeae73 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -261,25 +261,27 @@ func setupUser(config *initConfig) error { } } + // Rather than just erroring out later in setuid(2) and setgid(2), check + // that the user is mapped here. + if _, err := config.Config.HostUID(int(execUser.Uid)); err != nil { + return fmt.Errorf("cannot set uid to unmapped user in user namespace") + } + if _, err := config.Config.HostGID(int(execUser.Gid)); err != nil { + return fmt.Errorf("cannot set gid to unmapped user in user namespace") + } + if config.Rootless { - if execUser.Uid != 0 { - return fmt.Errorf("cannot run as a non-root user in a rootless container") - } - - if execUser.Gid != 0 { - return fmt.Errorf("cannot run as a non-root group in a rootless container") - } - - // We cannot set any additional groups in a rootless container and thus we - // bail if the user asked us to do so. TODO: We currently can't do this - // earlier, but if libcontainer.Process.User was typesafe this might work. + // We cannot set any additional groups in a rootless container and thus + // we bail if the user asked us to do so. TODO: We currently can't do + // this check earlier, but if libcontainer.Process.User was typesafe + // this might work. if len(addGroups) > 0 { return fmt.Errorf("cannot set any additional groups in a rootless container") } } - // before we change to the container's user make sure that the processes STDIO - // is correctly owned by the user that we are switching to. + // Before we change to the container's user make sure that the processes + // STDIO is correctly owned by the user that we are switching to. if err := fixStdioPermissions(config, execUser); err != nil { return err } @@ -298,7 +300,6 @@ func setupUser(config *initConfig) error { if err := system.Setgid(execUser.Gid); err != nil { return err } - if err := system.Setuid(execUser.Uid); err != nil { return err } From d0aec23c7e5494d4221a78dc4ad89eb1315dafe4 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Thu, 7 Sep 2017 07:07:43 +1000 Subject: [PATCH 7/8] tests: generalise rootless runner This is necessary in order to add proper opportunistic tests, and is a placeholder until we add tests for new{uid,gid}map configurations. Signed-off-by: Aleksa Sarai --- Makefile | 5 ++--- tests/rootless.sh | 55 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 3 deletions(-) create mode 100755 tests/rootless.sh diff --git a/Makefile b/Makefile index 0877a070..af48ecd9 100644 --- a/Makefile +++ b/Makefile @@ -77,11 +77,10 @@ localintegration: all bats -t tests/integration${TESTFLAGS} rootlessintegration: runcimage - docker run -e TESTFLAGS -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) --cap-drop=ALL -u rootless $(RUNC_IMAGE) make localintegration + docker run -e TESTFLAGS -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localrootlessintegration -# FIXME: This should not be separate from rootlessintegration's method of running. localrootlessintegration: all - sudo -u rootless -H PATH="${PATH}" bats -t tests/integration${TESTFLAGS} + tests/rootless.sh shell: all docker run -e TESTFLAGS -ti --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) bash diff --git a/tests/rootless.sh b/tests/rootless.sh new file mode 100755 index 00000000..2613c702 --- /dev/null +++ b/tests/rootless.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# Copyright (C) 2017 SUSE LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# rootless.sh -- Runner for rootless container tests. The purpose of this +# script is to allow for the addition (and testing) of "opportunistic" features +# to rootless containers while still testing the base features. In order to add +# a new feature, please match the existing style. Add an entry to $ALL_FEATURES, +# and add an enable_* and disable_* hook. + +ALL_FEATURES=() +ROOT="$(readlink -f "$(dirname "${BASH_SOURCE}")/..")" + +# Create a powerset of $ALL_FEATURES (the set of all subsets of $ALL_FEATURES). +# We test all of the possible combinations (as long as we don't add too many +# feature knobs this shouldn't take too long -- but the number of tested +# combinations is O(2^n)). +function powerset() { + eval printf '%s' $(printf '{,%s+}' "$@"): +} +features_powerset="$(powerset "${ALL_FEATURES[@]}")" + +# Iterate over the powerset of all features. +IFS=: +for enabled_features in $features_powerset +do + idx="$(($idx+1))" + echo "[$(printf '%.2d' "$idx")] run rootless tests ... (${enabled_features%%+})" + + unset IFS + for feature in "${ALL_FEATURES[@]}" + do + hook_func="disable_$feature" + grep -E "(^|\+)$feature(\+|$)" <<<$enabled_features &>/dev/null && hook_func="enable_$feature" + "$hook_func" + done + + # Run the test suite! + set -e + echo path: $PATH + export ROOTLESS_FEATURES="$enabled_features" + sudo -HE -u rootless PATH="$PATH" bats -t "$ROOT/tests/integration" + set +e +done From eb5bd4fa6adfe9f2e41a5a5a2463857ae5f7a0c6 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Thu, 7 Sep 2017 09:54:46 +1000 Subject: [PATCH 8/8] tests: add tests for rootless multi-mapping configurations Enable several previously disabled tests (for the idmap execution mode) for rootless containers, in addition to making all tests use the additional mappings. At the moment there's no strong need to add any additional tests purely for rootless_idmap. Signed-off-by: Aleksa Sarai --- Dockerfile | 1 + tests/integration/exec.bats | 6 ++-- tests/integration/helpers.bash | 43 +++++++++++++++++++++++---- tests/integration/spec.bats | 2 +- tests/integration/start_detached.bats | 4 +-- tests/integration/start_hello.bats | 4 +-- tests/integration/tty.bats | 16 +++++----- tests/rootless.sh | 35 +++++++++++++++++++++- 8 files changed, 88 insertions(+), 23 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9d6b96fb..1b36d0d9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,6 +22,7 @@ RUN apt-get update && apt-get install -y \ protobuf-c-compiler \ protobuf-compiler \ python-minimal \ + uidmap \ --no-install-recommends \ && apt-get clean diff --git a/tests/integration/exec.bats b/tests/integration/exec.bats index 70730de8..8d943aab 100644 --- a/tests/integration/exec.bats +++ b/tests/integration/exec.bats @@ -100,8 +100,8 @@ function teardown() { } @test "runc exec --user" { - # --user can't work in rootless containers - requires root + # --user can't work in rootless containers that don't have idmap. + [[ "$ROOTLESS" -ne 0 ]] && requires rootless_idmap # run busybox detached runc run -d --console-socket $CONSOLE_SOCKET test_busybox @@ -110,5 +110,5 @@ function teardown() { runc exec --user 1000:1000 test_busybox id [ "$status" -eq 0 ] - [[ ${output} == "uid=1000 gid=1000" ]] + [[ "${output}" == "uid=1000 gid=1000"* ]] } diff --git a/tests/integration/helpers.bash b/tests/integration/helpers.bash index 29e0fef6..e44fcaa7 100644 --- a/tests/integration/helpers.bash +++ b/tests/integration/helpers.bash @@ -58,15 +58,41 @@ function __runc() { "$RUNC" --log /proc/self/fd/2 --root "$ROOT" "$@" } -# Wrapper for runc spec. +# Wrapper for runc spec, which takes only one argument (the bundle path). function runc_spec() { - local args="" + ! [[ "$#" > 1 ]] + + local args=() + local bundle="" if [ "$ROOTLESS" -ne 0 ]; then - args+="--rootless" + args+=("--rootless") + fi + if [ "$#" -ne 0 ]; then + bundle="$1" + args+=("--bundle" "$bundle") fi - runc spec $args "$@" + runc spec "${args[@]}" + + # Always add additional mappings if we have idmaps. + if [[ "$ROOTLESS" -ne 0 ]] && [[ "$ROOTLESS_FEATURES" == *"idmap"* ]]; then + runc_rootless_idmap "$bundle" + fi +} + +# Shortcut to add additional uids and gids, based on the values set as part of +# a rootless configuration. +function runc_rootless_idmap() { + bundle="${1:-.}" + cat "$bundle/config.json" \ + | jq '.mounts |= map((select(.type == "devpts") | .options += ["gid=5"]) // .)' \ + | jq '.linux.uidMappings |= .+ [{"hostID": '"$ROOTLESS_UIDMAP_START"', "containerID": 1000, "size": '"$ROOTLESS_UIDMAP_LENGTH"'}]' \ + | jq '.linux.gidMappings |= .+ [{"hostID": '"$ROOTLESS_GIDMAP_START"', "containerID": 100, "size": 1}]' \ + | jq '.linux.gidMappings |= .+ [{"hostID": '"$(($ROOTLESS_GIDMAP_START+10))"', "containerID": 1, "size": 20}]' \ + | jq '.linux.gidMappings |= .+ [{"hostID": '"$(($ROOTLESS_GIDMAP_START+100))"', "containerID": 1000, "size": '"$(($ROOTLESS_GIDMAP_LENGTH-1000))"'}]' \ + >"$bundle/config.json.tmp" + mv "$bundle/config.json"{.tmp,} } # Fails the current test, providing the error given. @@ -90,14 +116,19 @@ function requires() { skip "test requires ${var}" fi ;; + rootless_idmap) + if [[ "$ROOTLESS_FEATURES" != *"idmap"* ]]; then + skip "test requires ${var}" + fi + ;; cgroups_kmem) if [ ! -e "$KMEM" ]; then - skip "Test requires ${var}." + skip "Test requires ${var}" fi ;; cgroups_rt) if [ ! -e "$RT_PERIOD" ]; then - skip "Test requires ${var}." + skip "Test requires ${var}" fi ;; *) diff --git a/tests/integration/spec.bats b/tests/integration/spec.bats index 60617060..c9ba2aaa 100644 --- a/tests/integration/spec.bats +++ b/tests/integration/spec.bats @@ -51,7 +51,7 @@ function teardown() { [ ! -e "$HELLO_BUNDLE"/config.json ] # test generation of spec does not return an error - runc_spec --bundle "$HELLO_BUNDLE" + runc_spec "$HELLO_BUNDLE" [ "$status" -eq 0 ] # test generation of spec created our config.json (spec) diff --git a/tests/integration/start_detached.bats b/tests/integration/start_detached.bats index 0dc5e0d8..7f177b86 100644 --- a/tests/integration/start_detached.bats +++ b/tests/integration/start_detached.bats @@ -21,8 +21,8 @@ function teardown() { } @test "runc run detached ({u,g}id != 0)" { - # cannot start containers as another user in rootless setup - requires root + # cannot start containers as another user in rootless setup without idmap + [[ "$ROOTLESS" -ne 0 ]] && requires rootless_idmap # replace "uid": 0 with "uid": 1000 # and do a similar thing for gid. diff --git a/tests/integration/start_hello.bats b/tests/integration/start_hello.bats index 2e935728..a706be27 100644 --- a/tests/integration/start_hello.bats +++ b/tests/integration/start_hello.bats @@ -21,8 +21,8 @@ function teardown() { } @test "runc run ({u,g}id != 0)" { - # cannot start containers as another user in rootless setup - requires root + # cannot start containers as another user in rootless setup without idmap + [[ "$ROOTLESS" -ne 0 ]] && requires rootless_idmap # replace "uid": 0 with "uid": 1000 # and do a similar thing for gid. diff --git a/tests/integration/tty.bats b/tests/integration/tty.bats index 985a9b75..a59652bb 100644 --- a/tests/integration/tty.bats +++ b/tests/integration/tty.bats @@ -24,9 +24,9 @@ function teardown() { } @test "runc run [tty owner]" { - # tty chmod is not doable in rootless containers. + # tty chmod is not doable in rootless containers without idmap. # TODO: this can be made as a change to the gid test. - requires root + [[ "$ROOTLESS" -ne 0 ]] && requires rootless_idmap # Replace sh script with stat. sed -i 's/"sh"/"sh", "-c", "stat -c %u:%g $(tty) | tr : \\\\\\\\n"/' config.json @@ -40,8 +40,8 @@ function teardown() { } @test "runc run [tty owner] ({u,g}id != 0)" { - # tty chmod is not doable in rootless containers. - requires root + # tty chmod is not doable in rootless containers without idmap. + [[ "$ROOTLESS" -ne 0 ]] && requires rootless_idmap # replace "uid": 0 with "uid": 1000 # and do a similar thing for gid. @@ -76,9 +76,9 @@ function teardown() { } @test "runc exec [tty owner]" { - # tty chmod is not doable in rootless containers. + # tty chmod is not doable in rootless containers without idmap. # TODO: this can be made as a change to the gid test. - requires root + [[ "$ROOTLESS" -ne 0 ]] && requires rootless_idmap # run busybox detached runc run -d --console-socket $CONSOLE_SOCKET test_busybox @@ -95,8 +95,8 @@ function teardown() { } @test "runc exec [tty owner] ({u,g}id != 0)" { - # tty chmod is not doable in rootless containers. - requires root + # tty chmod is not doable in rootless containers without idmap. + [[ "$ROOTLESS" -ne 0 ]] && requires rootless_idmap # replace "uid": 0 with "uid": 1000 # and do a similar thing for gid. diff --git a/tests/rootless.sh b/tests/rootless.sh index 2613c702..cb2c0647 100755 --- a/tests/rootless.sh +++ b/tests/rootless.sh @@ -19,9 +19,42 @@ # a new feature, please match the existing style. Add an entry to $ALL_FEATURES, # and add an enable_* and disable_* hook. -ALL_FEATURES=() +ALL_FEATURES=("idmap") ROOT="$(readlink -f "$(dirname "${BASH_SOURCE}")/..")" +# FEATURE: Opportunistic new{uid,gid}map support, allowing a rootless container +# to be set up with the usage of helper setuid binaries. + +function enable_idmap() { + export ROOTLESS_UIDMAP_START=100000 ROOTLESS_UIDMAP_LENGTH=65536 + export ROOTLESS_GIDMAP_START=200000 ROOTLESS_GIDMAP_LENGTH=65536 + + # Set up sub{uid,gid} mappings. + [ -e /etc/subuid.tmp ] && mv /etc/subuid{.tmp,} + ( grep -v '^rootless' /etc/subuid ; echo "rootless:$ROOTLESS_UIDMAP_START:$ROOTLESS_UIDMAP_LENGTH" ) > /etc/subuid.tmp + mv /etc/subuid{.tmp,} + [ -e /etc/subgid.tmp ] && mv /etc/subgid{.tmp,} + ( grep -v '^rootless' /etc/subgid ; echo "rootless:$ROOTLESS_GIDMAP_START:$ROOTLESS_GIDMAP_LENGTH" ) > /etc/subgid.tmp + mv /etc/subgid{.tmp,} + + # Reactivate new{uid,gid}map helpers if applicable. + [ -e /usr/bin/unused-newuidmap ] && mv /usr/bin/{unused-,}newuidmap + [ -e /usr/bin/unused-newgidmap ] && mv /usr/bin/{unused-,}newgidmap +} + +function disable_idmap() { + export ROOTLESS_UIDMAP_START ROOTLESS_UIDMAP_LENGTH + export ROOTLESS_GIDMAP_START ROOTLESS_GIDMAP_LENGTH + + # Deactivate sub{uid,gid} mappings. + [ -e /etc/subuid ] && mv /etc/subuid{,.tmp} + [ -e /etc/subgid ] && mv /etc/subgid{,.tmp} + + # Deactivate new{uid,gid}map helpers. setuid is preserved with mv(1). + [ -e /usr/bin/newuidmap ] && mv /usr/bin/{,unused-}newuidmap + [ -e /usr/bin/newgidmap ] && mv /usr/bin/{,unused-}newgidmap +} + # Create a powerset of $ALL_FEATURES (the set of all subsets of $ALL_FEATURES). # We test all of the possible combinations (as long as we don't add too many # feature knobs this shouldn't take too long -- but the number of tested