Add support for cgroup namespace
Cgroup namespace can be configured in `config.json` as other namespaces. Here is an example: ``` "namespaces": [ { "type": "pid" }, { "type": "network" }, { "type": "ipc" }, { "type": "uts" }, { "type": "mount" }, { "type": "cgroup" } ], ``` Note that if you want to run a container which has shared cgroup ns with another container, then it's strongly recommended that you set proper `CgroupsPath` of both containers(the second container's cgroup path must be the subdirectory of the first one). Or there might be some unexpected results. Signed-off-by: Yuanhong Peng <pengyuanhong@huawei.com> Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
parent
9a3a8a5ebf
commit
df3fa115f9
|
@ -148,6 +148,7 @@ config := &configs.Config{
|
||||||
{Type: configs.NEWPID},
|
{Type: configs.NEWPID},
|
||||||
{Type: configs.NEWUSER},
|
{Type: configs.NEWUSER},
|
||||||
{Type: configs.NEWNET},
|
{Type: configs.NEWNET},
|
||||||
|
{Type: configs.NEWCGROUP},
|
||||||
}),
|
}),
|
||||||
Cgroups: &configs.Cgroup{
|
Cgroups: &configs.Cgroup{
|
||||||
Name: "test-container",
|
Name: "test-container",
|
||||||
|
|
|
@ -21,16 +21,17 @@ Minimum requirements:
|
||||||
|
|
||||||
### Namespaces
|
### Namespaces
|
||||||
|
|
||||||
| Flag | Enabled |
|
| Flag | Enabled |
|
||||||
| ------------ | ------- |
|
| --------------- | ------- |
|
||||||
| CLONE_NEWPID | 1 |
|
| CLONE_NEWPID | 1 |
|
||||||
| CLONE_NEWUTS | 1 |
|
| CLONE_NEWUTS | 1 |
|
||||||
| CLONE_NEWIPC | 1 |
|
| CLONE_NEWIPC | 1 |
|
||||||
| CLONE_NEWNET | 1 |
|
| CLONE_NEWNET | 1 |
|
||||||
| CLONE_NEWNS | 1 |
|
| CLONE_NEWNS | 1 |
|
||||||
| CLONE_NEWUSER | 1 |
|
| CLONE_NEWUSER | 1 |
|
||||||
|
| CLONE_NEWCGROUP | 1 |
|
||||||
|
|
||||||
Namespaces are created for the container via the `clone` syscall.
|
Namespaces are created for the container via the `unshare` syscall.
|
||||||
|
|
||||||
|
|
||||||
### Filesystem
|
### Filesystem
|
||||||
|
|
|
@ -17,7 +17,7 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
cgroupNamePrefix = "name="
|
CgroupNamePrefix = "name="
|
||||||
CgroupProcesses = "cgroup.procs"
|
CgroupProcesses = "cgroup.procs"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -156,8 +156,8 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount,
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
ss[opt] = true
|
ss[opt] = true
|
||||||
if strings.HasPrefix(opt, cgroupNamePrefix) {
|
if strings.HasPrefix(opt, CgroupNamePrefix) {
|
||||||
opt = opt[len(cgroupNamePrefix):]
|
opt = opt[len(CgroupNamePrefix):]
|
||||||
}
|
}
|
||||||
m.Subsystems = append(m.Subsystems, opt)
|
m.Subsystems = append(m.Subsystems, opt)
|
||||||
numFound++
|
numFound++
|
||||||
|
@ -343,7 +343,7 @@ func getControllerPath(subsystem string, cgroups map[string]string) (string, err
|
||||||
return p, nil
|
return p, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if p, ok := cgroups[cgroupNamePrefix+subsystem]; ok {
|
if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
|
||||||
return p, nil
|
return p, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8,9 +8,6 @@ func (n *Namespace) Syscall() int {
|
||||||
return namespaceInfo[n.Type]
|
return namespaceInfo[n.Type]
|
||||||
}
|
}
|
||||||
|
|
||||||
// This is not yet in the Go stdlib.
|
|
||||||
const syscall_CLONE_NEWCGROUP = (1 << 29)
|
|
||||||
|
|
||||||
var namespaceInfo = map[NamespaceType]int{
|
var namespaceInfo = map[NamespaceType]int{
|
||||||
NEWNET: unix.CLONE_NEWNET,
|
NEWNET: unix.CLONE_NEWNET,
|
||||||
NEWNS: unix.CLONE_NEWNS,
|
NEWNS: unix.CLONE_NEWNS,
|
||||||
|
@ -18,7 +15,7 @@ var namespaceInfo = map[NamespaceType]int{
|
||||||
NEWIPC: unix.CLONE_NEWIPC,
|
NEWIPC: unix.CLONE_NEWIPC,
|
||||||
NEWUTS: unix.CLONE_NEWUTS,
|
NEWUTS: unix.CLONE_NEWUTS,
|
||||||
NEWPID: unix.CLONE_NEWPID,
|
NEWPID: unix.CLONE_NEWPID,
|
||||||
NEWCGROUP: syscall_CLONE_NEWCGROUP,
|
NEWCGROUP: unix.CLONE_NEWCGROUP,
|
||||||
}
|
}
|
||||||
|
|
||||||
// CloneFlags parses the container's Namespaces options to set the correct
|
// CloneFlags parses the container's Namespaces options to set the correct
|
||||||
|
|
|
@ -38,6 +38,9 @@ func (v *ConfigValidator) Validate(config *configs.Config) error {
|
||||||
if err := v.usernamespace(config); err != nil {
|
if err := v.usernamespace(config); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
if err := v.cgroupnamespace(config); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
if err := v.sysctl(config); err != nil {
|
if err := v.sysctl(config); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -116,6 +119,15 @@ func (v *ConfigValidator) usernamespace(config *configs.Config) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (v *ConfigValidator) cgroupnamespace(config *configs.Config) error {
|
||||||
|
if config.Namespaces.Contains(configs.NEWCGROUP) {
|
||||||
|
if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) {
|
||||||
|
return fmt.Errorf("cgroup namespaces aren't enabled in the kernel")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// sysctl validates that the specified sysctl keys are valid or not.
|
// sysctl validates that the specified sysctl keys are valid or not.
|
||||||
// /proc/sys isn't completely namespaced and depending on which namespaces
|
// /proc/sys isn't completely namespaced and depending on which namespaces
|
||||||
// are specified, a subset of sysctls are permitted.
|
// are specified, a subset of sysctls are permitted.
|
||||||
|
|
|
@ -1745,7 +1745,6 @@ func (c *linuxContainer) currentState() (*State, error) {
|
||||||
// can setns in order.
|
// can setns in order.
|
||||||
func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceType]string) ([]string, error) {
|
func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceType]string) ([]string, error) {
|
||||||
paths := []string{}
|
paths := []string{}
|
||||||
|
|
||||||
for _, ns := range configs.NamespaceTypes() {
|
for _, ns := range configs.NamespaceTypes() {
|
||||||
|
|
||||||
// Remove namespaces that we don't need to join.
|
// Remove namespaces that we don't need to join.
|
||||||
|
|
|
@ -1776,3 +1776,60 @@ func TestTmpfsCopyUp(t *testing.T) {
|
||||||
t.Fatalf("/etc/passwd not copied up as expected: %v", outputLs)
|
t.Fatalf("/etc/passwd not copied up as expected: %v", outputLs)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestCGROUPPrivate(t *testing.T) {
|
||||||
|
if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) {
|
||||||
|
t.Skip("cgroupns is unsupported")
|
||||||
|
}
|
||||||
|
if testing.Short() {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
rootfs, err := newRootfs()
|
||||||
|
ok(t, err)
|
||||||
|
defer remove(rootfs)
|
||||||
|
|
||||||
|
l, err := os.Readlink("/proc/1/ns/cgroup")
|
||||||
|
ok(t, err)
|
||||||
|
|
||||||
|
config := newTemplateConfig(rootfs)
|
||||||
|
config.Namespaces.Add(configs.NEWCGROUP, "")
|
||||||
|
buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/cgroup")
|
||||||
|
ok(t, err)
|
||||||
|
|
||||||
|
if exitCode != 0 {
|
||||||
|
t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr)
|
||||||
|
}
|
||||||
|
|
||||||
|
if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual == l {
|
||||||
|
t.Fatalf("cgroup link should be private to the container but equals host %q %q", actual, l)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCGROUPHost(t *testing.T) {
|
||||||
|
if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) {
|
||||||
|
t.Skip("cgroupns is unsupported")
|
||||||
|
}
|
||||||
|
if testing.Short() {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
rootfs, err := newRootfs()
|
||||||
|
ok(t, err)
|
||||||
|
defer remove(rootfs)
|
||||||
|
|
||||||
|
l, err := os.Readlink("/proc/1/ns/cgroup")
|
||||||
|
ok(t, err)
|
||||||
|
|
||||||
|
config := newTemplateConfig(rootfs)
|
||||||
|
buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/cgroup")
|
||||||
|
ok(t, err)
|
||||||
|
|
||||||
|
if exitCode != 0 {
|
||||||
|
t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr)
|
||||||
|
}
|
||||||
|
|
||||||
|
if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual != l {
|
||||||
|
t.Fatalf("cgroup link not equal to host link %q %q", actual, l)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -42,6 +42,12 @@ enum sync_t {
|
||||||
SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */
|
SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Synchronisation value for cgroup namespace setup.
|
||||||
|
* The same constant is defined in process_linux.go as "createCgroupns".
|
||||||
|
*/
|
||||||
|
#define CREATECGROUPNS 0x80
|
||||||
|
|
||||||
/* longjmp() arguments. */
|
/* longjmp() arguments. */
|
||||||
#define JUMP_PARENT 0x00
|
#define JUMP_PARENT 0x00
|
||||||
#define JUMP_CHILD 0xA0
|
#define JUMP_CHILD 0xA0
|
||||||
|
@ -640,7 +646,6 @@ void nsexec(void)
|
||||||
case JUMP_PARENT:{
|
case JUMP_PARENT:{
|
||||||
int len;
|
int len;
|
||||||
pid_t child, first_child = -1;
|
pid_t child, first_child = -1;
|
||||||
char buf[JSON_MAX];
|
|
||||||
bool ready = false;
|
bool ready = false;
|
||||||
|
|
||||||
/* For debugging. */
|
/* For debugging. */
|
||||||
|
@ -716,6 +721,18 @@ void nsexec(void)
|
||||||
kill(child, SIGKILL);
|
kill(child, SIGKILL);
|
||||||
bail("failed to sync with child: write(SYNC_RECVPID_ACK)");
|
bail("failed to sync with child: write(SYNC_RECVPID_ACK)");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Send the init_func pid back to our parent.
|
||||||
|
*
|
||||||
|
* Send the init_func pid and the pid of the first child back to our parent.
|
||||||
|
* We need to send both back because we can't reap the first child we created (CLONE_PARENT).
|
||||||
|
* It becomes the responsibility of our parent to reap the first child.
|
||||||
|
*/
|
||||||
|
len = dprintf(pipenum, "{\"pid\": %d, \"pid_first\": %d}\n", child, first_child);
|
||||||
|
if (len < 0) {
|
||||||
|
kill(child, SIGKILL);
|
||||||
|
bail("unable to generate JSON for child pid");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case SYNC_CHILD_READY:
|
case SYNC_CHILD_READY:
|
||||||
|
@ -759,23 +776,6 @@ void nsexec(void)
|
||||||
bail("unexpected sync value: %u", s);
|
bail("unexpected sync value: %u", s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Send the init_func pid and the pid of the first child back to our parent.
|
|
||||||
*
|
|
||||||
* We need to send both back because we can't reap the first child we created (CLONE_PARENT).
|
|
||||||
* It becomes the responsibility of our parent to reap the first child.
|
|
||||||
*/
|
|
||||||
len = snprintf(buf, JSON_MAX, "{\"pid\": %d, \"pid_first\": %d}\n", child, first_child);
|
|
||||||
if (len < 0) {
|
|
||||||
kill(child, SIGKILL);
|
|
||||||
bail("unable to generate JSON for child pid");
|
|
||||||
}
|
|
||||||
if (write(pipenum, buf, len) != len) {
|
|
||||||
kill(child, SIGKILL);
|
|
||||||
bail("unable to send child pid to bootstrapper");
|
|
||||||
}
|
|
||||||
|
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -862,14 +862,17 @@ void nsexec(void)
|
||||||
if (setresuid(0, 0, 0) < 0)
|
if (setresuid(0, 0, 0) < 0)
|
||||||
bail("failed to become root in user namespace");
|
bail("failed to become root in user namespace");
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Unshare all of the namespaces. Note that we don't merge this
|
* Unshare all of the namespaces. Now, it should be noted that this
|
||||||
* with clone() because there were some old kernel versions where
|
* ordering might break in the future (especially with rootless
|
||||||
* clone(CLONE_PARENT | CLONE_NEWPID) was broken, so we'll just do
|
* containers). But for now, it's not possible to split this into
|
||||||
* it the long way.
|
* CLONE_NEWUSER + [the rest] because of some RHEL SELinux issues.
|
||||||
|
*
|
||||||
|
* Note that we don't merge this with clone() because there were
|
||||||
|
* some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID)
|
||||||
|
* was broken, so we'll just do it the long way anyway.
|
||||||
*/
|
*/
|
||||||
if (unshare(config.cloneflags) < 0)
|
if (unshare(config.cloneflags & ~CLONE_NEWCGROUP) < 0)
|
||||||
bail("failed to unshare namespaces");
|
bail("failed to unshare namespaces");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -958,6 +961,18 @@ void nsexec(void)
|
||||||
bail("setgroups failed");
|
bail("setgroups failed");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ... wait until our topmost parent has finished cgroup setup in p.manager.Apply() ... */
|
||||||
|
if (config.cloneflags & CLONE_NEWCGROUP) {
|
||||||
|
uint8_t value;
|
||||||
|
if (read(pipenum, &value, sizeof(value)) != sizeof(value))
|
||||||
|
bail("read synchronisation value failed");
|
||||||
|
if (value == CREATECGROUPNS) {
|
||||||
|
if (unshare(CLONE_NEWCGROUP) < 0)
|
||||||
|
bail("failed to unshare cgroup namespace");
|
||||||
|
} else
|
||||||
|
bail("received unknown synchronisation value");
|
||||||
|
}
|
||||||
|
|
||||||
s = SYNC_CHILD_READY;
|
s = SYNC_CHILD_READY;
|
||||||
if (write(syncfd, &s, sizeof(s)) != sizeof(s))
|
if (write(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||||
bail("failed to sync with patent: write(SYNC_CHILD_READY)");
|
bail("failed to sync with patent: write(SYNC_CHILD_READY)");
|
||||||
|
|
|
@ -22,6 +22,10 @@ import (
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Synchronisation value for cgroup namespace setup.
|
||||||
|
// The same constant is defined in nsexec.c as "CREATECGROUPNS".
|
||||||
|
const createCgroupns = 0x80
|
||||||
|
|
||||||
type parentProcess interface {
|
type parentProcess interface {
|
||||||
// pid returns the pid for the running process.
|
// pid returns the pid for the running process.
|
||||||
pid() int
|
pid() int
|
||||||
|
@ -225,12 +229,17 @@ func (p *initProcess) externalDescriptors() []string {
|
||||||
return p.fds
|
return p.fds
|
||||||
}
|
}
|
||||||
|
|
||||||
// execSetns runs the process that executes C code to perform the setns calls
|
// getChildPid receives the final child's pid over the provided pipe.
|
||||||
// because setns support requires the C process to fork off a child and perform the setns
|
func (p *initProcess) getChildPid() (int, error) {
|
||||||
// before the go runtime boots, we wait on the process to die and receive the child's pid
|
var pid pid
|
||||||
// over the provided pipe.
|
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
|
||||||
// This is called by initProcess.start function
|
p.cmd.Wait()
|
||||||
func (p *initProcess) execSetns() error {
|
return -1, err
|
||||||
|
}
|
||||||
|
return pid.Pid, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *initProcess) waitForChildExit(childPid int) error {
|
||||||
status, err := p.cmd.Process.Wait()
|
status, err := p.cmd.Process.Wait()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
p.cmd.Wait()
|
p.cmd.Wait()
|
||||||
|
@ -240,22 +249,8 @@ func (p *initProcess) execSetns() error {
|
||||||
p.cmd.Wait()
|
p.cmd.Wait()
|
||||||
return &exec.ExitError{ProcessState: status}
|
return &exec.ExitError{ProcessState: status}
|
||||||
}
|
}
|
||||||
var pid *pid
|
|
||||||
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
|
|
||||||
p.cmd.Wait()
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clean up the zombie parent process
|
process, err := os.FindProcess(childPid)
|
||||||
firstChildProcess, err := os.FindProcess(pid.PidFirstChild)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ignore the error in case the child has already been reaped for any reason
|
|
||||||
_, _ = firstChildProcess.Wait()
|
|
||||||
|
|
||||||
process, err := os.FindProcess(pid.Pid)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -297,19 +292,47 @@ func (p *initProcess) start() error {
|
||||||
if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
|
if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
|
||||||
return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
|
return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
|
||||||
}
|
}
|
||||||
|
childPid, err := p.getChildPid()
|
||||||
if err := p.execSetns(); err != nil {
|
if err != nil {
|
||||||
return newSystemErrorWithCause(err, "running exec setns process for init")
|
return newSystemErrorWithCause(err, "getting the final child's pid from pipe")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save the standard descriptor names before the container process
|
// Save the standard descriptor names before the container process
|
||||||
// can potentially move them (e.g., via dup2()). If we don't do this now,
|
// can potentially move them (e.g., via dup2()). If we don't do this now,
|
||||||
// we won't know at checkpoint time which file descriptor to look up.
|
// we won't know at checkpoint time which file descriptor to look up.
|
||||||
fds, err := getPipeFds(p.pid())
|
fds, err := getPipeFds(childPid)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
|
return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", childPid)
|
||||||
}
|
}
|
||||||
p.setExternalDescriptors(fds)
|
p.setExternalDescriptors(fds)
|
||||||
|
// Do this before syncing with child so that no children
|
||||||
|
// can escape the cgroup
|
||||||
|
if err := p.manager.Apply(childPid); err != nil {
|
||||||
|
return newSystemErrorWithCause(err, "applying cgroup configuration for process")
|
||||||
|
}
|
||||||
|
if p.intelRdtManager != nil {
|
||||||
|
if err := p.intelRdtManager.Apply(childPid); err != nil {
|
||||||
|
return newSystemErrorWithCause(err, "applying Intel RDT configuration for process")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Now it's time to setup cgroup namesapce
|
||||||
|
if p.config.Config.Namespaces.Contains(configs.NEWCGROUP) && p.config.Config.Namespaces.PathOf(configs.NEWCGROUP) == "" {
|
||||||
|
if _, err := p.parentPipe.Write([]byte{createCgroupns}); err != nil {
|
||||||
|
return newSystemErrorWithCause(err, "sending synchronization value to init process")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for our first child to exit
|
||||||
|
if err := p.waitForChildExit(childPid); err != nil {
|
||||||
|
return newSystemErrorWithCause(err, "waiting for our first child to exit")
|
||||||
|
}
|
||||||
|
|
||||||
|
defer func() {
|
||||||
|
if err != nil {
|
||||||
|
// TODO: should not be the responsibility to call here
|
||||||
|
p.manager.Destroy()
|
||||||
|
}
|
||||||
|
}()
|
||||||
if err := p.createNetworkInterfaces(); err != nil {
|
if err := p.createNetworkInterfaces(); err != nil {
|
||||||
return newSystemErrorWithCause(err, "creating network interfaces")
|
return newSystemErrorWithCause(err, "creating network interfaces")
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,6 +46,7 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
|
||||||
return newSystemErrorWithCause(err, "preparing rootfs")
|
return newSystemErrorWithCause(err, "preparing rootfs")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
hasCgroupns := config.Namespaces.Contains(configs.NEWCGROUP)
|
||||||
setupDev := needsSetupDev(config)
|
setupDev := needsSetupDev(config)
|
||||||
for _, m := range config.Mounts {
|
for _, m := range config.Mounts {
|
||||||
for _, precmd := range m.PremountCmds {
|
for _, precmd := range m.PremountCmds {
|
||||||
|
@ -53,8 +54,7 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
|
||||||
return newSystemErrorWithCause(err, "running premount command")
|
return newSystemErrorWithCause(err, "running premount command")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if err := mountToRootfs(m, config.Rootfs, config.MountLabel, hasCgroupns); err != nil {
|
||||||
if err := mountToRootfs(m, config.Rootfs, config.MountLabel); err != nil {
|
|
||||||
return newSystemErrorWithCausef(err, "mounting %q to rootfs %q at %q", m.Source, config.Rootfs, m.Destination)
|
return newSystemErrorWithCausef(err, "mounting %q to rootfs %q at %q", m.Source, config.Rootfs, m.Destination)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -182,7 +182,7 @@ func mountCmd(cmd configs.Command) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
|
func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns bool) error {
|
||||||
var (
|
var (
|
||||||
dest = m.Destination
|
dest = m.Destination
|
||||||
)
|
)
|
||||||
|
@ -319,12 +319,33 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
|
||||||
Data: "mode=755",
|
Data: "mode=755",
|
||||||
PropagationFlags: m.PropagationFlags,
|
PropagationFlags: m.PropagationFlags,
|
||||||
}
|
}
|
||||||
if err := mountToRootfs(tmpfs, rootfs, mountLabel); err != nil {
|
if err := mountToRootfs(tmpfs, rootfs, mountLabel, enableCgroupns); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
for _, b := range binds {
|
for _, b := range binds {
|
||||||
if err := mountToRootfs(b, rootfs, mountLabel); err != nil {
|
if enableCgroupns {
|
||||||
return err
|
subsystemPath := filepath.Join(rootfs, b.Destination)
|
||||||
|
if err := os.MkdirAll(subsystemPath, 0755); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
flags := defaultMountFlags
|
||||||
|
if m.Flags&unix.MS_RDONLY != 0 {
|
||||||
|
flags = flags | unix.MS_RDONLY
|
||||||
|
}
|
||||||
|
cgroupmount := &configs.Mount{
|
||||||
|
Source: "cgroup",
|
||||||
|
Device: "cgroup",
|
||||||
|
Destination: subsystemPath,
|
||||||
|
Flags: flags,
|
||||||
|
Data: filepath.Base(subsystemPath),
|
||||||
|
}
|
||||||
|
if err := mountNewCgroup(cgroupmount); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if err := mountToRootfs(b, rootfs, mountLabel, enableCgroupns); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for _, mc := range merged {
|
for _, mc := range merged {
|
||||||
|
@ -862,3 +883,18 @@ func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func mountNewCgroup(m *configs.Mount) error {
|
||||||
|
var (
|
||||||
|
data = m.Data
|
||||||
|
source = m.Source
|
||||||
|
)
|
||||||
|
if data == "systemd" {
|
||||||
|
data = cgroups.CgroupNamePrefix + data
|
||||||
|
source = "systemd"
|
||||||
|
}
|
||||||
|
if err := unix.Mount(source, m.Destination, m.Device, uintptr(m.Flags), data); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue