rootfs: make pivot_root not use a temporary directory
Namely, use an undocumented feature of pivot_root(2) where pivot_root(".", ".") is actually a feature and allows you to make the old_root be tied to your /proc/self/cwd in a way that makes unmounting easy. Thanks a lot to the LXC developers which came up with this idea first. This is the first step of many to allowing runC to work with a completely read-only rootfs. Signed-off-by: Aleksa Sarai <asarai@suse.de>
This commit is contained in:
parent
88b4c4851e
commit
f8e6b5af5e
|
@ -85,11 +85,6 @@ type Config struct {
|
|||
// that the parent process dies.
|
||||
ParentDeathSignal int `json:"parent_death_signal"`
|
||||
|
||||
// PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set.
|
||||
// When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable.
|
||||
// This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot.
|
||||
PivotDir string `json:"pivot_dir"`
|
||||
|
||||
// Path to a directory containing the container's root filesystem.
|
||||
Rootfs string `json:"rootfs"`
|
||||
|
||||
|
|
|
@ -83,7 +83,7 @@ func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWrit
|
|||
if config.NoPivotRoot {
|
||||
err = msMoveRoot(config.Rootfs)
|
||||
} else {
|
||||
err = pivotRoot(config.Rootfs, config.PivotDir)
|
||||
err = pivotRoot(config.Rootfs)
|
||||
}
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "jailing process inside rootfs")
|
||||
|
@ -563,48 +563,66 @@ func setupPtmx(config *configs.Config, console *linuxConsole) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func pivotRoot(rootfs, pivotBaseDir string) (err error) {
|
||||
if pivotBaseDir == "" {
|
||||
pivotBaseDir = "/"
|
||||
}
|
||||
tmpDir := filepath.Join(rootfs, pivotBaseDir)
|
||||
if err := os.MkdirAll(tmpDir, 0755); err != nil {
|
||||
return fmt.Errorf("can't create tmp dir %s, error %v", tmpDir, err)
|
||||
}
|
||||
pivotDir, err := ioutil.TempDir(tmpDir, ".pivot_root")
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't create pivot_root dir %s, error %v", pivotDir, err)
|
||||
}
|
||||
defer func() {
|
||||
errVal := os.Remove(pivotDir)
|
||||
if err == nil {
|
||||
err = errVal
|
||||
}
|
||||
}()
|
||||
if err := syscall.PivotRoot(rootfs, pivotDir); err != nil {
|
||||
// Make the parent mount private
|
||||
if err := rootfsParentMountPrivate(rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
// Try again
|
||||
if err := syscall.PivotRoot(rootfs, pivotDir); err != nil {
|
||||
return fmt.Errorf("pivot_root %s", err)
|
||||
}
|
||||
}
|
||||
if err := syscall.Chdir("/"); err != nil {
|
||||
return fmt.Errorf("chdir / %s", err)
|
||||
}
|
||||
// path to pivot dir now changed, update
|
||||
pivotDir = filepath.Join(pivotBaseDir, filepath.Base(pivotDir))
|
||||
// pivotRoot will call pivot_root such that rootfs becomes the new root
|
||||
// filesystem, and everything else is cleaned up.
|
||||
func pivotRoot(rootfs string) error {
|
||||
// While the documentation may claim otherwise, pivot_root(".", ".") is
|
||||
// actually valid. What this results in is / being the new root but
|
||||
// /proc/self/cwd being the old root. Since we can play around with the cwd
|
||||
// with pivot_root this allows us to pivot without creating directories in
|
||||
// the rootfs. Shout-outs to the LXC developers for giving us this idea.
|
||||
|
||||
// Make pivotDir rprivate to make sure any of the unmounts don't
|
||||
// propagate to parent.
|
||||
if err := syscall.Mount("", pivotDir, "", syscall.MS_PRIVATE|syscall.MS_REC, ""); err != nil {
|
||||
oldroot, err := syscall.Open("/", syscall.O_DIRECTORY|syscall.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer syscall.Close(oldroot)
|
||||
|
||||
newroot, err := syscall.Open(rootfs, syscall.O_DIRECTORY|syscall.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer syscall.Close(newroot)
|
||||
|
||||
// Change to the new root so that the pivot_root actually acts on it.
|
||||
if err := syscall.Fchdir(newroot); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil {
|
||||
return fmt.Errorf("unmount pivot_root dir %s", err)
|
||||
if err := syscall.PivotRoot(".", "."); err != nil {
|
||||
// Make the parent mount private
|
||||
if err := rootfsParentMountPrivate("."); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Try again
|
||||
if err := syscall.PivotRoot(".", "."); err != nil {
|
||||
return fmt.Errorf("pivot_root %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Currently our "." is oldroot (according to the current kernel code).
|
||||
// However, purely for safety, we will fchdir(oldroot) since there isn't
|
||||
// really any guarantee from the kernel what /proc/self/cwd will be after a
|
||||
// pivot_root(2).
|
||||
|
||||
if err := syscall.Fchdir(oldroot); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Make oldroot rprivate to make sure our unmounts don't propogate to the
|
||||
// host (and thus bork the machine).
|
||||
if err := syscall.Mount("", ".", "", syscall.MS_PRIVATE|syscall.MS_REC, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
// Preform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd.
|
||||
if err := syscall.Unmount(".", syscall.MNT_DETACH); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Switch back to our shiny new root.
|
||||
if err := syscall.Chdir("/"); err != nil {
|
||||
return fmt.Errorf("chdir / %s", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue