// +build linux package main import ( "encoding/json" "fmt" "os" "path/filepath" "runtime" "strings" "syscall" "github.com/Sirupsen/logrus" "github.com/codegangsta/cli" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/devices" "github.com/opencontainers/specs" ) var specCommand = cli.Command{ Name: "spec", Usage: "create a new specification file", Action: func(context *cli.Context) { spec := specs.LinuxSpec{ Spec: specs.Spec{ Version: specs.Version, Platform: specs.Platform{ OS: runtime.GOOS, Arch: runtime.GOARCH, }, Root: specs.Root{ Path: "rootfs", Readonly: true, }, Process: specs.Process{ Terminal: true, User: specs.User{}, Args: []string{ "sh", }, Env: []string{ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "TERM=xterm", }, }, Hostname: "shell", Mounts: []specs.Mount{ { Type: "proc", Source: "proc", Destination: "/proc", Options: "", }, { Type: "tmpfs", Source: "tmpfs", Destination: "/dev", Options: "nosuid,strictatime,mode=755,size=65536k", }, { Type: "devpts", Source: "devpts", Destination: "/dev/pts", Options: "nosuid,noexec,newinstance,ptmxmode=0666,mode=0620,gid=5", }, { Type: "tmpfs", Source: "shm", Destination: "/dev/shm", Options: "nosuid,noexec,nodev,mode=1777,size=65536k", }, { Type: "mqueue", Source: "mqueue", Destination: "/dev/mqueue", Options: "nosuid,noexec,nodev", }, { Type: "sysfs", Source: "sysfs", Destination: "/sys", Options: "nosuid,noexec,nodev", }, { Type: "cgroup", Source: "cgroup", Destination: "/sys/fs/cgroup", Options: "nosuid,noexec,nodev,relatime,ro", }, }, }, Linux: specs.Linux{ Namespaces: []specs.Namespace{ { Type: "pid", }, { Type: "network", }, { Type: "ipc", }, { Type: "uts", }, { Type: "mount", }, }, Capabilities: []string{ "AUDIT_WRITE", "KILL", "NET_BIND_SERVICE", }, Devices: []string{ "null", "random", "full", "tty", "zero", "urandom", }, Resources: specs.Resources{ Memory: specs.Memory{ Swappiness: -1, }, }, }, } data, err := json.MarshalIndent(&spec, "", "\t") if err != nil { logrus.Fatal(err) } fmt.Printf("%s", data) }, } var namespaceMapping = map[string]configs.NamespaceType{ "pid": configs.NEWPID, "network": configs.NEWNET, "mount": configs.NEWNS, "user": configs.NEWUSER, "ipc": configs.NEWIPC, "uts": configs.NEWUTS, } // loadSpec loads the specification from the provided path. // If the path is empty then the default path will be "config.json" func loadSpec(path string) (*specs.LinuxSpec, error) { if path == "" { path = "config.json" } f, err := os.Open(path) if err != nil { if os.IsNotExist(err) { return nil, fmt.Errorf("JSON specification file for %s not found", path) } return nil, err } defer f.Close() var s *specs.LinuxSpec if err := json.NewDecoder(f).Decode(&s); err != nil { return nil, err } return s, checkSpecVersion(s) } // checkSpecVersion makes sure that the spec version matches runc's while we are in the initial // development period. It is better to hard fail than have missing fields or options in the spec. func checkSpecVersion(s *specs.LinuxSpec) error { if s.Version != specs.Version { return fmt.Errorf("spec version is not compatible with implemented version %q: spec %q", specs.Version, s.Version) } return nil } func createLibcontainerConfig(cgroupName string, spec *specs.LinuxSpec) (*configs.Config, error) { cwd, err := os.Getwd() if err != nil { return nil, err } rootfsPath := spec.Root.Path if !filepath.IsAbs(rootfsPath) { rootfsPath = filepath.Join(cwd, rootfsPath) } config := &configs.Config{ Rootfs: rootfsPath, Capabilities: spec.Linux.Capabilities, Readonlyfs: spec.Root.Readonly, Hostname: spec.Hostname, Privatefs: true, } for _, ns := range spec.Linux.Namespaces { t, exists := namespaceMapping[ns.Type] if !exists { return nil, fmt.Errorf("namespace %q does not exist", ns) } config.Namespaces.Add(t, ns.Path) } if config.Namespaces.Contains(configs.NEWNET) { config.Networks = []*configs.Network{ { Type: "loopback", }, } } for _, m := range spec.Mounts { config.Mounts = append(config.Mounts, createLibcontainerMount(cwd, m)) } if err := createDevices(spec, config); err != nil { return nil, err } if err := setupUserNamespace(spec, config); err != nil { return nil, err } c, err := createCgroupConfig(cgroupName, spec, config.Devices) if err != nil { return nil, err } config.Cgroups = c if config.Readonlyfs { setReadonly(config) config.MaskPaths = []string{ "/proc/kcore", } config.ReadonlyPaths = []string{ "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", } } config.Sysctl = spec.Linux.Sysctl return config, nil } func createLibcontainerMount(cwd string, m specs.Mount) *configs.Mount { flags, data := parseMountOptions(m.Options) source := m.Source if m.Type == "bind" { if !filepath.IsAbs(source) { source = filepath.Join(cwd, m.Source) } } return &configs.Mount{ Device: m.Type, Source: source, Destination: m.Destination, Data: data, Flags: flags, } } func createCgroupConfig(name string, spec *specs.LinuxSpec, devices []*configs.Device) (*configs.Cgroup, error) { myCgroupPath, err := cgroups.GetThisCgroupDir("devices") if err != nil { return nil, err } c := &configs.Cgroup{ Name: name, Parent: myCgroupPath, AllowedDevices: append(devices, allowedDevices...), } r := spec.Linux.Resources c.Memory = r.Memory.Limit c.MemoryReservation = r.Memory.Reservation c.MemorySwap = r.Memory.Swap c.KernelMemory = r.Memory.Kernel c.MemorySwappiness = r.Memory.Swappiness c.CpuShares = r.CPU.Shares c.CpuQuota = r.CPU.Quota c.CpuPeriod = r.CPU.Period c.CpuRtRuntime = r.CPU.RealtimeRuntime c.CpuRtPeriod = r.CPU.RealtimePeriod c.CpusetCpus = r.CPU.Cpus c.CpusetMems = r.CPU.Mems c.BlkioThrottleReadBpsDevice = r.BlockIO.ThrottleReadBpsDevice c.BlkioThrottleWriteBpsDevice = r.BlockIO.ThrottleWriteBpsDevice c.BlkioThrottleReadIOpsDevice = r.BlockIO.ThrottleReadIOpsDevice c.BlkioThrottleWriteIOpsDevice = r.BlockIO.ThrottleWriteIOpsDevice c.BlkioWeight = r.BlockIO.Weight c.BlkioWeightDevice = r.BlockIO.WeightDevice for _, l := range r.HugepageLimits { c.HugetlbLimit = append(c.HugetlbLimit, &configs.HugepageLimit{ Pagesize: l.Pagesize, Limit: l.Limit, }) } c.OomKillDisable = r.DisableOOMKiller c.NetClsClassid = r.Network.ClassID for _, m := range r.Network.Priorities { c.NetPrioIfpriomap = append(c.NetPrioIfpriomap, &configs.IfPrioMap{ Interface: m.Name, Priority: m.Priority, }) } return c, nil } func createDevices(spec *specs.LinuxSpec, config *configs.Config) error { for _, name := range spec.Linux.Devices { d, err := devices.DeviceFromPath(filepath.Join("/dev", name), "rwm") if err != nil { return err } config.Devices = append(config.Devices, d) } return nil } func setReadonly(config *configs.Config) { for _, m := range config.Mounts { if m.Device == "sysfs" { m.Flags |= syscall.MS_RDONLY } } } func setupUserNamespace(spec *specs.LinuxSpec, config *configs.Config) error { if len(spec.Linux.UIDMappings) == 0 { return nil } config.Namespaces.Add(configs.NEWUSER, "") create := func(m specs.IDMapping) configs.IDMap { return configs.IDMap{ HostID: int(m.HostID), ContainerID: int(m.ContainerID), Size: int(m.Size), } } for _, m := range spec.Linux.UIDMappings { config.UidMappings = append(config.UidMappings, create(m)) } for _, m := range spec.Linux.GIDMappings { config.GidMappings = append(config.GidMappings, create(m)) } rootUID, err := config.HostUID() if err != nil { return err } rootGID, err := config.HostGID() if err != nil { return err } for _, node := range config.Devices { node.Uid = uint32(rootUID) node.Gid = uint32(rootGID) } return nil } // parseMountOptions parses the string and returns the flags and any mount data that // it contains. func parseMountOptions(options string) (int, string) { var ( flag int data []string ) flags := map[string]struct { clear bool flag int }{ "async": {true, syscall.MS_SYNCHRONOUS}, "atime": {true, syscall.MS_NOATIME}, "bind": {false, syscall.MS_BIND}, "defaults": {false, 0}, "dev": {true, syscall.MS_NODEV}, "diratime": {true, syscall.MS_NODIRATIME}, "dirsync": {false, syscall.MS_DIRSYNC}, "exec": {true, syscall.MS_NOEXEC}, "mand": {false, syscall.MS_MANDLOCK}, "noatime": {false, syscall.MS_NOATIME}, "nodev": {false, syscall.MS_NODEV}, "nodiratime": {false, syscall.MS_NODIRATIME}, "noexec": {false, syscall.MS_NOEXEC}, "nomand": {true, syscall.MS_MANDLOCK}, "norelatime": {true, syscall.MS_RELATIME}, "nostrictatime": {true, syscall.MS_STRICTATIME}, "nosuid": {false, syscall.MS_NOSUID}, "private": {false, syscall.MS_PRIVATE}, "rbind": {false, syscall.MS_BIND | syscall.MS_REC}, "relatime": {false, syscall.MS_RELATIME}, "remount": {false, syscall.MS_REMOUNT}, "ro": {false, syscall.MS_RDONLY}, "rprivate": {false, syscall.MS_PRIVATE | syscall.MS_REC}, "rshared": {false, syscall.MS_SHARED | syscall.MS_REC}, "rslave": {false, syscall.MS_SLAVE | syscall.MS_REC}, "runbindable": {false, syscall.MS_UNBINDABLE | syscall.MS_REC}, "rw": {true, syscall.MS_RDONLY}, "shared": {false, syscall.MS_SHARED}, "slave": {false, syscall.MS_SLAVE}, "strictatime": {false, syscall.MS_STRICTATIME}, "suid": {true, syscall.MS_NOSUID}, "sync": {false, syscall.MS_SYNCHRONOUS}, "unbindable": {false, syscall.MS_UNBINDABLE}, } for _, o := range strings.Split(options, ",") { // If the option does not exist in the flags table or the flag // is not supported on the platform, // then it is a data value for a specific fs type if f, exists := flags[o]; exists && f.flag != 0 { if f.clear { flag &= ^f.flag } else { flag |= f.flag } } else { data = append(data, o) } } return flag, strings.Join(data, ",") }