From 3ce347c35fd45aab9da0c3f94c5c4b4e0bc49d44 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 14 May 2014 15:21:44 -0700 Subject: [PATCH] Move cgroups package into libcontainer Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- cgroups/MAINTAINERS | 3 + cgroups/cgroups.go | 30 +++ cgroups/cgroups_test.go | 27 +++ cgroups/fs/apply_raw.go | 155 +++++++++++++++ cgroups/fs/blkio.go | 121 ++++++++++++ cgroups/fs/blkio_test.go | 169 ++++++++++++++++ cgroups/fs/cpu.go | 64 +++++++ cgroups/fs/cpu_test.go | 57 ++++++ cgroups/fs/cpuacct.go | 143 ++++++++++++++ cgroups/fs/cpuset.go | 108 +++++++++++ cgroups/fs/devices.go | 69 +++++++ cgroups/fs/freezer.go | 72 +++++++ cgroups/fs/memory.go | 90 +++++++++ cgroups/fs/memory_test.go | 123 ++++++++++++ cgroups/fs/perf_event.go | 24 +++ cgroups/fs/test_util.go | 75 ++++++++ cgroups/fs/utils.go | 40 ++++ cgroups/fs/utils_test.go | 68 +++++++ cgroups/systemd/apply_nosystemd.go | 17 ++ cgroups/systemd/apply_systemd.go | 296 +++++++++++++++++++++++++++++ cgroups/utils.go | 67 +++++++ container.go | 2 +- nsinit/exec.go | 6 +- nsinit/unsupported.go | 2 +- 24 files changed, 1823 insertions(+), 5 deletions(-) create mode 100644 cgroups/MAINTAINERS create mode 100644 cgroups/cgroups.go create mode 100644 cgroups/cgroups_test.go create mode 100644 cgroups/fs/apply_raw.go create mode 100644 cgroups/fs/blkio.go create mode 100644 cgroups/fs/blkio_test.go create mode 100644 cgroups/fs/cpu.go create mode 100644 cgroups/fs/cpu_test.go create mode 100644 cgroups/fs/cpuacct.go create mode 100644 cgroups/fs/cpuset.go create mode 100644 cgroups/fs/devices.go create mode 100644 cgroups/fs/freezer.go create mode 100644 cgroups/fs/memory.go create mode 100644 cgroups/fs/memory_test.go create mode 100644 cgroups/fs/perf_event.go create mode 100644 cgroups/fs/test_util.go create mode 100644 cgroups/fs/utils.go create mode 100644 cgroups/fs/utils_test.go create mode 100644 cgroups/systemd/apply_nosystemd.go create mode 100644 cgroups/systemd/apply_systemd.go create mode 100644 cgroups/utils.go diff --git a/cgroups/MAINTAINERS b/cgroups/MAINTAINERS new file mode 100644 index 00000000..30cb8bba --- /dev/null +++ b/cgroups/MAINTAINERS @@ -0,0 +1,3 @@ +Michael Crosby (@crosbymichael) +Rohit Jnagal (@rjnagal) +Victor Marmol (@vmarmol) diff --git a/cgroups/cgroups.go b/cgroups/cgroups.go new file mode 100644 index 00000000..0f933207 --- /dev/null +++ b/cgroups/cgroups.go @@ -0,0 +1,30 @@ +package cgroups + +import ( + "errors" +) + +var ( + ErrNotFound = errors.New("mountpoint not found") +) + +type Cgroup struct { + Name string `json:"name,omitempty"` + Parent string `json:"parent,omitempty"` + + DeviceAccess bool `json:"device_access,omitempty"` // name of parent cgroup or slice + Memory int64 `json:"memory,omitempty"` // Memory limit (in bytes) + MemoryReservation int64 `json:"memory_reservation,omitempty"` // Memory reservation or soft_limit (in bytes) + MemorySwap int64 `json:"memory_swap,omitempty"` // Total memory usage (memory + swap); set `-1' to disable swap + CpuShares int64 `json:"cpu_shares,omitempty"` // CPU shares (relative weight vs. other containers) + CpuQuota int64 `json:"cpu_quota,omitempty"` // CPU hardcap limit (in usecs). Allowed cpu time in a given period. + CpuPeriod int64 `json:"cpu_period,omitempty"` // CPU period to be used for hardcapping (in usecs). 0 to use system default. + CpusetCpus string `json:"cpuset_cpus,omitempty"` // CPU to use + Freezer string `json:"freezer,omitempty"` // set the freeze value for the process + + Slice string `json:"slice,omitempty"` // Parent slice to use for systemd +} + +type ActiveCgroup interface { + Cleanup() error +} diff --git a/cgroups/cgroups_test.go b/cgroups/cgroups_test.go new file mode 100644 index 00000000..537336a4 --- /dev/null +++ b/cgroups/cgroups_test.go @@ -0,0 +1,27 @@ +package cgroups + +import ( + "bytes" + "testing" +) + +const ( + cgroupsContents = `11:hugetlb:/ +10:perf_event:/ +9:blkio:/ +8:net_cls:/ +7:freezer:/ +6:devices:/ +5:memory:/ +4:cpuacct,cpu:/ +3:cpuset:/ +2:name=systemd:/user.slice/user-1000.slice/session-16.scope` +) + +func TestParseCgroups(t *testing.T) { + r := bytes.NewBuffer([]byte(cgroupsContents)) + _, err := parseCgroupFile("blkio", r) + if err != nil { + t.Fatal(err) + } +} diff --git a/cgroups/fs/apply_raw.go b/cgroups/fs/apply_raw.go new file mode 100644 index 00000000..65aabcc5 --- /dev/null +++ b/cgroups/fs/apply_raw.go @@ -0,0 +1,155 @@ +package fs + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + + "github.com/dotcloud/docker/pkg/libcontainer/cgroups" +) + +var ( + subsystems = map[string]subsystem{ + "devices": &devicesGroup{}, + "memory": &memoryGroup{}, + "cpu": &cpuGroup{}, + "cpuset": &cpusetGroup{}, + "cpuacct": &cpuacctGroup{}, + "blkio": &blkioGroup{}, + "perf_event": &perfEventGroup{}, + "freezer": &freezerGroup{}, + } +) + +type subsystem interface { + Set(*data) error + Remove(*data) error + Stats(*data) (map[string]float64, error) +} + +type data struct { + root string + cgroup string + c *cgroups.Cgroup + pid int +} + +func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) { + // We have two implementation of cgroups support, one is based on + // systemd and the dbus api, and one is based on raw cgroup fs operations + // following the pre-single-writer model docs at: + // http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/ + // + // we can pick any subsystem to find the root + + cgroupRoot, err := cgroups.FindCgroupMountpoint("cpu") + if err != nil { + return nil, err + } + cgroupRoot = filepath.Dir(cgroupRoot) + + if _, err := os.Stat(cgroupRoot); err != nil { + return nil, fmt.Errorf("cgroups fs not found") + } + + cgroup := c.Name + if c.Parent != "" { + cgroup = filepath.Join(c.Parent, cgroup) + } + + d := &data{ + root: cgroupRoot, + cgroup: cgroup, + c: c, + pid: pid, + } + for _, sys := range subsystems { + if err := sys.Set(d); err != nil { + d.Cleanup() + return nil, err + } + } + return d, nil +} + +func GetStats(c *cgroups.Cgroup, subsystem string, pid int) (map[string]float64, error) { + cgroupRoot, err := cgroups.FindCgroupMountpoint("cpu") + if err != nil { + return nil, err + } + cgroupRoot = filepath.Dir(cgroupRoot) + + if _, err := os.Stat(cgroupRoot); err != nil { + return nil, fmt.Errorf("cgroups fs not found") + } + + cgroup := c.Name + if c.Parent != "" { + cgroup = filepath.Join(c.Parent, cgroup) + } + + d := &data{ + root: cgroupRoot, + cgroup: cgroup, + c: c, + pid: pid, + } + sys, exists := subsystems[subsystem] + if !exists { + return nil, fmt.Errorf("subsystem %s does not exist", subsystem) + } + return sys.Stats(d) +} + +func (raw *data) parent(subsystem string) (string, error) { + initPath, err := cgroups.GetInitCgroupDir(subsystem) + if err != nil { + return "", err + } + return filepath.Join(raw.root, subsystem, initPath), nil +} + +func (raw *data) path(subsystem string) (string, error) { + parent, err := raw.parent(subsystem) + if err != nil { + return "", err + } + return filepath.Join(parent, raw.cgroup), nil +} + +func (raw *data) join(subsystem string) (string, error) { + path, err := raw.path(subsystem) + if err != nil { + return "", err + } + if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { + return "", err + } + if err := writeFile(path, "cgroup.procs", strconv.Itoa(raw.pid)); err != nil { + return "", err + } + return path, nil +} + +func (raw *data) Cleanup() error { + for _, sys := range subsystems { + sys.Remove(raw) + } + return nil +} + +func writeFile(dir, file, data string) error { + return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) +} + +func removePath(p string, err error) error { + if err != nil { + return err + } + if p != "" { + return os.RemoveAll(p) + } + return nil +} diff --git a/cgroups/fs/blkio.go b/cgroups/fs/blkio.go new file mode 100644 index 00000000..213d250d --- /dev/null +++ b/cgroups/fs/blkio.go @@ -0,0 +1,121 @@ +package fs + +import ( + "bufio" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/dotcloud/docker/pkg/libcontainer/cgroups" +) + +type blkioGroup struct { +} + +func (s *blkioGroup) Set(d *data) error { + // we just want to join this group even though we don't set anything + if _, err := d.join("blkio"); err != nil && err != cgroups.ErrNotFound { + return err + } + return nil +} + +func (s *blkioGroup) Remove(d *data) error { + return removePath(d.path("blkio")) +} + +/* +examples: + + blkio.sectors + 8:0 6792 + + blkio.io_service_bytes + 8:0 Read 1282048 + 8:0 Write 2195456 + 8:0 Sync 2195456 + 8:0 Async 1282048 + 8:0 Total 3477504 + Total 3477504 + + blkio.io_serviced + 8:0 Read 124 + 8:0 Write 104 + 8:0 Sync 104 + 8:0 Async 124 + 8:0 Total 228 + Total 228 + + blkio.io_queued + 8:0 Read 0 + 8:0 Write 0 + 8:0 Sync 0 + 8:0 Async 0 + 8:0 Total 0 + Total 0 +*/ +func (s *blkioGroup) Stats(d *data) (map[string]float64, error) { + var ( + paramData = make(map[string]float64) + params = []string{ + "io_service_bytes_recursive", + "io_serviced_recursive", + "io_queued_recursive", + } + ) + + path, err := d.path("blkio") + if err != nil { + return nil, err + } + + k, v, err := s.getSectors(path) + if err != nil { + return nil, err + } + paramData[fmt.Sprintf("blkio.sectors_recursive:%s", k)] = v + + for _, param := range params { + f, err := os.Open(filepath.Join(path, fmt.Sprintf("blkio.%s", param))) + if err != nil { + return nil, err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + // format: dev type amount + fields := strings.Fields(sc.Text()) + switch len(fields) { + case 3: + v, err := strconv.ParseFloat(fields[2], 64) + if err != nil { + return nil, err + } + paramData[fmt.Sprintf("%s:%s:%s", param, fields[0], fields[1])] = v + case 2: + // this is the total line, skip + default: + return nil, ErrNotValidFormat + } + } + } + return paramData, nil +} + +func (s *blkioGroup) getSectors(path string) (string, float64, error) { + f, err := os.Open(filepath.Join(path, "blkio.sectors_recursive")) + if err != nil { + return "", 0, err + } + defer f.Close() + + data, err := ioutil.ReadAll(f) + if err != nil { + return "", 0, err + } + return getCgroupParamKeyValue(string(data)) +} diff --git a/cgroups/fs/blkio_test.go b/cgroups/fs/blkio_test.go new file mode 100644 index 00000000..5279ac43 --- /dev/null +++ b/cgroups/fs/blkio_test.go @@ -0,0 +1,169 @@ +package fs + +import ( + "testing" +) + +const ( + sectorsRecursiveContents = `8:0 1024` + serviceBytesRecursiveContents = `8:0 Read 100 +8:0 Write 400 +8:0 Sync 200 +8:0 Async 300 +8:0 Total 500 +Total 500` + servicedRecursiveContents = `8:0 Read 10 +8:0 Write 40 +8:0 Sync 20 +8:0 Async 30 +8:0 Total 50 +Total 50` + queuedRecursiveContents = `8:0 Read 1 +8:0 Write 4 +8:0 Sync 2 +8:0 Async 3 +8:0 Total 5 +Total 5` +) + +func TestBlkioStats(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + }) + + blkio := &blkioGroup{} + stats, err := blkio.Stats(helper.CgroupData) + if err != nil { + t.Fatal(err) + } + + // Verify expected stats. + expectedStats := map[string]float64{ + "blkio.sectors_recursive:8:0": 1024.0, + + // Serviced bytes. + "io_service_bytes_recursive:8:0:Read": 100.0, + "io_service_bytes_recursive:8:0:Write": 400.0, + "io_service_bytes_recursive:8:0:Sync": 200.0, + "io_service_bytes_recursive:8:0:Async": 300.0, + "io_service_bytes_recursive:8:0:Total": 500.0, + + // Serviced requests. + "io_serviced_recursive:8:0:Read": 10.0, + "io_serviced_recursive:8:0:Write": 40.0, + "io_serviced_recursive:8:0:Sync": 20.0, + "io_serviced_recursive:8:0:Async": 30.0, + "io_serviced_recursive:8:0:Total": 50.0, + + // Queued requests. + "io_queued_recursive:8:0:Read": 1.0, + "io_queued_recursive:8:0:Write": 4.0, + "io_queued_recursive:8:0:Sync": 2.0, + "io_queued_recursive:8:0:Async": 3.0, + "io_queued_recursive:8:0:Total": 5.0, + } + expectStats(t, expectedStats, stats) +} + +func TestBlkioStatsNoSectorsFile(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + }) + + blkio := &blkioGroup{} + _, err := blkio.Stats(helper.CgroupData) + if err == nil { + t.Fatal("Expected to fail, but did not") + } +} + +func TestBlkioStatsNoServiceBytesFile(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + }) + + blkio := &blkioGroup{} + _, err := blkio.Stats(helper.CgroupData) + if err == nil { + t.Fatal("Expected to fail, but did not") + } +} + +func TestBlkioStatsNoServicedFile(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + }) + + blkio := &blkioGroup{} + _, err := blkio.Stats(helper.CgroupData) + if err == nil { + t.Fatal("Expected to fail, but did not") + } +} + +func TestBlkioStatsNoQueuedFile(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + }) + + blkio := &blkioGroup{} + _, err := blkio.Stats(helper.CgroupData) + if err == nil { + t.Fatal("Expected to fail, but did not") + } +} + +func TestBlkioStatsUnexpectedNumberOfFields(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": "8:0 Read 100 100", + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + }) + + blkio := &blkioGroup{} + _, err := blkio.Stats(helper.CgroupData) + if err == nil { + t.Fatal("Expected to fail, but did not") + } +} + +func TestBlkioStatsUnexpectedFieldType(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": "8:0 Read Write", + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + }) + + blkio := &blkioGroup{} + _, err := blkio.Stats(helper.CgroupData) + if err == nil { + t.Fatal("Expected to fail, but did not") + } +} diff --git a/cgroups/fs/cpu.go b/cgroups/fs/cpu.go new file mode 100644 index 00000000..6a7f66c7 --- /dev/null +++ b/cgroups/fs/cpu.go @@ -0,0 +1,64 @@ +package fs + +import ( + "bufio" + "os" + "path/filepath" + "strconv" +) + +type cpuGroup struct { +} + +func (s *cpuGroup) Set(d *data) error { + // We always want to join the cpu group, to allow fair cpu scheduling + // on a container basis + dir, err := d.join("cpu") + if err != nil { + return err + } + if d.c.CpuShares != 0 { + if err := writeFile(dir, "cpu.shares", strconv.FormatInt(d.c.CpuShares, 10)); err != nil { + return err + } + } + if d.c.CpuPeriod != 0 { + if err := writeFile(dir, "cpu.cfs_period_us", strconv.FormatInt(d.c.CpuPeriod, 10)); err != nil { + return err + } + } + if d.c.CpuQuota != 0 { + if err := writeFile(dir, "cpu.cfs_quota_us", strconv.FormatInt(d.c.CpuQuota, 10)); err != nil { + return err + } + } + return nil +} + +func (s *cpuGroup) Remove(d *data) error { + return removePath(d.path("cpu")) +} + +func (s *cpuGroup) Stats(d *data) (map[string]float64, error) { + paramData := make(map[string]float64) + path, err := d.path("cpu") + if err != nil { + return nil, err + } + + f, err := os.Open(filepath.Join(path, "cpu.stat")) + if err != nil { + return nil, err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + t, v, err := getCgroupParamKeyValue(sc.Text()) + if err != nil { + return nil, err + } + paramData[t] = v + } + return paramData, nil +} diff --git a/cgroups/fs/cpu_test.go b/cgroups/fs/cpu_test.go new file mode 100644 index 00000000..698ae921 --- /dev/null +++ b/cgroups/fs/cpu_test.go @@ -0,0 +1,57 @@ +package fs + +import ( + "testing" +) + +func TestCpuStats(t *testing.T) { + helper := NewCgroupTestUtil("cpu", t) + defer helper.cleanup() + cpuStatContent := `nr_periods 2000 + nr_throttled 200 + throttled_time 42424242424` + helper.writeFileContents(map[string]string{ + "cpu.stat": cpuStatContent, + }) + + cpu := &cpuGroup{} + stats, err := cpu.Stats(helper.CgroupData) + if err != nil { + t.Fatal(err) + } + + expected_stats := map[string]float64{ + "nr_periods": 2000.0, + "nr_throttled": 200.0, + "throttled_time": 42424242424.0, + } + expectStats(t, expected_stats, stats) +} + +func TestNoCpuStatFile(t *testing.T) { + helper := NewCgroupTestUtil("cpu", t) + defer helper.cleanup() + + cpu := &cpuGroup{} + _, err := cpu.Stats(helper.CgroupData) + if err == nil { + t.Fatal("Expected to fail, but did not.") + } +} + +func TestInvalidCpuStat(t *testing.T) { + helper := NewCgroupTestUtil("cpu", t) + defer helper.cleanup() + cpuStatContent := `nr_periods 2000 + nr_throttled 200 + throttled_time fortytwo` + helper.writeFileContents(map[string]string{ + "cpu.stat": cpuStatContent, + }) + + cpu := &cpuGroup{} + _, err := cpu.Stats(helper.CgroupData) + if err == nil { + t.Fatal("Expected failed stat parsing.") + } +} diff --git a/cgroups/fs/cpuacct.go b/cgroups/fs/cpuacct.go new file mode 100644 index 00000000..3382440d --- /dev/null +++ b/cgroups/fs/cpuacct.go @@ -0,0 +1,143 @@ +package fs + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "runtime" + "strconv" + "strings" + "time" + + "github.com/dotcloud/docker/pkg/libcontainer/cgroups" + "github.com/dotcloud/docker/pkg/system" +) + +var ( + cpuCount = float64(runtime.NumCPU()) + clockTicks = float64(system.GetClockTicks()) +) + +type cpuacctGroup struct { +} + +func (s *cpuacctGroup) Set(d *data) error { + // we just want to join this group even though we don't set anything + if _, err := d.join("cpuacct"); err != nil && err != cgroups.ErrNotFound { + return err + } + return nil +} + +func (s *cpuacctGroup) Remove(d *data) error { + return removePath(d.path("cpuacct")) +} + +func (s *cpuacctGroup) Stats(d *data) (map[string]float64, error) { + var ( + startCpu, lastCpu, startSystem, lastSystem, startUsage, lastUsage float64 + percentage float64 + paramData = make(map[string]float64) + ) + path, err := d.path("cpuacct") + if startCpu, err = s.getCpuUsage(d, path); err != nil { + return nil, err + } + if startSystem, err = s.getSystemCpuUsage(d); err != nil { + return nil, err + } + startUsageTime := time.Now() + if startUsage, err = getCgroupParamFloat64(path, "cpuacct.usage"); err != nil { + return nil, err + } + // sample for 100ms + time.Sleep(100 * time.Millisecond) + if lastCpu, err = s.getCpuUsage(d, path); err != nil { + return nil, err + } + if lastSystem, err = s.getSystemCpuUsage(d); err != nil { + return nil, err + } + usageSampleDuration := time.Since(startUsageTime) + if lastUsage, err = getCgroupParamFloat64(path, "cpuacct.usage"); err != nil { + return nil, err + } + + var ( + deltaProc = lastCpu - startCpu + deltaSystem = lastSystem - startSystem + deltaUsage = lastUsage - startUsage + ) + if deltaSystem > 0.0 { + percentage = ((deltaProc / deltaSystem) * clockTicks) * cpuCount + } + // NOTE: a percentage over 100% is valid for POSIX because that means the + // processes is using multiple cores + paramData["percentage"] = percentage + + // Delta usage is in nanoseconds of CPU time so get the usage (in cores) over the sample time. + paramData["usage"] = deltaUsage / float64(usageSampleDuration.Nanoseconds()) + return paramData, nil +} + +func (s *cpuacctGroup) getProcStarttime(d *data) (float64, error) { + rawStart, err := system.GetProcessStartTime(d.pid) + if err != nil { + return 0, err + } + return strconv.ParseFloat(rawStart, 64) +} + +func (s *cpuacctGroup) getSystemCpuUsage(d *data) (float64, error) { + + f, err := os.Open("/proc/stat") + if err != nil { + return 0, err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + parts := strings.Fields(sc.Text()) + switch parts[0] { + case "cpu": + if len(parts) < 8 { + return 0, fmt.Errorf("invalid number of cpu fields") + } + + var total float64 + for _, i := range parts[1:8] { + v, err := strconv.ParseFloat(i, 64) + if err != nil { + return 0.0, fmt.Errorf("Unable to convert value %s to float: %s", i, err) + } + total += v + } + return total, nil + default: + continue + } + } + return 0, fmt.Errorf("invalid stat format") +} + +func (s *cpuacctGroup) getCpuUsage(d *data, path string) (float64, error) { + cpuTotal := 0.0 + f, err := os.Open(filepath.Join(path, "cpuacct.stat")) + if err != nil { + return 0.0, err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + _, v, err := getCgroupParamKeyValue(sc.Text()) + if err != nil { + return 0.0, err + } + // set the raw data in map + cpuTotal += v + } + return cpuTotal, nil +} diff --git a/cgroups/fs/cpuset.go b/cgroups/fs/cpuset.go new file mode 100644 index 00000000..f9f65ba4 --- /dev/null +++ b/cgroups/fs/cpuset.go @@ -0,0 +1,108 @@ +package fs + +import ( + "bytes" + "io/ioutil" + "os" + "path/filepath" + "strconv" +) + +type cpusetGroup struct { +} + +func (s *cpusetGroup) Set(d *data) error { + // we don't want to join this cgroup unless it is specified + if d.c.CpusetCpus != "" { + dir, err := d.path("cpuset") + if err != nil { + return err + } + if err := s.ensureParent(dir); err != nil { + return err + } + + // because we are not using d.join we need to place the pid into the procs file + // unlike the other subsystems + if err := writeFile(dir, "cgroup.procs", strconv.Itoa(d.pid)); err != nil { + return err + } + if err := writeFile(dir, "cpuset.cpus", d.c.CpusetCpus); err != nil { + return err + } + } + return nil +} + +func (s *cpusetGroup) Remove(d *data) error { + return removePath(d.path("cpuset")) +} + +func (s *cpusetGroup) Stats(d *data) (map[string]float64, error) { + return nil, ErrNotSupportStat +} + +func (s *cpusetGroup) getSubsystemSettings(parent string) (cpus []byte, mems []byte, err error) { + if cpus, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.cpus")); err != nil { + return + } + if mems, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.mems")); err != nil { + return + } + return cpus, mems, nil +} + +// ensureParent ensures that the parent directory of current is created +// with the proper cpus and mems files copied from it's parent if the values +// are a file with a new line char +func (s *cpusetGroup) ensureParent(current string) error { + parent := filepath.Dir(current) + + if _, err := os.Stat(parent); err != nil { + if !os.IsNotExist(err) { + return err + } + + if err := s.ensureParent(parent); err != nil { + return err + } + } + + if err := os.MkdirAll(current, 0755); err != nil && !os.IsExist(err) { + return err + } + return s.copyIfNeeded(current, parent) +} + +// copyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent +// directory to the current directory if the file's contents are 0 +func (s *cpusetGroup) copyIfNeeded(current, parent string) error { + var ( + err error + currentCpus, currentMems []byte + parentCpus, parentMems []byte + ) + + if currentCpus, currentMems, err = s.getSubsystemSettings(current); err != nil { + return err + } + if parentCpus, parentMems, err = s.getSubsystemSettings(parent); err != nil { + return err + } + + if s.isEmpty(currentCpus) { + if err := writeFile(current, "cpuset.cpus", string(parentCpus)); err != nil { + return err + } + } + if s.isEmpty(currentMems) { + if err := writeFile(current, "cpuset.mems", string(parentMems)); err != nil { + return err + } + } + return nil +} + +func (s *cpusetGroup) isEmpty(b []byte) bool { + return len(bytes.Trim(b, "\n")) == 0 +} diff --git a/cgroups/fs/devices.go b/cgroups/fs/devices.go new file mode 100644 index 00000000..a2f91eda --- /dev/null +++ b/cgroups/fs/devices.go @@ -0,0 +1,69 @@ +package fs + +import ( + "os" +) + +type devicesGroup struct { +} + +func (s *devicesGroup) Set(d *data) error { + dir, err := d.join("devices") + if err != nil { + return err + } + defer func() { + if err != nil { + os.RemoveAll(dir) + } + }() + + if !d.c.DeviceAccess { + if err := writeFile(dir, "devices.deny", "a"); err != nil { + return err + } + + allow := []string{ + // allow mknod for any device + "c *:* m", + "b *:* m", + + // /dev/null, zero, full + "c 1:3 rwm", + "c 1:5 rwm", + "c 1:7 rwm", + + // consoles + "c 5:1 rwm", + "c 5:0 rwm", + "c 4:0 rwm", + "c 4:1 rwm", + + // /dev/urandom,/dev/random + "c 1:9 rwm", + "c 1:8 rwm", + + // /dev/pts/ - pts namespaces are "coming soon" + "c 136:* rwm", + "c 5:2 rwm", + + // tuntap + "c 10:200 rwm", + } + + for _, val := range allow { + if err := writeFile(dir, "devices.allow", val); err != nil { + return err + } + } + } + return nil +} + +func (s *devicesGroup) Remove(d *data) error { + return removePath(d.path("devices")) +} + +func (s *devicesGroup) Stats(d *data) (map[string]float64, error) { + return nil, ErrNotSupportStat +} diff --git a/cgroups/fs/freezer.go b/cgroups/fs/freezer.go new file mode 100644 index 00000000..f3b1985a --- /dev/null +++ b/cgroups/fs/freezer.go @@ -0,0 +1,72 @@ +package fs + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/dotcloud/docker/pkg/libcontainer/cgroups" +) + +type freezerGroup struct { +} + +func (s *freezerGroup) Set(d *data) error { + dir, err := d.join("freezer") + if err != nil { + if err != cgroups.ErrNotFound { + return err + } + return nil + } + + if d.c.Freezer != "" { + if err := writeFile(dir, "freezer.state", d.c.Freezer); err != nil { + return err + } + } + return nil +} + +func (s *freezerGroup) Remove(d *data) error { + return removePath(d.path("freezer")) +} + +func (s *freezerGroup) Stats(d *data) (map[string]float64, error) { + var ( + paramData = make(map[string]float64) + params = []string{ + "parent_freezing", + "self_freezing", + // comment out right now because this is string "state", + } + ) + + path, err := d.path("freezer") + if err != nil { + return nil, err + } + + for _, param := range params { + f, err := os.Open(filepath.Join(path, fmt.Sprintf("freezer.%s", param))) + if err != nil { + return nil, err + } + defer f.Close() + + data, err := ioutil.ReadAll(f) + if err != nil { + return nil, err + } + + v, err := strconv.ParseFloat(strings.TrimSuffix(string(data), "\n"), 64) + if err != nil { + return nil, err + } + paramData[param] = v + } + return paramData, nil +} diff --git a/cgroups/fs/memory.go b/cgroups/fs/memory.go new file mode 100644 index 00000000..837640c0 --- /dev/null +++ b/cgroups/fs/memory.go @@ -0,0 +1,90 @@ +package fs + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "strconv" +) + +type memoryGroup struct { +} + +func (s *memoryGroup) Set(d *data) error { + dir, err := d.join("memory") + // only return an error for memory if it was not specified + if err != nil && (d.c.Memory != 0 || d.c.MemoryReservation != 0 || d.c.MemorySwap != 0) { + return err + } + defer func() { + if err != nil { + os.RemoveAll(dir) + } + }() + + // Only set values if some config was specified. + if d.c.Memory != 0 || d.c.MemoryReservation != 0 || d.c.MemorySwap != 0 { + if d.c.Memory != 0 { + if err := writeFile(dir, "memory.limit_in_bytes", strconv.FormatInt(d.c.Memory, 10)); err != nil { + return err + } + } + if d.c.MemoryReservation != 0 { + if err := writeFile(dir, "memory.soft_limit_in_bytes", strconv.FormatInt(d.c.MemoryReservation, 10)); err != nil { + return err + } + } + // By default, MemorySwap is set to twice the size of RAM. + // If you want to omit MemorySwap, set it to `-1'. + if d.c.MemorySwap != -1 { + if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(d.c.Memory*2, 10)); err != nil { + return err + } + } + } + return nil +} + +func (s *memoryGroup) Remove(d *data) error { + return removePath(d.path("memory")) +} + +func (s *memoryGroup) Stats(d *data) (map[string]float64, error) { + paramData := make(map[string]float64) + path, err := d.path("memory") + if err != nil { + return nil, err + } + + // Set stats from memory.stat. + statsFile, err := os.Open(filepath.Join(path, "memory.stat")) + if err != nil { + return nil, err + } + defer statsFile.Close() + + sc := bufio.NewScanner(statsFile) + for sc.Scan() { + t, v, err := getCgroupParamKeyValue(sc.Text()) + if err != nil { + return nil, err + } + paramData[t] = v + } + + // Set memory usage and max historical usage. + params := []string{ + "usage_in_bytes", + "max_usage_in_bytes", + } + for _, param := range params { + value, err := getCgroupParamFloat64(path, fmt.Sprintf("memory.%s", param)) + if err != nil { + return nil, err + } + paramData[param] = value + } + + return paramData, nil +} diff --git a/cgroups/fs/memory_test.go b/cgroups/fs/memory_test.go new file mode 100644 index 00000000..6c1fb735 --- /dev/null +++ b/cgroups/fs/memory_test.go @@ -0,0 +1,123 @@ +package fs + +import ( + "testing" +) + +const ( + memoryStatContents = `cache 512 +rss 1024` + memoryUsageContents = "2048\n" + memoryMaxUsageContents = "4096\n" +) + +func TestMemoryStats(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "memory.stat": memoryStatContents, + "memory.usage_in_bytes": memoryUsageContents, + "memory.max_usage_in_bytes": memoryMaxUsageContents, + }) + + memory := &memoryGroup{} + stats, err := memory.Stats(helper.CgroupData) + if err != nil { + t.Fatal(err) + } + expectedStats := map[string]float64{"cache": 512.0, "rss": 1024.0, "usage_in_bytes": 2048.0, "max_usage_in_bytes": 4096.0} + expectStats(t, expectedStats, stats) +} + +func TestMemoryStatsNoStatFile(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "memory.usage_in_bytes": memoryUsageContents, + "memory.max_usage_in_bytes": memoryMaxUsageContents, + }) + + memory := &memoryGroup{} + _, err := memory.Stats(helper.CgroupData) + if err == nil { + t.Fatal("Expected failure") + } +} + +func TestMemoryStatsNoUsageFile(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "memory.stat": memoryStatContents, + "memory.max_usage_in_bytes": memoryMaxUsageContents, + }) + + memory := &memoryGroup{} + _, err := memory.Stats(helper.CgroupData) + if err == nil { + t.Fatal("Expected failure") + } +} + +func TestMemoryStatsNoMaxUsageFile(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "memory.stat": memoryStatContents, + "memory.usage_in_bytes": memoryUsageContents, + }) + + memory := &memoryGroup{} + _, err := memory.Stats(helper.CgroupData) + if err == nil { + t.Fatal("Expected failure") + } +} + +func TestMemoryStatsBadStatFile(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "memory.stat": "rss rss", + "memory.usage_in_bytes": memoryUsageContents, + "memory.max_usage_in_bytes": memoryMaxUsageContents, + }) + + memory := &memoryGroup{} + _, err := memory.Stats(helper.CgroupData) + if err == nil { + t.Fatal("Expected failure") + } +} + +func TestMemoryStatsBadUsageFile(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "memory.stat": memoryStatContents, + "memory.usage_in_bytes": "bad", + "memory.max_usage_in_bytes": memoryMaxUsageContents, + }) + + memory := &memoryGroup{} + _, err := memory.Stats(helper.CgroupData) + if err == nil { + t.Fatal("Expected failure") + } +} + +func TestMemoryStatsBadMaxUsageFile(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "memory.stat": memoryStatContents, + "memory.usage_in_bytes": memoryUsageContents, + "memory.max_usage_in_bytes": "bad", + }) + + memory := &memoryGroup{} + _, err := memory.Stats(helper.CgroupData) + if err == nil { + t.Fatal("Expected failure") + } +} diff --git a/cgroups/fs/perf_event.go b/cgroups/fs/perf_event.go new file mode 100644 index 00000000..063cec4b --- /dev/null +++ b/cgroups/fs/perf_event.go @@ -0,0 +1,24 @@ +package fs + +import ( + "github.com/dotcloud/docker/pkg/libcontainer/cgroups" +) + +type perfEventGroup struct { +} + +func (s *perfEventGroup) Set(d *data) error { + // we just want to join this group even though we don't set anything + if _, err := d.join("perf_event"); err != nil && err != cgroups.ErrNotFound { + return err + } + return nil +} + +func (s *perfEventGroup) Remove(d *data) error { + return removePath(d.path("perf_event")) +} + +func (s *perfEventGroup) Stats(d *data) (map[string]float64, error) { + return nil, ErrNotSupportStat +} diff --git a/cgroups/fs/test_util.go b/cgroups/fs/test_util.go new file mode 100644 index 00000000..11b90b21 --- /dev/null +++ b/cgroups/fs/test_util.go @@ -0,0 +1,75 @@ +/* +Utility for testing cgroup operations. + +Creates a mock of the cgroup filesystem for the duration of the test. +*/ +package fs + +import ( + "fmt" + "io/ioutil" + "log" + "os" + "testing" +) + +type cgroupTestUtil struct { + // data to use in tests. + CgroupData *data + + // Path to the mock cgroup directory. + CgroupPath string + + // Temporary directory to store mock cgroup filesystem. + tempDir string + t *testing.T +} + +// Creates a new test util for the specified subsystem +func NewCgroupTestUtil(subsystem string, t *testing.T) *cgroupTestUtil { + d := &data{} + tempDir, err := ioutil.TempDir("", fmt.Sprintf("%s_cgroup_test", subsystem)) + if err != nil { + t.Fatal(err) + } + d.root = tempDir + testCgroupPath, err := d.path(subsystem) + if err != nil { + t.Fatal(err) + } + + // Ensure the full mock cgroup path exists. + err = os.MkdirAll(testCgroupPath, 0755) + if err != nil { + t.Fatal(err) + } + return &cgroupTestUtil{CgroupData: d, CgroupPath: testCgroupPath, tempDir: tempDir, t: t} +} + +func (c *cgroupTestUtil) cleanup() { + os.RemoveAll(c.tempDir) +} + +// Write the specified contents on the mock of the specified cgroup files. +func (c *cgroupTestUtil) writeFileContents(fileContents map[string]string) { + for file, contents := range fileContents { + err := writeFile(c.CgroupPath, file, contents) + if err != nil { + c.t.Fatal(err) + } + } +} + +// Expect the specified stats. +func expectStats(t *testing.T, expected, actual map[string]float64) { + for stat, expectedValue := range expected { + actualValue, ok := actual[stat] + if !ok { + log.Printf("Expected stat %s to exist: %s", stat, actual) + t.Fail() + } else if actualValue != expectedValue { + log.Printf("Expected stats %s to have value %f but had %f instead", stat, expectedValue, actualValue) + t.Fail() + } + } +} diff --git a/cgroups/fs/utils.go b/cgroups/fs/utils.go new file mode 100644 index 00000000..8be65c97 --- /dev/null +++ b/cgroups/fs/utils.go @@ -0,0 +1,40 @@ +package fs + +import ( + "errors" + "fmt" + "io/ioutil" + "path/filepath" + "strconv" + "strings" +) + +var ( + ErrNotSupportStat = errors.New("stats are not supported for subsystem") + ErrNotValidFormat = errors.New("line is not a valid key value format") +) + +// Parses a cgroup param and returns as name, value +// i.e. "io_service_bytes 1234" will return as io_service_bytes, 1234 +func getCgroupParamKeyValue(t string) (string, float64, error) { + parts := strings.Fields(t) + switch len(parts) { + case 2: + value, err := strconv.ParseFloat(parts[1], 64) + if err != nil { + return "", 0.0, fmt.Errorf("Unable to convert param value to float: %s", err) + } + return parts[0], value, nil + default: + return "", 0.0, ErrNotValidFormat + } +} + +// Gets a single float64 value from the specified cgroup file. +func getCgroupParamFloat64(cgroupPath, cgroupFile string) (float64, error) { + contents, err := ioutil.ReadFile(filepath.Join(cgroupPath, cgroupFile)) + if err != nil { + return -1.0, err + } + return strconv.ParseFloat(strings.TrimSpace(string(contents)), 64) +} diff --git a/cgroups/fs/utils_test.go b/cgroups/fs/utils_test.go new file mode 100644 index 00000000..c8f1b017 --- /dev/null +++ b/cgroups/fs/utils_test.go @@ -0,0 +1,68 @@ +package fs + +import ( + "io/ioutil" + "os" + "path/filepath" + "testing" +) + +const ( + cgroupFile = "cgroup.file" + floatValue = 2048.0 + floatString = "2048" +) + +func TestGetCgroupParamsFloat64(t *testing.T) { + // Setup tempdir. + tempDir, err := ioutil.TempDir("", "cgroup_utils_test") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tempDir) + tempFile := filepath.Join(tempDir, cgroupFile) + + // Success. + err = ioutil.WriteFile(tempFile, []byte(floatString), 0755) + if err != nil { + t.Fatal(err) + } + value, err := getCgroupParamFloat64(tempDir, cgroupFile) + if err != nil { + t.Fatal(err) + } else if value != floatValue { + t.Fatalf("Expected %f to equal %f", value, floatValue) + } + + // Success with new line. + err = ioutil.WriteFile(tempFile, []byte(floatString+"\n"), 0755) + if err != nil { + t.Fatal(err) + } + value, err = getCgroupParamFloat64(tempDir, cgroupFile) + if err != nil { + t.Fatal(err) + } else if value != floatValue { + t.Fatalf("Expected %f to equal %f", value, floatValue) + } + + // Not a float. + err = ioutil.WriteFile(tempFile, []byte("not-a-float"), 0755) + if err != nil { + t.Fatal(err) + } + _, err = getCgroupParamFloat64(tempDir, cgroupFile) + if err == nil { + t.Fatal("Expecting error, got none") + } + + // Unknown file. + err = os.Remove(tempFile) + if err != nil { + t.Fatal(err) + } + _, err = getCgroupParamFloat64(tempDir, cgroupFile) + if err == nil { + t.Fatal("Expecting error, got none") + } +} diff --git a/cgroups/systemd/apply_nosystemd.go b/cgroups/systemd/apply_nosystemd.go new file mode 100644 index 00000000..302fab77 --- /dev/null +++ b/cgroups/systemd/apply_nosystemd.go @@ -0,0 +1,17 @@ +// +build !linux + +package systemd + +import ( + "fmt" + + "github.com/dotcloud/docker/pkg/libcontainer/cgroups" +) + +func UseSystemd() bool { + return false +} + +func Apply(c *Cgroup, pid int) (cgroups.ActiveCgroup, error) { + return nil, fmt.Errorf("Systemd not supported") +} diff --git a/cgroups/systemd/apply_systemd.go b/cgroups/systemd/apply_systemd.go new file mode 100644 index 00000000..52940f6f --- /dev/null +++ b/cgroups/systemd/apply_systemd.go @@ -0,0 +1,296 @@ +// +build linux + +package systemd + +import ( + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + + systemd1 "github.com/coreos/go-systemd/dbus" + "github.com/dotcloud/docker/pkg/libcontainer/cgroups" + "github.com/dotcloud/docker/pkg/systemd" + "github.com/godbus/dbus" +) + +type systemdCgroup struct { + cleanupDirs []string +} + +type DeviceAllow struct { + Node string + Permissions string +} + +var ( + connLock sync.Mutex + theConn *systemd1.Conn + hasStartTransientUnit bool +) + +func UseSystemd() bool { + if !systemd.SdBooted() { + return false + } + + connLock.Lock() + defer connLock.Unlock() + + if theConn == nil { + var err error + theConn, err = systemd1.New() + if err != nil { + return false + } + + // Assume we have StartTransientUnit + hasStartTransientUnit = true + + // But if we get UnknownMethod error we don't + if _, err := theConn.StartTransientUnit("test.scope", "invalid"); err != nil { + if dbusError, ok := err.(dbus.Error); ok { + if dbusError.Name == "org.freedesktop.DBus.Error.UnknownMethod" { + hasStartTransientUnit = false + } + } + } + } + return hasStartTransientUnit +} + +func getIfaceForUnit(unitName string) string { + if strings.HasSuffix(unitName, ".scope") { + return "Scope" + } + if strings.HasSuffix(unitName, ".service") { + return "Service" + } + return "Unit" +} + +type cgroupArg struct { + File string + Value string +} + +func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) { + var ( + unitName = c.Parent + "-" + c.Name + ".scope" + slice = "system.slice" + properties []systemd1.Property + cpuArgs []cgroupArg + cpusetArgs []cgroupArg + memoryArgs []cgroupArg + res systemdCgroup + ) + + // First set up things not supported by systemd + + // -1 disables memorySwap + if c.MemorySwap >= 0 && (c.Memory != 0 || c.MemorySwap > 0) { + memorySwap := c.MemorySwap + + if memorySwap == 0 { + // By default, MemorySwap is set to twice the size of RAM. + memorySwap = c.Memory * 2 + } + + memoryArgs = append(memoryArgs, cgroupArg{"memory.memsw.limit_in_bytes", strconv.FormatInt(memorySwap, 10)}) + } + + if c.CpusetCpus != "" { + cpusetArgs = append(cpusetArgs, cgroupArg{"cpuset.cpus", c.CpusetCpus}) + } + + if c.Slice != "" { + slice = c.Slice + } + + properties = append(properties, + systemd1.Property{"Slice", dbus.MakeVariant(slice)}, + systemd1.Property{"Description", dbus.MakeVariant("docker container " + c.Name)}, + systemd1.Property{"PIDs", dbus.MakeVariant([]uint32{uint32(pid)})}, + ) + + if !c.DeviceAccess { + properties = append(properties, + systemd1.Property{"DevicePolicy", dbus.MakeVariant("strict")}, + systemd1.Property{"DeviceAllow", dbus.MakeVariant([]DeviceAllow{ + {"/dev/null", "rwm"}, + {"/dev/zero", "rwm"}, + {"/dev/full", "rwm"}, + {"/dev/random", "rwm"}, + {"/dev/urandom", "rwm"}, + {"/dev/tty", "rwm"}, + {"/dev/console", "rwm"}, + {"/dev/tty0", "rwm"}, + {"/dev/tty1", "rwm"}, + {"/dev/pts/ptmx", "rwm"}, + // There is no way to add /dev/pts/* here atm, so we hack this manually below + // /dev/pts/* (how to add this?) + // Same with tuntap, which doesn't exist as a node most of the time + })}) + } + + // Always enable accounting, this gets us the same behaviour as the fs implementation, + // plus the kernel has some problems with joining the memory cgroup at a later time. + properties = append(properties, + systemd1.Property{"MemoryAccounting", dbus.MakeVariant(true)}, + systemd1.Property{"CPUAccounting", dbus.MakeVariant(true)}, + systemd1.Property{"BlockIOAccounting", dbus.MakeVariant(true)}) + + if c.Memory != 0 { + properties = append(properties, + systemd1.Property{"MemoryLimit", dbus.MakeVariant(uint64(c.Memory))}) + } + // TODO: MemoryReservation and MemorySwap not available in systemd + + if c.CpuShares != 0 { + properties = append(properties, + systemd1.Property{"CPUShares", dbus.MakeVariant(uint64(c.CpuShares))}) + } + + if _, err := theConn.StartTransientUnit(unitName, "replace", properties...); err != nil { + return nil, err + } + + // To work around the lack of /dev/pts/* support above we need to manually add these + // so, ask systemd for the cgroup used + props, err := theConn.GetUnitTypeProperties(unitName, getIfaceForUnit(unitName)) + if err != nil { + return nil, err + } + + cgroup := props["ControlGroup"].(string) + + if !c.DeviceAccess { + mountpoint, err := cgroups.FindCgroupMountpoint("devices") + if err != nil { + return nil, err + } + + path := filepath.Join(mountpoint, cgroup) + + // /dev/pts/* + if err := ioutil.WriteFile(filepath.Join(path, "devices.allow"), []byte("c 136:* rwm"), 0700); err != nil { + return nil, err + } + // tuntap + if err := ioutil.WriteFile(filepath.Join(path, "devices.allow"), []byte("c 10:200 rwm"), 0700); err != nil { + return nil, err + } + } + + if len(cpuArgs) != 0 { + mountpoint, err := cgroups.FindCgroupMountpoint("cpu") + if err != nil { + return nil, err + } + + path := filepath.Join(mountpoint, cgroup) + + for _, arg := range cpuArgs { + if err := ioutil.WriteFile(filepath.Join(path, arg.File), []byte(arg.Value), 0700); err != nil { + return nil, err + } + } + } + + if len(memoryArgs) != 0 { + mountpoint, err := cgroups.FindCgroupMountpoint("memory") + if err != nil { + return nil, err + } + + path := filepath.Join(mountpoint, cgroup) + + for _, arg := range memoryArgs { + if err := ioutil.WriteFile(filepath.Join(path, arg.File), []byte(arg.Value), 0700); err != nil { + return nil, err + } + } + } + + if len(cpusetArgs) != 0 { + // systemd does not atm set up the cpuset controller, so we must manually + // join it. Additionally that is a very finicky controller where each + // level must have a full setup as the default for a new directory is "no cpus", + // so we avoid using any hierarchies here, creating a toplevel directory. + mountpoint, err := cgroups.FindCgroupMountpoint("cpuset") + if err != nil { + return nil, err + } + initPath, err := cgroups.GetInitCgroupDir("cpuset") + if err != nil { + return nil, err + } + + rootPath := filepath.Join(mountpoint, initPath) + + path := filepath.Join(mountpoint, initPath, c.Parent+"-"+c.Name) + + res.cleanupDirs = append(res.cleanupDirs, path) + + if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { + return nil, err + } + + foundCpus := false + foundMems := false + + for _, arg := range cpusetArgs { + if arg.File == "cpuset.cpus" { + foundCpus = true + } + if arg.File == "cpuset.mems" { + foundMems = true + } + if err := ioutil.WriteFile(filepath.Join(path, arg.File), []byte(arg.Value), 0700); err != nil { + return nil, err + } + } + + // These are required, if not specified inherit from parent + if !foundCpus { + s, err := ioutil.ReadFile(filepath.Join(rootPath, "cpuset.cpus")) + if err != nil { + return nil, err + } + + if err := ioutil.WriteFile(filepath.Join(path, "cpuset.cpus"), s, 0700); err != nil { + return nil, err + } + } + + // These are required, if not specified inherit from parent + if !foundMems { + s, err := ioutil.ReadFile(filepath.Join(rootPath, "cpuset.mems")) + if err != nil { + return nil, err + } + + if err := ioutil.WriteFile(filepath.Join(path, "cpuset.mems"), s, 0700); err != nil { + return nil, err + } + } + + if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700); err != nil { + return nil, err + } + } + + return &res, nil +} + +func (c *systemdCgroup) Cleanup() error { + // systemd cleans up, we don't need to do much + + for _, path := range c.cleanupDirs { + os.RemoveAll(path) + } + + return nil +} diff --git a/cgroups/utils.go b/cgroups/utils.go new file mode 100644 index 00000000..02a7f357 --- /dev/null +++ b/cgroups/utils.go @@ -0,0 +1,67 @@ +package cgroups + +import ( + "bufio" + "io" + "os" + "strings" + + "github.com/dotcloud/docker/pkg/mount" +) + +// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt +func FindCgroupMountpoint(subsystem string) (string, error) { + mounts, err := mount.GetMounts() + if err != nil { + return "", err + } + + for _, mount := range mounts { + if mount.Fstype == "cgroup" { + for _, opt := range strings.Split(mount.VfsOpts, ",") { + if opt == subsystem { + return mount.Mountpoint, nil + } + } + } + } + return "", ErrNotFound +} + +// Returns the relative path to the cgroup docker is running in. +func GetThisCgroupDir(subsystem string) (string, error) { + f, err := os.Open("/proc/self/cgroup") + if err != nil { + return "", err + } + defer f.Close() + + return parseCgroupFile(subsystem, f) +} + +func GetInitCgroupDir(subsystem string) (string, error) { + f, err := os.Open("/proc/1/cgroup") + if err != nil { + return "", err + } + defer f.Close() + + return parseCgroupFile(subsystem, f) +} + +func parseCgroupFile(subsystem string, r io.Reader) (string, error) { + s := bufio.NewScanner(r) + for s.Scan() { + if err := s.Err(); err != nil { + return "", err + } + text := s.Text() + parts := strings.Split(text, ":") + for _, subs := range strings.Split(parts[1], ",") { + if subs == subsystem { + return parts[2], nil + } + } + } + return "", ErrNotFound +} diff --git a/container.go b/container.go index 5acdff3d..7328f554 100644 --- a/container.go +++ b/container.go @@ -1,7 +1,7 @@ package libcontainer import ( - "github.com/dotcloud/docker/pkg/cgroups" + "github.com/dotcloud/docker/pkg/libcontainer/cgroups" ) // Context is a generic key value pair that allows diff --git a/nsinit/exec.go b/nsinit/exec.go index 5d0d772a..03e0f4b9 100644 --- a/nsinit/exec.go +++ b/nsinit/exec.go @@ -7,10 +7,10 @@ import ( "os/exec" "syscall" - "github.com/dotcloud/docker/pkg/cgroups" - "github.com/dotcloud/docker/pkg/cgroups/fs" - "github.com/dotcloud/docker/pkg/cgroups/systemd" "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/libcontainer/cgroups" + "github.com/dotcloud/docker/pkg/libcontainer/cgroups/fs" + "github.com/dotcloud/docker/pkg/libcontainer/cgroups/systemd" "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/system" ) diff --git a/nsinit/unsupported.go b/nsinit/unsupported.go index 929b3dba..51509f79 100644 --- a/nsinit/unsupported.go +++ b/nsinit/unsupported.go @@ -3,8 +3,8 @@ package nsinit import ( - "github.com/dotcloud/docker/pkg/cgroups" "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/libcontainer/cgroups" ) func Exec(container *libcontainer.Container, term Terminal, rootfs, dataPath string, args []string, createCommand CreateCommand, startCallback func()) (int, error) {