Merge pull request #446 from cyphar/18-add-pids-controller
cgroup: add PIDs cgroup controller support
This commit is contained in:
commit
4c767d7046
|
@ -23,6 +23,7 @@ var (
|
|||
&MemoryGroup{},
|
||||
&CpuGroup{},
|
||||
&CpuacctGroup{},
|
||||
&PidsGroup{},
|
||||
&BlkioGroup{},
|
||||
&HugetlbGroup{},
|
||||
&NetClsGroup{},
|
||||
|
@ -105,8 +106,6 @@ func (m *Manager) Apply(pid int) (err error) {
|
|||
return nil
|
||||
}
|
||||
|
||||
var c = m.Cgroups
|
||||
|
||||
d, err := getCgroupData(m.Cgroups, pid)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -135,13 +134,6 @@ func (m *Manager) Apply(pid int) (err error) {
|
|||
paths[sys.Name()] = p
|
||||
}
|
||||
m.Paths = paths
|
||||
|
||||
if paths["cpu"] != "" {
|
||||
if err := CheckCpushares(paths["cpu"], c.Resources.CpuShares); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -179,15 +171,34 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
|||
}
|
||||
|
||||
func (m *Manager) Set(container *configs.Config) error {
|
||||
for name, path := range m.Paths {
|
||||
sys, err := subsystems.Get(name)
|
||||
if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) {
|
||||
for _, sys := range subsystems {
|
||||
// We can't set this here, because after being applied, memcg doesn't
|
||||
// allow a non-empty cgroup from having its limits changed.
|
||||
if sys.Name() == "memory" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Generate fake cgroup data.
|
||||
d, err := getCgroupData(container.Cgroups, -1)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Get the path, but don't error out if the cgroup wasn't found.
|
||||
path, err := d.path(sys.Name())
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := sys.Set(path, container.Cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if m.Paths["cpu"] != "" {
|
||||
if err := CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -22,15 +22,10 @@ func (s *BlkioGroup) Name() string {
|
|||
}
|
||||
|
||||
func (s *BlkioGroup) Apply(d *cgroupData) error {
|
||||
dir, err := d.join("blkio")
|
||||
_, err := d.join("blkio")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := s.Set(dir, d.config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -22,15 +22,10 @@ func (s *CpuGroup) Name() string {
|
|||
func (s *CpuGroup) Apply(d *cgroupData) error {
|
||||
// We always want to join the cpu group, to allow fair cpu scheduling
|
||||
// on a container basis
|
||||
dir, err := d.join("cpu")
|
||||
_, err := d.join("cpu")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := s.Set(dir, d.config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -64,11 +64,6 @@ func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) erro
|
|||
if err := s.ensureParent(dir, root); err != nil {
|
||||
return err
|
||||
}
|
||||
// the default values inherit from parent cgroup are already set in
|
||||
// s.ensureParent, cover these if we have our own
|
||||
if err := s.Set(dir, cgroup); err != nil {
|
||||
return err
|
||||
}
|
||||
// because we are not using d.join we need to place the pid into the procs file
|
||||
// unlike the other subsystems
|
||||
if err := writeFile(dir, "cgroup.procs", strconv.Itoa(pid)); err != nil {
|
||||
|
|
|
@ -15,17 +15,12 @@ func (s *DevicesGroup) Name() string {
|
|||
}
|
||||
|
||||
func (s *DevicesGroup) Apply(d *cgroupData) error {
|
||||
dir, err := d.join("devices")
|
||||
_, err := d.join("devices")
|
||||
if err != nil {
|
||||
// We will return error even it's `not found` error, devices
|
||||
// cgroup is hard requirement for container's security.
|
||||
return err
|
||||
}
|
||||
|
||||
if err := s.Set(dir, d.config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -19,15 +19,10 @@ func (s *FreezerGroup) Name() string {
|
|||
}
|
||||
|
||||
func (s *FreezerGroup) Apply(d *cgroupData) error {
|
||||
dir, err := d.join("freezer")
|
||||
_, err := d.join("freezer")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := s.Set(dir, d.config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -19,15 +19,10 @@ func (s *HugetlbGroup) Name() string {
|
|||
}
|
||||
|
||||
func (s *HugetlbGroup) Apply(d *cgroupData) error {
|
||||
dir, err := d.join("hugetlb")
|
||||
_, err := d.join("hugetlb")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := s.Set(dir, d.config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -32,7 +32,6 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
|
|||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err := s.Set(path, d.config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -15,15 +15,10 @@ func (s *NetClsGroup) Name() string {
|
|||
}
|
||||
|
||||
func (s *NetClsGroup) Apply(d *cgroupData) error {
|
||||
dir, err := d.join("net_cls")
|
||||
_, err := d.join("net_cls")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := s.Set(dir, d.config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -15,15 +15,10 @@ func (s *NetPrioGroup) Name() string {
|
|||
}
|
||||
|
||||
func (s *NetPrioGroup) Apply(d *cgroupData) error {
|
||||
dir, err := d.join("net_prio")
|
||||
_, err := d.join("net_prio")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := s.Set(dir, d.config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type PidsGroup struct {
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Name() string {
|
||||
return "pids"
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Apply(d *cgroupData) error {
|
||||
_, err := d.join("pids")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.PidsLimit != 0 {
|
||||
// "max" is the fallback value.
|
||||
limit := "max"
|
||||
|
||||
if cgroup.Resources.PidsLimit > 0 {
|
||||
limit = strconv.FormatInt(cgroup.Resources.PidsLimit, 10)
|
||||
}
|
||||
|
||||
if err := writeFile(path, "pids.max", limit); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("pids"))
|
||||
}
|
||||
|
||||
func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
value, err := getCgroupParamUint(path, "pids.current")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse pids.current - %s", err)
|
||||
}
|
||||
|
||||
stats.PidsStats.Current = value
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
)
|
||||
|
||||
const (
|
||||
maxUnlimited = -1
|
||||
maxLimited = 1024
|
||||
)
|
||||
|
||||
func TestPidsSetMax(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("pids", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"pids.max": "max",
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.PidsLimit = maxLimited
|
||||
pids := &PidsGroup{}
|
||||
if err := pids.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := getCgroupParamUint(helper.CgroupPath, "pids.max")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse pids.max - %s", err)
|
||||
}
|
||||
|
||||
if value != maxLimited {
|
||||
t.Fatalf("Expected %d, got %d for setting pids.max - limited", maxLimited, value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPidsSetUnlimited(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("pids", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"pids.max": strconv.Itoa(maxLimited),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.PidsLimit = maxUnlimited
|
||||
pids := &PidsGroup{}
|
||||
if err := pids.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := getCgroupParamString(helper.CgroupPath, "pids.max")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse pids.max - %s", err)
|
||||
}
|
||||
|
||||
if value != "max" {
|
||||
t.Fatalf("Expected %s, got %s for setting pids.max - unlimited", "max", value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPidsStats(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("pids", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"pids.current": strconv.Itoa(1337),
|
||||
"pids.max": strconv.Itoa(maxLimited),
|
||||
})
|
||||
|
||||
pids := &PidsGroup{}
|
||||
stats := *cgroups.NewStats()
|
||||
if err := pids.GetStats(helper.CgroupPath, &stats); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if stats.PidsStats.Current != 1337 {
|
||||
t.Fatalf("Expected %d, got %d for pids.current", 1337, stats.PidsStats.Current)
|
||||
}
|
||||
}
|
|
@ -49,6 +49,11 @@ type MemoryStats struct {
|
|||
Stats map[string]uint64 `json:"stats,omitempty"`
|
||||
}
|
||||
|
||||
type PidsStats struct {
|
||||
// number of pids in the cgroup
|
||||
Current uint64 `json:"current,omitempty"`
|
||||
}
|
||||
|
||||
type BlkioStatEntry struct {
|
||||
Major uint64 `json:"major,omitempty"`
|
||||
Minor uint64 `json:"minor,omitempty"`
|
||||
|
@ -80,6 +85,7 @@ type HugetlbStats struct {
|
|||
type Stats struct {
|
||||
CpuStats CpuStats `json:"cpu_stats,omitempty"`
|
||||
MemoryStats MemoryStats `json:"memory_stats,omitempty"`
|
||||
PidsStats PidsStats `json:"pids_stats,omitempty"`
|
||||
BlkioStats BlkioStats `json:"blkio_stats,omitempty"`
|
||||
// the map is in the format "size of hugepage: stats of the hugepage"
|
||||
HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"`
|
||||
|
|
|
@ -55,6 +55,7 @@ var subsystems = subsystemSet{
|
|||
&fs.MemoryGroup{},
|
||||
&fs.CpuGroup{},
|
||||
&fs.CpuacctGroup{},
|
||||
&fs.PidsGroup{},
|
||||
&fs.BlkioGroup{},
|
||||
&fs.HugetlbGroup{},
|
||||
&fs.PerfEventGroup{},
|
||||
|
@ -233,7 +234,7 @@ func (m *Manager) Apply(pid int) error {
|
|||
return err
|
||||
}
|
||||
|
||||
// we need to manually join the freezer, net_cls, net_prio and cpuset cgroup in systemd
|
||||
// we need to manually join the freezer, net_cls, net_prio, pids and cpuset cgroup in systemd
|
||||
// because it does not currently support it via the dbus api.
|
||||
if err := joinFreezer(c, pid); err != nil {
|
||||
return err
|
||||
|
@ -246,6 +247,10 @@ func (m *Manager) Apply(pid int) error {
|
|||
return err
|
||||
}
|
||||
|
||||
if err := joinPids(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := joinCpuset(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -277,13 +282,6 @@ func (m *Manager) Apply(pid int) error {
|
|||
paths[s.Name()] = subsystemPath
|
||||
}
|
||||
m.Paths = paths
|
||||
|
||||
if paths["cpu"] != "" {
|
||||
if err := fs.CheckCpushares(paths["cpu"], c.Resources.CpuShares); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -330,68 +328,43 @@ func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
|
|||
}
|
||||
|
||||
func joinCpu(c *configs.Cgroup, pid int) error {
|
||||
path, err := getSubsystemPath(c, "cpu")
|
||||
_, err := join(c, "cpu", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
if c.Resources.CpuQuota != 0 {
|
||||
if err = writeFile(path, "cpu.cfs_quota_us", strconv.FormatInt(c.Resources.CpuQuota, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if c.Resources.CpuPeriod != 0 {
|
||||
if err = writeFile(path, "cpu.cfs_period_us", strconv.FormatInt(c.Resources.CpuPeriod, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if c.Resources.CpuRtPeriod != 0 {
|
||||
if err = writeFile(path, "cpu.rt_period_us", strconv.FormatInt(c.Resources.CpuRtPeriod, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if c.Resources.CpuRtRuntime != 0 {
|
||||
if err = writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(c.Resources.CpuRtRuntime, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinFreezer(c *configs.Cgroup, pid int) error {
|
||||
path, err := join(c, "freezer", pid)
|
||||
_, err := join(c, "freezer", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
freezer, err := subsystems.Get("freezer")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return freezer.Set(path, c)
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinNetPrio(c *configs.Cgroup, pid int) error {
|
||||
path, err := join(c, "net_prio", pid)
|
||||
_, err := join(c, "net_prio", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
netPrio, err := subsystems.Get("net_prio")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return netPrio.Set(path, c)
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinNetCls(c *configs.Cgroup, pid int) error {
|
||||
path, err := join(c, "net_cls", pid)
|
||||
_, err := join(c, "net_cls", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
netcls, err := subsystems.Get("net_cls")
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinPids(c *configs.Cgroup, pid int) error {
|
||||
_, err := join(c, "pids", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return netcls.Set(path, c)
|
||||
return nil
|
||||
}
|
||||
|
||||
func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
|
||||
|
@ -466,16 +439,29 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
|||
}
|
||||
|
||||
func (m *Manager) Set(container *configs.Config) error {
|
||||
for name, path := range m.Paths {
|
||||
sys, err := subsystems.Get(name)
|
||||
if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) {
|
||||
for _, sys := range subsystems {
|
||||
// We can't set this here, because after being applied, memcg doesn't
|
||||
// allow a non-empty cgroup from having its limits changed.
|
||||
if sys.Name() == "memory" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Get the subsystem path, but don't error out for not found cgroups.
|
||||
path, err := getSubsystemPath(container.Cgroups, sys.Name())
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := sys.Set(path, container.Cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if m.Paths["cpu"] != "" {
|
||||
if err := fs.CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -495,17 +481,13 @@ func getUnitName(c *configs.Cgroup) string {
|
|||
// because systemd will re-write the device settings if it needs to re-apply the cgroup context.
|
||||
// This happens at least for v208 when any sibling unit is started.
|
||||
func joinDevices(c *configs.Cgroup, pid int) error {
|
||||
path, err := join(c, "devices", pid)
|
||||
_, err := join(c, "devices", pid)
|
||||
// Even if it's `not found` error, we'll return err because devices cgroup
|
||||
// is hard requirement for container security.
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
devices, err := subsystems.Get("devices")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return devices.Set(path, c)
|
||||
return nil
|
||||
}
|
||||
|
||||
func setKernelMemory(c *configs.Cgroup) error {
|
||||
|
@ -518,9 +500,10 @@ func setKernelMemory(c *configs.Cgroup) error {
|
|||
return err
|
||||
}
|
||||
|
||||
// Shamelessly copied from fs.(*MemoryGroup).Set(). This doesn't get called
|
||||
// by manager.Set, so we need to do it here.
|
||||
if c.Resources.KernelMemory > 0 {
|
||||
err = writeFile(path, "memory.kmem.limit_in_bytes", strconv.FormatInt(c.Resources.KernelMemory, 10))
|
||||
if err != nil {
|
||||
if err := writeFile(path, "memory.kmem.limit_in_bytes", strconv.FormatInt(c.Resources.KernelMemory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -529,21 +512,27 @@ func setKernelMemory(c *configs.Cgroup) error {
|
|||
}
|
||||
|
||||
func joinMemory(c *configs.Cgroup, pid int) error {
|
||||
path, err := getSubsystemPath(c, "memory")
|
||||
path, err := join(c, "memory", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
// -1 disables memoryswap
|
||||
if c.Resources.MemorySwap > 0 {
|
||||
err = writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(c.Resources.MemorySwap, 10))
|
||||
if err != nil {
|
||||
// Shamelessly copied from fs.(*MemoryGroup).Set(). This doesn't include the
|
||||
// kernel memory limit (which is done in setKernelMemory). All of these are
|
||||
// not set by manager.Set, we have do do it here.
|
||||
|
||||
if c.Resources.Memory != 0 {
|
||||
if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(c.Resources.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if c.Resources.MemoryReservation > 0 {
|
||||
err = writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(c.Resources.MemoryReservation, 10))
|
||||
if err != nil {
|
||||
if c.Resources.MemoryReservation != 0 {
|
||||
if err := writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(c.Resources.MemoryReservation, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if c.Resources.MemorySwap > 0 {
|
||||
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(c.Resources.MemorySwap, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -552,10 +541,8 @@ func joinMemory(c *configs.Cgroup, pid int) error {
|
|||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if c.Resources.MemorySwappiness >= 0 && c.Resources.MemorySwappiness <= 100 {
|
||||
err = writeFile(path, "memory.swappiness", strconv.FormatInt(c.Resources.MemorySwappiness, 10))
|
||||
if err != nil {
|
||||
if err := writeFile(path, "memory.swappiness", strconv.FormatInt(c.Resources.MemorySwappiness, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
} else if c.Resources.MemorySwappiness == -1 {
|
||||
|
@ -585,68 +572,25 @@ func joinCpuset(c *configs.Cgroup, pid int) error {
|
|||
// expects device path instead of major minor numbers, which is also confusing
|
||||
// for users. So we use fs work around for now.
|
||||
func joinBlkio(c *configs.Cgroup, pid int) error {
|
||||
path, err := getSubsystemPath(c, "blkio")
|
||||
_, err := join(c, "blkio", pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// systemd doesn't directly support this in the dbus properties
|
||||
if c.Resources.BlkioLeafWeight != 0 {
|
||||
if err := writeFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(c.Resources.BlkioLeafWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, wd := range c.Resources.BlkioWeightDevice {
|
||||
if err := writeFile(path, "blkio.weight_device", wd.WeightString()); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range c.Resources.BlkioThrottleReadBpsDevice {
|
||||
if err := writeFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range c.Resources.BlkioThrottleWriteBpsDevice {
|
||||
if err := writeFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range c.Resources.BlkioThrottleReadIOPSDevice {
|
||||
if err := writeFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range c.Resources.BlkioThrottleWriteIOPSDevice {
|
||||
if err := writeFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinHugetlb(c *configs.Cgroup, pid int) error {
|
||||
path, err := join(c, "hugetlb", pid)
|
||||
_, err := join(c, "hugetlb", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
hugetlb, err := subsystems.Get("hugetlb")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return hugetlb.Set(path, c)
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinPerfEvent(c *configs.Cgroup, pid int) error {
|
||||
path, err := join(c, "perf_event", pid)
|
||||
_, err := join(c, "perf_event", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
perfEvent, err := subsystems.Get("perf_event")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return perfEvent.Set(path, c)
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -64,6 +64,9 @@ type Resources struct {
|
|||
// MEM to use
|
||||
CpusetMems string `json:"cpuset_mems"`
|
||||
|
||||
// Process limit; set <= `0' to disable limit.
|
||||
PidsLimit int64 `json:"pids_limit"`
|
||||
|
||||
// Specifies per cgroup weight, range is from 10 to 1000.
|
||||
BlkioWeight uint16 `json:"blkio_weight"`
|
||||
|
||||
|
|
|
@ -5,7 +5,6 @@ package libcontainer
|
|||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
|
@ -226,21 +225,29 @@ func (l *LinuxFactory) StartInitialization() (err error) {
|
|||
// clear the current process's environment to clean any libcontainer
|
||||
// specific env vars.
|
||||
os.Clearenv()
|
||||
var i initer
|
||||
defer func() {
|
||||
// if we have an error during the initialization of the container's init then send it back to the
|
||||
// parent process in the form of an initError.
|
||||
if err != nil {
|
||||
// ensure that any data sent from the parent is consumed so it doesn't
|
||||
// receive ECONNRESET when the child writes to the pipe.
|
||||
ioutil.ReadAll(pipe)
|
||||
if _, ok := i.(*linuxStandardInit); ok {
|
||||
// Synchronisation only necessary for standard init.
|
||||
if err := json.NewEncoder(pipe).Encode(procError); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
if err := json.NewEncoder(pipe).Encode(newSystemError(err)); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
} else {
|
||||
if err := json.NewEncoder(pipe).Encode(procStart); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
// ensure that this pipe is always closed
|
||||
pipe.Close()
|
||||
}()
|
||||
i, err := newContainerInit(it, pipe)
|
||||
i, err = newContainerInit(it, pipe)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -9,6 +9,15 @@ import (
|
|||
"github.com/opencontainers/runc/libcontainer/stacktrace"
|
||||
)
|
||||
|
||||
type syncType uint8
|
||||
|
||||
const (
|
||||
procReady syncType = iota
|
||||
procError
|
||||
procStart
|
||||
procRun
|
||||
)
|
||||
|
||||
var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}}
|
||||
Code: {{.ECode}}
|
||||
{{if .Message }}
|
||||
|
|
|
@ -5,6 +5,7 @@ package libcontainer
|
|||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net"
|
||||
"os"
|
||||
|
@ -73,6 +74,7 @@ func newContainerInit(t initType, pipe *os.File) (initer, error) {
|
|||
}, nil
|
||||
case initStandard:
|
||||
return &linuxStandardInit{
|
||||
pipe: pipe,
|
||||
parentPid: syscall.Getppid(),
|
||||
config: config,
|
||||
}, nil
|
||||
|
@ -140,6 +142,28 @@ func finalizeNamespace(config *initConfig) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// syncParentReady sends to the given pipe a JSON payload which indicates that
|
||||
// the init is ready to Exec the child process. It then waits for the parent to
|
||||
// indicate that it is cleared to Exec.
|
||||
func syncParentReady(pipe io.ReadWriter) error {
|
||||
// Tell parent.
|
||||
if err := json.NewEncoder(pipe).Encode(procReady); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Wait for parent to give the all-clear.
|
||||
var procSync syncType
|
||||
if err := json.NewDecoder(pipe).Decode(&procSync); err != nil {
|
||||
if err == io.EOF {
|
||||
return fmt.Errorf("parent closed synchronisation channel")
|
||||
}
|
||||
if procSync != procRun {
|
||||
return fmt.Errorf("invalid synchronisation flag from parent")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// joinExistingNamespaces gets all the namespace paths specified for the container and
|
||||
// does a setns on the namespace fd so that the current process joins the namespace.
|
||||
func joinExistingNamespaces(namespaces []configs.Namespace) error {
|
||||
|
|
|
@ -525,6 +525,75 @@ func testCpuShares(t *testing.T, systemd bool) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestPids(t *testing.T) {
|
||||
testPids(t, false)
|
||||
}
|
||||
|
||||
func TestPidsSystemd(t *testing.T) {
|
||||
if !systemd.UseSystemd() {
|
||||
t.Skip("Systemd is unsupported")
|
||||
}
|
||||
testPids(t, true)
|
||||
}
|
||||
|
||||
func testPids(t *testing.T, systemd bool) {
|
||||
if testing.Short() {
|
||||
return
|
||||
}
|
||||
|
||||
rootfs, err := newRootfs()
|
||||
ok(t, err)
|
||||
defer remove(rootfs)
|
||||
|
||||
config := newTemplateConfig(rootfs)
|
||||
if systemd {
|
||||
config.Cgroups.Parent = "system.slice"
|
||||
}
|
||||
config.Cgroups.Resources.PidsLimit = -1
|
||||
|
||||
// Running multiple processes.
|
||||
_, ret, err := runContainer(config, "", "/bin/sh", "-c", "/bin/true | /bin/true | /bin/true | /bin/true")
|
||||
if err != nil && strings.Contains(err.Error(), "no such directory for pids.max") {
|
||||
t.Skip("PIDs cgroup is unsupported")
|
||||
}
|
||||
ok(t, err)
|
||||
|
||||
if ret != 0 {
|
||||
t.Fatalf("expected fork() to succeed with no pids limit")
|
||||
}
|
||||
|
||||
// Enforce a permissive limit (shell + 6 * true + 3).
|
||||
config.Cgroups.Resources.PidsLimit = 10
|
||||
_, ret, err = runContainer(config, "", "/bin/sh", "-c", "/bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true")
|
||||
if err != nil && strings.Contains(err.Error(), "no such directory for pids.max") {
|
||||
t.Skip("PIDs cgroup is unsupported")
|
||||
}
|
||||
ok(t, err)
|
||||
|
||||
if ret != 0 {
|
||||
t.Fatalf("expected fork() to succeed with permissive pids limit")
|
||||
}
|
||||
|
||||
// Enforce a restrictive limit (shell + 6 * true + 3).
|
||||
config.Cgroups.Resources.PidsLimit = 10
|
||||
out, ret, err := runContainer(config, "", "/bin/sh", "-c", "/bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true")
|
||||
if err != nil && strings.Contains(err.Error(), "no such directory for pids.max") {
|
||||
t.Skip("PIDs cgroup is unsupported")
|
||||
}
|
||||
if err != nil && !strings.Contains(out.String(), "sh: can't fork") {
|
||||
ok(t, err)
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
t.Fatalf("expected fork() to fail with restrictive pids limit")
|
||||
}
|
||||
|
||||
// Minimal restrictions are not really supported, due to quirks in using Go
|
||||
// due to the fact that it spawns random processes. While we do our best with
|
||||
// late setting cgroup values, it's just too unreliable with very small pids.max.
|
||||
// As such, we don't test that case. YMMV.
|
||||
}
|
||||
|
||||
func TestRunWithKernelMemory(t *testing.T) {
|
||||
testRunWithKernelMemory(t, false)
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@ package libcontainer
|
|||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
|
@ -86,6 +87,7 @@ func (p *setnsProcess) start() (err error) {
|
|||
if err := json.NewEncoder(p.parentPipe).Encode(p.config); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
|
||||
if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
|
@ -95,6 +97,7 @@ func (p *setnsProcess) start() (err error) {
|
|||
if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF {
|
||||
return newSystemError(err)
|
||||
}
|
||||
// Must be done after Shutdown so the child will exit and we can wait for it.
|
||||
if ierr != nil {
|
||||
p.wait()
|
||||
return newSystemError(ierr)
|
||||
|
@ -198,7 +201,6 @@ func (p *initProcess) start() (err error) {
|
|||
return newSystemError(err)
|
||||
}
|
||||
p.setExternalDescriptors(fds)
|
||||
|
||||
// Do this before syncing with child so that no children
|
||||
// can escape the cgroup
|
||||
if err := p.manager.Apply(p.pid()); err != nil {
|
||||
|
@ -229,13 +231,58 @@ func (p *initProcess) start() (err error) {
|
|||
if err := p.sendConfig(); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
|
||||
var (
|
||||
procSync syncType
|
||||
sentRun bool
|
||||
ierr *genericError
|
||||
)
|
||||
|
||||
loop:
|
||||
for {
|
||||
if err := json.NewDecoder(p.parentPipe).Decode(&procSync); err != nil {
|
||||
if err == io.EOF {
|
||||
break loop
|
||||
}
|
||||
return newSystemError(err)
|
||||
}
|
||||
|
||||
switch procSync {
|
||||
case procStart:
|
||||
break loop
|
||||
case procReady:
|
||||
if err := p.manager.Set(p.config.Config); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
// Sync with child.
|
||||
if err := json.NewEncoder(p.parentPipe).Encode(procRun); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
sentRun = true
|
||||
case procError:
|
||||
// wait for the child process to fully complete and receive an error message
|
||||
// if one was encoutered
|
||||
var ierr *genericError
|
||||
if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF {
|
||||
return newSystemError(err)
|
||||
}
|
||||
if ierr != nil {
|
||||
break loop
|
||||
}
|
||||
// Programmer error.
|
||||
panic("No error following JSON procError payload.")
|
||||
default:
|
||||
return newSystemError(fmt.Errorf("invalid JSON synchronisation payload from child"))
|
||||
}
|
||||
}
|
||||
if !sentRun {
|
||||
return newSystemError(fmt.Errorf("could not synchronise with container process"))
|
||||
}
|
||||
if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
// Must be done after Shutdown so the child will exit and we can wait for it.
|
||||
if ierr != nil {
|
||||
p.wait()
|
||||
return newSystemError(ierr)
|
||||
}
|
||||
return nil
|
||||
|
@ -273,8 +320,7 @@ func (p *initProcess) sendConfig() error {
|
|||
if err := json.NewEncoder(p.parentPipe).Encode(p.config); err != nil {
|
||||
return err
|
||||
}
|
||||
// shutdown writes for the parent side of the pipe
|
||||
return syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *initProcess) createNetworkInterfaces() error {
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
package libcontainer
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
"syscall"
|
||||
|
||||
|
@ -14,6 +15,7 @@ import (
|
|||
)
|
||||
|
||||
type linuxStandardInit struct {
|
||||
pipe io.ReadWriter
|
||||
parentPid int
|
||||
config *initConfig
|
||||
}
|
||||
|
@ -50,7 +52,6 @@ func (l *linuxStandardInit) Init() error {
|
|||
if err := setOomScoreAdj(l.config.Config.OomScoreAdj); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
label.Init()
|
||||
// InitializeMountNamespace() can be executed only for a new mount namespace
|
||||
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
|
||||
|
@ -75,7 +76,6 @@ func (l *linuxStandardInit) Init() error {
|
|||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, path := range l.config.Config.ReadonlyPaths {
|
||||
if err := remountReadonly(path); err != nil {
|
||||
return err
|
||||
|
@ -90,6 +90,12 @@ func (l *linuxStandardInit) Init() error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Tell our parent that we're ready to Execv. This must be done before the
|
||||
// Seccomp rules have been applied, because we need to be able to read and
|
||||
// write to a socket.
|
||||
if err := syncParentReady(l.pipe); err != nil {
|
||||
return err
|
||||
}
|
||||
if l.config.Config.Seccomp != nil {
|
||||
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
||||
return err
|
||||
|
|
1
spec.go
1
spec.go
|
@ -456,6 +456,7 @@ func createCgroupConfig(name string, spec *specs.LinuxRuntimeSpec, devices []*co
|
|||
c.Resources.CpuRtPeriod = r.CPU.RealtimePeriod
|
||||
c.Resources.CpusetCpus = r.CPU.Cpus
|
||||
c.Resources.CpusetMems = r.CPU.Mems
|
||||
c.Resources.PidsLimit = r.Pids.Limit
|
||||
c.Resources.BlkioWeight = r.BlockIO.Weight
|
||||
c.Resources.BlkioLeafWeight = r.BlockIO.LeafWeight
|
||||
for _, wd := range r.BlockIO.WeightDevice {
|
||||
|
|
Loading…
Reference in New Issue