Merge pull request #2343 from lifubang/updateSystemdScope

fix data inconsistency when using runc update in systemd driven cgroup
This commit is contained in:
Kir Kolyshkin 2020-04-24 23:34:19 -07:00 committed by GitHub
commit b19f9cecfe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 164 additions and 92 deletions

View File

@ -63,6 +63,59 @@ var legacySubsystems = subsystemSet{
&fs.NameGroup{GroupName: "name=systemd"},
}
func genV1ResourcesProperties(c *configs.Cgroup) ([]systemdDbus.Property, error) {
var properties []systemdDbus.Property
if c.Resources.Memory != 0 {
properties = append(properties,
newProp("MemoryLimit", uint64(c.Resources.Memory)))
}
if c.Resources.CpuShares != 0 {
properties = append(properties,
newProp("CPUShares", c.Resources.CpuShares))
}
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
// corresponds to USEC_INFINITY in systemd
// if USEC_INFINITY is provided, CPUQuota is left unbound by systemd
// always setting a property value ensures we can apply a quota and remove it later
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
if c.Resources.CpuQuota > 0 {
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
// (integer percentage of CPU) internally. This means that if a fractional percent of
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
cpuQuotaPerSecUSec = uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
if cpuQuotaPerSecUSec%10000 != 0 {
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
}
}
properties = append(properties,
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
}
if c.Resources.BlkioWeight != 0 {
properties = append(properties,
newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight)))
}
if c.Resources.PidsLimit > 0 {
properties = append(properties,
newProp("TasksAccounting", true),
newProp("TasksMax", uint64(c.Resources.PidsLimit)))
}
// We have to set kernel memory here, as we can't change it once
// processes have been attached to the cgroup.
if c.Resources.KernelMemory != 0 {
if err := setKernelMemory(c); err != nil {
return nil, err
}
}
return properties, nil
}
func (m *LegacyManager) Apply(pid int) error {
var (
c = m.Cgroups
@ -124,55 +177,11 @@ func (m *LegacyManager) Apply(pid int) error {
properties = append(properties,
newProp("DefaultDependencies", false))
if c.Resources.Memory != 0 {
properties = append(properties,
newProp("MemoryLimit", uint64(c.Resources.Memory)))
resourcesProperties, err := genV1ResourcesProperties(c)
if err != nil {
return err
}
if c.Resources.CpuShares != 0 {
properties = append(properties,
newProp("CPUShares", c.Resources.CpuShares))
}
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
// corresponds to USEC_INFINITY in systemd
// if USEC_INFINITY is provided, CPUQuota is left unbound by systemd
// always setting a property value ensures we can apply a quota and remove it later
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
if c.Resources.CpuQuota > 0 {
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
// (integer percentage of CPU) internally. This means that if a fractional percent of
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
cpuQuotaPerSecUSec = uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
if cpuQuotaPerSecUSec%10000 != 0 {
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
}
}
properties = append(properties,
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
}
if c.Resources.BlkioWeight != 0 {
properties = append(properties,
newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight)))
}
if c.Resources.PidsLimit > 0 {
properties = append(properties,
newProp("TasksAccounting", true),
newProp("TasksMax", uint64(c.Resources.PidsLimit)))
}
// We have to set kernel memory here, as we can't change it once
// processes have been attached to the cgroup.
if c.Resources.KernelMemory != 0 {
if err := setKernelMemory(c); err != nil {
return err
}
}
properties = append(properties, resourcesProperties...)
properties = append(properties, c.SystemdProps...)
dbusConnection, err := getDbusConnection()
@ -376,6 +385,18 @@ func (m *LegacyManager) Set(container *configs.Config) error {
if m.Cgroups.Paths != nil {
return nil
}
properties, err := genV1ResourcesProperties(container.Cgroups)
if err != nil {
return err
}
dbusConnection, err := getDbusConnection()
if err != nil {
return err
}
if err := dbusConnection.SetUnitProperties(getUnitName(container.Cgroups), true, properties...); err != nil {
return err
}
for _, sys := range legacySubsystems {
// Get the subsystem path, but don't error out for not found cgroups.
path, err := getSubsystemPath(container.Cgroups, sys.Name())

View File

@ -34,6 +34,59 @@ func NewUnifiedManager(config *configs.Cgroup, path string, rootless bool) *unif
}
}
func genV2ResourcesProperties(c *configs.Cgroup) ([]systemdDbus.Property, error) {
var properties []systemdDbus.Property
if c.Resources.Memory != 0 {
properties = append(properties,
newProp("MemoryMax", uint64(c.Resources.Memory)))
}
swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(c.Resources.MemorySwap, c.Resources.Memory)
if err != nil {
return nil, err
}
if swap > 0 {
properties = append(properties,
newProp("MemorySwapMax", uint64(swap)))
}
if c.Resources.CpuWeight != 0 {
properties = append(properties,
newProp("CPUWeight", c.Resources.CpuWeight))
}
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
// corresponds to USEC_INFINITY in systemd
// if USEC_INFINITY is provided, CPUQuota is left unbound by systemd
// always setting a property value ensures we can apply a quota and remove it later
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
if c.Resources.CpuQuota > 0 {
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
// (integer percentage of CPU) internally. This means that if a fractional percent of
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
cpuQuotaPerSecUSec = uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
if cpuQuotaPerSecUSec%10000 != 0 {
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
}
}
properties = append(properties,
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
}
if c.Resources.PidsLimit > 0 {
properties = append(properties,
newProp("TasksAccounting", true),
newProp("TasksMax", uint64(c.Resources.PidsLimit)))
}
// ignore c.Resources.KernelMemory
return properties, nil
}
func (m *unifiedManager) Apply(pid int) error {
var (
c = m.cgroups
@ -82,55 +135,13 @@ func (m *unifiedManager) Apply(pid int) error {
properties = append(properties,
newProp("DefaultDependencies", false))
if c.Resources.Memory != 0 {
properties = append(properties,
newProp("MemoryMax", uint64(c.Resources.Memory)))
}
swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(c.Resources.MemorySwap, c.Resources.Memory)
resourcesProperties, err := genV2ResourcesProperties(c)
if err != nil {
return err
}
if swap > 0 {
properties = append(properties,
newProp("MemorySwapMax", uint64(swap)))
}
if c.Resources.CpuWeight != 0 {
properties = append(properties,
newProp("CPUWeight", c.Resources.CpuWeight))
}
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
// corresponds to USEC_INFINITY in systemd
// if USEC_INFINITY is provided, CPUQuota is left unbound by systemd
// always setting a property value ensures we can apply a quota and remove it later
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
if c.Resources.CpuQuota > 0 {
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
// (integer percentage of CPU) internally. This means that if a fractional percent of
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
cpuQuotaPerSecUSec = uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
if cpuQuotaPerSecUSec%10000 != 0 {
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
}
}
properties = append(properties,
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
}
if c.Resources.PidsLimit > 0 {
properties = append(properties,
newProp("TasksAccounting", true),
newProp("TasksMax", uint64(c.Resources.PidsLimit)))
}
properties = append(properties, resourcesProperties...)
properties = append(properties, c.SystemdProps...)
// ignore c.Resources.KernelMemory
dbusConnection, err := getDbusConnection()
if err != nil {
return err
@ -263,6 +274,18 @@ func (m *unifiedManager) GetStats() (*cgroups.Stats, error) {
}
func (m *unifiedManager) Set(container *configs.Config) error {
properties, err := genV2ResourcesProperties(m.cgroups)
if err != nil {
return err
}
dbusConnection, err := getDbusConnection()
if err != nil {
return err
}
if err := dbusConnection.SetUnitProperties(getUnitName(m.cgroups), true, properties...); err != nil {
return err
}
fsMgr, err := m.fsManager()
if err != nil {
return err

View File

@ -163,6 +163,17 @@ function check_cgroup_value() {
[ "$current" = "$expected" ]
}
# Helper to check a value in systemd.
function check_systemd_value() {
unitname=$1
source=$2
expected=$3
current=$(systemctl show $unitname | grep $source)
echo "current" $current "!?" "$expected"
[ "$current" = "$expected" ]
}
# Helper function to set a resources limit
function set_resources_limit() {
bundle="${1:-.}"

View File

@ -84,10 +84,24 @@ EOF
runc update test_update --memory 67108864
[ "$status" -eq 0 ]
check_cgroup_value $MEM_LIMIT 67108864
if [[ -n "${RUNC_USE_SYSTEMD}" ]] ; then
if [ "$CGROUP_UNIFIED" != "yes" ]; then
check_systemd_value "runc-cgroups-integration-test.scope" "MemoryLimit=" "MemoryLimit=67108864"
else
check_systemd_value "runc-cgroups-integration-test.scope" "MemoryMax=" "MemoryMax=67108864"
fi
fi
runc update test_update --memory 50M
[ "$status" -eq 0 ]
check_cgroup_value $MEM_LIMIT 52428800
if [[ -n "${RUNC_USE_SYSTEMD}" ]] ; then
if [ "$CGROUP_UNIFIED" != "yes" ]; then
check_systemd_value "runc-cgroups-integration-test.scope" "MemoryLimit=" "MemoryLimit=52428800"
else
check_systemd_value "runc-cgroups-integration-test.scope" "MemoryMax=" "MemoryMax=52428800"
fi
fi
# update memory soft limit
runc update test_update --memory-reservation 33554432
@ -123,6 +137,9 @@ EOF
runc update test_update --pids-limit 10
[ "$status" -eq 0 ]
check_cgroup_value "pids.max" 10
if [[ -n "${RUNC_USE_SYSTEMD}" ]] ; then
check_systemd_value "runc-cgroups-integration-test.scope" "TasksMax=" "TasksMax=10"
fi
# Revert to the test initial value via json on stdin
runc update -r - test_update <<EOF