2019-10-26 05:24:14 +08:00
|
|
|
// +build linux
|
2019-09-02 17:10:52 +08:00
|
|
|
|
|
|
|
package systemd
|
|
|
|
|
|
|
|
import (
|
|
|
|
"math"
|
|
|
|
"os"
|
|
|
|
"path/filepath"
|
2020-04-01 09:47:06 +08:00
|
|
|
"strconv"
|
2019-09-02 17:10:52 +08:00
|
|
|
"strings"
|
|
|
|
"sync"
|
|
|
|
|
2020-03-01 21:52:48 +08:00
|
|
|
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
2020-04-06 11:12:20 +08:00
|
|
|
securejoin "github.com/cyphar/filepath-securejoin"
|
2019-09-02 17:10:52 +08:00
|
|
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
2019-11-07 16:25:49 +08:00
|
|
|
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
2019-09-02 17:10:52 +08:00
|
|
|
"github.com/opencontainers/runc/libcontainer/configs"
|
2020-04-01 09:47:06 +08:00
|
|
|
"github.com/pkg/errors"
|
2020-05-10 23:20:26 +08:00
|
|
|
"github.com/sirupsen/logrus"
|
2019-09-02 17:10:52 +08:00
|
|
|
)
|
|
|
|
|
2020-04-06 10:06:25 +08:00
|
|
|
type unifiedManager struct {
|
2019-09-02 17:10:52 +08:00
|
|
|
mu sync.Mutex
|
2020-04-06 10:06:25 +08:00
|
|
|
cgroups *configs.Cgroup
|
2020-04-05 02:19:05 +08:00
|
|
|
// path is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope"
|
|
|
|
path string
|
|
|
|
rootless bool
|
|
|
|
}
|
|
|
|
|
2020-05-08 13:06:12 +08:00
|
|
|
func NewUnifiedManager(config *configs.Cgroup, path string, rootless bool) cgroups.Manager {
|
2020-04-06 10:06:25 +08:00
|
|
|
return &unifiedManager{
|
|
|
|
cgroups: config,
|
2020-04-05 02:19:05 +08:00
|
|
|
path: path,
|
|
|
|
rootless: rootless,
|
|
|
|
}
|
2019-09-02 17:10:52 +08:00
|
|
|
}
|
|
|
|
|
2020-04-22 13:47:44 +08:00
|
|
|
func genV2ResourcesProperties(c *configs.Cgroup) ([]systemdDbus.Property, error) {
|
|
|
|
var properties []systemdDbus.Property
|
|
|
|
|
2020-05-10 23:20:26 +08:00
|
|
|
// NOTE: This is of questionable correctness because we insert our own
|
|
|
|
// devices eBPF program later. Two programs with identical rules
|
|
|
|
// aren't the end of the world, but it is a bit concerning. However
|
|
|
|
// it's unclear if systemd removes all eBPF programs attached when
|
|
|
|
// doing SetUnitProperties...
|
|
|
|
deviceProperties, err := generateDeviceProperties(c.Resources.Devices)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
properties = append(properties, deviceProperties...)
|
|
|
|
|
2020-04-22 13:47:44 +08:00
|
|
|
if c.Resources.Memory != 0 {
|
|
|
|
properties = append(properties,
|
|
|
|
newProp("MemoryMax", uint64(c.Resources.Memory)))
|
|
|
|
}
|
2020-05-20 15:55:11 +08:00
|
|
|
if c.Resources.MemoryReservation != 0 {
|
|
|
|
properties = append(properties,
|
|
|
|
newProp("MemoryLow", uint64(c.Resources.MemoryReservation)))
|
|
|
|
}
|
2020-05-21 03:12:44 +08:00
|
|
|
|
|
|
|
swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(c.Resources.MemorySwap, c.Resources.Memory)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if swap != 0 {
|
2020-04-22 13:47:44 +08:00
|
|
|
properties = append(properties,
|
|
|
|
newProp("MemorySwapMax", uint64(swap)))
|
|
|
|
}
|
|
|
|
|
|
|
|
if c.Resources.CpuWeight != 0 {
|
|
|
|
properties = append(properties,
|
|
|
|
newProp("CPUWeight", c.Resources.CpuWeight))
|
|
|
|
}
|
|
|
|
|
|
|
|
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
|
|
|
|
if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
|
|
|
|
// corresponds to USEC_INFINITY in systemd
|
|
|
|
// if USEC_INFINITY is provided, CPUQuota is left unbound by systemd
|
|
|
|
// always setting a property value ensures we can apply a quota and remove it later
|
|
|
|
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
|
|
|
|
if c.Resources.CpuQuota > 0 {
|
|
|
|
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
|
|
|
|
// (integer percentage of CPU) internally. This means that if a fractional percent of
|
|
|
|
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
|
|
|
|
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
|
|
|
|
cpuQuotaPerSecUSec = uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
|
|
|
|
if cpuQuotaPerSecUSec%10000 != 0 {
|
|
|
|
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
|
|
|
|
}
|
|
|
|
}
|
|
|
|
properties = append(properties,
|
|
|
|
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
|
|
|
|
}
|
|
|
|
|
2020-05-05 10:19:46 +08:00
|
|
|
if c.Resources.PidsLimit > 0 || c.Resources.PidsLimit == -1 {
|
2020-04-22 13:47:44 +08:00
|
|
|
properties = append(properties,
|
|
|
|
newProp("TasksAccounting", true),
|
|
|
|
newProp("TasksMax", uint64(c.Resources.PidsLimit)))
|
|
|
|
}
|
|
|
|
|
|
|
|
// ignore c.Resources.KernelMemory
|
|
|
|
|
|
|
|
return properties, nil
|
|
|
|
}
|
|
|
|
|
2020-04-06 10:06:25 +08:00
|
|
|
func (m *unifiedManager) Apply(pid int) error {
|
2019-09-02 17:10:52 +08:00
|
|
|
var (
|
2020-04-06 10:06:25 +08:00
|
|
|
c = m.cgroups
|
2019-09-02 17:10:52 +08:00
|
|
|
unitName = getUnitName(c)
|
|
|
|
properties []systemdDbus.Property
|
|
|
|
)
|
|
|
|
|
|
|
|
if c.Paths != nil {
|
2020-04-05 02:19:05 +08:00
|
|
|
return cgroups.WriteCgroupProc(m.path, pid)
|
2019-09-02 17:10:52 +08:00
|
|
|
}
|
|
|
|
|
2020-04-01 09:47:06 +08:00
|
|
|
slice := "system.slice"
|
|
|
|
if m.rootless {
|
|
|
|
slice = "user.slice"
|
|
|
|
}
|
2019-09-02 17:10:52 +08:00
|
|
|
if c.Parent != "" {
|
|
|
|
slice = c.Parent
|
|
|
|
}
|
|
|
|
|
|
|
|
properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
|
|
|
|
|
|
|
|
// if we create a slice, the parent is defined via a Wants=
|
|
|
|
if strings.HasSuffix(unitName, ".slice") {
|
|
|
|
properties = append(properties, systemdDbus.PropWants(slice))
|
|
|
|
} else {
|
|
|
|
// otherwise, we use Slice=
|
|
|
|
properties = append(properties, systemdDbus.PropSlice(slice))
|
|
|
|
}
|
|
|
|
|
|
|
|
// only add pid if its valid, -1 is used w/ general slice creation.
|
|
|
|
if pid != -1 {
|
|
|
|
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check if we can delegate. This is only supported on systemd versions 218 and above.
|
|
|
|
if !strings.HasSuffix(unitName, ".slice") {
|
|
|
|
// Assume scopes always support delegation.
|
|
|
|
properties = append(properties, newProp("Delegate", true))
|
|
|
|
}
|
|
|
|
|
|
|
|
// Always enable accounting, this gets us the same behaviour as the fs implementation,
|
|
|
|
// plus the kernel has some problems with joining the memory cgroup at a later time.
|
|
|
|
properties = append(properties,
|
|
|
|
newProp("MemoryAccounting", true),
|
|
|
|
newProp("CPUAccounting", true),
|
2020-01-15 20:55:22 +08:00
|
|
|
newProp("IOAccounting", true))
|
2019-09-02 17:10:52 +08:00
|
|
|
|
|
|
|
// Assume DefaultDependencies= will always work (the check for it was previously broken.)
|
|
|
|
properties = append(properties,
|
|
|
|
newProp("DefaultDependencies", false))
|
|
|
|
|
2020-04-22 13:47:44 +08:00
|
|
|
resourcesProperties, err := genV2ResourcesProperties(c)
|
2020-04-02 15:23:09 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-04-22 13:47:44 +08:00
|
|
|
properties = append(properties, resourcesProperties...)
|
Support for setting systemd properties via annotations
In case systemd is used to set cgroups for the container,
it creates a scope unit dedicated to it (usually named
`runc-$ID.scope`).
This patch adds an ability to set arbitrary systemd properties
for the systemd unit via runtime spec annotations.
Initially this was developed as an ability to specify the
`TimeoutStopUSec` property, but later generalized to work with
arbitrary ones.
Example usage: add the following to runtime spec (config.json):
```
"annotations": {
"org.systemd.property.TimeoutStopUSec": "uint64 123456789",
"org.systemd.property.CollectMode":"'inactive-or-failed'"
},
```
and start the container (e.g. `runc --systemd-cgroup run $ID`).
The above will set the following systemd parameters:
* `TimeoutStopSec` to 2 minutes and 3 seconds,
* `CollectMode` to "inactive-or-failed".
The values are in the gvariant format (see [1]). To figure out
which type systemd expects for a particular parameter, see
systemd sources.
In particular, parameters with `USec` suffix require an `uint64`
typed argument, while gvariant assumes int32 for a numeric values,
therefore the explicit type is required.
NOTE that systemd receives the time-typed parameters as *USec
but shows them (in `systemctl show`) as *Sec. For example,
the stop timeout should be set as `TimeoutStopUSec` but
is shown as `TimeoutStopSec`.
[1] https://developer.gnome.org/glib/stable/gvariant-text.html
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-02-07 12:26:06 +08:00
|
|
|
properties = append(properties, c.SystemdProps...)
|
|
|
|
|
2020-04-01 09:47:06 +08:00
|
|
|
dbusConnection, err := getDbusConnection(m.rootless)
|
|
|
|
if err != nil {
|
2019-09-02 17:10:52 +08:00
|
|
|
return err
|
|
|
|
}
|
2020-04-01 09:47:06 +08:00
|
|
|
if err := startUnit(dbusConnection, unitName, properties); err != nil {
|
|
|
|
return errors.Wrapf(err, "error while starting unit %q with properties %+v", unitName, properties)
|
|
|
|
}
|
2019-09-02 17:10:52 +08:00
|
|
|
|
Simplify cgroup path handing in v2 via unified API
This unties the Gordian Knot of using GetPaths in cgroupv2 code.
The problem is, the current code uses GetPaths for three kinds of things:
1. Get all the paths to cgroup v1 controllers to save its state (see
(*linuxContainer).currentState(), (*LinuxFactory).loadState()
methods).
2. Get all the paths to cgroup v1 controllers to have the setns process
enter the proper cgroups in `(*setnsProcess).start()`.
3. Get the path to a specific controller (for example,
`m.GetPaths()["devices"]`).
Now, for cgroup v2 instead of a set of per-controller paths, we have only
one single unified path, and a dedicated function `GetUnifiedPath()` to get it.
This discrepancy between v1 and v2 cgroupManager API leads to the
following problems with the code:
- multiple if/else code blocks that have to treat v1 and v2 separately;
- backward-compatible GetPaths() methods in v2 controllers;
- - repeated writing of the PID into the same cgroup for v2;
Overall, it's hard to write the right code with all this, and the code
that is written is kinda hard to follow.
The solution is to slightly change the API to do the 3 things outlined
above in the same manner for v1 and v2:
1. Use `GetPaths()` for state saving and setns process cgroups entering.
2. Introduce and use Path(subsys string) to obtain a path to a
subsystem. For v2, the argument is ignored and the unified path is
returned.
This commit converts all the controllers to the new API, and modifies
all the users to use it.
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-05-07 08:36:28 +08:00
|
|
|
if err = m.initPath(); err != nil {
|
2019-11-07 16:25:49 +08:00
|
|
|
return err
|
|
|
|
}
|
2020-04-08 18:37:47 +08:00
|
|
|
if err := fs2.CreateCgroupPath(m.path, m.cgroups); err != nil {
|
2020-03-27 04:57:55 +08:00
|
|
|
return err
|
|
|
|
}
|
2019-09-02 17:10:52 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-04-06 10:06:25 +08:00
|
|
|
func (m *unifiedManager) Destroy() error {
|
|
|
|
if m.cgroups.Paths != nil {
|
2019-09-02 17:10:52 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
m.mu.Lock()
|
|
|
|
defer m.mu.Unlock()
|
2020-03-28 06:01:36 +08:00
|
|
|
|
2020-04-01 09:47:06 +08:00
|
|
|
dbusConnection, err := getDbusConnection(m.rootless)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-04-26 07:23:55 +08:00
|
|
|
unitName := getUnitName(m.cgroups)
|
2020-04-01 09:47:06 +08:00
|
|
|
if err := stopUnit(dbusConnection, unitName); err != nil {
|
2020-03-28 06:01:36 +08:00
|
|
|
return err
|
|
|
|
}
|
2020-04-05 02:19:05 +08:00
|
|
|
|
|
|
|
// XXX this is probably not needed, systemd should handle it
|
2020-04-01 09:47:06 +08:00
|
|
|
err = os.Remove(m.path)
|
2020-04-05 02:19:05 +08:00
|
|
|
if err != nil && !os.IsNotExist(err) {
|
2019-09-02 17:10:52 +08:00
|
|
|
return err
|
|
|
|
}
|
2020-04-05 02:19:05 +08:00
|
|
|
|
2019-09-02 17:10:52 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
Simplify cgroup path handing in v2 via unified API
This unties the Gordian Knot of using GetPaths in cgroupv2 code.
The problem is, the current code uses GetPaths for three kinds of things:
1. Get all the paths to cgroup v1 controllers to save its state (see
(*linuxContainer).currentState(), (*LinuxFactory).loadState()
methods).
2. Get all the paths to cgroup v1 controllers to have the setns process
enter the proper cgroups in `(*setnsProcess).start()`.
3. Get the path to a specific controller (for example,
`m.GetPaths()["devices"]`).
Now, for cgroup v2 instead of a set of per-controller paths, we have only
one single unified path, and a dedicated function `GetUnifiedPath()` to get it.
This discrepancy between v1 and v2 cgroupManager API leads to the
following problems with the code:
- multiple if/else code blocks that have to treat v1 and v2 separately;
- backward-compatible GetPaths() methods in v2 controllers;
- - repeated writing of the PID into the same cgroup for v2;
Overall, it's hard to write the right code with all this, and the code
that is written is kinda hard to follow.
The solution is to slightly change the API to do the 3 things outlined
above in the same manner for v1 and v2:
1. Use `GetPaths()` for state saving and setns process cgroups entering.
2. Introduce and use Path(subsys string) to obtain a path to a
subsystem. For v2, the argument is ignored and the unified path is
returned.
This commit converts all the controllers to the new API, and modifies
all the users to use it.
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-05-07 08:36:28 +08:00
|
|
|
func (m *unifiedManager) Path(_ string) string {
|
|
|
|
return m.path
|
2019-09-02 17:10:52 +08:00
|
|
|
}
|
2020-04-05 02:19:05 +08:00
|
|
|
|
Simplify cgroup path handing in v2 via unified API
This unties the Gordian Knot of using GetPaths in cgroupv2 code.
The problem is, the current code uses GetPaths for three kinds of things:
1. Get all the paths to cgroup v1 controllers to save its state (see
(*linuxContainer).currentState(), (*LinuxFactory).loadState()
methods).
2. Get all the paths to cgroup v1 controllers to have the setns process
enter the proper cgroups in `(*setnsProcess).start()`.
3. Get the path to a specific controller (for example,
`m.GetPaths()["devices"]`).
Now, for cgroup v2 instead of a set of per-controller paths, we have only
one single unified path, and a dedicated function `GetUnifiedPath()` to get it.
This discrepancy between v1 and v2 cgroupManager API leads to the
following problems with the code:
- multiple if/else code blocks that have to treat v1 and v2 separately;
- backward-compatible GetPaths() methods in v2 controllers;
- - repeated writing of the PID into the same cgroup for v2;
Overall, it's hard to write the right code with all this, and the code
that is written is kinda hard to follow.
The solution is to slightly change the API to do the 3 things outlined
above in the same manner for v1 and v2:
1. Use `GetPaths()` for state saving and setns process cgroups entering.
2. Introduce and use Path(subsys string) to obtain a path to a
subsystem. For v2, the argument is ignored and the unified path is
returned.
This commit converts all the controllers to the new API, and modifies
all the users to use it.
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-05-07 08:36:28 +08:00
|
|
|
// getSliceFull value is used in initPath.
|
2020-04-01 09:47:06 +08:00
|
|
|
// The value is incompatible with systemdDbus.PropSlice.
|
|
|
|
func (m *unifiedManager) getSliceFull() (string, error) {
|
|
|
|
c := m.cgroups
|
|
|
|
slice := "system.slice"
|
|
|
|
if m.rootless {
|
|
|
|
slice = "user.slice"
|
|
|
|
}
|
|
|
|
if c.Parent != "" {
|
|
|
|
var err error
|
|
|
|
slice, err = ExpandSlice(c.Parent)
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if m.rootless {
|
|
|
|
dbusConnection, err := getDbusConnection(m.rootless)
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
// managerCGQuoted is typically "/user.slice/user-${uid}.slice/user@${uid}.service" including the quote symbols
|
|
|
|
managerCGQuoted, err := dbusConnection.GetManagerProperty("ControlGroup")
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
managerCG, err := strconv.Unquote(managerCGQuoted)
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
slice = filepath.Join(managerCG, slice)
|
|
|
|
}
|
|
|
|
|
|
|
|
// an example of the final slice in rootless: "/user.slice/user-1001.slice/user@1001.service/user.slice"
|
|
|
|
// NOTE: systemdDbus.PropSlice requires the "/user.slice/user-1001.slice/user@1001.service/" prefix NOT to be specified.
|
|
|
|
return slice, nil
|
|
|
|
}
|
|
|
|
|
Simplify cgroup path handing in v2 via unified API
This unties the Gordian Knot of using GetPaths in cgroupv2 code.
The problem is, the current code uses GetPaths for three kinds of things:
1. Get all the paths to cgroup v1 controllers to save its state (see
(*linuxContainer).currentState(), (*LinuxFactory).loadState()
methods).
2. Get all the paths to cgroup v1 controllers to have the setns process
enter the proper cgroups in `(*setnsProcess).start()`.
3. Get the path to a specific controller (for example,
`m.GetPaths()["devices"]`).
Now, for cgroup v2 instead of a set of per-controller paths, we have only
one single unified path, and a dedicated function `GetUnifiedPath()` to get it.
This discrepancy between v1 and v2 cgroupManager API leads to the
following problems with the code:
- multiple if/else code blocks that have to treat v1 and v2 separately;
- backward-compatible GetPaths() methods in v2 controllers;
- - repeated writing of the PID into the same cgroup for v2;
Overall, it's hard to write the right code with all this, and the code
that is written is kinda hard to follow.
The solution is to slightly change the API to do the 3 things outlined
above in the same manner for v1 and v2:
1. Use `GetPaths()` for state saving and setns process cgroups entering.
2. Introduce and use Path(subsys string) to obtain a path to a
subsystem. For v2, the argument is ignored and the unified path is
returned.
This commit converts all the controllers to the new API, and modifies
all the users to use it.
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-05-07 08:36:28 +08:00
|
|
|
func (m *unifiedManager) initPath() error {
|
2020-04-05 02:19:05 +08:00
|
|
|
if m.path != "" {
|
Simplify cgroup path handing in v2 via unified API
This unties the Gordian Knot of using GetPaths in cgroupv2 code.
The problem is, the current code uses GetPaths for three kinds of things:
1. Get all the paths to cgroup v1 controllers to save its state (see
(*linuxContainer).currentState(), (*LinuxFactory).loadState()
methods).
2. Get all the paths to cgroup v1 controllers to have the setns process
enter the proper cgroups in `(*setnsProcess).start()`.
3. Get the path to a specific controller (for example,
`m.GetPaths()["devices"]`).
Now, for cgroup v2 instead of a set of per-controller paths, we have only
one single unified path, and a dedicated function `GetUnifiedPath()` to get it.
This discrepancy between v1 and v2 cgroupManager API leads to the
following problems with the code:
- multiple if/else code blocks that have to treat v1 and v2 separately;
- backward-compatible GetPaths() methods in v2 controllers;
- - repeated writing of the PID into the same cgroup for v2;
Overall, it's hard to write the right code with all this, and the code
that is written is kinda hard to follow.
The solution is to slightly change the API to do the 3 things outlined
above in the same manner for v1 and v2:
1. Use `GetPaths()` for state saving and setns process cgroups entering.
2. Introduce and use Path(subsys string) to obtain a path to a
subsystem. For v2, the argument is ignored and the unified path is
returned.
This commit converts all the controllers to the new API, and modifies
all the users to use it.
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-05-07 08:36:28 +08:00
|
|
|
return nil
|
2019-10-19 00:40:46 +08:00
|
|
|
}
|
2020-03-27 04:35:51 +08:00
|
|
|
|
2020-04-01 09:47:06 +08:00
|
|
|
sliceFull, err := m.getSliceFull()
|
2020-03-27 04:35:51 +08:00
|
|
|
if err != nil {
|
Simplify cgroup path handing in v2 via unified API
This unties the Gordian Knot of using GetPaths in cgroupv2 code.
The problem is, the current code uses GetPaths for three kinds of things:
1. Get all the paths to cgroup v1 controllers to save its state (see
(*linuxContainer).currentState(), (*LinuxFactory).loadState()
methods).
2. Get all the paths to cgroup v1 controllers to have the setns process
enter the proper cgroups in `(*setnsProcess).start()`.
3. Get the path to a specific controller (for example,
`m.GetPaths()["devices"]`).
Now, for cgroup v2 instead of a set of per-controller paths, we have only
one single unified path, and a dedicated function `GetUnifiedPath()` to get it.
This discrepancy between v1 and v2 cgroupManager API leads to the
following problems with the code:
- multiple if/else code blocks that have to treat v1 and v2 separately;
- backward-compatible GetPaths() methods in v2 controllers;
- - repeated writing of the PID into the same cgroup for v2;
Overall, it's hard to write the right code with all this, and the code
that is written is kinda hard to follow.
The solution is to slightly change the API to do the 3 things outlined
above in the same manner for v1 and v2:
1. Use `GetPaths()` for state saving and setns process cgroups entering.
2. Introduce and use Path(subsys string) to obtain a path to a
subsystem. For v2, the argument is ignored and the unified path is
returned.
This commit converts all the controllers to the new API, and modifies
all the users to use it.
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-05-07 08:36:28 +08:00
|
|
|
return err
|
2020-03-27 04:35:51 +08:00
|
|
|
}
|
|
|
|
|
2020-04-01 09:47:06 +08:00
|
|
|
c := m.cgroups
|
|
|
|
path := filepath.Join(sliceFull, getUnitName(c))
|
2020-04-06 11:12:20 +08:00
|
|
|
path, err = securejoin.SecureJoin(fs2.UnifiedMountpoint, path)
|
|
|
|
if err != nil {
|
Simplify cgroup path handing in v2 via unified API
This unties the Gordian Knot of using GetPaths in cgroupv2 code.
The problem is, the current code uses GetPaths for three kinds of things:
1. Get all the paths to cgroup v1 controllers to save its state (see
(*linuxContainer).currentState(), (*LinuxFactory).loadState()
methods).
2. Get all the paths to cgroup v1 controllers to have the setns process
enter the proper cgroups in `(*setnsProcess).start()`.
3. Get the path to a specific controller (for example,
`m.GetPaths()["devices"]`).
Now, for cgroup v2 instead of a set of per-controller paths, we have only
one single unified path, and a dedicated function `GetUnifiedPath()` to get it.
This discrepancy between v1 and v2 cgroupManager API leads to the
following problems with the code:
- multiple if/else code blocks that have to treat v1 and v2 separately;
- backward-compatible GetPaths() methods in v2 controllers;
- - repeated writing of the PID into the same cgroup for v2;
Overall, it's hard to write the right code with all this, and the code
that is written is kinda hard to follow.
The solution is to slightly change the API to do the 3 things outlined
above in the same manner for v1 and v2:
1. Use `GetPaths()` for state saving and setns process cgroups entering.
2. Introduce and use Path(subsys string) to obtain a path to a
subsystem. For v2, the argument is ignored and the unified path is
returned.
This commit converts all the controllers to the new API, and modifies
all the users to use it.
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-05-07 08:36:28 +08:00
|
|
|
return err
|
2020-04-06 11:12:20 +08:00
|
|
|
}
|
|
|
|
|
2020-04-01 09:47:06 +08:00
|
|
|
// an example of the final path in rootless:
|
|
|
|
// "/sys/fs/cgroup/user.slice/user-1001.slice/user@1001.service/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope"
|
Simplify cgroup path handing in v2 via unified API
This unties the Gordian Knot of using GetPaths in cgroupv2 code.
The problem is, the current code uses GetPaths for three kinds of things:
1. Get all the paths to cgroup v1 controllers to save its state (see
(*linuxContainer).currentState(), (*LinuxFactory).loadState()
methods).
2. Get all the paths to cgroup v1 controllers to have the setns process
enter the proper cgroups in `(*setnsProcess).start()`.
3. Get the path to a specific controller (for example,
`m.GetPaths()["devices"]`).
Now, for cgroup v2 instead of a set of per-controller paths, we have only
one single unified path, and a dedicated function `GetUnifiedPath()` to get it.
This discrepancy between v1 and v2 cgroupManager API leads to the
following problems with the code:
- multiple if/else code blocks that have to treat v1 and v2 separately;
- backward-compatible GetPaths() methods in v2 controllers;
- - repeated writing of the PID into the same cgroup for v2;
Overall, it's hard to write the right code with all this, and the code
that is written is kinda hard to follow.
The solution is to slightly change the API to do the 3 things outlined
above in the same manner for v1 and v2:
1. Use `GetPaths()` for state saving and setns process cgroups entering.
2. Introduce and use Path(subsys string) to obtain a path to a
subsystem. For v2, the argument is ignored and the unified path is
returned.
This commit converts all the controllers to the new API, and modifies
all the users to use it.
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-05-07 08:36:28 +08:00
|
|
|
m.path = path
|
|
|
|
|
|
|
|
return nil
|
2020-03-27 04:35:51 +08:00
|
|
|
}
|
|
|
|
|
2020-04-06 10:06:25 +08:00
|
|
|
func (m *unifiedManager) fsManager() (cgroups.Manager, error) {
|
Simplify cgroup path handing in v2 via unified API
This unties the Gordian Knot of using GetPaths in cgroupv2 code.
The problem is, the current code uses GetPaths for three kinds of things:
1. Get all the paths to cgroup v1 controllers to save its state (see
(*linuxContainer).currentState(), (*LinuxFactory).loadState()
methods).
2. Get all the paths to cgroup v1 controllers to have the setns process
enter the proper cgroups in `(*setnsProcess).start()`.
3. Get the path to a specific controller (for example,
`m.GetPaths()["devices"]`).
Now, for cgroup v2 instead of a set of per-controller paths, we have only
one single unified path, and a dedicated function `GetUnifiedPath()` to get it.
This discrepancy between v1 and v2 cgroupManager API leads to the
following problems with the code:
- multiple if/else code blocks that have to treat v1 and v2 separately;
- backward-compatible GetPaths() methods in v2 controllers;
- - repeated writing of the PID into the same cgroup for v2;
Overall, it's hard to write the right code with all this, and the code
that is written is kinda hard to follow.
The solution is to slightly change the API to do the 3 things outlined
above in the same manner for v1 and v2:
1. Use `GetPaths()` for state saving and setns process cgroups entering.
2. Introduce and use Path(subsys string) to obtain a path to a
subsystem. For v2, the argument is ignored and the unified path is
returned.
This commit converts all the controllers to the new API, and modifies
all the users to use it.
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-05-07 08:36:28 +08:00
|
|
|
if err := m.initPath(); err != nil {
|
2019-11-07 16:25:49 +08:00
|
|
|
return nil, err
|
2019-09-02 17:10:52 +08:00
|
|
|
}
|
Simplify cgroup path handing in v2 via unified API
This unties the Gordian Knot of using GetPaths in cgroupv2 code.
The problem is, the current code uses GetPaths for three kinds of things:
1. Get all the paths to cgroup v1 controllers to save its state (see
(*linuxContainer).currentState(), (*LinuxFactory).loadState()
methods).
2. Get all the paths to cgroup v1 controllers to have the setns process
enter the proper cgroups in `(*setnsProcess).start()`.
3. Get the path to a specific controller (for example,
`m.GetPaths()["devices"]`).
Now, for cgroup v2 instead of a set of per-controller paths, we have only
one single unified path, and a dedicated function `GetUnifiedPath()` to get it.
This discrepancy between v1 and v2 cgroupManager API leads to the
following problems with the code:
- multiple if/else code blocks that have to treat v1 and v2 separately;
- backward-compatible GetPaths() methods in v2 controllers;
- - repeated writing of the PID into the same cgroup for v2;
Overall, it's hard to write the right code with all this, and the code
that is written is kinda hard to follow.
The solution is to slightly change the API to do the 3 things outlined
above in the same manner for v1 and v2:
1. Use `GetPaths()` for state saving and setns process cgroups entering.
2. Introduce and use Path(subsys string) to obtain a path to a
subsystem. For v2, the argument is ignored and the unified path is
returned.
This commit converts all the controllers to the new API, and modifies
all the users to use it.
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-05-07 08:36:28 +08:00
|
|
|
return fs2.NewManager(m.cgroups, m.path, m.rootless)
|
2019-11-07 16:25:49 +08:00
|
|
|
}
|
|
|
|
|
2020-04-06 10:06:25 +08:00
|
|
|
func (m *unifiedManager) Freeze(state configs.FreezerState) error {
|
2019-11-07 16:25:49 +08:00
|
|
|
fsMgr, err := m.fsManager()
|
2019-09-02 17:10:52 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2019-11-07 16:25:49 +08:00
|
|
|
return fsMgr.Freeze(state)
|
2019-09-02 17:10:52 +08:00
|
|
|
}
|
|
|
|
|
2020-04-06 10:06:25 +08:00
|
|
|
func (m *unifiedManager) GetPids() ([]int, error) {
|
Simplify cgroup path handing in v2 via unified API
This unties the Gordian Knot of using GetPaths in cgroupv2 code.
The problem is, the current code uses GetPaths for three kinds of things:
1. Get all the paths to cgroup v1 controllers to save its state (see
(*linuxContainer).currentState(), (*LinuxFactory).loadState()
methods).
2. Get all the paths to cgroup v1 controllers to have the setns process
enter the proper cgroups in `(*setnsProcess).start()`.
3. Get the path to a specific controller (for example,
`m.GetPaths()["devices"]`).
Now, for cgroup v2 instead of a set of per-controller paths, we have only
one single unified path, and a dedicated function `GetUnifiedPath()` to get it.
This discrepancy between v1 and v2 cgroupManager API leads to the
following problems with the code:
- multiple if/else code blocks that have to treat v1 and v2 separately;
- backward-compatible GetPaths() methods in v2 controllers;
- - repeated writing of the PID into the same cgroup for v2;
Overall, it's hard to write the right code with all this, and the code
that is written is kinda hard to follow.
The solution is to slightly change the API to do the 3 things outlined
above in the same manner for v1 and v2:
1. Use `GetPaths()` for state saving and setns process cgroups entering.
2. Introduce and use Path(subsys string) to obtain a path to a
subsystem. For v2, the argument is ignored and the unified path is
returned.
This commit converts all the controllers to the new API, and modifies
all the users to use it.
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-05-07 08:36:28 +08:00
|
|
|
if err := m.initPath(); err != nil {
|
2019-09-02 17:10:52 +08:00
|
|
|
return nil, err
|
|
|
|
}
|
Simplify cgroup path handing in v2 via unified API
This unties the Gordian Knot of using GetPaths in cgroupv2 code.
The problem is, the current code uses GetPaths for three kinds of things:
1. Get all the paths to cgroup v1 controllers to save its state (see
(*linuxContainer).currentState(), (*LinuxFactory).loadState()
methods).
2. Get all the paths to cgroup v1 controllers to have the setns process
enter the proper cgroups in `(*setnsProcess).start()`.
3. Get the path to a specific controller (for example,
`m.GetPaths()["devices"]`).
Now, for cgroup v2 instead of a set of per-controller paths, we have only
one single unified path, and a dedicated function `GetUnifiedPath()` to get it.
This discrepancy between v1 and v2 cgroupManager API leads to the
following problems with the code:
- multiple if/else code blocks that have to treat v1 and v2 separately;
- backward-compatible GetPaths() methods in v2 controllers;
- - repeated writing of the PID into the same cgroup for v2;
Overall, it's hard to write the right code with all this, and the code
that is written is kinda hard to follow.
The solution is to slightly change the API to do the 3 things outlined
above in the same manner for v1 and v2:
1. Use `GetPaths()` for state saving and setns process cgroups entering.
2. Introduce and use Path(subsys string) to obtain a path to a
subsystem. For v2, the argument is ignored and the unified path is
returned.
This commit converts all the controllers to the new API, and modifies
all the users to use it.
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-05-07 08:36:28 +08:00
|
|
|
return cgroups.GetPids(m.path)
|
2019-09-02 17:10:52 +08:00
|
|
|
}
|
|
|
|
|
2020-04-06 10:06:25 +08:00
|
|
|
func (m *unifiedManager) GetAllPids() ([]int, error) {
|
Simplify cgroup path handing in v2 via unified API
This unties the Gordian Knot of using GetPaths in cgroupv2 code.
The problem is, the current code uses GetPaths for three kinds of things:
1. Get all the paths to cgroup v1 controllers to save its state (see
(*linuxContainer).currentState(), (*LinuxFactory).loadState()
methods).
2. Get all the paths to cgroup v1 controllers to have the setns process
enter the proper cgroups in `(*setnsProcess).start()`.
3. Get the path to a specific controller (for example,
`m.GetPaths()["devices"]`).
Now, for cgroup v2 instead of a set of per-controller paths, we have only
one single unified path, and a dedicated function `GetUnifiedPath()` to get it.
This discrepancy between v1 and v2 cgroupManager API leads to the
following problems with the code:
- multiple if/else code blocks that have to treat v1 and v2 separately;
- backward-compatible GetPaths() methods in v2 controllers;
- - repeated writing of the PID into the same cgroup for v2;
Overall, it's hard to write the right code with all this, and the code
that is written is kinda hard to follow.
The solution is to slightly change the API to do the 3 things outlined
above in the same manner for v1 and v2:
1. Use `GetPaths()` for state saving and setns process cgroups entering.
2. Introduce and use Path(subsys string) to obtain a path to a
subsystem. For v2, the argument is ignored and the unified path is
returned.
This commit converts all the controllers to the new API, and modifies
all the users to use it.
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-05-07 08:36:28 +08:00
|
|
|
if err := m.initPath(); err != nil {
|
2019-09-02 17:10:52 +08:00
|
|
|
return nil, err
|
|
|
|
}
|
Simplify cgroup path handing in v2 via unified API
This unties the Gordian Knot of using GetPaths in cgroupv2 code.
The problem is, the current code uses GetPaths for three kinds of things:
1. Get all the paths to cgroup v1 controllers to save its state (see
(*linuxContainer).currentState(), (*LinuxFactory).loadState()
methods).
2. Get all the paths to cgroup v1 controllers to have the setns process
enter the proper cgroups in `(*setnsProcess).start()`.
3. Get the path to a specific controller (for example,
`m.GetPaths()["devices"]`).
Now, for cgroup v2 instead of a set of per-controller paths, we have only
one single unified path, and a dedicated function `GetUnifiedPath()` to get it.
This discrepancy between v1 and v2 cgroupManager API leads to the
following problems with the code:
- multiple if/else code blocks that have to treat v1 and v2 separately;
- backward-compatible GetPaths() methods in v2 controllers;
- - repeated writing of the PID into the same cgroup for v2;
Overall, it's hard to write the right code with all this, and the code
that is written is kinda hard to follow.
The solution is to slightly change the API to do the 3 things outlined
above in the same manner for v1 and v2:
1. Use `GetPaths()` for state saving and setns process cgroups entering.
2. Introduce and use Path(subsys string) to obtain a path to a
subsystem. For v2, the argument is ignored and the unified path is
returned.
This commit converts all the controllers to the new API, and modifies
all the users to use it.
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-05-07 08:36:28 +08:00
|
|
|
return cgroups.GetAllPids(m.path)
|
2019-09-02 17:10:52 +08:00
|
|
|
}
|
|
|
|
|
2020-04-06 10:06:25 +08:00
|
|
|
func (m *unifiedManager) GetStats() (*cgroups.Stats, error) {
|
2019-11-07 16:25:49 +08:00
|
|
|
fsMgr, err := m.fsManager()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2019-09-02 17:10:52 +08:00
|
|
|
}
|
2019-11-07 16:25:49 +08:00
|
|
|
return fsMgr.GetStats()
|
2019-09-02 17:10:52 +08:00
|
|
|
}
|
|
|
|
|
2020-04-06 10:06:25 +08:00
|
|
|
func (m *unifiedManager) Set(container *configs.Config) error {
|
2020-04-22 13:47:44 +08:00
|
|
|
properties, err := genV2ResourcesProperties(m.cgroups)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-05-10 23:20:26 +08:00
|
|
|
|
|
|
|
// Figure out the current freezer state, so we can revert to it after we
|
|
|
|
// temporarily freeze the container.
|
|
|
|
targetFreezerState, err := m.GetFreezerState()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if targetFreezerState == configs.Undefined {
|
|
|
|
targetFreezerState = configs.Thawed
|
|
|
|
}
|
|
|
|
|
|
|
|
// We have to freeze the container while systemd sets the cgroup settings.
|
|
|
|
// The reason for this is that systemd's application of DeviceAllow rules
|
|
|
|
// is done disruptively, resulting in spurrious errors to common devices
|
|
|
|
// (unlike our fs driver, they will happily write deny-all rules to running
|
|
|
|
// containers). So we freeze the container to avoid them hitting the cgroup
|
|
|
|
// error. But if the freezer cgroup isn't supported, we just warn about it.
|
|
|
|
if err := m.Freeze(configs.Frozen); err != nil {
|
|
|
|
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
|
|
|
|
}
|
|
|
|
|
2020-04-01 09:47:06 +08:00
|
|
|
dbusConnection, err := getDbusConnection(m.rootless)
|
2020-04-22 13:47:44 +08:00
|
|
|
if err != nil {
|
2020-05-10 23:20:26 +08:00
|
|
|
_ = m.Freeze(targetFreezerState)
|
2020-04-22 13:47:44 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := dbusConnection.SetUnitProperties(getUnitName(m.cgroups), true, properties...); err != nil {
|
2020-05-10 23:20:26 +08:00
|
|
|
_ = m.Freeze(targetFreezerState)
|
2020-04-01 09:47:06 +08:00
|
|
|
return errors.Wrap(err, "error while setting unit properties")
|
2020-04-22 13:47:44 +08:00
|
|
|
}
|
|
|
|
|
2020-05-10 23:20:26 +08:00
|
|
|
// Reset freezer state before we apply the configuration, to avoid clashing
|
|
|
|
// with the freezer setting in the configuration.
|
|
|
|
_ = m.Freeze(targetFreezerState)
|
|
|
|
|
2019-11-07 16:25:49 +08:00
|
|
|
fsMgr, err := m.fsManager()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2019-09-02 17:10:52 +08:00
|
|
|
}
|
2019-11-07 16:25:49 +08:00
|
|
|
return fsMgr.Set(container)
|
2019-09-02 17:10:52 +08:00
|
|
|
}
|
2019-12-06 23:43:08 +08:00
|
|
|
|
Simplify cgroup path handing in v2 via unified API
This unties the Gordian Knot of using GetPaths in cgroupv2 code.
The problem is, the current code uses GetPaths for three kinds of things:
1. Get all the paths to cgroup v1 controllers to save its state (see
(*linuxContainer).currentState(), (*LinuxFactory).loadState()
methods).
2. Get all the paths to cgroup v1 controllers to have the setns process
enter the proper cgroups in `(*setnsProcess).start()`.
3. Get the path to a specific controller (for example,
`m.GetPaths()["devices"]`).
Now, for cgroup v2 instead of a set of per-controller paths, we have only
one single unified path, and a dedicated function `GetUnifiedPath()` to get it.
This discrepancy between v1 and v2 cgroupManager API leads to the
following problems with the code:
- multiple if/else code blocks that have to treat v1 and v2 separately;
- backward-compatible GetPaths() methods in v2 controllers;
- - repeated writing of the PID into the same cgroup for v2;
Overall, it's hard to write the right code with all this, and the code
that is written is kinda hard to follow.
The solution is to slightly change the API to do the 3 things outlined
above in the same manner for v1 and v2:
1. Use `GetPaths()` for state saving and setns process cgroups entering.
2. Introduce and use Path(subsys string) to obtain a path to a
subsystem. For v2, the argument is ignored and the unified path is
returned.
This commit converts all the controllers to the new API, and modifies
all the users to use it.
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-05-07 08:36:28 +08:00
|
|
|
func (m *unifiedManager) GetPaths() map[string]string {
|
|
|
|
paths := make(map[string]string, 1)
|
|
|
|
paths[""] = m.path
|
|
|
|
return paths
|
|
|
|
}
|
|
|
|
|
2020-04-06 10:06:25 +08:00
|
|
|
func (m *unifiedManager) GetCgroups() (*configs.Cgroup, error) {
|
|
|
|
return m.cgroups, nil
|
2019-12-06 23:43:08 +08:00
|
|
|
}
|
2020-05-11 13:19:30 +08:00
|
|
|
|
|
|
|
func (m *unifiedManager) GetFreezerState() (configs.FreezerState, error) {
|
|
|
|
fsMgr, err := m.fsManager()
|
|
|
|
if err != nil {
|
|
|
|
return configs.Undefined, err
|
|
|
|
}
|
|
|
|
return fsMgr.GetFreezerState()
|
|
|
|
}
|