diff --git a/docs/systemd-properties.md b/docs/systemd-properties.md new file mode 100644 index 00000000..737e2415 --- /dev/null +++ b/docs/systemd-properties.md @@ -0,0 +1,27 @@ +## Changing systemd unit properties + +In case runc uses systemd to set cgroup parameters for a container (i.e. +`--systemd-cgroup` CLI flag is set), systemd creates a scope (a.k.a. +transient unit) for the container, usually named like `runc-$ID.scope`. + +The systemd properties of this unit (shown by `systemctl show runc-$ID.scope` +after the container is started) can be modified by adding annotations +to container's runtime spec (`config.json`). For example: + +```json + "annotations": { + "org.systemd.property.TimeoutStopUSec": "uint64 123456789", + "org.systemd.property.CollectMode":"'inactive-or-failed'" + }, +``` + +The above will set the following properties: + +* `TimeoutStopSec` to 2 minutes and 3 seconds; +* `CollectMode` to "inactive-or-failed". + +The values must be in the gvariant format (for details, see +[gvariant documentation](https://developer.gnome.org/glib/stable/gvariant-text.html)). + +To find out which type systemd expects for a particular parameter, please +consult systemd sources. diff --git a/libcontainer/cgroups/systemd/apply_systemd.go b/libcontainer/cgroups/systemd/apply_systemd.go index c4b19b3e..bbd0c56e 100644 --- a/libcontainer/cgroups/systemd/apply_systemd.go +++ b/libcontainer/cgroups/systemd/apply_systemd.go @@ -245,6 +245,8 @@ func (m *LegacyManager) Apply(pid int) error { } } + properties = append(properties, c.SystemdProps...) + statusChan := make(chan string, 1) if _, err := theConn.StartTransientUnit(unitName, "replace", properties, statusChan); err == nil { select { diff --git a/libcontainer/cgroups/systemd/unified_hierarchy.go b/libcontainer/cgroups/systemd/unified_hierarchy.go index 66050991..355070a4 100644 --- a/libcontainer/cgroups/systemd/unified_hierarchy.go +++ b/libcontainer/cgroups/systemd/unified_hierarchy.go @@ -128,6 +128,8 @@ func (m *UnifiedManager) Apply(pid int) error { newProp("TasksMax", uint64(c.Resources.PidsLimit))) } + properties = append(properties, c.SystemdProps...) + // We have to set kernel memory here, as we can't change it once // processes have been attached to the cgroup. if c.Resources.KernelMemory != 0 { diff --git a/libcontainer/configs/cgroup_linux.go b/libcontainer/configs/cgroup_linux.go index 58ed19c9..1c47c96b 100644 --- a/libcontainer/configs/cgroup_linux.go +++ b/libcontainer/configs/cgroup_linux.go @@ -1,5 +1,9 @@ package configs +import ( + systemdDbus "github.com/coreos/go-systemd/dbus" +) + type FreezerState string const ( @@ -29,6 +33,11 @@ type Cgroup struct { // Resources contains various cgroups settings to apply *Resources + + // SystemdProps are any additional properties for systemd, + // derived from org.systemd.property.xxx annotations. + // Ignored unless systemd is used for managing cgroups. + SystemdProps []systemdDbus.Property `json:"-"` } type Resources struct { diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index 0084e7cd..90731f50 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -5,14 +5,17 @@ package specconv import ( + "errors" "fmt" "os" "path/filepath" + "regexp" "strings" "time" + systemdDbus "github.com/coreos/go-systemd/dbus" + "github.com/godbus/dbus" "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/seccomp" libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" @@ -299,6 +302,70 @@ func createLibcontainerMount(cwd string, m specs.Mount) *configs.Mount { } } +// systemd property name check: latin letters only, at least 3 of them +var isValidName = regexp.MustCompile(`^[a-zA-Z]{3,}$`).MatchString + +var isSecSuffix = regexp.MustCompile(`[a-z]Sec$`).MatchString + +// Some systemd properties are documented as having "Sec" suffix +// (e.g. TimeoutStopSec) but are expected to have "USec" suffix +// here, so let's provide conversion to improve compatibility. +func convertSecToUSec(value dbus.Variant) (dbus.Variant, error) { + var sec uint64 + const M = 1000000 + vi := value.Value() + switch value.Signature().String() { + case "y": + sec = uint64(vi.(byte)) * M + case "n": + sec = uint64(vi.(int16)) * M + case "q": + sec = uint64(vi.(uint16)) * M + case "i": + sec = uint64(vi.(int32)) * M + case "u": + sec = uint64(vi.(uint32)) * M + case "x": + sec = uint64(vi.(int64)) * M + case "t": + sec = vi.(uint64) * M + case "d": + sec = uint64(vi.(float64) * M) + default: + return value, errors.New("not a number") + } + return dbus.MakeVariant(sec), nil +} + +func initSystemdProps(spec *specs.Spec) ([]systemdDbus.Property, error) { + const keyPrefix = "org.systemd.property." + var sp []systemdDbus.Property + + for k, v := range spec.Annotations { + name := strings.TrimPrefix(k, keyPrefix) + if len(name) == len(k) { // prefix not there + continue + } + if !isValidName(name) { + return nil, fmt.Errorf("Annotation %s name incorrect: %s", k, name) + } + value, err := dbus.ParseVariant(v, dbus.Signature{}) + if err != nil { + return nil, fmt.Errorf("Annotation %s=%s value parse error: %v", k, v, err) + } + if isSecSuffix(name) { + name = strings.TrimSuffix(name, "Sec") + "USec" + value, err = convertSecToUSec(value) + if err != nil { + return nil, fmt.Errorf("Annotation %s=%s value parse error: %v", k, v, err) + } + } + sp = append(sp, systemdDbus.Property{Name: name, Value: value}) + } + + return sp, nil +} + func CreateCgroupConfig(opts *CreateOpts) (*configs.Cgroup, error) { var ( myCgroupPath string @@ -312,6 +379,14 @@ func CreateCgroupConfig(opts *CreateOpts) (*configs.Cgroup, error) { Resources: &configs.Resources{}, } + if useSystemdCgroup { + sp, err := initSystemdProps(spec) + if err != nil { + return nil, err + } + c.SystemdProps = sp + } + if spec.Linux != nil && spec.Linux.CgroupsPath != "" { myCgroupPath = libcontainerUtils.CleanPath(spec.Linux.CgroupsPath) if useSystemdCgroup { diff --git a/libcontainer/specconv/spec_linux_test.go b/libcontainer/specconv/spec_linux_test.go index da6a43a0..aca5ca5e 100644 --- a/libcontainer/specconv/spec_linux_test.go +++ b/libcontainer/specconv/spec_linux_test.go @@ -9,6 +9,7 @@ import ( "golang.org/x/sys/unix" + "github.com/godbus/dbus" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs/validate" "github.com/opencontainers/runtime-spec/specs-go" @@ -450,3 +451,137 @@ func TestNonZeroEUIDCompatibleSpecconvValidate(t *testing.T) { t.Errorf("Expected specconv to produce valid rootless container config: %v", err) } } + +func TestInitSystemdProps(t *testing.T) { + type inT struct { + name, value string + } + type expT struct { + isErr bool + name string + value interface{} + } + + testCases := []struct { + desc string + in inT + exp expT + }{ + { + in: inT{"org.systemd.property.TimeoutStopUSec", "uint64 123456789"}, + exp: expT{false, "TimeoutStopUSec", uint64(123456789)}, + }, + { + desc: "convert USec to Sec (default numeric type)", + in: inT{"org.systemd.property.TimeoutStopSec", "456"}, + exp: expT{false, "TimeoutStopUSec", uint64(456000000)}, + }, + { + desc: "convert USec to Sec (byte)", + in: inT{"org.systemd.property.TimeoutStopSec", "byte 234"}, + exp: expT{false, "TimeoutStopUSec", uint64(234000000)}, + }, + { + desc: "convert USec to Sec (int16)", + in: inT{"org.systemd.property.TimeoutStopSec", "int16 234"}, + exp: expT{false, "TimeoutStopUSec", uint64(234000000)}, + }, + { + desc: "convert USec to Sec (uint16)", + in: inT{"org.systemd.property.TimeoutStopSec", "uint16 234"}, + exp: expT{false, "TimeoutStopUSec", uint64(234000000)}, + }, + { + desc: "convert USec to Sec (int32)", + in: inT{"org.systemd.property.TimeoutStopSec", "int32 234"}, + exp: expT{false, "TimeoutStopUSec", uint64(234000000)}, + }, + { + desc: "convert USec to Sec (uint32)", + in: inT{"org.systemd.property.TimeoutStopSec", "uint32 234"}, + exp: expT{false, "TimeoutStopUSec", uint64(234000000)}, + }, + { + desc: "convert USec to Sec (int64)", + in: inT{"org.systemd.property.TimeoutStopSec", "int64 234"}, + exp: expT{false, "TimeoutStopUSec", uint64(234000000)}, + }, + { + desc: "convert USec to Sec (uint64)", + in: inT{"org.systemd.property.TimeoutStopSec", "uint64 234"}, + exp: expT{false, "TimeoutStopUSec", uint64(234000000)}, + }, + { + desc: "convert USec to Sec (float)", + in: inT{"org.systemd.property.TimeoutStopSec", "234.789"}, + exp: expT{false, "TimeoutStopUSec", uint64(234789000)}, + }, + { + desc: "convert USec to Sec (bool -- invalid value)", + in: inT{"org.systemd.property.TimeoutStopSec", "false"}, + exp: expT{true, "", ""}, + }, + { + desc: "convert USec to Sec (string -- invalid value)", + in: inT{"org.systemd.property.TimeoutStopSec", "'covfefe'"}, + exp: expT{true, "", ""}, + }, + { + in: inT{"org.systemd.property.CollectMode", "'inactive-or-failed'"}, + exp: expT{false, "CollectMode", "inactive-or-failed"}, + }, + { + desc: "unrelated property", + in: inT{"some.other.annotation", "0"}, + exp: expT{false, "", ""}, + }, + { + desc: "too short property name", + in: inT{"org.systemd.property.Xo", "1"}, + exp: expT{true, "", ""}, + }, + { + desc: "invalid character in property name", + in: inT{"org.systemd.property.Number1", "1"}, + exp: expT{true, "", ""}, + }, + { + desc: "invalid property value", + in: inT{"org.systemd.property.ValidName", "invalid-value"}, + exp: expT{true, "", ""}, + }, + } + + spec := &specs.Spec{} + + for _, tc := range testCases { + tc := tc + spec.Annotations = map[string]string{tc.in.name: tc.in.value} + + outMap, err := initSystemdProps(spec) + //t.Logf("input %+v, expected %+v, got err:%v out:%+v", tc.in, tc.exp, err, outMap) + + if tc.exp.isErr != (err != nil) { + t.Errorf("input %+v, expecting error: %v, got %v", tc.in, tc.exp.isErr, err) + } + expLen := 1 // expect a single item + if tc.exp.name == "" { + expLen = 0 // expect nothing + } + if len(outMap) != expLen { + t.Fatalf("input %+v, expected %d, got %d entries: %v", tc.in, expLen, len(outMap), outMap) + } + if expLen == 0 { + continue + } + + out := outMap[0] + if tc.exp.name != out.Name { + t.Errorf("input %+v, expecting name: %q, got %q", tc.in, tc.exp.name, out.Name) + } + expValue := dbus.MakeVariant(tc.exp.value).String() + if expValue != out.Value.String() { + t.Errorf("input %+v, expecting value: %s, got %s", tc.in, expValue, out.Value) + } + } +}