diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index d977b337..ab2fbe14 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -648,6 +648,13 @@ func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) { if c.config.RootlessCgroups { logrus.Warn("getting OOM notifications may fail if you don't have the full access to cgroups") } + if cgroups.IsCgroup2UnifiedMode() { + path, err := c.cgroupManager.GetUnifiedPath() + if err != nil { + return nil, err + } + return notifyOnOOMV2(path) + } return notifyOnOOM(c.cgroupManager.GetPaths()) } diff --git a/libcontainer/notify_linux_v2.go b/libcontainer/notify_linux_v2.go new file mode 100644 index 00000000..cdab10ed --- /dev/null +++ b/libcontainer/notify_linux_v2.go @@ -0,0 +1,102 @@ +// +build linux + +package libcontainer + +import ( + "io/ioutil" + "path/filepath" + "strconv" + "strings" + "unsafe" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +func getValueFromCgroup(path, key string) (int, error) { + content, err := ioutil.ReadFile(path) + if err != nil { + return 0, err + } + + lines := strings.Split(string(content), "\n") + for _, line := range lines { + arr := strings.Split(line, " ") + if len(arr) == 2 && arr[0] == key { + return strconv.Atoi(arr[1]) + } + } + return 0, nil +} + +func registerMemoryEventV2(cgDir, evName, cgEvName string) (<-chan struct{}, error) { + eventControlPath := filepath.Join(cgDir, evName) + cgEvPath := filepath.Join(cgDir, cgEvName) + fd, err := unix.InotifyInit() + if err != nil { + return nil, errors.Wrap(err, "unable to init inotify") + } + // watching oom kill + evFd, err := unix.InotifyAddWatch(fd, eventControlPath, unix.IN_MODIFY) + if err != nil { + unix.Close(fd) + return nil, errors.Wrap(err, "unable to add inotify watch") + } + // Because no `unix.IN_DELETE|unix.IN_DELETE_SELF` event for cgroup file system, so watching all process exited + cgFd, err := unix.InotifyAddWatch(fd, cgEvPath, unix.IN_MODIFY) + if err != nil { + unix.Close(fd) + return nil, errors.Wrap(err, "unable to add inotify watch") + } + ch := make(chan struct{}) + go func() { + var ( + buffer [unix.SizeofInotifyEvent + unix.PathMax + 1]byte + offset uint32 + ) + defer func() { + unix.Close(fd) + close(ch) + }() + + for { + n, err := unix.Read(fd, buffer[:]) + if err != nil { + logrus.Warnf("unable to read event data from inotify, got error: %v", err) + return + } + if n < unix.SizeofInotifyEvent { + logrus.Warnf("we should read at least %d bytes from inotify, but got %d bytes.", unix.SizeofInotifyEvent, n) + return + } + offset = 0 + for offset <= uint32(n-unix.SizeofInotifyEvent) { + rawEvent := (*unix.InotifyEvent)(unsafe.Pointer(&buffer[offset])) + offset += unix.SizeofInotifyEvent + uint32(rawEvent.Len) + if rawEvent.Mask&unix.IN_MODIFY != unix.IN_MODIFY { + continue + } + switch int(rawEvent.Wd) { + case evFd: + oom, err := getValueFromCgroup(eventControlPath, "oom_kill") + if err != nil || oom > 0 { + ch <- struct{}{} + } + case cgFd: + pids, err := getValueFromCgroup(cgEvPath, "populated") + if err != nil || pids == 0 { + return + } + } + } + } + }() + return ch, nil +} + +// notifyOnOOMV2 returns channel on which you can expect event about OOM, +// if process died without OOM this channel will be closed. +func notifyOnOOMV2(path string) (<-chan struct{}, error) { + return registerMemoryEventV2(path, "memory.events", "cgroup.events") +} diff --git a/tests/integration/events.bats b/tests/integration/events.bats index 8f2f81a2..50c577e8 100644 --- a/tests/integration/events.bats +++ b/tests/integration/events.bats @@ -13,8 +13,8 @@ function teardown() { @test "events --stats" { # XXX: currently cgroups require root containers. - # TODO: support cgroup v2 memory.events - requires root cgroups_v1 + requires root + init_cgroup_paths # run busybox detached runc run -d --console-socket $CONSOLE_SOCKET test_busybox @@ -27,10 +27,10 @@ function teardown() { [[ "${lines[0]}" == *"data"* ]] } -@test "events --interval default " { +@test "events --interval default" { # XXX: currently cgroups require root containers. - # TODO: support cgroup v2 memory.events - requires root cgroups_v1 + requires root + init_cgroup_paths # run busybox detached runc run -d --console-socket $CONSOLE_SOCKET test_busybox @@ -55,10 +55,10 @@ function teardown() { [[ "${lines[0]}" == *"data"* ]] } -@test "events --interval 1s " { +@test "events --interval 1s" { # XXX: currently cgroups require root containers. - # TODO: support cgroup v2 memory.events - requires root cgroups_v1 + requires root + init_cgroup_paths # run busybox detached runc run -d --console-socket $CONSOLE_SOCKET test_busybox @@ -82,10 +82,10 @@ function teardown() { [ "$status" -eq 0 ] } -@test "events --interval 100ms " { +@test "events --interval 100ms" { # XXX: currently cgroups require root containers. - # TODO: support cgroup v2 memory.events - requires root cgroups_v1 + requires root + init_cgroup_paths # run busybox detached runc run -d --console-socket $CONSOLE_SOCKET test_busybox @@ -111,3 +111,40 @@ function teardown() { run eval "grep -q 'test_busybox' events.log" [ "$status" -eq 0 ] } + +@test "events oom " { + # XXX: currently cgroups require root containers. + requires root + init_cgroup_paths + + # we need the container to hit OOM, so disable swap + # ("swap" here is actually memory+swap) + DATA=$(cat < events.log) & + ( + retry 10 1 eval "grep -q 'test_busybox' events.log" + __runc exec -d test_busybox sh -c 'test=$(dd if=/dev/urandom ibs=5120k)' + retry 10 1 eval "grep -q 'oom' events.log" + __runc delete -f test_busybox + ) & + wait # wait for the above sub shells to finish + + grep -q '{"type":"oom","id":"test_busybox"}' events.log +}