libcontainer: ability to compile without kmem
Commit fe898e7862
(PR #1350) enables kernel memory accounting
for all cgroups created by libcontainer -- even if kmem limit is
not configured.
Kernel memory accounting is known to be broken in some kernels,
specifically the ones from RHEL7 (including RHEL 7.5). Those
kernels do not support kernel memory reclaim, and are prone to
oopses. Unconditionally enabling kmem acct on such kernels lead
to bugs, such as
* https://github.com/opencontainers/runc/issues/1725
* https://github.com/kubernetes/kubernetes/issues/61937
* https://github.com/moby/moby/issues/29638
This commit gives a way to compile runc without kernel memory setting
support. To do so, use something like
make BUILDTAGS="seccomp nokmem"
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
This commit is contained in:
parent
f3ce8221ea
commit
6a2c155968
2
Makefile
2
Makefile
|
@ -12,7 +12,7 @@ GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null)
|
|||
GIT_BRANCH_CLEAN := $(shell echo $(GIT_BRANCH) | sed -e "s/[^[:alnum:]]/-/g")
|
||||
RUNC_IMAGE := runc_dev$(if $(GIT_BRANCH_CLEAN),:$(GIT_BRANCH_CLEAN))
|
||||
PROJECT := github.com/opencontainers/runc
|
||||
BUILDTAGS := seccomp
|
||||
BUILDTAGS ?= seccomp
|
||||
COMMIT_NO := $(shell git rev-parse HEAD 2> /dev/null || true)
|
||||
COMMIT := $(if $(shell git status --porcelain --untracked-files=no),"${COMMIT_NO}-dirty","${COMMIT_NO}")
|
||||
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
// +build linux,!nokmem
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"syscall" // for Errno type only
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
|
||||
|
||||
func EnableKernelMemoryAccounting(path string) error {
|
||||
// Check if kernel memory is enabled
|
||||
// We have to limit the kernel memory here as it won't be accounted at all
|
||||
// until a limit is set on the cgroup and limit cannot be set once the
|
||||
// cgroup has children, or if there are already tasks in the cgroup.
|
||||
for _, i := range []int64{1, -1} {
|
||||
if err := setKernelMemory(path, i); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setKernelMemory(path string, kernelMemoryLimit int64) error {
|
||||
if path == "" {
|
||||
return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
|
||||
}
|
||||
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
|
||||
// kernel memory is not enabled on the system so we should do nothing
|
||||
return nil
|
||||
}
|
||||
if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
|
||||
// Check if the error number returned by the syscall is "EBUSY"
|
||||
// The EBUSY signal is returned on attempts to write to the
|
||||
// memory.kmem.limit_in_bytes file if the cgroup has children or
|
||||
// once tasks have been attached to the cgroup
|
||||
if pathErr, ok := err.(*os.PathError); ok {
|
||||
if errNo, ok := pathErr.Err.(syscall.Errno); ok {
|
||||
if errNo == unix.EBUSY {
|
||||
return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
|
||||
}
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err)
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
// +build linux,nokmem
|
||||
|
||||
package fs
|
||||
|
||||
func EnableKernelMemoryAccounting(path string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func setKernelMemory(path string, kernelMemoryLimit int64) error {
|
||||
return nil
|
||||
}
|
|
@ -5,23 +5,18 @@ package fs
|
|||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall" // only for Errno
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
|
||||
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
|
||||
cgroupMemoryLimit = "memory.limit_in_bytes"
|
||||
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
|
||||
cgroupMemoryLimit = "memory.limit_in_bytes"
|
||||
)
|
||||
|
||||
type MemoryGroup struct {
|
||||
|
@ -67,44 +62,6 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
|
|||
return nil
|
||||
}
|
||||
|
||||
func EnableKernelMemoryAccounting(path string) error {
|
||||
// Check if kernel memory is enabled
|
||||
// We have to limit the kernel memory here as it won't be accounted at all
|
||||
// until a limit is set on the cgroup and limit cannot be set once the
|
||||
// cgroup has children, or if there are already tasks in the cgroup.
|
||||
for _, i := range []int64{1, -1} {
|
||||
if err := setKernelMemory(path, i); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setKernelMemory(path string, kernelMemoryLimit int64) error {
|
||||
if path == "" {
|
||||
return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
|
||||
}
|
||||
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
|
||||
// kernel memory is not enabled on the system so we should do nothing
|
||||
return nil
|
||||
}
|
||||
if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
|
||||
// Check if the error number returned by the syscall is "EBUSY"
|
||||
// The EBUSY signal is returned on attempts to write to the
|
||||
// memory.kmem.limit_in_bytes file if the cgroup has children or
|
||||
// once tasks have been attached to the cgroup
|
||||
if pathErr, ok := err.(*os.PathError); ok {
|
||||
if errNo, ok := pathErr.Err.(syscall.Errno); ok {
|
||||
if errNo == unix.EBUSY {
|
||||
return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
|
||||
}
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
|
||||
// If the memory update is set to -1 we should also
|
||||
// set swap to -1, it means unlimited memory.
|
||||
|
|
Loading…
Reference in New Issue