libcontainer: intelrdt: add support for Intel RDT/MBA in runc
Memory Bandwidth Allocation (MBA) is a resource allocation sub-feature of Intel Resource Director Technology (RDT) which is supported on some Intel Xeon platforms. Intel RDT/MBA provides indirect and approximate throttle over memory bandwidth for the software. A user controls the resource by indicating the percentage of maximum memory bandwidth. Hardware details of Intel RDT/MBA can be found in section 17.18 of Intel Software Developer Manual: https://software.intel.com/en-us/articles/intel-sdm In Linux 4.12 kernel and newer, Intel RDT/MBA is enabled by kernel config CONFIG_INTEL_RDT. If hardware support, CPU flags `rdt_a` and `mba` will be set in /proc/cpuinfo. Intel RDT "resource control" filesystem hierarchy: mount -t resctrl resctrl /sys/fs/resctrl tree /sys/fs/resctrl /sys/fs/resctrl/ |-- info | |-- L3 | | |-- cbm_mask | | |-- min_cbm_bits | | |-- num_closids | |-- MB | |-- bandwidth_gran | |-- delay_linear | |-- min_bandwidth | |-- num_closids |-- ... |-- schemata |-- tasks |-- <container_id> |-- ... |-- schemata |-- tasks For MBA support for `runc`, we will reuse the infrastructure and code base of Intel RDT/CAT which implemented in #1279. We could also make use of `tasks` and `schemata` configuration for memory bandwidth resource constraints. The file `tasks` has a list of tasks that belongs to this group (e.g., <container_id>" group). Tasks can be added to a group by writing the task ID to the "tasks" file (which will automatically remove them from the previous group to which they belonged). New tasks created by fork(2) and clone(2) are added to the same group as their parent. The file `schemata` has a list of all the resources available to this group. Each resource (L3 cache, memory bandwidth) has its own line and format. Memory bandwidth schema: It has allocation values for memory bandwidth on each socket, which contains L3 cache id and memory bandwidth percentage. Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..." The minimum bandwidth percentage value for each CPU model is predefined and can be looked up through "info/MB/min_bandwidth". The bandwidth granularity that is allocated is also dependent on the CPU model and can be looked up at "info/MB/bandwidth_gran". The available bandwidth control steps are: min_bw + N * bw_gran. Intermediate values are rounded to the next control step available on the hardware. For more information about Intel RDT kernel interface: https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt An example for runc: Consider a two-socket machine with two L3 caches where the minimum memory bandwidth of 10% with a memory bandwidth granularity of 10%. Tasks inside the container may use a maximum memory bandwidth of 20% on socket 0 and 70% on socket 1. "linux": { "intelRdt": { "memBwSchema": "MB:0=20;1=70" } } Signed-off-by: Xiaochen Shen <xiaochen.shen@intel.com>
This commit is contained in:
parent
c1cece7e23
commit
27560ace2f
38
events.go
38
events.go
|
@ -104,6 +104,13 @@ type l3CacheInfo struct {
|
|||
NumClosids uint64 `json:"num_closids,omitempty"`
|
||||
}
|
||||
|
||||
type memBwInfo struct {
|
||||
BandwidthGran uint64 `json:"bandwidth_gran,omitempty"`
|
||||
DelayLinear uint64 `json:"delay_linear,omitempty"`
|
||||
MinBandwidth uint64 `json:"min_bandwidth,omitempty"`
|
||||
NumClosids uint64 `json:"num_closids,omitempty"`
|
||||
}
|
||||
|
||||
type intelRdt struct {
|
||||
// The read-only L3 cache information
|
||||
L3CacheInfo *l3CacheInfo `json:"l3_cache_info,omitempty"`
|
||||
|
@ -113,6 +120,15 @@ type intelRdt struct {
|
|||
|
||||
// The L3 cache schema in 'container_id' group
|
||||
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
||||
|
||||
// The read-only memory bandwidth information
|
||||
MemBwInfo *memBwInfo `json:"mem_bw_info,omitempty"`
|
||||
|
||||
// The read-only memory bandwidth schema in root
|
||||
MemBwSchemaRoot string `json:"mem_bw_schema_root,omitempty"`
|
||||
|
||||
// The memory bandwidth schema in 'container_id' group
|
||||
MemBwSchema string `json:"mem_bw_schema,omitempty"`
|
||||
}
|
||||
|
||||
var eventsCommand = cli.Command{
|
||||
|
@ -248,9 +264,16 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *stats {
|
|||
}
|
||||
|
||||
if is := ls.IntelRdtStats; is != nil {
|
||||
s.IntelRdt.L3CacheInfo = convertL3CacheInfo(is.L3CacheInfo)
|
||||
s.IntelRdt.L3CacheSchemaRoot = is.L3CacheSchemaRoot
|
||||
s.IntelRdt.L3CacheSchema = is.L3CacheSchema
|
||||
if intelrdt.IsCatEnabled() {
|
||||
s.IntelRdt.L3CacheInfo = convertL3CacheInfo(is.L3CacheInfo)
|
||||
s.IntelRdt.L3CacheSchemaRoot = is.L3CacheSchemaRoot
|
||||
s.IntelRdt.L3CacheSchema = is.L3CacheSchema
|
||||
}
|
||||
if intelrdt.IsMbaEnabled() {
|
||||
s.IntelRdt.MemBwInfo = convertMemBwInfo(is.MemBwInfo)
|
||||
s.IntelRdt.MemBwSchemaRoot = is.MemBwSchemaRoot
|
||||
s.IntelRdt.MemBwSchema = is.MemBwSchema
|
||||
}
|
||||
}
|
||||
|
||||
return &s
|
||||
|
@ -293,3 +316,12 @@ func convertL3CacheInfo(i *intelrdt.L3CacheInfo) *l3CacheInfo {
|
|||
NumClosids: i.NumClosids,
|
||||
}
|
||||
}
|
||||
|
||||
func convertMemBwInfo(i *intelrdt.MemBwInfo) *memBwInfo {
|
||||
return &memBwInfo{
|
||||
BandwidthGran: i.BandwidthGran,
|
||||
DelayLinear: i.DelayLinear,
|
||||
MinBandwidth: i.MinBandwidth,
|
||||
NumClosids: i.NumClosids,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -186,8 +186,8 @@ type Config struct {
|
|||
// callers keyring in this case.
|
||||
NoNewKeyring bool `json:"no_new_keyring"`
|
||||
|
||||
// IntelRdt specifies settings for Intel RDT/CAT group that the container is placed into
|
||||
// to limit the resources (e.g., L3 cache) the container has available
|
||||
// IntelRdt specifies settings for Intel RDT group that the container is placed into
|
||||
// to limit the resources (e.g., L3 cache, memory bandwidth) the container has available
|
||||
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
|
||||
|
||||
// RootlessEUID is set when the runc was launched with non-zero EUID.
|
||||
|
|
|
@ -4,4 +4,8 @@ type IntelRdt struct {
|
|||
// The schema for L3 cache id and capacity bitmask (CBM)
|
||||
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
||||
|
||||
// The schema of memory bandwidth percentage per L3 cache id
|
||||
// Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
|
||||
MemBwSchema string `json:"memBwSchema,omitempty"`
|
||||
}
|
||||
|
|
|
@ -169,11 +169,22 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {
|
|||
|
||||
func (v *ConfigValidator) intelrdt(config *configs.Config) error {
|
||||
if config.IntelRdt != nil {
|
||||
if !intelrdt.IsEnabled() {
|
||||
return fmt.Errorf("intelRdt is specified in config, but Intel RDT feature is not supported or enabled")
|
||||
if !intelrdt.IsCatEnabled() && !intelrdt.IsMbaEnabled() {
|
||||
return fmt.Errorf("intelRdt is specified in config, but Intel RDT is not supported or enabled")
|
||||
}
|
||||
if config.IntelRdt.L3CacheSchema == "" {
|
||||
return fmt.Errorf("intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
|
||||
|
||||
if !intelrdt.IsCatEnabled() && config.IntelRdt.L3CacheSchema != "" {
|
||||
return fmt.Errorf("intelRdt.l3CacheSchema is specified in config, but Intel RDT/CAT is not enabled")
|
||||
}
|
||||
if !intelrdt.IsMbaEnabled() && config.IntelRdt.MemBwSchema != "" {
|
||||
return fmt.Errorf("intelRdt.memBwSchema is specified in config, but Intel RDT/MBA is not enabled")
|
||||
}
|
||||
|
||||
if intelrdt.IsCatEnabled() && config.IntelRdt.L3CacheSchema == "" {
|
||||
return fmt.Errorf("Intel RDT/CAT is enabled and intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
|
||||
}
|
||||
if intelrdt.IsMbaEnabled() && config.IntelRdt.MemBwSchema == "" {
|
||||
return fmt.Errorf("Intel RDT/MBA is enabled and intelRdt is specified in config, but intelRdt.memBwSchema is empty")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -92,7 +92,7 @@ func RootlessCgroupfs(l *LinuxFactory) error {
|
|||
|
||||
// IntelRdtfs is an options func to configure a LinuxFactory to return
|
||||
// containers that use the Intel RDT "resource control" filesystem to
|
||||
// create and manage Intel Xeon platform shared resources (e.g., L3 cache).
|
||||
// create and manage Intel RDT resources (e.g., L3 cache, memory bandwidth).
|
||||
func IntelRdtFs(l *LinuxFactory) error {
|
||||
l.NewIntelRdtManager = func(config *configs.Config, id string, path string) intelrdt.Manager {
|
||||
return &intelrdt.IntelRdtManager{
|
||||
|
@ -222,7 +222,7 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
|
|||
newgidmapPath: l.NewgidmapPath,
|
||||
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
|
||||
}
|
||||
if intelrdt.IsEnabled() {
|
||||
if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() {
|
||||
c.intelRdtManager = l.NewIntelRdtManager(config, id, "")
|
||||
}
|
||||
c.state = &stoppedState{c: c}
|
||||
|
@ -268,7 +268,7 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
|
|||
if err := c.refreshState(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if intelrdt.IsEnabled() {
|
||||
if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() {
|
||||
c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath)
|
||||
}
|
||||
return c, nil
|
||||
|
|
|
@ -16,20 +16,25 @@ import (
|
|||
)
|
||||
|
||||
/*
|
||||
* About Intel RDT/CAT feature:
|
||||
* About Intel RDT features:
|
||||
* Intel platforms with new Xeon CPU support Resource Director Technology (RDT).
|
||||
* Intel Cache Allocation Technology (CAT) is a sub-feature of RDT. Currently L3
|
||||
* Cache is the only resource that is supported in RDT.
|
||||
* Cache Allocation Technology (CAT) and Memory Bandwidth Allocation (MBA) are
|
||||
* two sub-features of RDT.
|
||||
*
|
||||
* This feature provides a way for the software to restrict cache allocation to a
|
||||
* defined 'subset' of L3 cache which may be overlapping with other 'subsets'.
|
||||
* The different subsets are identified by class of service (CLOS) and each CLOS
|
||||
* has a capacity bitmask (CBM).
|
||||
* Cache Allocation Technology (CAT) provides a way for the software to restrict
|
||||
* cache allocation to a defined 'subset' of L3 cache which may be overlapping
|
||||
* with other 'subsets'. The different subsets are identified by class of
|
||||
* service (CLOS) and each CLOS has a capacity bitmask (CBM).
|
||||
*
|
||||
* For more information about Intel RDT/CAT can be found in the section 17.17
|
||||
* of Intel Software Developer Manual.
|
||||
* Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle
|
||||
* over memory bandwidth for the software. A user controls the resource by
|
||||
* indicating the percentage of maximum memory bandwidth.
|
||||
*
|
||||
* About Intel RDT/CAT kernel interface:
|
||||
* More details about Intel RDT CAT and MBA can be found in the section 17.18
|
||||
* of Intel Software Developer Manual:
|
||||
* https://software.intel.com/en-us/articles/intel-sdm
|
||||
*
|
||||
* About Intel RDT kernel interface:
|
||||
* In Linux 4.10 kernel or newer, the interface is defined and exposed via
|
||||
* "resource control" filesystem, which is a "cgroup-like" interface.
|
||||
*
|
||||
|
@ -37,59 +42,86 @@ import (
|
|||
* interfaces in a container. But unlike cgroups' hierarchy, it has single level
|
||||
* filesystem layout.
|
||||
*
|
||||
* CAT and MBA features are introduced in Linux 4.10 and 4.12 kernel via
|
||||
* "resource control" filesystem.
|
||||
*
|
||||
* Intel RDT "resource control" filesystem hierarchy:
|
||||
* mount -t resctrl resctrl /sys/fs/resctrl
|
||||
* tree /sys/fs/resctrl
|
||||
* /sys/fs/resctrl/
|
||||
* |-- info
|
||||
* | |-- L3
|
||||
* | |-- cbm_mask
|
||||
* | |-- min_cbm_bits
|
||||
* | | |-- cbm_mask
|
||||
* | | |-- min_cbm_bits
|
||||
* | | |-- num_closids
|
||||
* | |-- MB
|
||||
* | |-- bandwidth_gran
|
||||
* | |-- delay_linear
|
||||
* | |-- min_bandwidth
|
||||
* | |-- num_closids
|
||||
* |-- cpus
|
||||
* |-- ...
|
||||
* |-- schemata
|
||||
* |-- tasks
|
||||
* |-- <container_id>
|
||||
* |-- cpus
|
||||
* |-- ...
|
||||
* |-- schemata
|
||||
* |-- tasks
|
||||
*
|
||||
* For runc, we can make use of `tasks` and `schemata` configuration for L3 cache
|
||||
* resource constraints.
|
||||
* For runc, we can make use of `tasks` and `schemata` configuration for L3
|
||||
* cache and memory bandwidth resources constraints.
|
||||
*
|
||||
* The file `tasks` has a list of tasks that belongs to this group (e.g.,
|
||||
* The file `tasks` has a list of tasks that belongs to this group (e.g.,
|
||||
* <container_id>" group). Tasks can be added to a group by writing the task ID
|
||||
* to the "tasks" file (which will automatically remove them from the previous
|
||||
* to the "tasks" file (which will automatically remove them from the previous
|
||||
* group to which they belonged). New tasks created by fork(2) and clone(2) are
|
||||
* added to the same group as their parent. If a pid is not in any sub group, it is
|
||||
* in root group.
|
||||
* added to the same group as their parent.
|
||||
*
|
||||
* The file `schemata` has allocation bitmasks/values for L3 cache on each socket,
|
||||
* which contains L3 cache id and capacity bitmask (CBM).
|
||||
* The file `schemata` has a list of all the resources available to this group.
|
||||
* Each resource (L3 cache, memory bandwidth) has its own line and format.
|
||||
*
|
||||
* L3 cache schema:
|
||||
* It has allocation bitmasks/values for L3 cache on each socket, which
|
||||
* contains L3 cache id and capacity bitmask (CBM).
|
||||
* Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
* For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0`
|
||||
* For example, on a two-socket machine, the schema line could be "L3:0=ff;1=c0"
|
||||
* which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
|
||||
*
|
||||
* The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
|
||||
* be set is less than the max bit. The max bits in the CBM is varied among
|
||||
* supported Intel Xeon platforms. In Intel RDT "resource control" filesystem
|
||||
* layout, the CBM in a group should be a subset of the CBM in root. Kernel will
|
||||
* check if it is valid when writing. e.g., 0xfffff in root indicates the max bits
|
||||
* of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM
|
||||
* values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||
* supported Intel CPU models. Kernel will check if it is valid when writing.
|
||||
* e.g., default value 0xfffff in root indicates the max bits of CBM is 20
|
||||
* bits, which mapping to entire L3 cache capacity. Some valid CBM values to
|
||||
* set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||
*
|
||||
* For more information about Intel RDT/CAT kernel interface:
|
||||
* Memory bandwidth schema:
|
||||
* It has allocation values for memory bandwidth on each socket, which contains
|
||||
* L3 cache id and memory bandwidth percentage.
|
||||
* Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
|
||||
* For example, on a two-socket machine, the schema line could be "MB:0=20;1=70"
|
||||
*
|
||||
* The minimum bandwidth percentage value for each CPU model is predefined and
|
||||
* can be looked up through "info/MB/min_bandwidth". The bandwidth granularity
|
||||
* that is allocated is also dependent on the CPU model and can be looked up at
|
||||
* "info/MB/bandwidth_gran". The available bandwidth control steps are:
|
||||
* min_bw + N * bw_gran. Intermediate values are rounded to the next control
|
||||
* step available on the hardware.
|
||||
*
|
||||
* For more information about Intel RDT kernel interface:
|
||||
* https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
|
||||
*
|
||||
* An example for runc:
|
||||
* Consider a two-socket machine with two L3 caches where the default CBM is
|
||||
* 0xfffff and the max CBM length is 20 bits. With this configuration, tasks
|
||||
* inside the container only have access to the "upper" 80% of L3 cache id 0 and
|
||||
* the "lower" 50% L3 cache id 1:
|
||||
* 0x7ff and the max CBM length is 11 bits, and minimum memory bandwidth of 10%
|
||||
* with a memory bandwidth granularity of 10%.
|
||||
*
|
||||
* Tasks inside the container only have access to the "upper" 7/11 of L3 cache
|
||||
* on socket 0 and the "lower" 5/11 L3 cache on socket 1, and may use a
|
||||
* maximum memory bandwidth of 20% on socket 0 and 70% on socket 1.
|
||||
*
|
||||
* "linux": {
|
||||
* "intelRdt": {
|
||||
* "l3CacheSchema": "L3:0=ffff0;1=3ff"
|
||||
* "intelRdt": {
|
||||
* "l3CacheSchema": "L3:0=7f0;1=1f",
|
||||
* "memBwSchema": "MB:0=20;1=70"
|
||||
* }
|
||||
* }
|
||||
*/
|
||||
|
@ -129,8 +161,10 @@ var (
|
|||
intelRdtRoot string
|
||||
intelRdtRootLock sync.Mutex
|
||||
|
||||
// The flag to indicate if Intel RDT is supported
|
||||
isEnabled bool
|
||||
// The flag to indicate if Intel RDT/CAT is enabled
|
||||
isCatEnabled bool
|
||||
// The flag to indicate if Intel RDT/MBA is enabled
|
||||
isMbaEnabled bool
|
||||
)
|
||||
|
||||
type intelRdtData struct {
|
||||
|
@ -139,19 +173,22 @@ type intelRdtData struct {
|
|||
pid int
|
||||
}
|
||||
|
||||
// Check if Intel RDT is enabled in init()
|
||||
// Check if Intel RDT sub-features are enabled in init()
|
||||
func init() {
|
||||
// 1. Check if hardware and kernel support Intel RDT/CAT feature
|
||||
// "cat_l3" flag is set if supported
|
||||
isFlagSet, err := parseCpuInfoFile("/proc/cpuinfo")
|
||||
if !isFlagSet || err != nil {
|
||||
isEnabled = false
|
||||
// 1. Check if hardware and kernel support Intel RDT sub-features
|
||||
// "cat_l3" flag for CAT and "mba" flag for MBA
|
||||
isCatFlagSet, isMbaFlagSet, err := parseCpuInfoFile("/proc/cpuinfo")
|
||||
if err != nil {
|
||||
isCatEnabled = false
|
||||
isMbaEnabled = false
|
||||
return
|
||||
}
|
||||
|
||||
// 2. Check if Intel RDT "resource control" filesystem is mounted
|
||||
// The user guarantees to mount the filesystem
|
||||
isEnabled = isIntelRdtMounted()
|
||||
isFsMounted := isIntelRdtMounted()
|
||||
isCatEnabled = isCatFlagSet && isFsMounted
|
||||
isMbaEnabled = isMbaFlagSet && isFsMounted
|
||||
}
|
||||
|
||||
// Return the mount point path of Intel RDT "resource control" filesysem
|
||||
|
@ -223,30 +260,40 @@ func isIntelRdtMounted() bool {
|
|||
return true
|
||||
}
|
||||
|
||||
func parseCpuInfoFile(path string) (bool, error) {
|
||||
func parseCpuInfoFile(path string) (bool, bool, error) {
|
||||
isCatFlagSet := false
|
||||
isMbaFlagSet := false
|
||||
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return false, err
|
||||
return false, false, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
for s.Scan() {
|
||||
if err := s.Err(); err != nil {
|
||||
return false, err
|
||||
return false, false, err
|
||||
}
|
||||
|
||||
text := s.Text()
|
||||
flags := strings.Split(text, " ")
|
||||
line := s.Text()
|
||||
|
||||
// "cat_l3" flag is set if Intel RDT/CAT is supported
|
||||
for _, flag := range flags {
|
||||
if flag == "cat_l3" {
|
||||
return true, nil
|
||||
// Search "cat_l3" and "mba" flags in first "flags" line
|
||||
if strings.Contains(line, "flags") {
|
||||
flags := strings.Split(line, " ")
|
||||
// "cat_l3" flag for CAT and "mba" flag for MBA
|
||||
for _, flag := range flags {
|
||||
switch flag {
|
||||
case "cat_l3":
|
||||
isCatFlagSet = true
|
||||
case "mba":
|
||||
isMbaFlagSet = true
|
||||
}
|
||||
}
|
||||
return isCatFlagSet, isMbaFlagSet, nil
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
return isCatFlagSet, isMbaFlagSet, nil
|
||||
}
|
||||
|
||||
func parseUint(s string, base, bitSize int) (uint64, error) {
|
||||
|
@ -292,30 +339,6 @@ func getIntelRdtParamString(path, file string) (string, error) {
|
|||
return strings.TrimSpace(string(contents)), nil
|
||||
}
|
||||
|
||||
func readTasksFile(dir string) ([]int, error) {
|
||||
f, err := os.Open(filepath.Join(dir, IntelRdtTasks))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var (
|
||||
s = bufio.NewScanner(f)
|
||||
out = []int{}
|
||||
)
|
||||
|
||||
for s.Scan() {
|
||||
if t := s.Text(); t != "" {
|
||||
pid, err := strconv.Atoi(t)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, pid)
|
||||
}
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func writeFile(dir, file, data string) error {
|
||||
if dir == "" {
|
||||
return fmt.Errorf("no such directory for %s", file)
|
||||
|
@ -368,6 +391,41 @@ func getL3CacheInfo() (*L3CacheInfo, error) {
|
|||
return l3CacheInfo, nil
|
||||
}
|
||||
|
||||
// Get the read-only memory bandwidth information
|
||||
func getMemBwInfo() (*MemBwInfo, error) {
|
||||
memBwInfo := &MemBwInfo{}
|
||||
|
||||
rootPath, err := getIntelRdtRoot()
|
||||
if err != nil {
|
||||
return memBwInfo, err
|
||||
}
|
||||
|
||||
path := filepath.Join(rootPath, "info", "MB")
|
||||
bandwidthGran, err := getIntelRdtParamUint(path, "bandwidth_gran")
|
||||
if err != nil {
|
||||
return memBwInfo, err
|
||||
}
|
||||
delayLinear, err := getIntelRdtParamUint(path, "delay_linear")
|
||||
if err != nil {
|
||||
return memBwInfo, err
|
||||
}
|
||||
minBandwidth, err := getIntelRdtParamUint(path, "min_bandwidth")
|
||||
if err != nil {
|
||||
return memBwInfo, err
|
||||
}
|
||||
numClosids, err := getIntelRdtParamUint(path, "num_closids")
|
||||
if err != nil {
|
||||
return memBwInfo, err
|
||||
}
|
||||
|
||||
memBwInfo.BandwidthGran = bandwidthGran
|
||||
memBwInfo.DelayLinear = delayLinear
|
||||
memBwInfo.MinBandwidth = minBandwidth
|
||||
memBwInfo.NumClosids = numClosids
|
||||
|
||||
return memBwInfo, nil
|
||||
}
|
||||
|
||||
// WriteIntelRdtTasks writes the specified pid into the "tasks" file
|
||||
func WriteIntelRdtTasks(dir string, pid int) error {
|
||||
if dir == "" {
|
||||
|
@ -383,9 +441,14 @@ func WriteIntelRdtTasks(dir string, pid int) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// Check if Intel RDT is enabled
|
||||
func IsEnabled() bool {
|
||||
return isEnabled
|
||||
// Check if Intel RDT/CAT is enabled
|
||||
func IsCatEnabled() bool {
|
||||
return isCatEnabled
|
||||
}
|
||||
|
||||
// Check if Intel RDT/MBA is enabled
|
||||
func IsMbaEnabled() bool {
|
||||
return isMbaEnabled
|
||||
}
|
||||
|
||||
// Get the 'container_id' path in Intel RDT "resource control" filesystem
|
||||
|
@ -452,67 +515,132 @@ func (m *IntelRdtManager) GetStats() (*Stats, error) {
|
|||
defer m.mu.Unlock()
|
||||
stats := NewStats()
|
||||
|
||||
// The read-only L3 cache information
|
||||
l3CacheInfo, err := getL3CacheInfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stats.L3CacheInfo = l3CacheInfo
|
||||
|
||||
// The read-only L3 cache schema in root
|
||||
rootPath, err := getIntelRdtRoot()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// The read-only L3 cache and memory bandwidth schemata in root
|
||||
tmpRootStrings, err := getIntelRdtParamString(rootPath, "schemata")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// L3 cache schema is in the first line
|
||||
schemaRootStrings := strings.Split(tmpRootStrings, "\n")
|
||||
stats.L3CacheSchemaRoot = schemaRootStrings[0]
|
||||
|
||||
// The L3 cache schema in 'container_id' group
|
||||
// The L3 cache and memory bandwidth schemata in 'container_id' group
|
||||
tmpStrings, err := getIntelRdtParamString(m.GetPath(), "schemata")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// L3 cache schema is in the first line
|
||||
schemaStrings := strings.Split(tmpStrings, "\n")
|
||||
stats.L3CacheSchema = schemaStrings[0]
|
||||
|
||||
if IsCatEnabled() {
|
||||
// The read-only L3 cache information
|
||||
l3CacheInfo, err := getL3CacheInfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stats.L3CacheInfo = l3CacheInfo
|
||||
|
||||
// The read-only L3 cache schema in root
|
||||
for _, schemaRoot := range schemaRootStrings {
|
||||
if strings.Contains(schemaRoot, "L3") {
|
||||
stats.L3CacheSchemaRoot = strings.TrimSpace(schemaRoot)
|
||||
}
|
||||
}
|
||||
|
||||
// The L3 cache schema in 'container_id' group
|
||||
for _, schema := range schemaStrings {
|
||||
if strings.Contains(schema, "L3") {
|
||||
stats.L3CacheSchema = strings.TrimSpace(schema)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if IsMbaEnabled() {
|
||||
// The read-only memory bandwidth information
|
||||
memBwInfo, err := getMemBwInfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stats.MemBwInfo = memBwInfo
|
||||
|
||||
// The read-only memory bandwidth information
|
||||
for _, schemaRoot := range schemaRootStrings {
|
||||
if strings.Contains(schemaRoot, "MB") {
|
||||
stats.MemBwSchemaRoot = strings.TrimSpace(schemaRoot)
|
||||
}
|
||||
}
|
||||
|
||||
// The memory bandwidth schema in 'container_id' group
|
||||
for _, schema := range schemaStrings {
|
||||
if strings.Contains(schema, "MB") {
|
||||
stats.MemBwSchema = strings.TrimSpace(schema)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// Set Intel RDT "resource control" filesystem as configured.
|
||||
func (m *IntelRdtManager) Set(container *configs.Config) error {
|
||||
path := m.GetPath()
|
||||
|
||||
// About L3 cache schema file:
|
||||
// The schema has allocation masks/values for L3 cache on each socket,
|
||||
// About L3 cache schema:
|
||||
// It has allocation bitmasks/values for L3 cache on each socket,
|
||||
// which contains L3 cache id and capacity bitmask (CBM).
|
||||
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
// For example, on a two-socket machine, L3's schema line could be:
|
||||
// L3:0=ff;1=c0
|
||||
// Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
|
||||
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
// For example, on a two-socket machine, the schema line could be:
|
||||
// L3:0=ff;1=c0
|
||||
// which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM
|
||||
// is 0xc0.
|
||||
//
|
||||
// About L3 cache CBM validity:
|
||||
// The valid L3 cache CBM is a *contiguous bits set* and number of
|
||||
// bits that can be set is less than the max bit. The max bits in the
|
||||
// CBM is varied among supported Intel Xeon platforms. In Intel RDT
|
||||
// "resource control" filesystem layout, the CBM in a group should
|
||||
// be a subset of the CBM in root. Kernel will check if it is valid
|
||||
// when writing.
|
||||
// e.g., 0xfffff in root indicates the max bits of CBM is 20 bits,
|
||||
// which mapping to entire L3 cache capacity. Some valid CBM values
|
||||
// to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||
// CBM is varied among supported Intel CPU models. Kernel will check
|
||||
// if it is valid when writing. e.g., default value 0xfffff in root
|
||||
// indicates the max bits of CBM is 20 bits, which mapping to entire
|
||||
// L3 cache capacity. Some valid CBM values to set in a group:
|
||||
// 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||
//
|
||||
//
|
||||
// About memory bandwidth schema:
|
||||
// It has allocation values for memory bandwidth on each socket, which
|
||||
// contains L3 cache id and memory bandwidth percentage.
|
||||
// Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
|
||||
// For example, on a two-socket machine, the schema line could be:
|
||||
// "MB:0=20;1=70"
|
||||
//
|
||||
// The minimum bandwidth percentage value for each CPU model is
|
||||
// predefined and can be looked up through "info/MB/min_bandwidth".
|
||||
// The bandwidth granularity that is allocated is also dependent on
|
||||
// the CPU model and can be looked up at "info/MB/bandwidth_gran".
|
||||
// The available bandwidth control steps are: min_bw + N * bw_gran.
|
||||
// Intermediate values are rounded to the next control step available
|
||||
// on the hardware.
|
||||
if container.IntelRdt != nil {
|
||||
path := m.GetPath()
|
||||
l3CacheSchema := container.IntelRdt.L3CacheSchema
|
||||
if l3CacheSchema != "" {
|
||||
memBwSchema := container.IntelRdt.MemBwSchema
|
||||
|
||||
// Write a single joint schema string to schemata file
|
||||
if l3CacheSchema != "" && memBwSchema != "" {
|
||||
if err := writeFile(path, "schemata", l3CacheSchema+"\n"+memBwSchema); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Write only L3 cache schema string to schemata file
|
||||
if l3CacheSchema != "" && memBwSchema == "" {
|
||||
if err := writeFile(path, "schemata", l3CacheSchema); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Write only memory bandwidth schema string to schemata file
|
||||
if l3CacheSchema == "" && memBwSchema != "" {
|
||||
if err := writeFile(path, "schemata", memBwSchema); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
|
|
|
@ -8,6 +8,13 @@ type L3CacheInfo struct {
|
|||
NumClosids uint64 `json:"num_closids,omitempty"`
|
||||
}
|
||||
|
||||
type MemBwInfo struct {
|
||||
BandwidthGran uint64 `json:"bandwidth_gran,omitempty"`
|
||||
DelayLinear uint64 `json:"delay_linear,omitempty"`
|
||||
MinBandwidth uint64 `json:"min_bandwidth,omitempty"`
|
||||
NumClosids uint64 `json:"num_closids,omitempty"`
|
||||
}
|
||||
|
||||
type Stats struct {
|
||||
// The read-only L3 cache information
|
||||
L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"`
|
||||
|
@ -17,6 +24,15 @@ type Stats struct {
|
|||
|
||||
// The L3 cache schema in 'container_id' group
|
||||
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
||||
|
||||
// The read-only memory bandwidth information
|
||||
MemBwInfo *MemBwInfo `json:"mem_bw_info,omitempty"`
|
||||
|
||||
// The read-only memory bandwidth schema in root
|
||||
MemBwSchemaRoot string `json:"mem_bw_schema_root,omitempty"`
|
||||
|
||||
// The memory bandwidth schema in 'container_id' group
|
||||
MemBwSchema string `json:"mem_bw_schema,omitempty"`
|
||||
}
|
||||
|
||||
func NewStats() *Stats {
|
||||
|
|
|
@ -246,6 +246,9 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
|
|||
if spec.Linux.IntelRdt.L3CacheSchema != "" {
|
||||
config.IntelRdt.L3CacheSchema = spec.Linux.IntelRdt.L3CacheSchema
|
||||
}
|
||||
if spec.Linux.IntelRdt.MemBwSchema != "" {
|
||||
config.IntelRdt.MemBwSchema = spec.Linux.IntelRdt.MemBwSchema
|
||||
}
|
||||
}
|
||||
}
|
||||
if spec.Process.SelinuxLabel != "" {
|
||||
|
|
|
@ -262,7 +262,7 @@ other options are ignored.
|
|||
|
||||
// Update Intel RDT/CAT
|
||||
if val := context.String("l3-cache-schema"); val != "" {
|
||||
if !intelrdt.IsEnabled() {
|
||||
if !intelrdt.IsCatEnabled() {
|
||||
return fmt.Errorf("Intel RDT: l3 cache schema is not enabled")
|
||||
}
|
||||
|
||||
|
|
|
@ -54,7 +54,7 @@ func loadFactory(context *cli.Context) (libcontainer.Factory, error) {
|
|||
}
|
||||
|
||||
intelRdtManager := libcontainer.IntelRdtFs
|
||||
if !intelrdt.IsEnabled() {
|
||||
if !intelrdt.IsCatEnabled() && !intelrdt.IsMbaEnabled() {
|
||||
intelRdtManager = nil
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue