libcontainer: intelrdt: add support for Intel RDT/MBA in runc
Memory Bandwidth Allocation (MBA) is a resource allocation sub-feature of Intel Resource Director Technology (RDT) which is supported on some Intel Xeon platforms. Intel RDT/MBA provides indirect and approximate throttle over memory bandwidth for the software. A user controls the resource by indicating the percentage of maximum memory bandwidth. Hardware details of Intel RDT/MBA can be found in section 17.18 of Intel Software Developer Manual: https://software.intel.com/en-us/articles/intel-sdm In Linux 4.12 kernel and newer, Intel RDT/MBA is enabled by kernel config CONFIG_INTEL_RDT. If hardware support, CPU flags `rdt_a` and `mba` will be set in /proc/cpuinfo. Intel RDT "resource control" filesystem hierarchy: mount -t resctrl resctrl /sys/fs/resctrl tree /sys/fs/resctrl /sys/fs/resctrl/ |-- info | |-- L3 | | |-- cbm_mask | | |-- min_cbm_bits | | |-- num_closids | |-- MB | |-- bandwidth_gran | |-- delay_linear | |-- min_bandwidth | |-- num_closids |-- ... |-- schemata |-- tasks |-- <container_id> |-- ... |-- schemata |-- tasks For MBA support for `runc`, we will reuse the infrastructure and code base of Intel RDT/CAT which implemented in #1279. We could also make use of `tasks` and `schemata` configuration for memory bandwidth resource constraints. The file `tasks` has a list of tasks that belongs to this group (e.g., <container_id>" group). Tasks can be added to a group by writing the task ID to the "tasks" file (which will automatically remove them from the previous group to which they belonged). New tasks created by fork(2) and clone(2) are added to the same group as their parent. The file `schemata` has a list of all the resources available to this group. Each resource (L3 cache, memory bandwidth) has its own line and format. Memory bandwidth schema: It has allocation values for memory bandwidth on each socket, which contains L3 cache id and memory bandwidth percentage. Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..." The minimum bandwidth percentage value for each CPU model is predefined and can be looked up through "info/MB/min_bandwidth". The bandwidth granularity that is allocated is also dependent on the CPU model and can be looked up at "info/MB/bandwidth_gran". The available bandwidth control steps are: min_bw + N * bw_gran. Intermediate values are rounded to the next control step available on the hardware. For more information about Intel RDT kernel interface: https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt An example for runc: Consider a two-socket machine with two L3 caches where the minimum memory bandwidth of 10% with a memory bandwidth granularity of 10%. Tasks inside the container may use a maximum memory bandwidth of 20% on socket 0 and 70% on socket 1. "linux": { "intelRdt": { "memBwSchema": "MB:0=20;1=70" } } Signed-off-by: Xiaochen Shen <xiaochen.shen@intel.com>
This commit is contained in:
parent
c1cece7e23
commit
27560ace2f
38
events.go
38
events.go
|
@ -104,6 +104,13 @@ type l3CacheInfo struct {
|
||||||
NumClosids uint64 `json:"num_closids,omitempty"`
|
NumClosids uint64 `json:"num_closids,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type memBwInfo struct {
|
||||||
|
BandwidthGran uint64 `json:"bandwidth_gran,omitempty"`
|
||||||
|
DelayLinear uint64 `json:"delay_linear,omitempty"`
|
||||||
|
MinBandwidth uint64 `json:"min_bandwidth,omitempty"`
|
||||||
|
NumClosids uint64 `json:"num_closids,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
type intelRdt struct {
|
type intelRdt struct {
|
||||||
// The read-only L3 cache information
|
// The read-only L3 cache information
|
||||||
L3CacheInfo *l3CacheInfo `json:"l3_cache_info,omitempty"`
|
L3CacheInfo *l3CacheInfo `json:"l3_cache_info,omitempty"`
|
||||||
|
@ -113,6 +120,15 @@ type intelRdt struct {
|
||||||
|
|
||||||
// The L3 cache schema in 'container_id' group
|
// The L3 cache schema in 'container_id' group
|
||||||
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
||||||
|
|
||||||
|
// The read-only memory bandwidth information
|
||||||
|
MemBwInfo *memBwInfo `json:"mem_bw_info,omitempty"`
|
||||||
|
|
||||||
|
// The read-only memory bandwidth schema in root
|
||||||
|
MemBwSchemaRoot string `json:"mem_bw_schema_root,omitempty"`
|
||||||
|
|
||||||
|
// The memory bandwidth schema in 'container_id' group
|
||||||
|
MemBwSchema string `json:"mem_bw_schema,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
var eventsCommand = cli.Command{
|
var eventsCommand = cli.Command{
|
||||||
|
@ -248,9 +264,16 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *stats {
|
||||||
}
|
}
|
||||||
|
|
||||||
if is := ls.IntelRdtStats; is != nil {
|
if is := ls.IntelRdtStats; is != nil {
|
||||||
s.IntelRdt.L3CacheInfo = convertL3CacheInfo(is.L3CacheInfo)
|
if intelrdt.IsCatEnabled() {
|
||||||
s.IntelRdt.L3CacheSchemaRoot = is.L3CacheSchemaRoot
|
s.IntelRdt.L3CacheInfo = convertL3CacheInfo(is.L3CacheInfo)
|
||||||
s.IntelRdt.L3CacheSchema = is.L3CacheSchema
|
s.IntelRdt.L3CacheSchemaRoot = is.L3CacheSchemaRoot
|
||||||
|
s.IntelRdt.L3CacheSchema = is.L3CacheSchema
|
||||||
|
}
|
||||||
|
if intelrdt.IsMbaEnabled() {
|
||||||
|
s.IntelRdt.MemBwInfo = convertMemBwInfo(is.MemBwInfo)
|
||||||
|
s.IntelRdt.MemBwSchemaRoot = is.MemBwSchemaRoot
|
||||||
|
s.IntelRdt.MemBwSchema = is.MemBwSchema
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return &s
|
return &s
|
||||||
|
@ -293,3 +316,12 @@ func convertL3CacheInfo(i *intelrdt.L3CacheInfo) *l3CacheInfo {
|
||||||
NumClosids: i.NumClosids,
|
NumClosids: i.NumClosids,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func convertMemBwInfo(i *intelrdt.MemBwInfo) *memBwInfo {
|
||||||
|
return &memBwInfo{
|
||||||
|
BandwidthGran: i.BandwidthGran,
|
||||||
|
DelayLinear: i.DelayLinear,
|
||||||
|
MinBandwidth: i.MinBandwidth,
|
||||||
|
NumClosids: i.NumClosids,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -186,8 +186,8 @@ type Config struct {
|
||||||
// callers keyring in this case.
|
// callers keyring in this case.
|
||||||
NoNewKeyring bool `json:"no_new_keyring"`
|
NoNewKeyring bool `json:"no_new_keyring"`
|
||||||
|
|
||||||
// IntelRdt specifies settings for Intel RDT/CAT group that the container is placed into
|
// IntelRdt specifies settings for Intel RDT group that the container is placed into
|
||||||
// to limit the resources (e.g., L3 cache) the container has available
|
// to limit the resources (e.g., L3 cache, memory bandwidth) the container has available
|
||||||
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
|
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
|
||||||
|
|
||||||
// RootlessEUID is set when the runc was launched with non-zero EUID.
|
// RootlessEUID is set when the runc was launched with non-zero EUID.
|
||||||
|
|
|
@ -4,4 +4,8 @@ type IntelRdt struct {
|
||||||
// The schema for L3 cache id and capacity bitmask (CBM)
|
// The schema for L3 cache id and capacity bitmask (CBM)
|
||||||
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||||
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
||||||
|
|
||||||
|
// The schema of memory bandwidth percentage per L3 cache id
|
||||||
|
// Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
|
||||||
|
MemBwSchema string `json:"memBwSchema,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
|
@ -169,11 +169,22 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {
|
||||||
|
|
||||||
func (v *ConfigValidator) intelrdt(config *configs.Config) error {
|
func (v *ConfigValidator) intelrdt(config *configs.Config) error {
|
||||||
if config.IntelRdt != nil {
|
if config.IntelRdt != nil {
|
||||||
if !intelrdt.IsEnabled() {
|
if !intelrdt.IsCatEnabled() && !intelrdt.IsMbaEnabled() {
|
||||||
return fmt.Errorf("intelRdt is specified in config, but Intel RDT feature is not supported or enabled")
|
return fmt.Errorf("intelRdt is specified in config, but Intel RDT is not supported or enabled")
|
||||||
}
|
}
|
||||||
if config.IntelRdt.L3CacheSchema == "" {
|
|
||||||
return fmt.Errorf("intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
|
if !intelrdt.IsCatEnabled() && config.IntelRdt.L3CacheSchema != "" {
|
||||||
|
return fmt.Errorf("intelRdt.l3CacheSchema is specified in config, but Intel RDT/CAT is not enabled")
|
||||||
|
}
|
||||||
|
if !intelrdt.IsMbaEnabled() && config.IntelRdt.MemBwSchema != "" {
|
||||||
|
return fmt.Errorf("intelRdt.memBwSchema is specified in config, but Intel RDT/MBA is not enabled")
|
||||||
|
}
|
||||||
|
|
||||||
|
if intelrdt.IsCatEnabled() && config.IntelRdt.L3CacheSchema == "" {
|
||||||
|
return fmt.Errorf("Intel RDT/CAT is enabled and intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
|
||||||
|
}
|
||||||
|
if intelrdt.IsMbaEnabled() && config.IntelRdt.MemBwSchema == "" {
|
||||||
|
return fmt.Errorf("Intel RDT/MBA is enabled and intelRdt is specified in config, but intelRdt.memBwSchema is empty")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -92,7 +92,7 @@ func RootlessCgroupfs(l *LinuxFactory) error {
|
||||||
|
|
||||||
// IntelRdtfs is an options func to configure a LinuxFactory to return
|
// IntelRdtfs is an options func to configure a LinuxFactory to return
|
||||||
// containers that use the Intel RDT "resource control" filesystem to
|
// containers that use the Intel RDT "resource control" filesystem to
|
||||||
// create and manage Intel Xeon platform shared resources (e.g., L3 cache).
|
// create and manage Intel RDT resources (e.g., L3 cache, memory bandwidth).
|
||||||
func IntelRdtFs(l *LinuxFactory) error {
|
func IntelRdtFs(l *LinuxFactory) error {
|
||||||
l.NewIntelRdtManager = func(config *configs.Config, id string, path string) intelrdt.Manager {
|
l.NewIntelRdtManager = func(config *configs.Config, id string, path string) intelrdt.Manager {
|
||||||
return &intelrdt.IntelRdtManager{
|
return &intelrdt.IntelRdtManager{
|
||||||
|
@ -222,7 +222,7 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
|
||||||
newgidmapPath: l.NewgidmapPath,
|
newgidmapPath: l.NewgidmapPath,
|
||||||
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
|
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
|
||||||
}
|
}
|
||||||
if intelrdt.IsEnabled() {
|
if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() {
|
||||||
c.intelRdtManager = l.NewIntelRdtManager(config, id, "")
|
c.intelRdtManager = l.NewIntelRdtManager(config, id, "")
|
||||||
}
|
}
|
||||||
c.state = &stoppedState{c: c}
|
c.state = &stoppedState{c: c}
|
||||||
|
@ -268,7 +268,7 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
|
||||||
if err := c.refreshState(); err != nil {
|
if err := c.refreshState(); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
if intelrdt.IsEnabled() {
|
if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() {
|
||||||
c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath)
|
c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath)
|
||||||
}
|
}
|
||||||
return c, nil
|
return c, nil
|
||||||
|
|
|
@ -16,20 +16,25 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* About Intel RDT/CAT feature:
|
* About Intel RDT features:
|
||||||
* Intel platforms with new Xeon CPU support Resource Director Technology (RDT).
|
* Intel platforms with new Xeon CPU support Resource Director Technology (RDT).
|
||||||
* Intel Cache Allocation Technology (CAT) is a sub-feature of RDT. Currently L3
|
* Cache Allocation Technology (CAT) and Memory Bandwidth Allocation (MBA) are
|
||||||
* Cache is the only resource that is supported in RDT.
|
* two sub-features of RDT.
|
||||||
*
|
*
|
||||||
* This feature provides a way for the software to restrict cache allocation to a
|
* Cache Allocation Technology (CAT) provides a way for the software to restrict
|
||||||
* defined 'subset' of L3 cache which may be overlapping with other 'subsets'.
|
* cache allocation to a defined 'subset' of L3 cache which may be overlapping
|
||||||
* The different subsets are identified by class of service (CLOS) and each CLOS
|
* with other 'subsets'. The different subsets are identified by class of
|
||||||
* has a capacity bitmask (CBM).
|
* service (CLOS) and each CLOS has a capacity bitmask (CBM).
|
||||||
*
|
*
|
||||||
* For more information about Intel RDT/CAT can be found in the section 17.17
|
* Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle
|
||||||
* of Intel Software Developer Manual.
|
* over memory bandwidth for the software. A user controls the resource by
|
||||||
|
* indicating the percentage of maximum memory bandwidth.
|
||||||
*
|
*
|
||||||
* About Intel RDT/CAT kernel interface:
|
* More details about Intel RDT CAT and MBA can be found in the section 17.18
|
||||||
|
* of Intel Software Developer Manual:
|
||||||
|
* https://software.intel.com/en-us/articles/intel-sdm
|
||||||
|
*
|
||||||
|
* About Intel RDT kernel interface:
|
||||||
* In Linux 4.10 kernel or newer, the interface is defined and exposed via
|
* In Linux 4.10 kernel or newer, the interface is defined and exposed via
|
||||||
* "resource control" filesystem, which is a "cgroup-like" interface.
|
* "resource control" filesystem, which is a "cgroup-like" interface.
|
||||||
*
|
*
|
||||||
|
@ -37,59 +42,86 @@ import (
|
||||||
* interfaces in a container. But unlike cgroups' hierarchy, it has single level
|
* interfaces in a container. But unlike cgroups' hierarchy, it has single level
|
||||||
* filesystem layout.
|
* filesystem layout.
|
||||||
*
|
*
|
||||||
|
* CAT and MBA features are introduced in Linux 4.10 and 4.12 kernel via
|
||||||
|
* "resource control" filesystem.
|
||||||
|
*
|
||||||
* Intel RDT "resource control" filesystem hierarchy:
|
* Intel RDT "resource control" filesystem hierarchy:
|
||||||
* mount -t resctrl resctrl /sys/fs/resctrl
|
* mount -t resctrl resctrl /sys/fs/resctrl
|
||||||
* tree /sys/fs/resctrl
|
* tree /sys/fs/resctrl
|
||||||
* /sys/fs/resctrl/
|
* /sys/fs/resctrl/
|
||||||
* |-- info
|
* |-- info
|
||||||
* | |-- L3
|
* | |-- L3
|
||||||
* | |-- cbm_mask
|
* | | |-- cbm_mask
|
||||||
* | |-- min_cbm_bits
|
* | | |-- min_cbm_bits
|
||||||
|
* | | |-- num_closids
|
||||||
|
* | |-- MB
|
||||||
|
* | |-- bandwidth_gran
|
||||||
|
* | |-- delay_linear
|
||||||
|
* | |-- min_bandwidth
|
||||||
* | |-- num_closids
|
* | |-- num_closids
|
||||||
* |-- cpus
|
* |-- ...
|
||||||
* |-- schemata
|
* |-- schemata
|
||||||
* |-- tasks
|
* |-- tasks
|
||||||
* |-- <container_id>
|
* |-- <container_id>
|
||||||
* |-- cpus
|
* |-- ...
|
||||||
* |-- schemata
|
* |-- schemata
|
||||||
* |-- tasks
|
* |-- tasks
|
||||||
*
|
*
|
||||||
* For runc, we can make use of `tasks` and `schemata` configuration for L3 cache
|
* For runc, we can make use of `tasks` and `schemata` configuration for L3
|
||||||
* resource constraints.
|
* cache and memory bandwidth resources constraints.
|
||||||
*
|
*
|
||||||
* The file `tasks` has a list of tasks that belongs to this group (e.g.,
|
* The file `tasks` has a list of tasks that belongs to this group (e.g.,
|
||||||
* <container_id>" group). Tasks can be added to a group by writing the task ID
|
* <container_id>" group). Tasks can be added to a group by writing the task ID
|
||||||
* to the "tasks" file (which will automatically remove them from the previous
|
* to the "tasks" file (which will automatically remove them from the previous
|
||||||
* group to which they belonged). New tasks created by fork(2) and clone(2) are
|
* group to which they belonged). New tasks created by fork(2) and clone(2) are
|
||||||
* added to the same group as their parent. If a pid is not in any sub group, it is
|
* added to the same group as their parent.
|
||||||
* in root group.
|
|
||||||
*
|
*
|
||||||
* The file `schemata` has allocation bitmasks/values for L3 cache on each socket,
|
* The file `schemata` has a list of all the resources available to this group.
|
||||||
* which contains L3 cache id and capacity bitmask (CBM).
|
* Each resource (L3 cache, memory bandwidth) has its own line and format.
|
||||||
|
*
|
||||||
|
* L3 cache schema:
|
||||||
|
* It has allocation bitmasks/values for L3 cache on each socket, which
|
||||||
|
* contains L3 cache id and capacity bitmask (CBM).
|
||||||
* Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
* Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||||
* For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0`
|
* For example, on a two-socket machine, the schema line could be "L3:0=ff;1=c0"
|
||||||
* which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
|
* which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
|
||||||
*
|
*
|
||||||
* The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
|
* The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
|
||||||
* be set is less than the max bit. The max bits in the CBM is varied among
|
* be set is less than the max bit. The max bits in the CBM is varied among
|
||||||
* supported Intel Xeon platforms. In Intel RDT "resource control" filesystem
|
* supported Intel CPU models. Kernel will check if it is valid when writing.
|
||||||
* layout, the CBM in a group should be a subset of the CBM in root. Kernel will
|
* e.g., default value 0xfffff in root indicates the max bits of CBM is 20
|
||||||
* check if it is valid when writing. e.g., 0xfffff in root indicates the max bits
|
* bits, which mapping to entire L3 cache capacity. Some valid CBM values to
|
||||||
* of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM
|
* set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||||
* values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
|
||||||
*
|
*
|
||||||
* For more information about Intel RDT/CAT kernel interface:
|
* Memory bandwidth schema:
|
||||||
|
* It has allocation values for memory bandwidth on each socket, which contains
|
||||||
|
* L3 cache id and memory bandwidth percentage.
|
||||||
|
* Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
|
||||||
|
* For example, on a two-socket machine, the schema line could be "MB:0=20;1=70"
|
||||||
|
*
|
||||||
|
* The minimum bandwidth percentage value for each CPU model is predefined and
|
||||||
|
* can be looked up through "info/MB/min_bandwidth". The bandwidth granularity
|
||||||
|
* that is allocated is also dependent on the CPU model and can be looked up at
|
||||||
|
* "info/MB/bandwidth_gran". The available bandwidth control steps are:
|
||||||
|
* min_bw + N * bw_gran. Intermediate values are rounded to the next control
|
||||||
|
* step available on the hardware.
|
||||||
|
*
|
||||||
|
* For more information about Intel RDT kernel interface:
|
||||||
* https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
|
* https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
|
||||||
*
|
*
|
||||||
* An example for runc:
|
* An example for runc:
|
||||||
* Consider a two-socket machine with two L3 caches where the default CBM is
|
* Consider a two-socket machine with two L3 caches where the default CBM is
|
||||||
* 0xfffff and the max CBM length is 20 bits. With this configuration, tasks
|
* 0x7ff and the max CBM length is 11 bits, and minimum memory bandwidth of 10%
|
||||||
* inside the container only have access to the "upper" 80% of L3 cache id 0 and
|
* with a memory bandwidth granularity of 10%.
|
||||||
* the "lower" 50% L3 cache id 1:
|
*
|
||||||
|
* Tasks inside the container only have access to the "upper" 7/11 of L3 cache
|
||||||
|
* on socket 0 and the "lower" 5/11 L3 cache on socket 1, and may use a
|
||||||
|
* maximum memory bandwidth of 20% on socket 0 and 70% on socket 1.
|
||||||
*
|
*
|
||||||
* "linux": {
|
* "linux": {
|
||||||
* "intelRdt": {
|
* "intelRdt": {
|
||||||
* "l3CacheSchema": "L3:0=ffff0;1=3ff"
|
* "l3CacheSchema": "L3:0=7f0;1=1f",
|
||||||
|
* "memBwSchema": "MB:0=20;1=70"
|
||||||
* }
|
* }
|
||||||
* }
|
* }
|
||||||
*/
|
*/
|
||||||
|
@ -129,8 +161,10 @@ var (
|
||||||
intelRdtRoot string
|
intelRdtRoot string
|
||||||
intelRdtRootLock sync.Mutex
|
intelRdtRootLock sync.Mutex
|
||||||
|
|
||||||
// The flag to indicate if Intel RDT is supported
|
// The flag to indicate if Intel RDT/CAT is enabled
|
||||||
isEnabled bool
|
isCatEnabled bool
|
||||||
|
// The flag to indicate if Intel RDT/MBA is enabled
|
||||||
|
isMbaEnabled bool
|
||||||
)
|
)
|
||||||
|
|
||||||
type intelRdtData struct {
|
type intelRdtData struct {
|
||||||
|
@ -139,19 +173,22 @@ type intelRdtData struct {
|
||||||
pid int
|
pid int
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if Intel RDT is enabled in init()
|
// Check if Intel RDT sub-features are enabled in init()
|
||||||
func init() {
|
func init() {
|
||||||
// 1. Check if hardware and kernel support Intel RDT/CAT feature
|
// 1. Check if hardware and kernel support Intel RDT sub-features
|
||||||
// "cat_l3" flag is set if supported
|
// "cat_l3" flag for CAT and "mba" flag for MBA
|
||||||
isFlagSet, err := parseCpuInfoFile("/proc/cpuinfo")
|
isCatFlagSet, isMbaFlagSet, err := parseCpuInfoFile("/proc/cpuinfo")
|
||||||
if !isFlagSet || err != nil {
|
if err != nil {
|
||||||
isEnabled = false
|
isCatEnabled = false
|
||||||
|
isMbaEnabled = false
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2. Check if Intel RDT "resource control" filesystem is mounted
|
// 2. Check if Intel RDT "resource control" filesystem is mounted
|
||||||
// The user guarantees to mount the filesystem
|
// The user guarantees to mount the filesystem
|
||||||
isEnabled = isIntelRdtMounted()
|
isFsMounted := isIntelRdtMounted()
|
||||||
|
isCatEnabled = isCatFlagSet && isFsMounted
|
||||||
|
isMbaEnabled = isMbaFlagSet && isFsMounted
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return the mount point path of Intel RDT "resource control" filesysem
|
// Return the mount point path of Intel RDT "resource control" filesysem
|
||||||
|
@ -223,30 +260,40 @@ func isIntelRdtMounted() bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseCpuInfoFile(path string) (bool, error) {
|
func parseCpuInfoFile(path string) (bool, bool, error) {
|
||||||
|
isCatFlagSet := false
|
||||||
|
isMbaFlagSet := false
|
||||||
|
|
||||||
f, err := os.Open(path)
|
f, err := os.Open(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, err
|
return false, false, err
|
||||||
}
|
}
|
||||||
defer f.Close()
|
defer f.Close()
|
||||||
|
|
||||||
s := bufio.NewScanner(f)
|
s := bufio.NewScanner(f)
|
||||||
for s.Scan() {
|
for s.Scan() {
|
||||||
if err := s.Err(); err != nil {
|
if err := s.Err(); err != nil {
|
||||||
return false, err
|
return false, false, err
|
||||||
}
|
}
|
||||||
|
|
||||||
text := s.Text()
|
line := s.Text()
|
||||||
flags := strings.Split(text, " ")
|
|
||||||
|
|
||||||
// "cat_l3" flag is set if Intel RDT/CAT is supported
|
// Search "cat_l3" and "mba" flags in first "flags" line
|
||||||
for _, flag := range flags {
|
if strings.Contains(line, "flags") {
|
||||||
if flag == "cat_l3" {
|
flags := strings.Split(line, " ")
|
||||||
return true, nil
|
// "cat_l3" flag for CAT and "mba" flag for MBA
|
||||||
|
for _, flag := range flags {
|
||||||
|
switch flag {
|
||||||
|
case "cat_l3":
|
||||||
|
isCatFlagSet = true
|
||||||
|
case "mba":
|
||||||
|
isMbaFlagSet = true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return isCatFlagSet, isMbaFlagSet, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false, nil
|
return isCatFlagSet, isMbaFlagSet, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseUint(s string, base, bitSize int) (uint64, error) {
|
func parseUint(s string, base, bitSize int) (uint64, error) {
|
||||||
|
@ -292,30 +339,6 @@ func getIntelRdtParamString(path, file string) (string, error) {
|
||||||
return strings.TrimSpace(string(contents)), nil
|
return strings.TrimSpace(string(contents)), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func readTasksFile(dir string) ([]int, error) {
|
|
||||||
f, err := os.Open(filepath.Join(dir, IntelRdtTasks))
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
var (
|
|
||||||
s = bufio.NewScanner(f)
|
|
||||||
out = []int{}
|
|
||||||
)
|
|
||||||
|
|
||||||
for s.Scan() {
|
|
||||||
if t := s.Text(); t != "" {
|
|
||||||
pid, err := strconv.Atoi(t)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
out = append(out, pid)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return out, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func writeFile(dir, file, data string) error {
|
func writeFile(dir, file, data string) error {
|
||||||
if dir == "" {
|
if dir == "" {
|
||||||
return fmt.Errorf("no such directory for %s", file)
|
return fmt.Errorf("no such directory for %s", file)
|
||||||
|
@ -368,6 +391,41 @@ func getL3CacheInfo() (*L3CacheInfo, error) {
|
||||||
return l3CacheInfo, nil
|
return l3CacheInfo, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get the read-only memory bandwidth information
|
||||||
|
func getMemBwInfo() (*MemBwInfo, error) {
|
||||||
|
memBwInfo := &MemBwInfo{}
|
||||||
|
|
||||||
|
rootPath, err := getIntelRdtRoot()
|
||||||
|
if err != nil {
|
||||||
|
return memBwInfo, err
|
||||||
|
}
|
||||||
|
|
||||||
|
path := filepath.Join(rootPath, "info", "MB")
|
||||||
|
bandwidthGran, err := getIntelRdtParamUint(path, "bandwidth_gran")
|
||||||
|
if err != nil {
|
||||||
|
return memBwInfo, err
|
||||||
|
}
|
||||||
|
delayLinear, err := getIntelRdtParamUint(path, "delay_linear")
|
||||||
|
if err != nil {
|
||||||
|
return memBwInfo, err
|
||||||
|
}
|
||||||
|
minBandwidth, err := getIntelRdtParamUint(path, "min_bandwidth")
|
||||||
|
if err != nil {
|
||||||
|
return memBwInfo, err
|
||||||
|
}
|
||||||
|
numClosids, err := getIntelRdtParamUint(path, "num_closids")
|
||||||
|
if err != nil {
|
||||||
|
return memBwInfo, err
|
||||||
|
}
|
||||||
|
|
||||||
|
memBwInfo.BandwidthGran = bandwidthGran
|
||||||
|
memBwInfo.DelayLinear = delayLinear
|
||||||
|
memBwInfo.MinBandwidth = minBandwidth
|
||||||
|
memBwInfo.NumClosids = numClosids
|
||||||
|
|
||||||
|
return memBwInfo, nil
|
||||||
|
}
|
||||||
|
|
||||||
// WriteIntelRdtTasks writes the specified pid into the "tasks" file
|
// WriteIntelRdtTasks writes the specified pid into the "tasks" file
|
||||||
func WriteIntelRdtTasks(dir string, pid int) error {
|
func WriteIntelRdtTasks(dir string, pid int) error {
|
||||||
if dir == "" {
|
if dir == "" {
|
||||||
|
@ -383,9 +441,14 @@ func WriteIntelRdtTasks(dir string, pid int) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if Intel RDT is enabled
|
// Check if Intel RDT/CAT is enabled
|
||||||
func IsEnabled() bool {
|
func IsCatEnabled() bool {
|
||||||
return isEnabled
|
return isCatEnabled
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if Intel RDT/MBA is enabled
|
||||||
|
func IsMbaEnabled() bool {
|
||||||
|
return isMbaEnabled
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the 'container_id' path in Intel RDT "resource control" filesystem
|
// Get the 'container_id' path in Intel RDT "resource control" filesystem
|
||||||
|
@ -452,67 +515,132 @@ func (m *IntelRdtManager) GetStats() (*Stats, error) {
|
||||||
defer m.mu.Unlock()
|
defer m.mu.Unlock()
|
||||||
stats := NewStats()
|
stats := NewStats()
|
||||||
|
|
||||||
// The read-only L3 cache information
|
|
||||||
l3CacheInfo, err := getL3CacheInfo()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
stats.L3CacheInfo = l3CacheInfo
|
|
||||||
|
|
||||||
// The read-only L3 cache schema in root
|
|
||||||
rootPath, err := getIntelRdtRoot()
|
rootPath, err := getIntelRdtRoot()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
// The read-only L3 cache and memory bandwidth schemata in root
|
||||||
tmpRootStrings, err := getIntelRdtParamString(rootPath, "schemata")
|
tmpRootStrings, err := getIntelRdtParamString(rootPath, "schemata")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
// L3 cache schema is in the first line
|
|
||||||
schemaRootStrings := strings.Split(tmpRootStrings, "\n")
|
schemaRootStrings := strings.Split(tmpRootStrings, "\n")
|
||||||
stats.L3CacheSchemaRoot = schemaRootStrings[0]
|
|
||||||
|
|
||||||
// The L3 cache schema in 'container_id' group
|
// The L3 cache and memory bandwidth schemata in 'container_id' group
|
||||||
tmpStrings, err := getIntelRdtParamString(m.GetPath(), "schemata")
|
tmpStrings, err := getIntelRdtParamString(m.GetPath(), "schemata")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
// L3 cache schema is in the first line
|
|
||||||
schemaStrings := strings.Split(tmpStrings, "\n")
|
schemaStrings := strings.Split(tmpStrings, "\n")
|
||||||
stats.L3CacheSchema = schemaStrings[0]
|
|
||||||
|
if IsCatEnabled() {
|
||||||
|
// The read-only L3 cache information
|
||||||
|
l3CacheInfo, err := getL3CacheInfo()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
stats.L3CacheInfo = l3CacheInfo
|
||||||
|
|
||||||
|
// The read-only L3 cache schema in root
|
||||||
|
for _, schemaRoot := range schemaRootStrings {
|
||||||
|
if strings.Contains(schemaRoot, "L3") {
|
||||||
|
stats.L3CacheSchemaRoot = strings.TrimSpace(schemaRoot)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The L3 cache schema in 'container_id' group
|
||||||
|
for _, schema := range schemaStrings {
|
||||||
|
if strings.Contains(schema, "L3") {
|
||||||
|
stats.L3CacheSchema = strings.TrimSpace(schema)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if IsMbaEnabled() {
|
||||||
|
// The read-only memory bandwidth information
|
||||||
|
memBwInfo, err := getMemBwInfo()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
stats.MemBwInfo = memBwInfo
|
||||||
|
|
||||||
|
// The read-only memory bandwidth information
|
||||||
|
for _, schemaRoot := range schemaRootStrings {
|
||||||
|
if strings.Contains(schemaRoot, "MB") {
|
||||||
|
stats.MemBwSchemaRoot = strings.TrimSpace(schemaRoot)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The memory bandwidth schema in 'container_id' group
|
||||||
|
for _, schema := range schemaStrings {
|
||||||
|
if strings.Contains(schema, "MB") {
|
||||||
|
stats.MemBwSchema = strings.TrimSpace(schema)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return stats, nil
|
return stats, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set Intel RDT "resource control" filesystem as configured.
|
// Set Intel RDT "resource control" filesystem as configured.
|
||||||
func (m *IntelRdtManager) Set(container *configs.Config) error {
|
func (m *IntelRdtManager) Set(container *configs.Config) error {
|
||||||
path := m.GetPath()
|
// About L3 cache schema:
|
||||||
|
// It has allocation bitmasks/values for L3 cache on each socket,
|
||||||
// About L3 cache schema file:
|
|
||||||
// The schema has allocation masks/values for L3 cache on each socket,
|
|
||||||
// which contains L3 cache id and capacity bitmask (CBM).
|
// which contains L3 cache id and capacity bitmask (CBM).
|
||||||
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||||
// For example, on a two-socket machine, L3's schema line could be:
|
// For example, on a two-socket machine, the schema line could be:
|
||||||
// L3:0=ff;1=c0
|
// L3:0=ff;1=c0
|
||||||
// Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
|
// which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM
|
||||||
|
// is 0xc0.
|
||||||
//
|
//
|
||||||
// About L3 cache CBM validity:
|
|
||||||
// The valid L3 cache CBM is a *contiguous bits set* and number of
|
// The valid L3 cache CBM is a *contiguous bits set* and number of
|
||||||
// bits that can be set is less than the max bit. The max bits in the
|
// bits that can be set is less than the max bit. The max bits in the
|
||||||
// CBM is varied among supported Intel Xeon platforms. In Intel RDT
|
// CBM is varied among supported Intel CPU models. Kernel will check
|
||||||
// "resource control" filesystem layout, the CBM in a group should
|
// if it is valid when writing. e.g., default value 0xfffff in root
|
||||||
// be a subset of the CBM in root. Kernel will check if it is valid
|
// indicates the max bits of CBM is 20 bits, which mapping to entire
|
||||||
// when writing.
|
// L3 cache capacity. Some valid CBM values to set in a group:
|
||||||
// e.g., 0xfffff in root indicates the max bits of CBM is 20 bits,
|
// 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||||
// which mapping to entire L3 cache capacity. Some valid CBM values
|
//
|
||||||
// to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
//
|
||||||
|
// About memory bandwidth schema:
|
||||||
|
// It has allocation values for memory bandwidth on each socket, which
|
||||||
|
// contains L3 cache id and memory bandwidth percentage.
|
||||||
|
// Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
|
||||||
|
// For example, on a two-socket machine, the schema line could be:
|
||||||
|
// "MB:0=20;1=70"
|
||||||
|
//
|
||||||
|
// The minimum bandwidth percentage value for each CPU model is
|
||||||
|
// predefined and can be looked up through "info/MB/min_bandwidth".
|
||||||
|
// The bandwidth granularity that is allocated is also dependent on
|
||||||
|
// the CPU model and can be looked up at "info/MB/bandwidth_gran".
|
||||||
|
// The available bandwidth control steps are: min_bw + N * bw_gran.
|
||||||
|
// Intermediate values are rounded to the next control step available
|
||||||
|
// on the hardware.
|
||||||
if container.IntelRdt != nil {
|
if container.IntelRdt != nil {
|
||||||
|
path := m.GetPath()
|
||||||
l3CacheSchema := container.IntelRdt.L3CacheSchema
|
l3CacheSchema := container.IntelRdt.L3CacheSchema
|
||||||
if l3CacheSchema != "" {
|
memBwSchema := container.IntelRdt.MemBwSchema
|
||||||
|
|
||||||
|
// Write a single joint schema string to schemata file
|
||||||
|
if l3CacheSchema != "" && memBwSchema != "" {
|
||||||
|
if err := writeFile(path, "schemata", l3CacheSchema+"\n"+memBwSchema); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write only L3 cache schema string to schemata file
|
||||||
|
if l3CacheSchema != "" && memBwSchema == "" {
|
||||||
if err := writeFile(path, "schemata", l3CacheSchema); err != nil {
|
if err := writeFile(path, "schemata", l3CacheSchema); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Write only memory bandwidth schema string to schemata file
|
||||||
|
if l3CacheSchema == "" && memBwSchema != "" {
|
||||||
|
if err := writeFile(path, "schemata", memBwSchema); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|
|
@ -8,6 +8,13 @@ type L3CacheInfo struct {
|
||||||
NumClosids uint64 `json:"num_closids,omitempty"`
|
NumClosids uint64 `json:"num_closids,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type MemBwInfo struct {
|
||||||
|
BandwidthGran uint64 `json:"bandwidth_gran,omitempty"`
|
||||||
|
DelayLinear uint64 `json:"delay_linear,omitempty"`
|
||||||
|
MinBandwidth uint64 `json:"min_bandwidth,omitempty"`
|
||||||
|
NumClosids uint64 `json:"num_closids,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
type Stats struct {
|
type Stats struct {
|
||||||
// The read-only L3 cache information
|
// The read-only L3 cache information
|
||||||
L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"`
|
L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"`
|
||||||
|
@ -17,6 +24,15 @@ type Stats struct {
|
||||||
|
|
||||||
// The L3 cache schema in 'container_id' group
|
// The L3 cache schema in 'container_id' group
|
||||||
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
||||||
|
|
||||||
|
// The read-only memory bandwidth information
|
||||||
|
MemBwInfo *MemBwInfo `json:"mem_bw_info,omitempty"`
|
||||||
|
|
||||||
|
// The read-only memory bandwidth schema in root
|
||||||
|
MemBwSchemaRoot string `json:"mem_bw_schema_root,omitempty"`
|
||||||
|
|
||||||
|
// The memory bandwidth schema in 'container_id' group
|
||||||
|
MemBwSchema string `json:"mem_bw_schema,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewStats() *Stats {
|
func NewStats() *Stats {
|
||||||
|
|
|
@ -246,6 +246,9 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
|
||||||
if spec.Linux.IntelRdt.L3CacheSchema != "" {
|
if spec.Linux.IntelRdt.L3CacheSchema != "" {
|
||||||
config.IntelRdt.L3CacheSchema = spec.Linux.IntelRdt.L3CacheSchema
|
config.IntelRdt.L3CacheSchema = spec.Linux.IntelRdt.L3CacheSchema
|
||||||
}
|
}
|
||||||
|
if spec.Linux.IntelRdt.MemBwSchema != "" {
|
||||||
|
config.IntelRdt.MemBwSchema = spec.Linux.IntelRdt.MemBwSchema
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if spec.Process.SelinuxLabel != "" {
|
if spec.Process.SelinuxLabel != "" {
|
||||||
|
|
|
@ -262,7 +262,7 @@ other options are ignored.
|
||||||
|
|
||||||
// Update Intel RDT/CAT
|
// Update Intel RDT/CAT
|
||||||
if val := context.String("l3-cache-schema"); val != "" {
|
if val := context.String("l3-cache-schema"); val != "" {
|
||||||
if !intelrdt.IsEnabled() {
|
if !intelrdt.IsCatEnabled() {
|
||||||
return fmt.Errorf("Intel RDT: l3 cache schema is not enabled")
|
return fmt.Errorf("Intel RDT: l3 cache schema is not enabled")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -54,7 +54,7 @@ func loadFactory(context *cli.Context) (libcontainer.Factory, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
intelRdtManager := libcontainer.IntelRdtFs
|
intelRdtManager := libcontainer.IntelRdtFs
|
||||||
if !intelrdt.IsEnabled() {
|
if !intelrdt.IsCatEnabled() && !intelrdt.IsMbaEnabled() {
|
||||||
intelRdtManager = nil
|
intelRdtManager = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue