Merge pull request #1632 from xiaochenshen/rdt-mba
libcontainer: intelrdt: add support for Intel RDT/MBA in runc
This commit is contained in:
commit
78ef28e63b
38
events.go
38
events.go
|
@ -104,6 +104,13 @@ type l3CacheInfo struct {
|
|||
NumClosids uint64 `json:"num_closids,omitempty"`
|
||||
}
|
||||
|
||||
type memBwInfo struct {
|
||||
BandwidthGran uint64 `json:"bandwidth_gran,omitempty"`
|
||||
DelayLinear uint64 `json:"delay_linear,omitempty"`
|
||||
MinBandwidth uint64 `json:"min_bandwidth,omitempty"`
|
||||
NumClosids uint64 `json:"num_closids,omitempty"`
|
||||
}
|
||||
|
||||
type intelRdt struct {
|
||||
// The read-only L3 cache information
|
||||
L3CacheInfo *l3CacheInfo `json:"l3_cache_info,omitempty"`
|
||||
|
@ -113,6 +120,15 @@ type intelRdt struct {
|
|||
|
||||
// The L3 cache schema in 'container_id' group
|
||||
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
||||
|
||||
// The read-only memory bandwidth information
|
||||
MemBwInfo *memBwInfo `json:"mem_bw_info,omitempty"`
|
||||
|
||||
// The read-only memory bandwidth schema in root
|
||||
MemBwSchemaRoot string `json:"mem_bw_schema_root,omitempty"`
|
||||
|
||||
// The memory bandwidth schema in 'container_id' group
|
||||
MemBwSchema string `json:"mem_bw_schema,omitempty"`
|
||||
}
|
||||
|
||||
var eventsCommand = cli.Command{
|
||||
|
@ -248,9 +264,16 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *stats {
|
|||
}
|
||||
|
||||
if is := ls.IntelRdtStats; is != nil {
|
||||
s.IntelRdt.L3CacheInfo = convertL3CacheInfo(is.L3CacheInfo)
|
||||
s.IntelRdt.L3CacheSchemaRoot = is.L3CacheSchemaRoot
|
||||
s.IntelRdt.L3CacheSchema = is.L3CacheSchema
|
||||
if intelrdt.IsCatEnabled() {
|
||||
s.IntelRdt.L3CacheInfo = convertL3CacheInfo(is.L3CacheInfo)
|
||||
s.IntelRdt.L3CacheSchemaRoot = is.L3CacheSchemaRoot
|
||||
s.IntelRdt.L3CacheSchema = is.L3CacheSchema
|
||||
}
|
||||
if intelrdt.IsMbaEnabled() {
|
||||
s.IntelRdt.MemBwInfo = convertMemBwInfo(is.MemBwInfo)
|
||||
s.IntelRdt.MemBwSchemaRoot = is.MemBwSchemaRoot
|
||||
s.IntelRdt.MemBwSchema = is.MemBwSchema
|
||||
}
|
||||
}
|
||||
|
||||
return &s
|
||||
|
@ -293,3 +316,12 @@ func convertL3CacheInfo(i *intelrdt.L3CacheInfo) *l3CacheInfo {
|
|||
NumClosids: i.NumClosids,
|
||||
}
|
||||
}
|
||||
|
||||
func convertMemBwInfo(i *intelrdt.MemBwInfo) *memBwInfo {
|
||||
return &memBwInfo{
|
||||
BandwidthGran: i.BandwidthGran,
|
||||
DelayLinear: i.DelayLinear,
|
||||
MinBandwidth: i.MinBandwidth,
|
||||
NumClosids: i.NumClosids,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -156,17 +156,21 @@ init process will block waiting for the parent to finish setup.
|
|||
|
||||
### IntelRdt
|
||||
|
||||
Intel platforms with new Xeon CPU support Intel Resource Director Technology
|
||||
(RDT). Cache Allocation Technology (CAT) is a sub-feature of RDT, which
|
||||
currently supports L3 cache resource allocation.
|
||||
Intel platforms with new Xeon CPU support Resource Director Technology (RDT).
|
||||
Cache Allocation Technology (CAT) and Memory Bandwidth Allocation (MBA) are
|
||||
two sub-features of RDT.
|
||||
|
||||
This feature provides a way for the software to restrict cache allocation to a
|
||||
defined 'subset' of L3 cache which may be overlapping with other 'subsets'.
|
||||
The different subsets are identified by class of service (CLOS) and each CLOS
|
||||
has a capacity bitmask (CBM).
|
||||
Cache Allocation Technology (CAT) provides a way for the software to restrict
|
||||
cache allocation to a defined 'subset' of L3 cache which may be overlapping
|
||||
with other 'subsets'. The different subsets are identified by class of
|
||||
service (CLOS) and each CLOS has a capacity bitmask (CBM).
|
||||
|
||||
It can be used to handle L3 cache resource allocation for containers if
|
||||
hardware and kernel support Intel RDT/CAT.
|
||||
Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle
|
||||
over memory bandwidth for the software. A user controls the resource by
|
||||
indicating the percentage of maximum memory bandwidth.
|
||||
|
||||
It can be used to handle L3 cache and memory bandwidth resources allocation
|
||||
for containers if hardware and kernel support Intel RDT CAT and MBA features.
|
||||
|
||||
In Linux 4.10 kernel or newer, the interface is defined and exposed via
|
||||
"resource control" filesystem, which is a "cgroup-like" interface.
|
||||
|
@ -175,6 +179,9 @@ Comparing with cgroups, it has similar process management lifecycle and
|
|||
interfaces in a container. But unlike cgroups' hierarchy, it has single level
|
||||
filesystem layout.
|
||||
|
||||
CAT and MBA features are introduced in Linux 4.10 and 4.12 kernel via
|
||||
"resource control" filesystem.
|
||||
|
||||
Intel RDT "resource control" filesystem hierarchy:
|
||||
```
|
||||
mount -t resctrl resctrl /sys/fs/resctrl
|
||||
|
@ -182,59 +189,85 @@ tree /sys/fs/resctrl
|
|||
/sys/fs/resctrl/
|
||||
|-- info
|
||||
| |-- L3
|
||||
| |-- cbm_mask
|
||||
| |-- min_cbm_bits
|
||||
| | |-- cbm_mask
|
||||
| | |-- min_cbm_bits
|
||||
| | |-- num_closids
|
||||
| |-- MB
|
||||
| |-- bandwidth_gran
|
||||
| |-- delay_linear
|
||||
| |-- min_bandwidth
|
||||
| |-- num_closids
|
||||
|-- cpus
|
||||
|-- ...
|
||||
|-- schemata
|
||||
|-- tasks
|
||||
|-- <container_id>
|
||||
|-- cpus
|
||||
|-- ...
|
||||
|-- schemata
|
||||
|-- tasks
|
||||
|
||||
```
|
||||
|
||||
For runc, we can make use of `tasks` and `schemata` configuration for L3 cache
|
||||
resource constraints.
|
||||
For runc, we can make use of `tasks` and `schemata` configuration for L3
|
||||
cache and memory bandwidth resources constraints.
|
||||
|
||||
The file `tasks` has a list of tasks that belongs to this group (e.g.,
|
||||
<container_id>" group). Tasks can be added to a group by writing the task ID
|
||||
to the "tasks" file (which will automatically remove them from the previous
|
||||
to the "tasks" file (which will automatically remove them from the previous
|
||||
group to which they belonged). New tasks created by fork(2) and clone(2) are
|
||||
added to the same group as their parent. If a pid is not in any sub group, it
|
||||
is in root group.
|
||||
added to the same group as their parent.
|
||||
|
||||
The file `schemata` has allocation masks/values for L3 cache on each socket,
|
||||
which contains L3 cache id and capacity bitmask (CBM).
|
||||
The file `schemata` has a list of all the resources available to this group.
|
||||
Each resource (L3 cache, memory bandwidth) has its own line and format.
|
||||
|
||||
L3 cache schema:
|
||||
It has allocation bitmasks/values for L3 cache on each socket, which
|
||||
contains L3 cache id and capacity bitmask (CBM).
|
||||
```
|
||||
Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
```
|
||||
For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0`
|
||||
Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
|
||||
For example, on a two-socket machine, the schema line could be "L3:0=ff;1=c0"
|
||||
which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
|
||||
|
||||
The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
|
||||
be set is less than the max bit. The max bits in the CBM is varied among
|
||||
supported Intel Xeon platforms. In Intel RDT "resource control" filesystem
|
||||
layout, the CBM in a group should be a subset of the CBM in root. Kernel will
|
||||
check if it is valid when writing. e.g., 0xfffff in root indicates the max bits
|
||||
of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM
|
||||
values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||
supported Intel CPU models. Kernel will check if it is valid when writing.
|
||||
e.g., default value 0xfffff in root indicates the max bits of CBM is 20
|
||||
bits, which mapping to entire L3 cache capacity. Some valid CBM values to
|
||||
set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||
|
||||
For more information about Intel RDT/CAT kernel interface:
|
||||
Memory bandwidth schema:
|
||||
It has allocation values for memory bandwidth on each socket, which contains
|
||||
L3 cache id and memory bandwidth percentage.
|
||||
```
|
||||
Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
|
||||
```
|
||||
For example, on a two-socket machine, the schema line could be "MB:0=20;1=70"
|
||||
|
||||
The minimum bandwidth percentage value for each CPU model is predefined and
|
||||
can be looked up through "info/MB/min_bandwidth". The bandwidth granularity
|
||||
that is allocated is also dependent on the CPU model and can be looked up at
|
||||
"info/MB/bandwidth_gran". The available bandwidth control steps are:
|
||||
min_bw + N * bw_gran. Intermediate values are rounded to the next control
|
||||
step available on the hardware.
|
||||
|
||||
For more information about Intel RDT kernel interface:
|
||||
https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
|
||||
|
||||
An example for runc:
|
||||
```
|
||||
An example for runc:
|
||||
Consider a two-socket machine with two L3 caches where the default CBM is
|
||||
0xfffff and the max CBM length is 20 bits. With this configuration, tasks
|
||||
inside the container only have access to the "upper" 80% of L3 cache id 0 and
|
||||
the "lower" 50% L3 cache id 1:
|
||||
0x7ff and the max CBM length is 11 bits, and minimum memory bandwidth of 10%
|
||||
with a memory bandwidth granularity of 10%.
|
||||
|
||||
Tasks inside the container only have access to the "upper" 7/11 of L3 cache
|
||||
on socket 0 and the "lower" 5/11 L3 cache on socket 1, and may use a
|
||||
maximum memory bandwidth of 20% on socket 0 and 70% on socket 1.
|
||||
|
||||
"linux": {
|
||||
"intelRdt": {
|
||||
"l3CacheSchema": "L3:0=ffff0;1=3ff"
|
||||
}
|
||||
"intelRdt": {
|
||||
"closID": "guaranteed_group",
|
||||
"l3CacheSchema": "L3:0=7f0;1=1f",
|
||||
"memBwSchema": "MB:0=20;1=70"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
|
|
@ -186,8 +186,8 @@ type Config struct {
|
|||
// callers keyring in this case.
|
||||
NoNewKeyring bool `json:"no_new_keyring"`
|
||||
|
||||
// IntelRdt specifies settings for Intel RDT/CAT group that the container is placed into
|
||||
// to limit the resources (e.g., L3 cache) the container has available
|
||||
// IntelRdt specifies settings for Intel RDT group that the container is placed into
|
||||
// to limit the resources (e.g., L3 cache, memory bandwidth) the container has available
|
||||
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
|
||||
|
||||
// RootlessEUID is set when the runc was launched with non-zero EUID.
|
||||
|
|
|
@ -4,4 +4,8 @@ type IntelRdt struct {
|
|||
// The schema for L3 cache id and capacity bitmask (CBM)
|
||||
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
||||
|
||||
// The schema of memory bandwidth percentage per L3 cache id
|
||||
// Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
|
||||
MemBwSchema string `json:"memBwSchema,omitempty"`
|
||||
}
|
||||
|
|
|
@ -169,11 +169,22 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {
|
|||
|
||||
func (v *ConfigValidator) intelrdt(config *configs.Config) error {
|
||||
if config.IntelRdt != nil {
|
||||
if !intelrdt.IsEnabled() {
|
||||
return fmt.Errorf("intelRdt is specified in config, but Intel RDT feature is not supported or enabled")
|
||||
if !intelrdt.IsCatEnabled() && !intelrdt.IsMbaEnabled() {
|
||||
return fmt.Errorf("intelRdt is specified in config, but Intel RDT is not supported or enabled")
|
||||
}
|
||||
if config.IntelRdt.L3CacheSchema == "" {
|
||||
return fmt.Errorf("intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
|
||||
|
||||
if !intelrdt.IsCatEnabled() && config.IntelRdt.L3CacheSchema != "" {
|
||||
return fmt.Errorf("intelRdt.l3CacheSchema is specified in config, but Intel RDT/CAT is not enabled")
|
||||
}
|
||||
if !intelrdt.IsMbaEnabled() && config.IntelRdt.MemBwSchema != "" {
|
||||
return fmt.Errorf("intelRdt.memBwSchema is specified in config, but Intel RDT/MBA is not enabled")
|
||||
}
|
||||
|
||||
if intelrdt.IsCatEnabled() && config.IntelRdt.L3CacheSchema == "" {
|
||||
return fmt.Errorf("Intel RDT/CAT is enabled and intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
|
||||
}
|
||||
if intelrdt.IsMbaEnabled() && config.IntelRdt.MemBwSchema == "" {
|
||||
return fmt.Errorf("Intel RDT/MBA is enabled and intelRdt is specified in config, but intelRdt.memBwSchema is empty")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -148,6 +148,7 @@ func TestGetContainerStats(t *testing.T) {
|
|||
intelRdtManager: &mockIntelRdtManager{
|
||||
stats: &intelrdt.Stats{
|
||||
L3CacheSchema: "L3:0=f;1=f0",
|
||||
MemBwSchema: "MB:0=20;1=70",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
@ -161,7 +162,7 @@ func TestGetContainerStats(t *testing.T) {
|
|||
if stats.CgroupStats.MemoryStats.Usage.Usage != 1024 {
|
||||
t.Fatalf("expected memory usage 1024 but received %d", stats.CgroupStats.MemoryStats.Usage.Usage)
|
||||
}
|
||||
if intelrdt.IsEnabled() {
|
||||
if intelrdt.IsCatEnabled() {
|
||||
if stats.IntelRdtStats == nil {
|
||||
t.Fatal("intel rdt stats are nil")
|
||||
}
|
||||
|
@ -169,6 +170,14 @@ func TestGetContainerStats(t *testing.T) {
|
|||
t.Fatalf("expected L3CacheSchema L3:0=f;1=f0 but recevied %s", stats.IntelRdtStats.L3CacheSchema)
|
||||
}
|
||||
}
|
||||
if intelrdt.IsMbaEnabled() {
|
||||
if stats.IntelRdtStats == nil {
|
||||
t.Fatal("intel rdt stats are nil")
|
||||
}
|
||||
if stats.IntelRdtStats.MemBwSchema != "MB:0=20;1=70" {
|
||||
t.Fatalf("expected MemBwSchema MB:0=20;1=70 but recevied %s", stats.IntelRdtStats.MemBwSchema)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetContainerState(t *testing.T) {
|
||||
|
@ -210,6 +219,7 @@ func TestGetContainerState(t *testing.T) {
|
|||
intelRdtManager: &mockIntelRdtManager{
|
||||
stats: &intelrdt.Stats{
|
||||
L3CacheSchema: "L3:0=f0;1=f",
|
||||
MemBwSchema: "MB:0=70;1=20",
|
||||
},
|
||||
path: expectedIntelRdtPath,
|
||||
},
|
||||
|
@ -232,7 +242,7 @@ func TestGetContainerState(t *testing.T) {
|
|||
if memPath := paths["memory"]; memPath != expectedMemoryPath {
|
||||
t.Fatalf("expected memory path %q but received %q", expectedMemoryPath, memPath)
|
||||
}
|
||||
if intelrdt.IsEnabled() {
|
||||
if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() {
|
||||
intelRdtPath := state.IntelRdtPath
|
||||
if intelRdtPath == "" {
|
||||
t.Fatal("intel rdt path should not be empty")
|
||||
|
|
|
@ -92,7 +92,7 @@ func RootlessCgroupfs(l *LinuxFactory) error {
|
|||
|
||||
// IntelRdtfs is an options func to configure a LinuxFactory to return
|
||||
// containers that use the Intel RDT "resource control" filesystem to
|
||||
// create and manage Intel Xeon platform shared resources (e.g., L3 cache).
|
||||
// create and manage Intel RDT resources (e.g., L3 cache, memory bandwidth).
|
||||
func IntelRdtFs(l *LinuxFactory) error {
|
||||
l.NewIntelRdtManager = func(config *configs.Config, id string, path string) intelrdt.Manager {
|
||||
return &intelrdt.IntelRdtManager{
|
||||
|
@ -222,7 +222,7 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
|
|||
newgidmapPath: l.NewgidmapPath,
|
||||
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
|
||||
}
|
||||
if intelrdt.IsEnabled() {
|
||||
if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() {
|
||||
c.intelRdtManager = l.NewIntelRdtManager(config, id, "")
|
||||
}
|
||||
c.state = &stoppedState{c: c}
|
||||
|
@ -268,7 +268,7 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
|
|||
if err := c.refreshState(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if intelrdt.IsEnabled() {
|
||||
if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() {
|
||||
c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath)
|
||||
}
|
||||
return c, nil
|
||||
|
|
|
@ -16,20 +16,25 @@ import (
|
|||
)
|
||||
|
||||
/*
|
||||
* About Intel RDT/CAT feature:
|
||||
* About Intel RDT features:
|
||||
* Intel platforms with new Xeon CPU support Resource Director Technology (RDT).
|
||||
* Intel Cache Allocation Technology (CAT) is a sub-feature of RDT. Currently L3
|
||||
* Cache is the only resource that is supported in RDT.
|
||||
* Cache Allocation Technology (CAT) and Memory Bandwidth Allocation (MBA) are
|
||||
* two sub-features of RDT.
|
||||
*
|
||||
* This feature provides a way for the software to restrict cache allocation to a
|
||||
* defined 'subset' of L3 cache which may be overlapping with other 'subsets'.
|
||||
* The different subsets are identified by class of service (CLOS) and each CLOS
|
||||
* has a capacity bitmask (CBM).
|
||||
* Cache Allocation Technology (CAT) provides a way for the software to restrict
|
||||
* cache allocation to a defined 'subset' of L3 cache which may be overlapping
|
||||
* with other 'subsets'. The different subsets are identified by class of
|
||||
* service (CLOS) and each CLOS has a capacity bitmask (CBM).
|
||||
*
|
||||
* For more information about Intel RDT/CAT can be found in the section 17.17
|
||||
* of Intel Software Developer Manual.
|
||||
* Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle
|
||||
* over memory bandwidth for the software. A user controls the resource by
|
||||
* indicating the percentage of maximum memory bandwidth.
|
||||
*
|
||||
* About Intel RDT/CAT kernel interface:
|
||||
* More details about Intel RDT CAT and MBA can be found in the section 17.18
|
||||
* of Intel Software Developer Manual:
|
||||
* https://software.intel.com/en-us/articles/intel-sdm
|
||||
*
|
||||
* About Intel RDT kernel interface:
|
||||
* In Linux 4.10 kernel or newer, the interface is defined and exposed via
|
||||
* "resource control" filesystem, which is a "cgroup-like" interface.
|
||||
*
|
||||
|
@ -37,59 +42,86 @@ import (
|
|||
* interfaces in a container. But unlike cgroups' hierarchy, it has single level
|
||||
* filesystem layout.
|
||||
*
|
||||
* CAT and MBA features are introduced in Linux 4.10 and 4.12 kernel via
|
||||
* "resource control" filesystem.
|
||||
*
|
||||
* Intel RDT "resource control" filesystem hierarchy:
|
||||
* mount -t resctrl resctrl /sys/fs/resctrl
|
||||
* tree /sys/fs/resctrl
|
||||
* /sys/fs/resctrl/
|
||||
* |-- info
|
||||
* | |-- L3
|
||||
* | |-- cbm_mask
|
||||
* | |-- min_cbm_bits
|
||||
* | | |-- cbm_mask
|
||||
* | | |-- min_cbm_bits
|
||||
* | | |-- num_closids
|
||||
* | |-- MB
|
||||
* | |-- bandwidth_gran
|
||||
* | |-- delay_linear
|
||||
* | |-- min_bandwidth
|
||||
* | |-- num_closids
|
||||
* |-- cpus
|
||||
* |-- ...
|
||||
* |-- schemata
|
||||
* |-- tasks
|
||||
* |-- <container_id>
|
||||
* |-- cpus
|
||||
* |-- ...
|
||||
* |-- schemata
|
||||
* |-- tasks
|
||||
*
|
||||
* For runc, we can make use of `tasks` and `schemata` configuration for L3 cache
|
||||
* resource constraints.
|
||||
* For runc, we can make use of `tasks` and `schemata` configuration for L3
|
||||
* cache and memory bandwidth resources constraints.
|
||||
*
|
||||
* The file `tasks` has a list of tasks that belongs to this group (e.g.,
|
||||
* The file `tasks` has a list of tasks that belongs to this group (e.g.,
|
||||
* <container_id>" group). Tasks can be added to a group by writing the task ID
|
||||
* to the "tasks" file (which will automatically remove them from the previous
|
||||
* to the "tasks" file (which will automatically remove them from the previous
|
||||
* group to which they belonged). New tasks created by fork(2) and clone(2) are
|
||||
* added to the same group as their parent. If a pid is not in any sub group, it is
|
||||
* in root group.
|
||||
* added to the same group as their parent.
|
||||
*
|
||||
* The file `schemata` has allocation bitmasks/values for L3 cache on each socket,
|
||||
* which contains L3 cache id and capacity bitmask (CBM).
|
||||
* The file `schemata` has a list of all the resources available to this group.
|
||||
* Each resource (L3 cache, memory bandwidth) has its own line and format.
|
||||
*
|
||||
* L3 cache schema:
|
||||
* It has allocation bitmasks/values for L3 cache on each socket, which
|
||||
* contains L3 cache id and capacity bitmask (CBM).
|
||||
* Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
* For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0`
|
||||
* For example, on a two-socket machine, the schema line could be "L3:0=ff;1=c0"
|
||||
* which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
|
||||
*
|
||||
* The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
|
||||
* be set is less than the max bit. The max bits in the CBM is varied among
|
||||
* supported Intel Xeon platforms. In Intel RDT "resource control" filesystem
|
||||
* layout, the CBM in a group should be a subset of the CBM in root. Kernel will
|
||||
* check if it is valid when writing. e.g., 0xfffff in root indicates the max bits
|
||||
* of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM
|
||||
* values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||
* supported Intel CPU models. Kernel will check if it is valid when writing.
|
||||
* e.g., default value 0xfffff in root indicates the max bits of CBM is 20
|
||||
* bits, which mapping to entire L3 cache capacity. Some valid CBM values to
|
||||
* set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||
*
|
||||
* For more information about Intel RDT/CAT kernel interface:
|
||||
* Memory bandwidth schema:
|
||||
* It has allocation values for memory bandwidth on each socket, which contains
|
||||
* L3 cache id and memory bandwidth percentage.
|
||||
* Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
|
||||
* For example, on a two-socket machine, the schema line could be "MB:0=20;1=70"
|
||||
*
|
||||
* The minimum bandwidth percentage value for each CPU model is predefined and
|
||||
* can be looked up through "info/MB/min_bandwidth". The bandwidth granularity
|
||||
* that is allocated is also dependent on the CPU model and can be looked up at
|
||||
* "info/MB/bandwidth_gran". The available bandwidth control steps are:
|
||||
* min_bw + N * bw_gran. Intermediate values are rounded to the next control
|
||||
* step available on the hardware.
|
||||
*
|
||||
* For more information about Intel RDT kernel interface:
|
||||
* https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
|
||||
*
|
||||
* An example for runc:
|
||||
* Consider a two-socket machine with two L3 caches where the default CBM is
|
||||
* 0xfffff and the max CBM length is 20 bits. With this configuration, tasks
|
||||
* inside the container only have access to the "upper" 80% of L3 cache id 0 and
|
||||
* the "lower" 50% L3 cache id 1:
|
||||
* 0x7ff and the max CBM length is 11 bits, and minimum memory bandwidth of 10%
|
||||
* with a memory bandwidth granularity of 10%.
|
||||
*
|
||||
* Tasks inside the container only have access to the "upper" 7/11 of L3 cache
|
||||
* on socket 0 and the "lower" 5/11 L3 cache on socket 1, and may use a
|
||||
* maximum memory bandwidth of 20% on socket 0 and 70% on socket 1.
|
||||
*
|
||||
* "linux": {
|
||||
* "intelRdt": {
|
||||
* "l3CacheSchema": "L3:0=ffff0;1=3ff"
|
||||
* "intelRdt": {
|
||||
* "l3CacheSchema": "L3:0=7f0;1=1f",
|
||||
* "memBwSchema": "MB:0=20;1=70"
|
||||
* }
|
||||
* }
|
||||
*/
|
||||
|
@ -129,8 +161,10 @@ var (
|
|||
intelRdtRoot string
|
||||
intelRdtRootLock sync.Mutex
|
||||
|
||||
// The flag to indicate if Intel RDT is supported
|
||||
isEnabled bool
|
||||
// The flag to indicate if Intel RDT/CAT is enabled
|
||||
isCatEnabled bool
|
||||
// The flag to indicate if Intel RDT/MBA is enabled
|
||||
isMbaEnabled bool
|
||||
)
|
||||
|
||||
type intelRdtData struct {
|
||||
|
@ -139,19 +173,35 @@ type intelRdtData struct {
|
|||
pid int
|
||||
}
|
||||
|
||||
// Check if Intel RDT is enabled in init()
|
||||
// Check if Intel RDT sub-features are enabled in init()
|
||||
func init() {
|
||||
// 1. Check if hardware and kernel support Intel RDT/CAT feature
|
||||
// "cat_l3" flag is set if supported
|
||||
isFlagSet, err := parseCpuInfoFile("/proc/cpuinfo")
|
||||
if !isFlagSet || err != nil {
|
||||
isEnabled = false
|
||||
// 1. Check if hardware and kernel support Intel RDT sub-features
|
||||
// "cat_l3" flag for CAT and "mba" flag for MBA
|
||||
isCatFlagSet, isMbaFlagSet, err := parseCpuInfoFile("/proc/cpuinfo")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// 2. Check if Intel RDT "resource control" filesystem is mounted
|
||||
// The user guarantees to mount the filesystem
|
||||
isEnabled = isIntelRdtMounted()
|
||||
if !isIntelRdtMounted() {
|
||||
return
|
||||
}
|
||||
|
||||
// 3. Double check if Intel RDT sub-features are available in
|
||||
// "resource control" filesystem. Intel RDT sub-features can be
|
||||
// selectively disabled or enabled by kernel command line
|
||||
// (e.g., rdt=!l3cat,mba) in 4.14 and newer kernel
|
||||
if isCatFlagSet {
|
||||
if _, err := os.Stat(filepath.Join(intelRdtRoot, "info", "L3")); err == nil {
|
||||
isCatEnabled = true
|
||||
}
|
||||
}
|
||||
if isMbaFlagSet {
|
||||
if _, err := os.Stat(filepath.Join(intelRdtRoot, "info", "MB")); err == nil {
|
||||
isMbaEnabled = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Return the mount point path of Intel RDT "resource control" filesysem
|
||||
|
@ -223,30 +273,40 @@ func isIntelRdtMounted() bool {
|
|||
return true
|
||||
}
|
||||
|
||||
func parseCpuInfoFile(path string) (bool, error) {
|
||||
func parseCpuInfoFile(path string) (bool, bool, error) {
|
||||
isCatFlagSet := false
|
||||
isMbaFlagSet := false
|
||||
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return false, err
|
||||
return false, false, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
for s.Scan() {
|
||||
if err := s.Err(); err != nil {
|
||||
return false, err
|
||||
return false, false, err
|
||||
}
|
||||
|
||||
text := s.Text()
|
||||
flags := strings.Split(text, " ")
|
||||
line := s.Text()
|
||||
|
||||
// "cat_l3" flag is set if Intel RDT/CAT is supported
|
||||
for _, flag := range flags {
|
||||
if flag == "cat_l3" {
|
||||
return true, nil
|
||||
// Search "cat_l3" and "mba" flags in first "flags" line
|
||||
if strings.Contains(line, "flags") {
|
||||
flags := strings.Split(line, " ")
|
||||
// "cat_l3" flag for CAT and "mba" flag for MBA
|
||||
for _, flag := range flags {
|
||||
switch flag {
|
||||
case "cat_l3":
|
||||
isCatFlagSet = true
|
||||
case "mba":
|
||||
isMbaFlagSet = true
|
||||
}
|
||||
}
|
||||
return isCatFlagSet, isMbaFlagSet, nil
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
return isCatFlagSet, isMbaFlagSet, nil
|
||||
}
|
||||
|
||||
func parseUint(s string, base, bitSize int) (uint64, error) {
|
||||
|
@ -292,30 +352,6 @@ func getIntelRdtParamString(path, file string) (string, error) {
|
|||
return strings.TrimSpace(string(contents)), nil
|
||||
}
|
||||
|
||||
func readTasksFile(dir string) ([]int, error) {
|
||||
f, err := os.Open(filepath.Join(dir, IntelRdtTasks))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var (
|
||||
s = bufio.NewScanner(f)
|
||||
out = []int{}
|
||||
)
|
||||
|
||||
for s.Scan() {
|
||||
if t := s.Text(); t != "" {
|
||||
pid, err := strconv.Atoi(t)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, pid)
|
||||
}
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func writeFile(dir, file, data string) error {
|
||||
if dir == "" {
|
||||
return fmt.Errorf("no such directory for %s", file)
|
||||
|
@ -368,6 +404,41 @@ func getL3CacheInfo() (*L3CacheInfo, error) {
|
|||
return l3CacheInfo, nil
|
||||
}
|
||||
|
||||
// Get the read-only memory bandwidth information
|
||||
func getMemBwInfo() (*MemBwInfo, error) {
|
||||
memBwInfo := &MemBwInfo{}
|
||||
|
||||
rootPath, err := getIntelRdtRoot()
|
||||
if err != nil {
|
||||
return memBwInfo, err
|
||||
}
|
||||
|
||||
path := filepath.Join(rootPath, "info", "MB")
|
||||
bandwidthGran, err := getIntelRdtParamUint(path, "bandwidth_gran")
|
||||
if err != nil {
|
||||
return memBwInfo, err
|
||||
}
|
||||
delayLinear, err := getIntelRdtParamUint(path, "delay_linear")
|
||||
if err != nil {
|
||||
return memBwInfo, err
|
||||
}
|
||||
minBandwidth, err := getIntelRdtParamUint(path, "min_bandwidth")
|
||||
if err != nil {
|
||||
return memBwInfo, err
|
||||
}
|
||||
numClosids, err := getIntelRdtParamUint(path, "num_closids")
|
||||
if err != nil {
|
||||
return memBwInfo, err
|
||||
}
|
||||
|
||||
memBwInfo.BandwidthGran = bandwidthGran
|
||||
memBwInfo.DelayLinear = delayLinear
|
||||
memBwInfo.MinBandwidth = minBandwidth
|
||||
memBwInfo.NumClosids = numClosids
|
||||
|
||||
return memBwInfo, nil
|
||||
}
|
||||
|
||||
// WriteIntelRdtTasks writes the specified pid into the "tasks" file
|
||||
func WriteIntelRdtTasks(dir string, pid int) error {
|
||||
if dir == "" {
|
||||
|
@ -383,9 +454,14 @@ func WriteIntelRdtTasks(dir string, pid int) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// Check if Intel RDT is enabled
|
||||
func IsEnabled() bool {
|
||||
return isEnabled
|
||||
// Check if Intel RDT/CAT is enabled
|
||||
func IsCatEnabled() bool {
|
||||
return isCatEnabled
|
||||
}
|
||||
|
||||
// Check if Intel RDT/MBA is enabled
|
||||
func IsMbaEnabled() bool {
|
||||
return isMbaEnabled
|
||||
}
|
||||
|
||||
// Get the 'container_id' path in Intel RDT "resource control" filesystem
|
||||
|
@ -452,67 +528,132 @@ func (m *IntelRdtManager) GetStats() (*Stats, error) {
|
|||
defer m.mu.Unlock()
|
||||
stats := NewStats()
|
||||
|
||||
// The read-only L3 cache information
|
||||
l3CacheInfo, err := getL3CacheInfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stats.L3CacheInfo = l3CacheInfo
|
||||
|
||||
// The read-only L3 cache schema in root
|
||||
rootPath, err := getIntelRdtRoot()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// The read-only L3 cache and memory bandwidth schemata in root
|
||||
tmpRootStrings, err := getIntelRdtParamString(rootPath, "schemata")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// L3 cache schema is in the first line
|
||||
schemaRootStrings := strings.Split(tmpRootStrings, "\n")
|
||||
stats.L3CacheSchemaRoot = schemaRootStrings[0]
|
||||
|
||||
// The L3 cache schema in 'container_id' group
|
||||
// The L3 cache and memory bandwidth schemata in 'container_id' group
|
||||
tmpStrings, err := getIntelRdtParamString(m.GetPath(), "schemata")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// L3 cache schema is in the first line
|
||||
schemaStrings := strings.Split(tmpStrings, "\n")
|
||||
stats.L3CacheSchema = schemaStrings[0]
|
||||
|
||||
if IsCatEnabled() {
|
||||
// The read-only L3 cache information
|
||||
l3CacheInfo, err := getL3CacheInfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stats.L3CacheInfo = l3CacheInfo
|
||||
|
||||
// The read-only L3 cache schema in root
|
||||
for _, schemaRoot := range schemaRootStrings {
|
||||
if strings.Contains(schemaRoot, "L3") {
|
||||
stats.L3CacheSchemaRoot = strings.TrimSpace(schemaRoot)
|
||||
}
|
||||
}
|
||||
|
||||
// The L3 cache schema in 'container_id' group
|
||||
for _, schema := range schemaStrings {
|
||||
if strings.Contains(schema, "L3") {
|
||||
stats.L3CacheSchema = strings.TrimSpace(schema)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if IsMbaEnabled() {
|
||||
// The read-only memory bandwidth information
|
||||
memBwInfo, err := getMemBwInfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stats.MemBwInfo = memBwInfo
|
||||
|
||||
// The read-only memory bandwidth information
|
||||
for _, schemaRoot := range schemaRootStrings {
|
||||
if strings.Contains(schemaRoot, "MB") {
|
||||
stats.MemBwSchemaRoot = strings.TrimSpace(schemaRoot)
|
||||
}
|
||||
}
|
||||
|
||||
// The memory bandwidth schema in 'container_id' group
|
||||
for _, schema := range schemaStrings {
|
||||
if strings.Contains(schema, "MB") {
|
||||
stats.MemBwSchema = strings.TrimSpace(schema)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// Set Intel RDT "resource control" filesystem as configured.
|
||||
func (m *IntelRdtManager) Set(container *configs.Config) error {
|
||||
path := m.GetPath()
|
||||
|
||||
// About L3 cache schema file:
|
||||
// The schema has allocation masks/values for L3 cache on each socket,
|
||||
// About L3 cache schema:
|
||||
// It has allocation bitmasks/values for L3 cache on each socket,
|
||||
// which contains L3 cache id and capacity bitmask (CBM).
|
||||
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
// For example, on a two-socket machine, L3's schema line could be:
|
||||
// L3:0=ff;1=c0
|
||||
// Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
|
||||
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
// For example, on a two-socket machine, the schema line could be:
|
||||
// L3:0=ff;1=c0
|
||||
// which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM
|
||||
// is 0xc0.
|
||||
//
|
||||
// About L3 cache CBM validity:
|
||||
// The valid L3 cache CBM is a *contiguous bits set* and number of
|
||||
// bits that can be set is less than the max bit. The max bits in the
|
||||
// CBM is varied among supported Intel Xeon platforms. In Intel RDT
|
||||
// "resource control" filesystem layout, the CBM in a group should
|
||||
// be a subset of the CBM in root. Kernel will check if it is valid
|
||||
// when writing.
|
||||
// e.g., 0xfffff in root indicates the max bits of CBM is 20 bits,
|
||||
// which mapping to entire L3 cache capacity. Some valid CBM values
|
||||
// to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||
// CBM is varied among supported Intel CPU models. Kernel will check
|
||||
// if it is valid when writing. e.g., default value 0xfffff in root
|
||||
// indicates the max bits of CBM is 20 bits, which mapping to entire
|
||||
// L3 cache capacity. Some valid CBM values to set in a group:
|
||||
// 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||
//
|
||||
//
|
||||
// About memory bandwidth schema:
|
||||
// It has allocation values for memory bandwidth on each socket, which
|
||||
// contains L3 cache id and memory bandwidth percentage.
|
||||
// Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
|
||||
// For example, on a two-socket machine, the schema line could be:
|
||||
// "MB:0=20;1=70"
|
||||
//
|
||||
// The minimum bandwidth percentage value for each CPU model is
|
||||
// predefined and can be looked up through "info/MB/min_bandwidth".
|
||||
// The bandwidth granularity that is allocated is also dependent on
|
||||
// the CPU model and can be looked up at "info/MB/bandwidth_gran".
|
||||
// The available bandwidth control steps are: min_bw + N * bw_gran.
|
||||
// Intermediate values are rounded to the next control step available
|
||||
// on the hardware.
|
||||
if container.IntelRdt != nil {
|
||||
path := m.GetPath()
|
||||
l3CacheSchema := container.IntelRdt.L3CacheSchema
|
||||
if l3CacheSchema != "" {
|
||||
memBwSchema := container.IntelRdt.MemBwSchema
|
||||
|
||||
// Write a single joint schema string to schemata file
|
||||
if l3CacheSchema != "" && memBwSchema != "" {
|
||||
if err := writeFile(path, "schemata", l3CacheSchema+"\n"+memBwSchema); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Write only L3 cache schema string to schemata file
|
||||
if l3CacheSchema != "" && memBwSchema == "" {
|
||||
if err := writeFile(path, "schemata", l3CacheSchema); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Write only memory bandwidth schema string to schemata file
|
||||
if l3CacheSchema == "" && memBwSchema != "" {
|
||||
if err := writeFile(path, "schemata", memBwSchema); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
|
|
|
@ -8,7 +8,7 @@ import (
|
|||
)
|
||||
|
||||
func TestIntelRdtSetL3CacheSchema(t *testing.T) {
|
||||
if !IsEnabled() {
|
||||
if !IsCatEnabled() {
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -44,3 +44,41 @@ func TestIntelRdtSetL3CacheSchema(t *testing.T) {
|
|||
t.Fatal("Got the wrong value, set 'schemata' failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntelRdtSetMemBwSchema(t *testing.T) {
|
||||
if !IsMbaEnabled() {
|
||||
return
|
||||
}
|
||||
|
||||
helper := NewIntelRdtTestUtil(t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
memBwSchemaBefore = "MB:0=20;1=70"
|
||||
memBwSchemeAfter = "MB:0=70;1=20"
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"schemata": memBwSchemaBefore + "\n",
|
||||
})
|
||||
|
||||
helper.IntelRdtData.config.IntelRdt.MemBwSchema = memBwSchemeAfter
|
||||
intelrdt := &IntelRdtManager{
|
||||
Config: helper.IntelRdtData.config,
|
||||
Path: helper.IntelRdtPath,
|
||||
}
|
||||
if err := intelrdt.Set(helper.IntelRdtData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
tmpStrings, err := getIntelRdtParamString(helper.IntelRdtPath, "schemata")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse file 'schemata' - %s", err)
|
||||
}
|
||||
values := strings.Split(tmpStrings, "\n")
|
||||
value := values[0]
|
||||
|
||||
if value != memBwSchemeAfter {
|
||||
t.Fatal("Got the wrong value, set 'schemata' failed.")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,6 +8,13 @@ type L3CacheInfo struct {
|
|||
NumClosids uint64 `json:"num_closids,omitempty"`
|
||||
}
|
||||
|
||||
type MemBwInfo struct {
|
||||
BandwidthGran uint64 `json:"bandwidth_gran,omitempty"`
|
||||
DelayLinear uint64 `json:"delay_linear,omitempty"`
|
||||
MinBandwidth uint64 `json:"min_bandwidth,omitempty"`
|
||||
NumClosids uint64 `json:"num_closids,omitempty"`
|
||||
}
|
||||
|
||||
type Stats struct {
|
||||
// The read-only L3 cache information
|
||||
L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"`
|
||||
|
@ -17,6 +24,15 @@ type Stats struct {
|
|||
|
||||
// The L3 cache schema in 'container_id' group
|
||||
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
||||
|
||||
// The read-only memory bandwidth information
|
||||
MemBwInfo *MemBwInfo `json:"mem_bw_info,omitempty"`
|
||||
|
||||
// The read-only memory bandwidth schema in root
|
||||
MemBwSchemaRoot string `json:"mem_bw_schema_root,omitempty"`
|
||||
|
||||
// The memory bandwidth schema in 'container_id' group
|
||||
MemBwSchema string `json:"mem_bw_schema,omitempty"`
|
||||
}
|
||||
|
||||
func NewStats() *Stats {
|
||||
|
|
|
@ -246,6 +246,9 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
|
|||
if spec.Linux.IntelRdt.L3CacheSchema != "" {
|
||||
config.IntelRdt.L3CacheSchema = spec.Linux.IntelRdt.L3CacheSchema
|
||||
}
|
||||
if spec.Linux.IntelRdt.MemBwSchema != "" {
|
||||
config.IntelRdt.MemBwSchema = spec.Linux.IntelRdt.MemBwSchema
|
||||
}
|
||||
}
|
||||
}
|
||||
if spec.Process.SelinuxLabel != "" {
|
||||
|
|
23
update.go
23
update.go
|
@ -118,6 +118,10 @@ other options are ignored.
|
|||
Name: "l3-cache-schema",
|
||||
Usage: "The string of Intel RDT/CAT L3 cache schema",
|
||||
},
|
||||
cli.StringFlag{
|
||||
Name: "mem-bw-schema",
|
||||
Usage: "The string of Intel RDT/MBA memory bandwidth schema",
|
||||
},
|
||||
},
|
||||
Action: func(context *cli.Context) error {
|
||||
if err := checkArgs(context, 1, exactArgs); err != nil {
|
||||
|
@ -260,12 +264,18 @@ other options are ignored.
|
|||
config.Cgroups.Resources.MemorySwap = *r.Memory.Swap
|
||||
config.Cgroups.Resources.PidsLimit = r.Pids.Limit
|
||||
|
||||
// Update Intel RDT/CAT
|
||||
if val := context.String("l3-cache-schema"); val != "" {
|
||||
if !intelrdt.IsEnabled() {
|
||||
return fmt.Errorf("Intel RDT: l3 cache schema is not enabled")
|
||||
}
|
||||
// Update Intel RDT
|
||||
l3CacheSchema := context.String("l3-cache-schema")
|
||||
memBwSchema := context.String("mem-bw-schema")
|
||||
if l3CacheSchema != "" && !intelrdt.IsCatEnabled() {
|
||||
return fmt.Errorf("Intel RDT/CAT: l3 cache schema is not enabled")
|
||||
}
|
||||
|
||||
if memBwSchema != "" && !intelrdt.IsMbaEnabled() {
|
||||
return fmt.Errorf("Intel RDT/MBA: memory bandwidth schema is not enabled")
|
||||
}
|
||||
|
||||
if l3CacheSchema != "" || memBwSchema != "" {
|
||||
// If intelRdt is not specified in original configuration, we just don't
|
||||
// Apply() to create intelRdt group or attach tasks for this container.
|
||||
// In update command, we could re-enable through IntelRdtManager.Apply()
|
||||
|
@ -285,7 +295,8 @@ other options are ignored.
|
|||
return err
|
||||
}
|
||||
}
|
||||
config.IntelRdt.L3CacheSchema = val
|
||||
config.IntelRdt.L3CacheSchema = l3CacheSchema
|
||||
config.IntelRdt.MemBwSchema = memBwSchema
|
||||
}
|
||||
|
||||
return container.Set(config)
|
||||
|
|
|
@ -54,7 +54,7 @@ func loadFactory(context *cli.Context) (libcontainer.Factory, error) {
|
|||
}
|
||||
|
||||
intelRdtManager := libcontainer.IntelRdtFs
|
||||
if !intelrdt.IsEnabled() {
|
||||
if !intelrdt.IsCatEnabled() && !intelrdt.IsMbaEnabled() {
|
||||
intelRdtManager = nil
|
||||
}
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# OCI runtime-spec. When updating this, make sure you use a version tag rather
|
||||
# than a commit ID so it's much more obvious what version of the spec we are
|
||||
# using.
|
||||
github.com/opencontainers/runtime-spec v1.0.0
|
||||
github.com/opencontainers/runtime-spec 5684b8af48c1ac3b1451fa499724e30e3c20a294
|
||||
# Core libcontainer functionality.
|
||||
github.com/mrunalp/fileutils ed869b029674c0e9ce4c0dfa781405c2d9946d08
|
||||
github.com/opencontainers/selinux v1.0.0-rc1
|
||||
|
|
|
@ -22,12 +22,12 @@ To provide context for users the following section gives example use cases for e
|
|||
### Application Bundle Builders
|
||||
|
||||
Application bundle builders can create a [bundle](bundle.md) directory that includes all of the files required for launching an application as a container.
|
||||
The bundle contains an OCI [configuration file](config.md) where the builder can specify host-independent details such as [which executable to launch](config.md#process) and host-specific settings such as [mount](config.md#mounts) locations, [hook](config.md#hooks) paths, Linux [namespaces](config-linux.md#namespaces) and [cgroups](config-linux.md#control-groups).
|
||||
The bundle contains an OCI [configuration file](config.md) where the builder can specify host-independent details such as [which executable to launch](config.md#process) and host-specific settings such as [mount](config.md#mounts) locations, [hook](config.md#posix-platform-hooks) paths, Linux [namespaces](config-linux.md#namespaces) and [cgroups](config-linux.md#control-groups).
|
||||
Because the configuration includes host-specific settings, application bundle directories copied between two hosts may require configuration adjustments.
|
||||
|
||||
### Hook Developers
|
||||
|
||||
[Hook](config.md#hooks) developers can extend the functionality of an OCI-compliant runtime by hooking into a container's lifecycle with an external application.
|
||||
[Hook](config.md#posix-platform-hooks) developers can extend the functionality of an OCI-compliant runtime by hooking into a container's lifecycle with an external application.
|
||||
Example use cases include sophisticated network configuration, volume garbage collection, etc.
|
||||
|
||||
### Runtime Developers
|
||||
|
@ -52,17 +52,12 @@ It also guarantees that the design is sound before code is written; a GitHub pul
|
|||
Typos and grammatical errors can go straight to a pull-request.
|
||||
When in doubt, start on the [mailing-list](#mailing-list).
|
||||
|
||||
### Weekly Call
|
||||
|
||||
The contributors and maintainers of all OCI projects have a weekly meeting on Wednesdays at:
|
||||
|
||||
* 8:00 AM (USA Pacific), during [odd weeks][iso-week].
|
||||
* 2:00 PM (USA Pacific), during [even weeks][iso-week].
|
||||
### Meetings
|
||||
|
||||
The contributors and maintainers of all OCI projects have monthly meetings, which are usually at 2:00 PM (USA Pacific) on the first Wednesday of every month.
|
||||
There is an [iCalendar][rfc5545] format for the meetings [here](meeting.ics).
|
||||
|
||||
Everyone is welcome to participate via [UberConference web][uberconference] or audio-only: +1 415 968 0849 (no PIN needed).
|
||||
An initial agenda will be posted to the [mailing list](#mailing-list) earlier in the week, and everyone is welcome to propose additional topics or suggest other agenda alterations there.
|
||||
An initial agenda will be posted to the [mailing list](#mailing-list) in the week before each meeting, and everyone is welcome to propose additional topics or suggest other agenda alterations there.
|
||||
Minutes are posted to the [mailing list](#mailing-list) and minutes from past calls are archived [here][minutes], with minutes from especially old meetings (September 2015 and earlier) archived [here][runtime-wiki].
|
||||
|
||||
### Mailing List
|
||||
|
|
|
@ -4,7 +4,7 @@ import "os"
|
|||
|
||||
// Spec is the base configuration for the container.
|
||||
type Spec struct {
|
||||
// Version of the Open Container Runtime Specification with which the bundle complies.
|
||||
// Version of the Open Container Initiative Runtime Specification with which the bundle complies.
|
||||
Version string `json:"ociVersion"`
|
||||
// Process configures the container process.
|
||||
Process *Process `json:"process,omitempty"`
|
||||
|
@ -25,6 +25,8 @@ type Spec struct {
|
|||
Solaris *Solaris `json:"solaris,omitempty" platform:"solaris"`
|
||||
// Windows is platform-specific configuration for Windows based containers.
|
||||
Windows *Windows `json:"windows,omitempty" platform:"windows"`
|
||||
// VM specifies configuration for virtual-machine-based containers.
|
||||
VM *VM `json:"vm,omitempty" platform:"vm"`
|
||||
}
|
||||
|
||||
// Process contains information to start a specific application inside the container.
|
||||
|
@ -158,8 +160,8 @@ type Linux struct {
|
|||
ReadonlyPaths []string `json:"readonlyPaths,omitempty"`
|
||||
// MountLabel specifies the selinux context for the mounts in the container.
|
||||
MountLabel string `json:"mountLabel,omitempty"`
|
||||
// IntelRdt contains Intel Resource Director Technology (RDT) information
|
||||
// for handling resource constraints (e.g., L3 cache) for the container
|
||||
// IntelRdt contains Intel Resource Director Technology (RDT) information for
|
||||
// handling resource constraints (e.g., L3 cache, memory bandwidth) for the container
|
||||
IntelRdt *LinuxIntelRdt `json:"intelRdt,omitempty"`
|
||||
}
|
||||
|
||||
|
@ -194,10 +196,10 @@ const (
|
|||
|
||||
// LinuxIDMapping specifies UID/GID mappings
|
||||
type LinuxIDMapping struct {
|
||||
// HostID is the starting UID/GID on the host to be mapped to 'ContainerID'
|
||||
HostID uint32 `json:"hostID"`
|
||||
// ContainerID is the starting UID/GID in the container
|
||||
ContainerID uint32 `json:"containerID"`
|
||||
// HostID is the starting UID/GID on the host to be mapped to 'ContainerID'
|
||||
HostID uint32 `json:"hostID"`
|
||||
// Size is the number of IDs to be mapped
|
||||
Size uint32 `json:"size"`
|
||||
}
|
||||
|
@ -320,6 +322,14 @@ type LinuxNetwork struct {
|
|||
Priorities []LinuxInterfacePriority `json:"priorities,omitempty"`
|
||||
}
|
||||
|
||||
// LinuxRdma for Linux cgroup 'rdma' resource management (Linux 4.11)
|
||||
type LinuxRdma struct {
|
||||
// Maximum number of HCA handles that can be opened. Default is "no limit".
|
||||
HcaHandles *uint32 `json:"hcaHandles,omitempty"`
|
||||
// Maximum number of HCA objects that can be created. Default is "no limit".
|
||||
HcaObjects *uint32 `json:"hcaObjects,omitempty"`
|
||||
}
|
||||
|
||||
// LinuxResources has container runtime resource constraints
|
||||
type LinuxResources struct {
|
||||
// Devices configures the device whitelist.
|
||||
|
@ -336,6 +346,10 @@ type LinuxResources struct {
|
|||
HugepageLimits []LinuxHugepageLimit `json:"hugepageLimits,omitempty"`
|
||||
// Network restriction configuration
|
||||
Network *LinuxNetwork `json:"network,omitempty"`
|
||||
// Rdma resource restriction configuration.
|
||||
// Limits are a set of key value pairs that define RDMA resource limits,
|
||||
// where the key is device name and value is resource limits.
|
||||
Rdma map[string]LinuxRdma `json:"rdma,omitempty"`
|
||||
}
|
||||
|
||||
// LinuxDevice represents the mknod information for a Linux special device file
|
||||
|
@ -419,6 +433,8 @@ type SolarisAnet struct {
|
|||
type Windows struct {
|
||||
// LayerFolders contains a list of absolute paths to directories containing image layers.
|
||||
LayerFolders []string `json:"layerFolders"`
|
||||
// Devices are the list of devices to be mapped into the container.
|
||||
Devices []WindowsDevice `json:"devices,omitempty"`
|
||||
// Resources contains information for handling resource constraints for the container.
|
||||
Resources *WindowsResources `json:"resources,omitempty"`
|
||||
// CredentialSpec contains a JSON object describing a group Managed Service Account (gMSA) specification.
|
||||
|
@ -433,6 +449,14 @@ type Windows struct {
|
|||
Network *WindowsNetwork `json:"network,omitempty"`
|
||||
}
|
||||
|
||||
// WindowsDevice represents information about a host device to be mapped into the container.
|
||||
type WindowsDevice struct {
|
||||
// Device identifier: interface class GUID, etc.
|
||||
ID string `json:"id"`
|
||||
// Device identifier type: "class", etc.
|
||||
IDType string `json:"idType"`
|
||||
}
|
||||
|
||||
// WindowsResources has container runtime resource constraints for containers running on Windows.
|
||||
type WindowsResources struct {
|
||||
// Memory restriction configuration.
|
||||
|
@ -479,6 +503,8 @@ type WindowsNetwork struct {
|
|||
DNSSearchList []string `json:"DNSSearchList,omitempty"`
|
||||
// Name (ID) of the container that we will share with the network stack.
|
||||
NetworkSharedContainerName string `json:"networkSharedContainerName,omitempty"`
|
||||
// name (ID) of the network namespace that will be used for the container.
|
||||
NetworkNamespace string `json:"networkNamespace,omitempty"`
|
||||
}
|
||||
|
||||
// WindowsHyperV contains information for configuring a container to run with Hyper-V isolation.
|
||||
|
@ -487,6 +513,42 @@ type WindowsHyperV struct {
|
|||
UtilityVMPath string `json:"utilityVMPath,omitempty"`
|
||||
}
|
||||
|
||||
// VM contains information for virtual-machine-based containers.
|
||||
type VM struct {
|
||||
// Hypervisor specifies hypervisor-related configuration for virtual-machine-based containers.
|
||||
Hypervisor VMHypervisor `json:"hypervisor,omitempty"`
|
||||
// Kernel specifies kernel-related configuration for virtual-machine-based containers.
|
||||
Kernel VMKernel `json:"kernel"`
|
||||
// Image specifies guest image related configuration for virtual-machine-based containers.
|
||||
Image VMImage `json:"image,omitempty"`
|
||||
}
|
||||
|
||||
// VMHypervisor contains information about the hypervisor to use for a virtual machine.
|
||||
type VMHypervisor struct {
|
||||
// Path is the host path to the hypervisor used to manage the virtual machine.
|
||||
Path string `json:"path"`
|
||||
// Parameters specifies parameters to pass to the hypervisor.
|
||||
Parameters string `json:"parameters,omitempty"`
|
||||
}
|
||||
|
||||
// VMKernel contains information about the kernel to use for a virtual machine.
|
||||
type VMKernel struct {
|
||||
// Path is the host path to the kernel used to boot the virtual machine.
|
||||
Path string `json:"path"`
|
||||
// Parameters specifies parameters to pass to the kernel.
|
||||
Parameters string `json:"parameters,omitempty"`
|
||||
// InitRD is the host path to an initial ramdisk to be used by the kernel.
|
||||
InitRD string `json:"initrd,omitempty"`
|
||||
}
|
||||
|
||||
// VMImage contains information about the virtual machine root image.
|
||||
type VMImage struct {
|
||||
// Path is the host path to the root image that the VM kernel would boot into.
|
||||
Path string `json:"path"`
|
||||
// Format is the root image format type (e.g. "qcow2", "raw", "vhd", etc).
|
||||
Format string `json:"format"`
|
||||
}
|
||||
|
||||
// LinuxSeccomp represents syscall restrictions
|
||||
type LinuxSeccomp struct {
|
||||
DefaultAction LinuxSeccompAction `json:"defaultAction"`
|
||||
|
@ -561,10 +623,16 @@ type LinuxSyscall struct {
|
|||
Args []LinuxSeccompArg `json:"args,omitempty"`
|
||||
}
|
||||
|
||||
// LinuxIntelRdt has container runtime resource constraints
|
||||
// for Intel RDT/CAT which introduced in Linux 4.10 kernel
|
||||
// LinuxIntelRdt has container runtime resource constraints for Intel RDT
|
||||
// CAT and MBA features which introduced in Linux 4.10 and 4.12 kernel
|
||||
type LinuxIntelRdt struct {
|
||||
// The identity for RDT Class of Service
|
||||
ClosID string `json:"closID,omitempty"`
|
||||
// The schema for L3 cache id and capacity bitmask (CBM)
|
||||
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
L3CacheSchema string `json:"l3CacheSchema,omitempty"`
|
||||
|
||||
// The schema of memory bandwidth percentage per L3 cache id
|
||||
// Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
|
||||
MemBwSchema string `json:"memBwSchema,omitempty"`
|
||||
}
|
||||
|
|
|
@ -8,10 +8,10 @@ const (
|
|||
// VersionMinor is for functionality in a backwards-compatible manner
|
||||
VersionMinor = 0
|
||||
// VersionPatch is for backwards-compatible bug fixes
|
||||
VersionPatch = 0
|
||||
VersionPatch = 1
|
||||
|
||||
// VersionDev indicates development branch. Releases will be empty string.
|
||||
VersionDev = ""
|
||||
VersionDev = "-dev"
|
||||
)
|
||||
|
||||
// Version is the specification version that the package types support.
|
||||
|
|
Loading…
Reference in New Issue