Skip to content

Commit e108084

Browse files
committed
[DO NOT MERGE] Add memory policy support
Implement support for Linux memory policy in OCI spec PR: opencontainers/runtime-spec#1282 TODO: - remove the replace from go.mod when OCI spec is merged Signed-off-by: Antti Kervinen <[email protected]>
1 parent 43f1191 commit e108084

File tree

13 files changed

+316
-4
lines changed

13 files changed

+316
-4
lines changed

go.mod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,5 @@ require (
3939
github.com/russross/blackfriday/v2 v2.1.0 // indirect
4040
github.com/vishvananda/netns v0.0.4 // indirect
4141
)
42+
43+
replace github.com/opencontainers/runtime-spec => github.com/askervin/runtime-spec v1.0.3-0.20250328150043-68936b63f0db

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
github.com/BurntSushi/toml v1.4.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=
2+
github.com/askervin/runtime-spec v1.0.3-0.20250328150043-68936b63f0db h1:vfXC+spYsKfEjaQxvkj2vGbSMuaTy475Z3ctMoe2Uf8=
3+
github.com/askervin/runtime-spec v1.0.3-0.20250328150043-68936b63f0db/go.mod h1:0ccwhiCQXxLwvWvVsdVdxTe+IFfXyJTjr/wNue5fNJY=
24
github.com/checkpoint-restore/go-criu/v7 v7.2.0 h1:qGiWA4App1gGlEfIJ68WR9jbezV9J7yZdjzglezcqKo=
35
github.com/checkpoint-restore/go-criu/v7 v7.2.0/go.mod h1:u0LCWLg0w4yqqu14aXhiB4YD3a1qd8EcCEg7vda5dwo=
46
github.com/cilium/ebpf v0.17.3 h1:FnP4r16PWYSE4ux6zN+//jMcW4nMVRvuTLVTvCjyyjg=
@@ -47,8 +49,6 @@ github.com/mrunalp/fileutils v0.5.1 h1:F+S7ZlNKnrwHfSwdlgNSkKo67ReVf8o9fel6C3dkm
4749
github.com/mrunalp/fileutils v0.5.1/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
4850
github.com/opencontainers/cgroups v0.0.1 h1:MXjMkkFpKv6kpuirUa4USFBas573sSAY082B4CiHEVA=
4951
github.com/opencontainers/cgroups v0.0.1/go.mod h1:s8lktyhlGUqM7OSRL5P7eAW6Wb+kWPNvt4qvVfzA5vs=
50-
github.com/opencontainers/runtime-spec v1.2.1 h1:S4k4ryNgEpxW1dzyqffOmhI1BHYcjzU8lpJfSlR0xww=
51-
github.com/opencontainers/runtime-spec v1.2.1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
5252
github.com/opencontainers/selinux v1.12.0 h1:6n5JV4Cf+4y0KNXW48TLj5DwfXpvWlxXplUkdTrmPb8=
5353
github.com/opencontainers/selinux v1.12.0/go.mod h1:BTPX+bjVbWGXw7ZZWUbdENt8w0htPSrlgOOysQaU62U=
5454
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=

libcontainer/configs/config.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,9 @@ type Config struct {
208208
// to limit the resources (e.g., L3 cache, memory bandwidth) the container has available
209209
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
210210

211+
// MemoryPolicy specifies NUMA memory policy for the container.
212+
MemoryPolicy *LinuxMemoryPolicy `json:"memoryPolicy,omitempty"`
213+
211214
// RootlessEUID is set when the runc was launched with non-zero EUID.
212215
// Note that RootlessEUID is set to false when launched with EUID=0 in userns.
213216
// When RootlessEUID is set, runc creates a new userns for the container.

libcontainer/configs/memorypolicy.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
package configs
2+
3+
type LinuxMemoryPolicy struct {
4+
Mode uint
5+
Nodes []int
6+
}

libcontainer/init_linux.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,10 @@ func setupIOPriority(config *initConfig) error {
659659
return nil
660660
}
661661

662+
func setupMemoryPolicy(config *configs.Config) error {
663+
return system.SetMempolicy(config.MemoryPolicy.Mode, config.MemoryPolicy.Nodes)
664+
}
665+
662666
func setupPersonality(config *configs.Config) error {
663667
return system.SetLinuxPersonality(config.Personality.Domain)
664668
}

libcontainer/setns_init_linux.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,11 @@ func (l *linuxSetnsInit) Init() error {
110110
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
111111
return err
112112
}
113+
if l.config.Config.MemoryPolicy != nil {
114+
if err := setupMemoryPolicy(l.config.Config); err != nil {
115+
return err
116+
}
117+
}
113118
if l.config.Config.Personality != nil {
114119
if err := setupPersonality(l.config.Config); err != nil {
115120
return err

libcontainer/specconv/spec_linux.go

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"os"
1010
"path/filepath"
1111
"sort"
12+
"strconv"
1213
"strings"
1314
"sync"
1415
"time"
@@ -41,6 +42,8 @@ var (
4142
flag int
4243
}
4344
complexFlags map[string]func(*configs.Mount)
45+
mpolModeMap map[specs.MemoryPolicyModeType]uint
46+
mpolModeFMap map[specs.MemoryPolicyFlagType]uint
4447
)
4548

4649
func initMaps() {
@@ -148,6 +151,21 @@ func initMaps() {
148151
m.IDMapping.Recursive = true
149152
},
150153
}
154+
155+
mpolModeMap = map[specs.MemoryPolicyModeType]uint{
156+
specs.MpolDefault: 0,
157+
specs.MpolPreferred: 1,
158+
specs.MpolBind: 2,
159+
specs.MpolInterleave: 3,
160+
specs.MpolLocal: 4,
161+
specs.MpolWeightedInterleave: 6,
162+
}
163+
164+
mpolModeFMap = map[specs.MemoryPolicyFlagType]uint{
165+
specs.MpolFStaticNodes: 1 << 15,
166+
specs.MpolFRelativeNodes: 1 << 14,
167+
specs.MpolFNumaBalancing: 1 << 13,
168+
}
151169
})
152170
}
153171

@@ -467,6 +485,32 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
467485
MemBwSchema: spec.Linux.IntelRdt.MemBwSchema,
468486
}
469487
}
488+
if spec.Linux.MemoryPolicy != nil &&
489+
(spec.Linux.MemoryPolicy.Mode != "" ||
490+
spec.Linux.MemoryPolicy.Nodes != "" ||
491+
len(spec.Linux.MemoryPolicy.Flags) > 0) {
492+
var ok bool
493+
specMp := spec.Linux.MemoryPolicy
494+
confMp := &configs.LinuxMemoryPolicy{}
495+
if confMp.Mode, ok = mpolModeMap[specMp.Mode]; !ok {
496+
return nil, fmt.Errorf("invalid memory policy mode %q", specMp.Mode)
497+
}
498+
// MAX_NODE is a sensibility check to user-provided
499+
// nodes, not reflecting currently onlined nodes.
500+
// set_mempolicy() accepts non-existent nodes.
501+
MAX_NODE := 1023
502+
if confMp.Nodes, err = parseListSet(specMp.Nodes, 0, MAX_NODE); err != nil {
503+
return nil, fmt.Errorf("invalid memory policy nodes %q: %v", specMp.Nodes, err)
504+
}
505+
for _, specFlag := range specMp.Flags {
506+
confModeFlag, ok := mpolModeFMap[specFlag]
507+
if !ok {
508+
return nil, fmt.Errorf("invalid memory policy flag %q", specFlag)
509+
}
510+
confMp.Mode |= confModeFlag
511+
}
512+
config.MemoryPolicy = confMp
513+
}
470514
if spec.Linux.Personality != nil {
471515
if len(spec.Linux.Personality.Flags) > 0 {
472516
logrus.Warnf("ignoring unsupported personality flags: %+v because personality flag has not supported at this time", spec.Linux.Personality.Flags)
@@ -1127,6 +1171,53 @@ func parseMountOptions(options []string) *configs.Mount {
11271171
return &m
11281172
}
11291173

1174+
// parseListSet parses "list set" syntax ("0,61-63,2") into a list ([0, 61, 62, 63, 2])
1175+
func parseListSet(listSet string, minValue, maxValue int) ([]int, error) {
1176+
var result []int
1177+
if listSet == "" {
1178+
return result, nil
1179+
}
1180+
parts := strings.Split(listSet, ",")
1181+
for _, part := range parts {
1182+
switch {
1183+
case part == "":
1184+
continue
1185+
case strings.Contains(part, "-"):
1186+
rangeParts := strings.Split(part, "-")
1187+
if len(rangeParts) != 2 {
1188+
return nil, fmt.Errorf("invalid range: %s", part)
1189+
}
1190+
start, err := strconv.Atoi(rangeParts[0])
1191+
if err != nil {
1192+
return nil, err
1193+
}
1194+
end, err := strconv.Atoi(rangeParts[1])
1195+
if err != nil {
1196+
return nil, err
1197+
}
1198+
if start > end {
1199+
return nil, fmt.Errorf("invalid range %s: start > end", part)
1200+
}
1201+
if start < minValue || end > maxValue {
1202+
return nil, fmt.Errorf("invalid range %s: not in %d-%d", part, minValue, maxValue)
1203+
}
1204+
for i := start; i <= end; i++ {
1205+
result = append(result, i)
1206+
}
1207+
default:
1208+
num, err := strconv.Atoi(part)
1209+
if err != nil {
1210+
return nil, err
1211+
}
1212+
if num < minValue || num > maxValue {
1213+
return nil, fmt.Errorf("invalid value %d: not in %d-%d", num, minValue, maxValue)
1214+
}
1215+
result = append(result, num)
1216+
}
1217+
}
1218+
return result, nil
1219+
}
1220+
11301221
func SetupSeccomp(config *specs.LinuxSeccomp) (*configs.Seccomp, error) {
11311222
if config == nil {
11321223
return nil, nil

libcontainer/standard_init_linux.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,13 @@ func (l *linuxStandardInit) Init() error {
238238
}
239239
}
240240

241+
// Set memory policy if specified.
242+
if l.config.Config.MemoryPolicy != nil {
243+
if err := setupMemoryPolicy(l.config.Config); err != nil {
244+
return err
245+
}
246+
}
247+
241248
// Set personality if specified.
242249
if l.config.Config.Personality != nil {
243250
if err := setupPersonality(l.config.Config); err != nil {

libcontainer/system/linux.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,31 @@ fallback:
151151
return io.Copy(dst, src)
152152
}
153153

154+
func bitmaskFromInts(bits []int) []uint64 {
155+
maxBit := 0
156+
for _, bit := range bits {
157+
if bit > maxBit {
158+
maxBit = bit
159+
}
160+
}
161+
mask := make([]uint64, (maxBit/64)+1)
162+
for _, bit := range bits {
163+
mask[bit/64] |= (1 << (bit % 64))
164+
}
165+
return mask
166+
}
167+
168+
// SetMempolicy sets the NUMA memory policy. For more information see the set_mempolicy syscall documentation.
169+
func SetMempolicy(mode uint, nodes []int) error {
170+
nodemask := bitmaskFromInts(nodes)
171+
nodemaskPtr := unsafe.Pointer(&nodemask[0])
172+
_, _, errno := unix.Syscall(unix.SYS_SET_MEMPOLICY, uintptr(mode), uintptr(nodemaskPtr), uintptr(len(nodemask)*64))
173+
if errno != 0 {
174+
return &os.SyscallError{Syscall: "set_mempolicy", Err: errno}
175+
}
176+
return nil
177+
}
178+
154179
// SetLinuxPersonality sets the Linux execution personality. For more information see the personality syscall documentation.
155180
// checkout getLinuxPersonalityFromStr() from libcontainer/specconv/spec_linux.go for type conversion.
156181
func SetLinuxPersonality(personality int) error {

tests/integration/memorypolicy.bats

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
#!/usr/bin/env bats
2+
3+
load helpers
4+
5+
function setup() {
6+
setup_busybox
7+
}
8+
9+
function teardown() {
10+
teardown_bundle
11+
}
12+
13+
@test "runc run memory policy interleave without flags" {
14+
update_config '
15+
.process.args = ["/bin/sh", "-c", "head -n 1 /proc/self/numa_maps | cut -d \" \" -f 2"]
16+
| .linux.memoryPolicy = {
17+
"mode": "MPOL_INTERLEAVE",
18+
"nodes": "0"
19+
}'
20+
runc run test_busybox
21+
[ "$status" -eq 0 ]
22+
[[ "${lines[0]}" == "interleave:0" ]]
23+
}
24+
25+
@test "runc run memory policy bind static" {
26+
update_config '
27+
.process.args = ["/bin/sh", "-c", "head -n 1 /proc/self/numa_maps | cut -d \" \" -f 2"]
28+
| .linux.memoryPolicy = {
29+
"mode": "MPOL_BIND",
30+
"nodes": "0",
31+
"flags": ["MPOL_F_STATIC_NODES"]
32+
}'
33+
runc run test_busybox
34+
[ "$status" -eq 0 ]
35+
[[ "${lines[0]}" == "bind"*"static"*"0" ]]
36+
}
37+
38+
@test "runc run and exec memory policy prefer relative" {
39+
update_config '
40+
.linux.memoryPolicy = {
41+
"mode": "MPOL_PREFERRED",
42+
"nodes": "0",
43+
"flags": ["MPOL_F_RELATIVE_NODES"]
44+
}'
45+
runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
46+
[ "$status" -eq 0 ]
47+
48+
runc exec test_busybox /bin/sh -c "head -n 1 /proc/self/numa_maps | cut -d \" \" -f 2"
49+
[ "$status" -eq 0 ]
50+
[[ "${lines[0]}" == "prefer"*"relative"*"0" ]]
51+
}
52+
53+
@test "runc run empty memory policy" {
54+
update_config '
55+
.process.args = ["/bin/sh", "-c", "head -n 1 /proc/self/numa_maps | cut -d \" \" -f 2"]
56+
| .linux.memoryPolicy = {
57+
}'
58+
runc run test_busybox
59+
[ "$status" -eq 0 ]
60+
[[ "${lines[0]}" == "default" ]]
61+
}
62+
63+
@test "runc run memory policy with non-existing mode" {
64+
update_config '
65+
.process.args = ["/bin/sh", "-c", "head -n 1 /proc/self/numa_maps | cut -d \" \" -f 2"]
66+
| .linux.memoryPolicy = {
67+
"mode": "INTERLEAVE",
68+
"nodes": "0"
69+
}'
70+
runc run test_busybox
71+
[ "$status" -eq 1 ]
72+
[[ "${lines[0]}" == *"invalid memory policy"* ]]
73+
}
74+
75+
@test "runc run memory policy with invalid flag" {
76+
update_config '
77+
.process.args = ["/bin/sh", "-c", "head -n 1 /proc/self/numa_maps | cut -d \" \" -f 2"]
78+
| .linux.memoryPolicy = {
79+
"mode": "MPOL_PREFERRED",
80+
"nodes": "0",
81+
"flags": ["MPOL_F_RELATIVE_NODES", "badflag"]
82+
}'
83+
runc run test_busybox
84+
[ "$status" -eq 1 ]
85+
[[ "${lines[0]}" == *"invalid memory policy flag"* ]]
86+
}
87+
88+
@test "runc run memory policy default with missing nodes" {
89+
update_config '
90+
.process.args = ["/bin/sh", "-c", "head -n 1 /proc/self/numa_maps | cut -d \" \" -f 2"]
91+
| .linux.memoryPolicy = {
92+
"mode": "MPOL_DEFAULT"
93+
}'
94+
runc run test_busybox
95+
[ "$status" -eq 0 ]
96+
[[ "${lines[0]}" == *"default"* ]]
97+
}
98+
99+
@test "runc run memory policy with missing mode" {
100+
update_config '
101+
.process.args = ["/bin/sh", "-c", "head -n 1 /proc/self/numa_maps | cut -d \" \" -f 2"]
102+
| .linux.memoryPolicy = {
103+
"nodes": "0-7"
104+
}'
105+
runc run test_busybox
106+
[ "$status" -eq 1 ]
107+
[[ "${lines[0]}" == *"invalid memory policy mode"* ]]
108+
}
109+
110+
@test "runc run memory policy calls syscall with invalid arguments" {
111+
update_config '
112+
.process.args = ["/bin/sh", "-c", "head -n 1 /proc/self/numa_maps | cut -d \" \" -f 2"]
113+
| .linux.memoryPolicy = {
114+
"mode": "MPOL_DEFAULT",
115+
"nodes": "0-7",
116+
"flags": ["MPOL_F_NUMA_BALANCING", "MPOL_F_STATIC_NODES", "MPOL_F_RELATIVE_NODES"]
117+
}'
118+
runc run test_busybox
119+
[ "$status" -eq 1 ]
120+
[[ "${lines[0]}" == *"set_mempolicy"*"invalid argument"* ]]
121+
}
122+
123+
@test "runc run memory policy bind way too large a node number" {
124+
update_config '
125+
.process.args = ["/bin/sh", "-c", "head -n 1 /proc/self/numa_maps | cut -d \" \" -f 2"]
126+
| .linux.memoryPolicy = {
127+
"mode": "MPOL_BIND",
128+
"nodes": "0-9876543210",
129+
"flags": []
130+
}'
131+
runc run test_busybox
132+
[ "$status" -eq 1 ]
133+
[[ "${lines[0]}" == *"invalid memory policy node"* ]]
134+
}

0 commit comments

Comments
 (0)