diff --git a/libcontainer/cgroups/fs/blkio.go b/libcontainer/cgroups/fs/blkio.go index 8aa8fcdc41e..88012a2f5f1 100644 --- a/libcontainer/cgroups/fs/blkio.go +++ b/libcontainer/cgroups/fs/blkio.go @@ -41,11 +41,15 @@ func (s *BlkioGroup) Set(path string, r *configs.Resources) error { } } for _, wd := range r.BlkioWeightDevice { - if err := cgroups.WriteFile(path, s.weightDeviceFilename, wd.WeightString()); err != nil { - return err + if wd.Weight != 0 { + if err := cgroups.WriteFile(path, s.weightDeviceFilename, wd.WeightString()); err != nil { + return err + } } - if err := cgroups.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil { - return err + if wd.LeafWeight != 0 { + if err := cgroups.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil { + return err + } } } for _, td := range r.BlkioThrottleReadBpsDevice { diff --git a/libcontainer/cgroups/fs2/io.go b/libcontainer/cgroups/fs2/io.go index 864a4c1ca6a..fd3f0993ea0 100644 --- a/libcontainer/cgroups/fs2/io.go +++ b/libcontainer/cgroups/fs2/io.go @@ -4,6 +4,8 @@ package fs2 import ( "bufio" + "bytes" + "fmt" "os" "strconv" "strings" @@ -16,32 +18,64 @@ import ( func isIoSet(r *configs.Resources) bool { return r.BlkioWeight != 0 || + len(r.BlkioWeightDevice) > 0 || len(r.BlkioThrottleReadBpsDevice) > 0 || len(r.BlkioThrottleWriteBpsDevice) > 0 || len(r.BlkioThrottleReadIOPSDevice) > 0 || len(r.BlkioThrottleWriteIOPSDevice) > 0 } +// bfqDeviceWeightSupported checks for per-device BFQ weight support (added +// in kernel v5.4, commit 795fe54c2a8) by reading from "io.bfq.weight". +func bfqDeviceWeightSupported(bfq *os.File) bool { + if bfq == nil { + return false + } + _, _ = bfq.Seek(0, 0) + buf := make([]byte, 32) + _, _ = bfq.Read(buf) + // If only a single number (default weight) if read back, we have older kernel. + _, err := strconv.ParseInt(string(bytes.TrimSpace(buf)), 10, 64) + return err != nil +} + func setIo(dirPath string, r *configs.Resources) error { if !isIoSet(r) { return nil } + // If BFQ IO scheduler is available, use it. + var bfq *os.File + if r.BlkioWeight != 0 || len(r.BlkioWeightDevice) > 0 { + var err error + bfq, err = cgroups.OpenFile(dirPath, "io.bfq.weight", os.O_RDWR) + if err == nil { + defer bfq.Close() + } else if !os.IsNotExist(err) { + return err + } + } + if r.BlkioWeight != 0 { - filename := "io.bfq.weight" - if err := cgroups.WriteFile(dirPath, filename, - strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil { - // if io.bfq.weight does not exist, then bfq module is not loaded. - // Fallback to use io.weight with a conversion scheme - if !os.IsNotExist(err) { + if bfq != nil { // Use BFQ. + if _, err := bfq.WriteString(strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil { return err } + } else { + // Fallback to io.weight with a conversion scheme. v := cgroups.ConvertBlkIOToIOWeightValue(r.BlkioWeight) if err := cgroups.WriteFile(dirPath, "io.weight", strconv.FormatUint(v, 10)); err != nil { return err } } } + if bfqDeviceWeightSupported(bfq) { + for _, wd := range r.BlkioWeightDevice { + if _, err := bfq.WriteString(wd.WeightString() + "\n"); err != nil { + return fmt.Errorf("setting device weight %q: %w", wd.WeightString(), err) + } + } + } for _, td := range r.BlkioThrottleReadBpsDevice { if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("rbps")); err != nil { return err diff --git a/tests/integration/cgroups.bats b/tests/integration/cgroups.bats index 5875469c68c..1ea3e07a016 100644 --- a/tests/integration/cgroups.bats +++ b/tests/integration/cgroups.bats @@ -150,6 +150,43 @@ function setup() { fi } +@test "runc run (per-device io weight for bfq)" { + requires root # to create a loop device + + dd if=/dev/zero of=backing.img bs=4096 count=1 + dev=$(losetup --find --show backing.img) || skip "unable to create a loop device" + + # See if BFQ scheduler is available. + if ! { grep -qw bfq "/sys/block/${dev#/dev/}/queue/scheduler" && + echo bfq >"/sys/block/${dev#/dev/}/queue/scheduler"; }; then + losetup -d "$dev" + skip "BFQ scheduler not available" + fi + + set_cgroups_path + + IFS=$' \t:' read -r major minor <<<"$(lsblk -nd -o MAJ:MIN "$dev")" + update_config ' .linux.devices += [{path: "'"$dev"'", type: "b", major: '"$major"', minor: '"$minor"'}] + | .linux.resources.blockIO.weight |= 333 + | .linux.resources.blockIO.weightDevice |= [ + { major: '"$major"', minor: '"$minor"', weight: 444 } + ]' + runc run -d --console-socket "$CONSOLE_SOCKET" test_dev_weight + [ "$status" -eq 0 ] + + # The loop device itself is no longer needed. + losetup -d "$dev" + + if [ "$CGROUP_UNIFIED" = "yes" ]; then + file="io.bfq.weight" + else + file="blkio.bfq.weight_device" + fi + weights=$(get_cgroup_value $file) + [[ "$weights" == *"default 333"* ]] + [[ "$weights" == *"$major:$minor 444"* ]] +} + @test "runc run (cgroup v2 resources.unified only)" { requires root cgroups_v2 diff --git a/tests/integration/helpers.bash b/tests/integration/helpers.bash index b92c4169a4f..fe3a324b62f 100644 --- a/tests/integration/helpers.bash +++ b/tests/integration/helpers.bash @@ -168,10 +168,9 @@ function set_cgroups_path() { update_config '.linux.cgroupsPath |= "'"${OCI_CGROUPS_PATH}"'"' "$bundle" } -# Helper to check a value in cgroups. -function check_cgroup_value() { +# Get a value from a cgroup file. +function get_cgroup_value() { local source=$1 - local expected=$2 local cgroup var current if [ "x$CGROUP_UNIFIED" = "xyes" ]; then @@ -181,9 +180,15 @@ function check_cgroup_value() { var=CGROUP_${var^^}_BASE_PATH # variable name (e.g. CGROUP_MEMORY_BASE_PATH) eval cgroup=\$${var}${REL_CGROUPS_PATH} fi + cat $cgroup/$source +} + +# Helper to check a if value in a cgroup file matches the expected one. +function check_cgroup_value() { + local current + current="$(get_cgroup_value $1)" + local expected=$2 - current=$(cat $cgroup/$source) - echo $cgroup/$source echo "current" $current "!?" "$expected" [ "$current" = "$expected" ] }