Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 48 additions & 3 deletions pkg/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package utils

import (
"bytes"
"errors"
"fmt"
"os"
"path/filepath"
Expand All @@ -30,14 +31,17 @@ var (
sysBusPci = "/sys/bus/pci/devices"
// golangci-lint doesn't see it is used in the testing.go
//nolint: unused
sysBusAux = "/sys/bus/auxiliary/devices"
devDir = "/dev"
sysBusAux = "/sys/bus/auxiliary/devices"
devDir = "/dev"
NetSysDir = "/sys/class/net"
physPortRepRegex = regexp.MustCompile(`^p(\d+)$`)
)

const (
totalVfFile = "sriov_totalvfs"
configuredVfFile = "sriov_numvfs"
eswitchModeSwitchdev = "switchdev"
netdevPhysPortName = "phys_port_name"
classIDBaseInt = 16
classIDBitSize = 64
maxVendorName = 20
Expand Down Expand Up @@ -100,11 +104,52 @@ func GetPfName(pciAddr string) (string, error) {
}
return "", err
} else if len(files) > 0 {
return files[0].Name(), nil
name, err := getPfNameSysFs(pciAddr)
Copy link
Contributor

@adrianchiris adrianchiris Jun 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

according to the description, SFs are used. so id expect the device is in switchdev mode in this case.
then we should hit L#91 and get the correct netdevice name.

the logic implemented in this PR (getPfNameSysFs) is very similar to GetUplinkRepresentor[1]

[1] https://github.com/k8snetworkplumbingwg/sriovnet/blob/f71b8cb53530e8d51213739e1c24d8551dd497c9/sriovnet_switchdev.go#L116

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does it fail to identify the correct PF because you are running this in non privileged mode ? (and devlink command fails ?)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@adrianchiris Yes, we are running in non-priv mode. Our deployment does not allow priv-mode.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

according to the description, SFs are used. so id expect the device is in switchdev mode in this case. then we should hit L#91 and get the correct netdevice name.

the logic implemented in this PR (getPfNameSysFs) is very similar to GetUplinkRepresentor[1]

[1] https://github.com/k8snetworkplumbingwg/sriovnet/blob/f71b8cb53530e8d51213739e1c24d8551dd497c9/sriovnet_switchdev.go#L116

yes, the logic is similar. in our deployement using non-priv mode is not able to determine if switchdev mode is enabled or not.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we rely on the ability to invoke devlink and query devices (here and in other places in code, such as getting DDP profile for intel nic)

generally device plugin requires access to system resources to properly advertise devices.

can you try to add NET_ADMIN capability to your device plugin deployment ?
i think this should do it for your usecase.

    securityContext:
      privileged: false
      capabilities:
        add: ["NET_ADMIN"]

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@adrianchiris no, currently the usage of NET_ADMIN capabilities is not allowed.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@adrianchiris Also the bug is that, If executed in non-priv mode. wrong device name
is picked as PfName.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@adrianchiris Please let me know if there aare any other queries or any actio ns to be taken?

if err != nil {
return "", err
}
glog.Infof("pciAddr and pfName %s. %s", pciAddr, name)
return name, nil
}
return "", fmt.Errorf("the PF name is not found for device %s", pciAddr)
}

// getPfNameSysFs gets a VF or PF PCI address (e.g '0000:03:00.4') and
// returns the PF name.
func getPfNameSysFs(pciAddress string) (string, error) {
devicePath := filepath.Join(sysBusPci, pciAddress, "physfn", "net")
if _, err := os.Stat(devicePath); errors.Is(err, os.ErrNotExist) {
// If physfn symlink to the parent PF doesn't exist, use the current device's dir
devicePath = filepath.Join(sysBusPci, pciAddress, "net")
}

devices, err := os.ReadDir(devicePath)
if err != nil {
return "", fmt.Errorf("failed to lookup %s: %v", pciAddress, err)
}
for _, device := range devices {
// Try to get the phys port name, if not exists then fallback to check without it
// phys_port_name should be in formant p<port-num> e.g p0,p1,p2 ...etc.
if devicePhysPortName, err := getNetDevPhysPortName(device.Name()); err == nil {
if !physPortRepRegex.MatchString(devicePhysPortName) {
continue
}
}

return device.Name(), nil
}
return "", fmt.Errorf("pfName for %s not found", pciAddress)
}

func getNetDevPhysPortName(netDev string) (string, error) {
devicePortNameFile := filepath.Join(NetSysDir, netDev, netdevPhysPortName)
physPortName, err := os.ReadFile(devicePortNameFile)
if err != nil {
return "", err
}
return strings.TrimSpace(string(physPortName)), nil
}

// GetPfNameFromAuxDev returns netdevice name of the PF associated with the
// provided auxiliary device name.
func GetPfNameFromAuxDev(auxDevName string) (string, error) {
Expand Down