diff --git a/mooncake-transfer-engine/src/topology.cpp b/mooncake-transfer-engine/src/topology.cpp index 7d11b6c0c..13bcfabcf 100644 --- a/mooncake-transfer-engine/src/topology.cpp +++ b/mooncake-transfer-engine/src/topology.cpp @@ -157,6 +157,17 @@ static int getPciDistance(const char *bus1, const char *bus2) { return distance; } +static bool isSameNumaNode(const char *bus1, const char *bus2) { + char path[PATH_MAX]; + int numa1 = -1; + int numa2 = -1; + snprintf(path, sizeof(path), "/sys/bus/pci/devices/%s/numa_node", bus1); + std::ifstream(path) >> numa1; + snprintf(path, sizeof(path), "/sys/bus/pci/devices/%s/numa_node", bus2); + std::ifstream(path) >> numa2; + return (numa1 != -1 && numa1 == numa2); +} + static std::vector discoverCudaTopology( const std::vector &all_hca) { std::vector topology; @@ -181,7 +192,8 @@ static std::vector discoverCudaTopology( for (const auto &hca : all_hca) { int distance = getPciDistance(hca.pci_bus_id.c_str(), pci_bus_id); - if (distance >= 0) { + if (distance >= 0 && + isSameNumaNode(hca.pci_bus_id.c_str(), pci_bus_id)) { if (distance < min_distance) { min_distance = distance; min_distance_hcas.clear();