Skip to content

Commit 48e09ea

Browse files
committed
prov/efa: dmabuf try / fallback logic
feat: Adding default dmabuf attempt and fallback logic for all efa_hmem_ifaces Problem: - How make dmabuf usage default going forward and have fallback mechanism if dmabuf not supported Solution: - Modified initial PR from @jiaxiyan at 6aa6708#diff-9b57a9410ed94ed1f1aea837412e68bbe9b49582edce813ba352fffb37dcc007 - Added dmabuf_supported_by_device_b flag in efa_hmem_info structure in prov/efa/efa_hmem.h - Updated dmabuf_supported_by_device_b in each fi_hmem_iface type p2p_support fcn in prov/efa/efa_hmem.c - Removed per fi_hmem_iface type checks in prov/efa/efa_mr_reg_ibv_mr.c - Testing: - Ran mpi perf tests on 2 nodes on p5en with dmabuf and fallback option hard set - Ran mpi perf tests on 16 nodes on p5en with dmabuf and fallback option hard set Sim Issue: - N/A Signed-off-by: Nick Mazzilli <[email protected]>
1 parent cbab404 commit 48e09ea

File tree

3 files changed

+87
-70
lines changed

3 files changed

+87
-70
lines changed

prov/efa/src/efa_hmem.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ static inline void efa_hmem_info_check_p2p_support_cuda(struct efa_hmem_info *in
118118
int dmabuf_fd;
119119
uint64_t dmabuf_offset;
120120

121+
info->dmabuf_supported_by_device_b = false;
122+
121123
cuda_ret = ofi_cudaMalloc(&ptr, len);
122124
if (cuda_ret != cudaSuccess) {
123125
info->initialized = false;
@@ -145,6 +147,7 @@ static inline void efa_hmem_info_check_p2p_support_cuda(struct efa_hmem_info *in
145147
"Fall back to ibv_reg_mr\n", fi_strerror(-errno));
146148
ibv_mr = ibv_reg_mr(ibv_pd, ptr, len, ibv_access);
147149
}
150+
info->dmabuf_supported_by_device_b = true;
148151
} else {
149152
EFA_INFO(FI_LOG_CORE,
150153
"Unable to retrieve dmabuf fd of CUDA device buffer: %d. "
@@ -193,6 +196,8 @@ static inline void efa_hmem_info_check_p2p_support_neuron(struct efa_hmem_info *
193196
uint64_t offset;
194197
int ret;
195198

199+
info->dmabuf_supported_by_device_b = false;
200+
196201
if (g_efa_selected_device_list[0].device_caps & EFADV_DEVICE_ATTR_CAPS_RDMA_READ) {
197202
ibv_access |= IBV_ACCESS_REMOTE_READ;
198203
}
@@ -222,6 +227,7 @@ static inline void efa_hmem_info_check_p2p_support_neuron(struct efa_hmem_info *
222227
ibv_mr = ibv_reg_dmabuf_mr(
223228
ibv_pd, offset,
224229
len, (uint64_t)ptr, dmabuf_fd, ibv_access);
230+
info->dmabuf_supported_by_device_b = true;
225231
} else if (ret == -FI_EOPNOTSUPP) {
226232
EFA_INFO(FI_LOG_MR,
227233
"Unable to retrieve dmabuf fd of Neuron device buffer, "
@@ -284,9 +290,14 @@ efa_hmem_info_init_iface(enum fi_hmem_iface iface)
284290
}
285291

286292
info->initialized = true;
293+
info->max_medium_msg_size = 0;
294+
info->runt_size = 0;
295+
info->min_read_msg_size = 0;
296+
info->min_read_write_size = 0;
287297

288298
if (iface == FI_HMEM_SYNAPSEAI || iface == FI_HMEM_SYSTEM) {
289299
info->p2p_supported_by_device = true;
300+
info->dmabuf_supported_by_device_b = true;
290301
} else if (ofi_hmem_p2p_disabled()) {
291302
info->p2p_supported_by_device = false;
292303
} else {

prov/efa/src/efa_hmem.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ static const enum fi_hmem_iface efa_hmem_ifaces[] = {
2424
struct efa_hmem_info {
2525
bool initialized; /* do we support it at all */
2626
bool p2p_supported_by_device; /* do we support p2p with this device */
27+
bool dmabuf_supported_by_device_b; /* do we support dmabuf with this device */
2728

2829
size_t max_medium_msg_size;
2930
size_t runt_size;

prov/efa/src/efa_mr.c

Lines changed: 75 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -506,77 +506,82 @@ struct ibv_mr *efa_mr_reg_ibv_dmabuf_mr(struct ibv_pd *pd, uint64_t offset,
506506
* @param flags flags in fi_mr_reg/fi_mr_regattr
507507
* @return struct ibv_mr* the ptr to the registered MR
508508
*/
509-
static struct ibv_mr *efa_mr_reg_ibv_mr(struct efa_mr *efa_mr, struct fi_mr_attr *mr_attr,
510-
int access, const uint64_t flags)
509+
static struct ibv_mr *efa_mr_reg_ibv_mr(struct efa_mr *efa_mr,
510+
struct fi_mr_attr *mr_attr,
511+
int access, const uint64_t flags)
511512
{
512-
int dmabuf_fd;
513-
uint64_t offset;
514-
int ret;
515-
struct ibv_mr *ibv_mr;
516-
517-
if (flags & FI_MR_DMABUF)
518-
return efa_mr_reg_ibv_dmabuf_mr(
519-
efa_mr->domain->ibv_pd,
520-
mr_attr->dmabuf->offset,
521-
mr_attr->dmabuf->len,
522-
(uintptr_t) mr_attr->dmabuf->base_addr + mr_attr->dmabuf->offset,
523-
mr_attr->dmabuf->fd,
524-
access
525-
);
526-
527-
if (efa_mr_is_synapseai(efa_mr)) {
528-
ret = ofi_hmem_get_dmabuf_fd(efa_mr->peer.iface,
529-
mr_attr->mr_iov->iov_base,
530-
(uint64_t) mr_attr->mr_iov->iov_len,
531-
&dmabuf_fd, &offset);
532-
if (ret != FI_SUCCESS) {
533-
EFA_WARN(FI_LOG_MR, "Unable to get dmabuf fd for Gaudi device buffer \n");
534-
return NULL;
535-
}
536-
return efa_mr_reg_ibv_dmabuf_mr(
537-
efa_mr->domain->ibv_pd, offset,
538-
mr_attr->mr_iov->iov_len,
539-
(uint64_t)mr_attr->mr_iov->iov_base,
540-
dmabuf_fd, access);
541-
}
542-
543-
/*
544-
* TODO: need such fallback for cuda as well when
545-
* FI_CUDA_API_PERMITTED is true
546-
*/
547-
if (efa_mr_is_neuron(efa_mr)) {
548-
ret = ofi_hmem_get_dmabuf_fd(
549-
efa_mr->peer.iface,
550-
mr_attr->mr_iov->iov_base,
551-
mr_attr->mr_iov->iov_len,
552-
&dmabuf_fd,
553-
&offset);
554-
555-
if (ret == FI_SUCCESS) {
556-
/* Success => invoke ibv_reg_dmabuf_mr */
557-
ibv_mr = efa_mr_reg_ibv_dmabuf_mr(
558-
efa_mr->domain->ibv_pd, 0,
559-
mr_attr->mr_iov->iov_len,
560-
(uint64_t)mr_attr->mr_iov->iov_base,
561-
dmabuf_fd, access);
562-
(void) ofi_hmem_put_dmabuf_fd(efa_mr->peer.iface, dmabuf_fd);
563-
return ibv_mr;
564-
} else if (ret == -FI_EOPNOTSUPP) {
565-
/* Protocol not available => fallback */
566-
EFA_INFO(FI_LOG_MR,
567-
"Unable to get dmabuf fd for Neuron device buffer, "
568-
"Fall back to ibv_reg_mr\n");
569-
return ibv_reg_mr(
570-
efa_mr->domain->ibv_pd,
571-
(void *)mr_attr->mr_iov->iov_base,
572-
mr_attr->mr_iov->iov_len, access);
573-
}
574-
return NULL;
575-
}
576-
577-
return ibv_reg_mr(efa_mr->domain->ibv_pd,
578-
(void *)mr_attr->mr_iov->iov_base,
579-
mr_attr->mr_iov->iov_len, access);
513+
int dmabuf_fd;
514+
uint64_t offset;
515+
int ret;
516+
517+
/* Explicit dmabuf registration */
518+
if (flags & FI_MR_DMABUF) {
519+
if (!mr_attr->dmabuf) {
520+
EFA_WARN(FI_LOG_MR, "FI_MR_DMABUF set but mr_attr->dmabuf == NULL\n");
521+
return NULL;
522+
}
523+
if (!g_efa_hmem_info[mr_attr->iface].dmabuf_supported_by_device_b) {
524+
EFA_WARN(FI_LOG_MR,
525+
"Requested FI_MR_DMABUF, but dmabuf not supported for iface=%d\n",
526+
mr_attr->iface);
527+
return NULL;
528+
}
529+
530+
EFA_INFO(FI_LOG_MR,
531+
"FI_MR_DMABUF: fd=%d offset=%lu len=%zu\n",
532+
mr_attr->dmabuf->fd, mr_attr->dmabuf->offset,
533+
mr_attr->dmabuf->len);
534+
535+
return efa_mr_reg_ibv_dmabuf_mr(
536+
efa_mr->domain->ibv_pd,
537+
mr_attr->dmabuf->offset,
538+
mr_attr->dmabuf->len,
539+
(uintptr_t) mr_attr->dmabuf->base_addr + mr_attr->dmabuf->offset,
540+
mr_attr->dmabuf->fd,
541+
access);
542+
}
543+
544+
/* Implicit VA path with dmabuf-first */
545+
if (g_efa_hmem_info[mr_attr->iface].dmabuf_supported_by_device_b) {
546+
ret = ofi_hmem_get_dmabuf_fd(
547+
mr_attr->iface,
548+
mr_attr->mr_iov->iov_base,
549+
(uint64_t) mr_attr->mr_iov->iov_len,
550+
&dmabuf_fd, &offset);
551+
552+
if (ret == FI_SUCCESS) {
553+
EFA_INFO(FI_LOG_MR,
554+
"Registering dmabuf MR: fd=%d offset=%lu len=%zu\n",
555+
dmabuf_fd, offset, mr_attr->mr_iov->iov_len);
556+
557+
return efa_mr_reg_ibv_dmabuf_mr(
558+
efa_mr->domain->ibv_pd, offset,
559+
mr_attr->mr_iov->iov_len,
560+
(uint64_t)mr_attr->mr_iov->iov_base,
561+
dmabuf_fd, access);
562+
}
563+
564+
if (ret == -FI_EOPNOTSUPP || ret == -FI_ENOSYS) {
565+
EFA_WARN(FI_LOG_MR,
566+
"dmabuf not supported at runtime for iface=%d, disabling\n",
567+
mr_attr->iface);
568+
g_efa_hmem_info[mr_attr->iface].dmabuf_supported_by_device_b = false;
569+
} else {
570+
EFA_WARN(FI_LOG_MR,
571+
"ofi_hmem_get_dmabuf_fd failed: ret=%d (%s)\n",
572+
ret, fi_strerror(-ret));
573+
}
574+
/* fall through to ibv_reg_mr */
575+
}
576+
577+
/* Fallback: plain ibv_reg_mr */
578+
EFA_WARN(FI_LOG_MR,
579+
"Fallback ibv_reg_mr: addr=%p len=%zu\n",
580+
mr_attr->mr_iov->iov_base, mr_attr->mr_iov->iov_len);
581+
582+
return ibv_reg_mr(efa_mr->domain->ibv_pd,
583+
(void *)mr_attr->mr_iov->iov_base,
584+
mr_attr->mr_iov->iov_len, access);
580585
}
581586

582587
#if HAVE_CUDA

0 commit comments

Comments
 (0)