@@ -72,6 +72,12 @@ int mca_coll_han_alltoall_using_smsc(
72
72
opal_convertor_t convertor ;
73
73
int send_needs_bounce , have_device_buffer ;
74
74
size_t packed_size = 0 ;
75
+ enum {
76
+ BOUNCE_NOT_INITIALIZED = 0 ,
77
+ BOUNCE_IS_FROM_RBUF = 1 ,
78
+ BOUNCE_IS_FROM_FREELIST = 2 ,
79
+ BOUNCE_IS_FROM_MALLOC = 3 ,
80
+ };
75
81
76
82
77
83
OPAL_OUTPUT_VERBOSE ((90 , mca_coll_han_component .han_output ,
@@ -191,7 +197,7 @@ int mca_coll_han_alltoall_using_smsc(
191
197
192
198
If the application buffer is device memory, we'll also need to exchange
193
199
in push mode so that the process which has device registrations can
194
- perform the reads.
200
+ perform the reads. (this mode has been disabled)
195
201
196
202
In both of these cases, we'll need to use the bounce buffer too.
197
203
*/
@@ -211,19 +217,30 @@ int mca_coll_han_alltoall_using_smsc(
211
217
inter_recv_reqs = malloc (sizeof (* inter_recv_reqs ) * up_size );
212
218
char * * low_bufs = malloc (low_size * sizeof (* low_bufs ));
213
219
void * * sbuf_map_ctx = malloc (low_size * sizeof (& sbuf_map_ctx ));
220
+ opal_free_list_item_t * send_fl_item = NULL ;
214
221
215
222
const int nptrs_gather = 3 ;
216
223
void * * gather_buf_out = calloc (low_size * nptrs_gather , sizeof (void * ));
217
- bool send_bounce_is_allocated = false ;
224
+ int send_bounce_status = BOUNCE_NOT_INITIALIZED ;
218
225
219
226
do {
220
227
start_allgather :
221
228
if ( 0 == send_needs_bounce ) {
222
229
send_bounce = (char * )rbuf + up_rank * send_bytes_per_fan ;
230
+ send_bounce_status = BOUNCE_IS_FROM_RBUF ;
223
231
} else {
224
- if (!send_bounce_is_allocated ) {
225
- send_bounce = malloc (send_bytes_per_fan * fanout );
226
- send_bounce_is_allocated = true;
232
+ if (send_bounce_status == BOUNCE_NOT_INITIALIZED || send_bounce_status == BOUNCE_IS_FROM_RBUF ) {
233
+ if (send_bytes_per_fan * fanout < mca_coll_han_component .han_packbuf_bytes ) {
234
+ send_fl_item = opal_free_list_get (& mca_coll_han_component .pack_buffers );
235
+ if (send_fl_item ) {
236
+ send_bounce_status = BOUNCE_IS_FROM_FREELIST ;
237
+ send_bounce = send_fl_item -> ptr ;
238
+ }
239
+ }
240
+ if (!send_fl_item ) {
241
+ send_bounce = malloc (send_bytes_per_fan * fanout );
242
+ send_bounce_status = BOUNCE_IS_FROM_MALLOC ;
243
+ }
227
244
}
228
245
}
229
246
@@ -409,7 +426,11 @@ int mca_coll_han_alltoall_using_smsc(
409
426
}
410
427
}
411
428
OBJ_DESTRUCT (& convertor );
412
- if (send_bounce_is_allocated ) free (send_bounce );
429
+ if (send_bounce_status == BOUNCE_IS_FROM_FREELIST ) {
430
+ opal_free_list_return (& mca_coll_han_component .pack_buffers , send_fl_item );
431
+ } else if (send_bounce_status == BOUNCE_IS_FROM_MALLOC ) {
432
+ free (send_bounce );
433
+ }
413
434
free (inter_send_reqs );
414
435
free (inter_recv_reqs );
415
436
free (sbuf_map_ctx );
0 commit comments