From db92ff54806ee744aba0a8498a453d8f648b574b Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Tue, 5 Nov 2024 10:50:30 -0700 Subject: [PATCH] UCX: fix problem when doing multiple session init/finalize Another case where the MCA system gets shut down if no more "instances" are present, but unless --enable-mca-dso is used, the UCX common has retained info about memory allocation, etc. that is no longer correct. related to #12909 Signed-off-by: Howard Pritchard --- opal/mca/common/ucx/common_ucx.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/opal/mca/common/ucx/common_ucx.c b/opal/mca/common/ucx/common_ucx.c index 9f90567c309..3b21213e134 100644 --- a/opal/mca/common/ucx/common_ucx.c +++ b/opal/mca/common/ucx/common_ucx.c @@ -4,7 +4,7 @@ * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021 Triad National Security, LLC. All rights + * Copyright (c) 2021-2024 Triad National Security, LLC. All rights * reserved. * Copyright (c) 2022 Google, LLC. All rights reserved. * Copyright (c) 2022 IBM Corporation. All rights reserved. @@ -107,6 +107,10 @@ OPAL_DECLSPEC void opal_common_ucx_mca_var_register(const mca_base_component_t * // cleans up the MCA vars. This will cause the string to go // out of scope unless we place the pointer to it on the heap. opal_common_ucx.tls = (char **) malloc(sizeof(char *)); + *opal_common_ucx.tls = NULL; + } + + if (NULL == *opal_common_ucx.tls) { *opal_common_ucx.tls = strdup(default_tls); } @@ -122,8 +126,13 @@ OPAL_DECLSPEC void opal_common_ucx_mca_var_register(const mca_base_component_t * if (NULL == opal_common_ucx.devices) { opal_common_ucx.devices = (char**) malloc(sizeof(char*)); + *opal_common_ucx.devices = NULL; + } + + if (NULL == *opal_common_ucx.devices) { *opal_common_ucx.devices = strdup(default_devices); } + devices_index = mca_base_var_register( "opal", "opal_common", "ucx", "devices", "List of device driver pattern names, which, if supported by UCX, will "