diff --git a/ompi/runtime/ompi_rte.c b/ompi/runtime/ompi_rte.c index 4e7719c73af..1e4ad39d4a3 100644 --- a/ompi/runtime/ompi_rte.c +++ b/ompi/runtime/ompi_rte.c @@ -68,6 +68,7 @@ opal_process_name_t pmix_name_invalid = {UINT32_MAX, UINT32_MAX}; * infrastructure that manages its structure (e.g., OpenPMIx). If we setup this * session directory structure, then we shall cleanup after ourselves. */ +static bool destroy_top_session_dir = false; static bool destroy_job_session_dir = false; static bool destroy_proc_session_dir = false; @@ -983,22 +984,25 @@ int ompi_rte_finalize(void) { /* cleanup the session directory we created */ + if (NULL != opal_process_info.top_session_dir && destroy_top_session_dir) { + opal_os_dirpath_destroy(opal_process_info.top_session_dir, + true, check_file); + free(opal_process_info.top_session_dir); + opal_process_info.top_session_dir = NULL; + destroy_top_session_dir = false; + } + if (NULL != opal_process_info.job_session_dir && destroy_job_session_dir) { opal_os_dirpath_destroy(opal_process_info.job_session_dir, - false, check_file); + true, check_file); free(opal_process_info.job_session_dir); opal_process_info.job_session_dir = NULL; destroy_job_session_dir = false; } - if (NULL != opal_process_info.top_session_dir) { - free(opal_process_info.top_session_dir); - opal_process_info.top_session_dir = NULL; - } - if (NULL != opal_process_info.proc_session_dir && destroy_proc_session_dir) { opal_os_dirpath_destroy(opal_process_info.proc_session_dir, - false, check_file); + true, check_file); free(opal_process_info.proc_session_dir); opal_process_info.proc_session_dir = NULL; destroy_proc_session_dir = false; @@ -1165,27 +1169,45 @@ void ompi_rte_wait_for_debugger(void) static int _setup_top_session_dir(char **sdir) { + /* + * Use a session directory structure similar to prrte (create only one + * directory for the top session) so that it can be cleaned up correctly + * when terminated. + */ char *tmpdir; + int rc; + uid_t uid = geteuid(); + pid_t pid = getpid(); if( NULL == (tmpdir = getenv("TMPDIR")) ) if( NULL == (tmpdir = getenv("TEMP")) ) if( NULL == (tmpdir = getenv("TMP")) ) tmpdir = "/tmp"; - *sdir = strdup(tmpdir); + if (0 > opal_asprintf(sdir, "%s/%s.%s.%lu.%lu", + tmpdir, "ompi", + opal_process_info.nodename, + (unsigned long)pid, (unsigned long) uid)) { + opal_process_info.top_session_dir = NULL; + return OPAL_ERR_OUT_OF_RESOURCE; + } + rc = opal_os_dirpath_create(opal_process_info.top_session_dir, 0755); + if (OPAL_SUCCESS != rc) { + // could not create top session dir + free(opal_process_info.top_session_dir); + opal_process_info.top_session_dir = NULL; + return rc; + } + destroy_top_session_dir = true; return OPAL_SUCCESS; } static int _setup_job_session_dir(char **sdir) { int rc; - /* get the effective uid */ - uid_t uid = geteuid(); - if (0 > opal_asprintf(sdir, "%s/ompi.%s.%lu/jf.0/%u", + if (0 > opal_asprintf(sdir, "%s/%u", opal_process_info.top_session_dir, - opal_process_info.nodename, - (unsigned long)uid, opal_process_info.my_name.jobid)) { opal_process_info.job_session_dir = NULL; return OPAL_ERR_OUT_OF_RESOURCE; diff --git a/opal/mca/btl/sm/btl_sm_component.c b/opal/mca/btl/sm/btl_sm_component.c index 21e1bcf2a6c..82f04df9466 100644 --- a/opal/mca/btl/sm/btl_sm_component.c +++ b/opal/mca/btl/sm/btl_sm_component.c @@ -298,6 +298,11 @@ static int mca_btl_sm_deregister_mem_knem(struct mca_btl_base_module_t *btl, return OPAL_SUCCESS; } +static void mca_btl_sm_component_finalize(void *data /*data unused*/) { + opal_shmem_unlink(&mca_btl_sm_component.seg_ds); + opal_shmem_segment_detach(&mca_btl_sm_component.seg_ds); +} + /* * SM component initialization */ @@ -419,6 +424,12 @@ mca_btl_sm_component_init(int *num_btls, bool enable_progress_threads, bool enab /* set flag indicating btl not inited */ mca_btl_sm.btl_inited = false; + /* + * Use a method similar to `mca_btl_smcuda_component_init` to register segment finalize + * to opal and release it before shmem is closed. + */ + opal_finalize_register_cleanup(mca_btl_sm_component_finalize); + return btls; failed: opal_shmem_unlink(&component->seg_ds); diff --git a/opal/mca/btl/sm/btl_sm_module.c b/opal/mca/btl/sm/btl_sm_module.c index 3cb69c8d3da..27859bb4a2c 100644 --- a/opal/mca/btl/sm/btl_sm_module.c +++ b/opal/mca/btl/sm/btl_sm_module.c @@ -347,9 +347,6 @@ static int sm_finalize(struct mca_btl_base_module_t *btl) free(component->fbox_in_endpoints); component->fbox_in_endpoints = NULL; - opal_shmem_unlink(&mca_btl_sm_component.seg_ds); - opal_shmem_segment_detach(&mca_btl_sm_component.seg_ds); - return OPAL_SUCCESS; }