diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..6e16cc238 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,8 @@ +[submodule "tpls/Caliper"] + path = tpls/Caliper + url = https://github.com/NexGenAnalytics/Caliper.git + branch = feature/make-multitool-safe # Until Caliper gets full support for Kokkos EventSet +[submodule "tpls/apex"] + path = tpls/apex + url = https://github.com/NexGenAnalytics/apex.git + branch = develop diff --git a/Build.md b/Build.md new file mode 100644 index 000000000..2fffc7650 --- /dev/null +++ b/Build.md @@ -0,0 +1,22 @@ +# How to Build + +# With Cmake + +1. Create your build directory and go to it + +2. Type `ccmake ..` and change any options, including tools you want turned on (some are by default off). (Optional) + +3. Type `cmake ..` + +4. Type `make` + +5. Specify the generated .dylib file in the environment variable KOKKOS_TOOLS_LIBRARY when running your Kokkos-based application. + + +# With Makefile (recommended) + +1. Go into the directory of the particular tool, e.g., `cd debugging/kernel_logger` + +2. Type `make` + +3. Specify the generated .so file in the environment variable KOKKOS_TOOLS_LIBRARY when running your Kokkos-based application. diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 000000000..68d5ea1d0 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,235 @@ +cmake_minimum_required(VERSION 3.16 FATAL_ERROR) + +project(KokkosTools CXX) + +if(NOT DEFINED CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 17) +endif() +if(CMAKE_CXX_STANDARD LESS 17) + message(FATAL_ERROR "KokkosTools requires C++17") +endif() + + +# Include utilities +include(cmake/utils.cmake) +include(cmake/configure_tpls.cmake) + +# Set policies +cmake_policy(SET CMP0111 NEW) # error if library not found + +# Disable in-source builds to prevent source tree corruption. +if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR) + message(FATAL_ERROR "FATAL: In-source builds are not allowed. You should create a separate directory for build files.") +endif() + +list(INSERT CMAKE_MODULE_PATH 0 ${PROJECT_SOURCE_DIR}/cmake) + +message(STATUS) +message(STATUS Configuring Kokkos-Tools) +message(STATUS) + +# Common settings +set(BUILD_SHARED_LIBS "Build shared libraries" ON) +if(WIN32) + set(BUILD_SHARED_LIBS OFF) # We need to add __declspec(dllexport/dllimport) for Windows DLLs +endif() + +# Tools settings +option(KokkosTools_ENABLE_SINGLE "Build single library interfacing all profilers and dispatching at runtime" OFF) +if(WIN32) + set(KokkosTools_ENABLE_SINGLE ON) +endif() + +option(KokkosTools_ENABLE_PAPI "Enable PAPI support" OFF) +option(KokkosTools_ENABLE_MPI "Enable MPI support" OFF) +option(KokkosTools_ENABLE_CALIPER "Enable building Caliper library" OFF) +option(KokkosTools_ENABLE_APEX "Enable building Apex library" OFF) +option(KokkosTools_ENABLE_EXAMPLES "Build examples" OFF) +# Advanced settings +option(KokkosTools_REUSE_KOKKOS_COMPILER "Set the compiler and flags based on installed Kokkos settings" OFF) +mark_as_advanced(KokkosTools_REUSE_KOKKOS_COMPILER) + +# Fetch Kokkos options: +acquire_kokkos_config() +if(DEFINED Kokkos_FOUND_MSG) + message(STATUS "${Kokkos_FOUND_MSG}: ${Kokkos_INSTALL_DIR}\n" + "\t\tDevices: ${Kokkos_DEVICES}\n" + "\t\tArchitecture: ${Kokkos_ARCH}\n" + "\t\tTPLs: ${Kokkos_TPLS}\n" + "\t\tCompiler: ${Kokkos_CXX_COMPILER} (${Kokkos_CXX_COMPILER_ID})\n" + "\t\tCMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}\n" + "\t\tOptions: ${Kokkos_OPTIONS}") + # Synchronize compiler and flags (only when explicitly requested) + if(KokkosTools_REUSE_KOKKOS_COMPILER) + set(CMAKE_CXX_COMPILER "${Kokkos_CXX_COMPILER}" CACHE STRING "C++ Compiler") + set(CMAKE_CXX_STANDARD "${CMAKE_CXX_STANDARD_DEFAULT}" CACHE STRING "C++ Standard: 98, 11, 14, 17, 20 or 23") + endif() +else() + if(KokkosTools_REUSE_KOKKOS_COMPILER) + message(FATAL_ERROR "Kokkos not found: can't reuse Kokkos compiler (which was explicitly" + "requested with KokkosTools_REUSE_KOKKOS_COMPILER=ON)") + endif() + message(STATUS "Kokkos NOT found") +endif() + +# Libraries +if(KokkosTools_ENABLE_PAPI) + find_package(PAPI REQUIRED) # TODO: papi-connector requires v6.0 or newer + cmake_path(GET PAPI_INCLUDE_DIR PARENT_PATH PAPI_ROOT) + message(STATUS "Found PAPI ${PAPI_VERSION_STRING} at ${PAPI_ROOT}") + set(KokkosTools_HAS_PAPI ON) +else() + message(STATUS "PAPI support disabled") + set(KokkosTools_HAS_PAPI OFF) +endif() + +if(KokkosTools_ENABLE_MPI) + find_package(MPI REQUIRED) + message(STATUS "Found MPI ${MPI_CXX_VERSION}: ${MPI_CXX_LIBRARIES}") + set(KOKKOSTOOLS_HAS_MPI 1) +else() + message(STATUS "MPI not available. MPI disabled.") + set(KOKKOSTOOLS_HAS_MPI 0) +endif() + +include(cmake/configure_variorum.cmake) + +set(KOKKOSTOOLS_HAS_CALIPER ${KokkosTools_ENABLE_CALIPER}) +set(KOKKOSTOOLS_HAS_NVPROF ${Kokkos_ENABLE_CUDA}) # we assume that enabling CUDA for Kokkos program means nvprof should be available + +if(DEFINED ENV{VTUNE_HOME}) + set(VTune_ROOT $ENV{VTUNE_HOME}) +endif() +if(VTune_ROOT) + find_package(ITT REQUIRED) + set(KOKKOSTOOLS_HAS_VTUNE ON) +else() + message(WARNING "Set VTUNE_HOME in environment or VTune_ROOT in build options to build VTune connectors") + set(VTune_ROOT "" CACHE STRING "Path to VTune Intel compiler") + set(KOKKOSTOOLS_HAS_VTUNE OFF) +endif() + +# make Kokkos profiling interface available for native profilers +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/profiling/all) + +# Config file +configure_file(common/kp_config.hpp.in common/kp_config.hpp) +set(COMMON_HEADERS_PATH ${CMAKE_CURRENT_BINARY_DIR}/common) +include_directories(${COMMON_HEADERS_PATH}) + +set(SINGLELIB_PROFILERS "" CACHE STRING "" FORCE) + +# Export settings +include(GNUInstallDirs) +set(EXPORT_NAME KokkosToolsConfig) +set(EXPORT_INCLUDE_DIR ${CMAKE_INSTALL_INCLUDEDIR}) +set(EXPORT_LIB_DIR ${CMAKE_INSTALL_LIBDIR}) +set(EXPORT_TARGETS "" CACHE STRING "" FORCE) + +if(WIN32) + message(STATUS "Windows target detected - skipping Unix-only tools.") +endif() + +if(APPLE) + message(STATUS "Apple OSX target detected.") +endif() + +# Utilities +if(NOT WIN32) + add_subdirectory(common/kernel-filter) +endif() +add_subdirectory(debugging/kernel-logger) + +# Profilers +if(NOT WIN32) + add_subdirectory(profiling/simple-kernel-timer) + add_subdirectory(profiling/memory-hwm) + if(KOKKOSTOOLS_USE_MPI) + add_subdirectory(profiling/memory-hwm-mpi) + else() + message(STATUS "Skipping memory-hwm-mpi (MPI disabled)") + endif() + add_subdirectory(profiling/memory-events) + add_subdirectory(profiling/memory-usage) + add_subdirectory(profiling/chrome-tracing) + add_subdirectory(profiling/space-time-stack) +endif() + +# External lib connectors +if(KokkosTools_ENABLE_PAPI) + add_subdirectory(profiling/papi-connector) +endif() + +if(NOT WIN32 AND NOT APPLE) + add_subdirectory(profiling/systemtap-connector) +endif() + +if(KOKKOSTOOLS_HAS_VARIORUM) + add_subdirectory(profiling/variorum-connector) +endif() + +# GPU profilers +if(Kokkos_ENABLE_CUDA) + add_subdirectory(profiling/nvprof-connector) + add_subdirectory(profiling/nvprof-focused-connector) +endif() +if(KOKKOS_ENABLE_HIP) + #add_subdirectory(profiling/roctx-connector) +endif() + +if(KOKKOSTOOLS_HAS_VTUNE) + add_subdirectory(profiling/vtune-connector) + add_subdirectory(profiling/vtune-focused-connector) +endif() + +# Find or build Caliper +if(KokkosTools_ENABLE_CALIPER) + find_package(caliper QUIET) + if(caliper_INCLUDE_DIR) + cmake_path(GET caliper_INCLUDE_DIR PARENT_PATH Caliper_INSTALL_DIR) + file(REAL_PATH ${Caliper_INSTALL_DIR} Caliper_INSTALL_DIR) + message(STATUS "Caliper installation found in: ${Caliper_INSTALL_DIR}") + list(APPEND SINGLELIB_PROFILERS caliper) + else() + # Don't support git submodules for Caliper. The Kokkos tools user has can try installing Apex and linking on their own if they don't have it. + message(FATAL_ERROR "FATAL: Required Caliper installation not found! Exiting.") + endif() +endif() + +# Find or build Apex +if(KokkosTools_ENABLE_APEX) + find_package(Apex QUIET) + if(Apex_FOUND) + message(STATUS "Apex installation found in: ${Apex_DIR}") + list(APPEND SINGLELIB_PROFILERS "apex") + else() + # Don't support git submodules for apex. The Kokkos tools user has can try installing Apex and linking on their own if they don't have it. + message(FATAL_ERROR "FATAL: Required Apex installation not found! Exiting.") + endif() +endif() + +# Build single library interface (once we have everything set up) +if(KokkosTools_ENABLE_SINGLE) + message(STATUS "Building Monolithic KokkosTools library with profilers: ${SINGLELIB_PROFILERS}") + add_subdirectory(profiling/all) +else() + message(STATUS "Monolithic KokkosTools library skipped") +endif() + +# Build examples +if(KokkosTools_ENABLE_EXAMPLES) + if(NOT KokkosTools_ENABLE_SINGLE) + message(WARNING "This example requires KokkosTools built with monolothic library interface (KokkosTools_ENABLE_SINGLE=ON)") + else() + enable_testing() + add_subdirectory(example) + endif() +endif() + +# Install exports +install(TARGETS ${EXPORT_TARGETS} EXPORT ${EXPORT_NAME}) +install(EXPORT ${EXPORT_NAME} + NAMESPACE KokkosTools:: + DESTINATION ${EXPORT_LIB_DIR}/cmake) +install(CODE "SET(KokkosTools_HAS_MPI ${USE_MPI})") + diff --git a/cmake/FindApex.cmake b/cmake/FindApex.cmake new file mode 100644 index 000000000..4f6be521d --- /dev/null +++ b/cmake/FindApex.cmake @@ -0,0 +1,34 @@ +find_package(PkgConfig REQUIRED) + +# backup current CMAKE_PREFIX_PATH and PKG_CONFIG_USE_CMAKE_PREFIX_PATH +if(DEFINED CMAKE_PREFIX_PATH) + set(_old_def ON) + set(_old_val ${CMAKE_PREFIX_PATH}) +else() + set(_old_def OFF) +endif() +set(_old_use ${PKG_CONFIG_USE_CMAKE_PREFIX_PATH}) +set(PKG_CONFIG_USE_CMAKE_PREFIX_PATH ON) + +# add Apex_DIR / Apex_ROOT to module search path +if(Apex_DIR) + set(CMAKE_PREFIX_PATH ${Apex_DIR}) +elseif(Apex_ROOT) + set(CMAKE_PREFIX_PATH ${Apex_ROOT}) +endif() + +# find Apex +pkg_check_modules(Apex QUIET IMPORTED_TARGET apex) +if(Apex_FOUND) + # create "apex" target like it would be created by Apex setup + add_library(apex ALIAS PkgConfig::Apex) + file(REAL_PATH ${Apex_PREFIX} Apex_DIR) +endif() + +# restore original variables +if(_old_def) + set(CMAKE_PREFIX_PATH ${_old_val}) +else() + unset(CMAKE_PREFIX_PATH) +endif() +set(PKG_CONFIG_USE_CMAKE_PREFIX_PATH ${_old_use}) diff --git a/cmake/FindITT.cmake b/cmake/FindITT.cmake new file mode 100644 index 000000000..753e48878 --- /dev/null +++ b/cmake/FindITT.cmake @@ -0,0 +1,57 @@ +# Note: Package is named "ITT" here because we reuse Caliper's FindITTAPI.cmake find module +# and it calls find_package_handle_standard_args() with "ITT" package name internally, so CMake +# expectes find_package() calles to use "ITT" package name as well. + +function(is_architecture_x64 OUT_ARCH64) + # heuristic to catch x86_64 on Unix and AMD64 on Windows + string(REGEX MATCH "64$" ARCH64 ${CMAKE_SYSTEM_PROCESSOR}) + if(${ARCH64} STREQUAL "64") + set(${OUT_ARCH64} ON PARENT_SCOPE) + else() + set(${OUT_ARCH64} OFF PARENT_SCOPE) + endif() +endfunction() + +#--------------------------------------------------------------------------------# +# 2022-02-14 On some x64 platforms (encountered on Ubuntu 20.04 in Win11/WSL2) +# CMake does NOT enable FIND_LIBRARY_USE_LIB64_PATHS as it should, which leads to +# Intel oneAPI libs not being found in .../lib64 folders. +# See: https://cmake.org/cmake/help/latest/command/find_library.html +get_property(USE_LIB32 GLOBAL PROPERTY FIND_LIBRARY_USE_LIB32_PATHS) +get_property(USE_LIB64 GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS) +is_architecture_x64(ARCH64) +if(ARCH64 AND NOT USE_LIB32) + set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS ON) +elseif(NOT USE_LIB64) + set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB32_PATHS ON) +endif() +#--------------------------------------------------------------------------------# + +if(MSVC) + + # 2022-02-14: find_library() can't locate libittnotify.lib on Windows - not sure why... + # using find_file() instead as a workaround + get_property(USE_LIB64 GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS) + if(USE_LIB64) + find_file(ITT_LIBRARY libittnotify.lib ${VTune_ROOT}/lib64) + else() + find_file(ITT_LIBRARY libittnotify.lib ${VTune_ROOT}/lib32) + endif() + find_path(ITT_INCLUDE_DIR NAMES ittnotify.h HINTS ${VTune_ROOT}/include) + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(ITT DEFAULT_MSG ITT_LIBRARY ITT_INCLUDE_DIR) + +else() + + # Just reuse find module implemented in Caliper + set(ITT_PREFIX ${VTune_ROOT}) + include(${PROJECT_SOURCE_DIR}/tpls/Caliper/cmake/FindITTAPI.cmake) + +endif() + +# Set up imported target +if(NOT TARGET ittapi) # Note: "ittnotify" is a target created by Apex + add_library(ittapi INTERFACE IMPORTED) + target_include_directories(ittapi INTERFACE ${ITT_INCLUDE_DIR}) + target_link_libraries(ittapi INTERFACE ${ITT_LIBRARY}) +endif() diff --git a/cmake/FindPAPI.cmake b/cmake/FindPAPI.cmake new file mode 100644 index 000000000..d2573fa8c --- /dev/null +++ b/cmake/FindPAPI.cmake @@ -0,0 +1,82 @@ +#[=======================================================================[.rst: +FindPAPI +-------- + +Find the native PAPI headers and libraries. + +IMPORTED Targets +^^^^^^^^^^^^^^^^ + +This module defines :prop_tgt:`IMPORTED` target ``PAPI::PAPI``, if PAPI has been found. + +Result Variables +^^^^^^^^^^^^^^^^ + +This module defines the following variables: + +``PAPI_FOUND`` + "True" if ``papi`` found. + +``PAPI_INCLUDE_DIR`` + where to find ``papi``/``papi.h``, etc. + +``PAPI_LIBRARY`` + List of libraries when using ``papi``. + +``PAPI_VERSION_STRING`` + The version of ``papi`` found. + +This module defines ``PAPI::PAPI`` target for PAPI library. + +#]=======================================================================] + +# Look for the header file. +find_path( + PAPI_INCLUDE_DIR + NAMES papi.h + HINTS /usr/include /usr/local/include) +mark_as_advanced(PAPI_INCLUDE_DIR) + +# Look for the library (sorted from most current/relevant entry to least). +find_library( + PAPI_LIBRARY + NAMES papi + HINTS /usr/lib /usr/local/lib) +mark_as_advanced(PAPI_LIBRARY) + +#define PAPI_VERSION PAPI_VERSION_NUMBER(6,0,0,1) + +if(PAPI_INCLUDE_DIR AND NOT PAPI_VERSION_STRING AND EXISTS "${PAPI_INCLUDE_DIR}/papi.h") + file( + STRINGS "${PAPI_INCLUDE_DIR}/papi.h" + PAPI_VERSION_STRING + REGEX "^#define[\t ]+PAPI_VERSION[\t ]+PAPI_VERSION_NUMBER\(.*\)") + string( + REGEX REPLACE + "^#define[\t ]+PAPI_VERSION[\t ]+PAPI_VERSION_NUMBER\\((.*)\\)" + "\\1" + PAPI_VERSION_STRING + "${PAPI_VERSION_STRING}") + string(REPLACE "," "." PAPI_VERSION_STRING "${PAPI_VERSION_STRING}") +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args( + PAPI + REQUIRED_VARS PAPI_LIBRARY PAPI_INCLUDE_DIR + VERSION_VAR PAPI_VERSION_STRING) + + +# Skip target if already defined +if(TARGET PAPI::PAPI) + return() +endif() + +# Set up imported target +add_library(PAPI::PAPI INTERFACE IMPORTED) + +target_include_directories(PAPI::PAPI INTERFACE ${PAPI_INCLUDE_DIR}) +target_link_libraries(PAPI::PAPI INTERFACE ${PAPI_LIBRARY}) + +set(PAPI_INCLUDE_DIRS ${PAPI_INCLUDE_DIR}) +set(PAPI_LIBRARIES ${PAPI_LIBRARY}) diff --git a/cmake/configure_tpls.cmake b/cmake/configure_tpls.cmake new file mode 100644 index 000000000..398eabba6 --- /dev/null +++ b/cmake/configure_tpls.cmake @@ -0,0 +1,67 @@ +# Alter some Caliper defaults, based on Kokkos and Tools settings +# see https://software.llnl.gov/Caliper/build.html +macro(configure_caliper) + set_cache(CALIPER_OPTION_PREFIX ON) + set_cache(CALIPER_WITH_KOKKOS ON) + if(USE_MPI) + set_cache(CALIPER_WITH_MPI ON) + endif() + if(KokkosTools_HAS_PAPI) + set(PAPI_PREFIX ${PAPI_ROOT}) + set_cache(CALIPER_WITH_PAPI ON) + endif() + if(KOKKOSTOOLS_HAS_VARIORUM) + set_cache(CALIPER_WITH_VARIORUM ON) + set(VARIORUM_PREFIX ${Variorum_ROOT}) + endif() + if(KOKKOSTOOLS_HAS_VTUNE) + set_cache(CALIPER_WITH_VTUNE ON) + set(ITT_PREFIX ${VTune_ROOT}) + endif() + if(Kokkos_FOUND) + if(Kokkos_ENABLE_CUDA) + # TODO: check if this works... + set_cache(CALIPER_WITH_NVTX ON) + set_cache(CALIPER_WITH_CUPTI ON) + endif() + if(Kokkos_ENABLE_HIP) + # TODO: check if this works... + set_cache(CALIPER_WITH_ROCTX ON) + set_cache(CALIPER_WITH_ROCTRACER ON) + endif() + endif() +endmacro() + +# Alter some Apex defaults, based on Kokkos and Tools settings +# See http://uo-oaciss.github.io/apex/install/#standalone_installation +macro(configure_apex) + if(BUILD_SHARED_LIBS) + set_cache(BUILD_STATIC_EXECUTABLES OFF) + else() + set_cache(BUILD_STATIC_EXECUTABLES ON) + endif() + set_cache(APEX_WITH_PAPI ${KokkosTools_ENABLE_PAPI}) + set_cache(APEX_WITH_MPI ${KokkosTools_ENABLE_MPI}) + + ## TODO: Build Binutils if not installed (detect?) and the compiler is NOT gcc/clang/icc (check CMake vars) + # set(BFD_ROOT /path/to/binutils) + # option(APEX_BUILD_BFD "Build Binutils library if not found" ON) + + ## TODO: Build OMPT if compilers >= [gcc/clang/icc] and we're NOT offloading to GPU + ## Note: OMPT should work nice with Intel compiler + # option(APEX_BUILD_OMPT "Build OpenMP runtime with OMPT if support not found" ON) + + if(Kokkos_ENABLE_CUDA) + option(APEX_WITH_CUDA "Enable CUDA (CUPTI) support" ON) + # TODO: check if we need to set CUPTI_ROOT and/or NVML_ROOT here + endif() + + if(Kokkos_ENABLE_HIP) + option(APEX_WITH_HIP "Enable HIP (ROCTRACER) support" ON) + ## TODO: check/set paths (we can skip roctracer, rocprofiler, rocm_smi if they're located in ${ROCM_PATH}) + # set(ROCM_ROOT ${ROCM_PATH}) + # set(ROCTX_ROOT ${ROCM_PATH}/roctracer) + # set(ROCTRACER_ROOT ${ROCM_PATH}/roctracer) + # set(RSMI_ROOT ${ROCM_PATH}/rocm_smi) + endif() +endmacro() diff --git a/cmake/configure_variorum.cmake b/cmake/configure_variorum.cmake new file mode 100644 index 000000000..b6280dc0a --- /dev/null +++ b/cmake/configure_variorum.cmake @@ -0,0 +1,24 @@ +# Based on Makefile authored by Zachary S. Frye (CASC at LLNL) in July 2020 + +set(KOKKOSTOOLS_HAS_VARIORUM OFF) + +# Set Variorum_ROOT for find_package() based on VARIORUM_ROOT (CMake or environment variable) +set(MSG_NOTFOUND "set Variorum_ROOT CMake variable or VARIORUM_ROOT environment variable to build Variorum connector") +if(NOT DEFINED Variorum_ROOT) + if(DEFINED ENV{VARIORUM_ROOT}) + set(Variorum_ROOT $ENV{VARIORUM_ROOT}) + set(MSG_NOTFOUND "check VARIORUM_ROOT environment variable ($ENV{VARIORUM_ROOT})") + endif() + set(Variorum_ROOT ${VARIORUM_ROOT}) +else() + set(MSG_NOTFOUND "check Variorum_ROOT (${Variorum_ROOT})") +endif() + +set(Variorum_DIR ${Variorum_ROOT}/lib/cmake) +find_package(Variorum QUIET) + +if(Variorum_FOUND) + set(KOKKOSTOOLS_HAS_VARIORUM TRUE) +else() + message(WARNING "Variorum not found: ${MSG_NOTFOUND}") +endif() diff --git a/cmake/utils.cmake b/cmake/utils.cmake new file mode 100644 index 000000000..549ec8d82 --- /dev/null +++ b/cmake/utils.cmake @@ -0,0 +1,44 @@ +function(kp_add_library TARGET) + add_library(${TARGET} ${ARGN}) # SOURCES = ${ARGN} + + # add this library to the list of profilers linked to single library + list(APPEND SINGLELIB_PROFILERS ${TARGET}) + set(SINGLELIB_PROFILERS ${SINGLELIB_PROFILERS} CACHE STRING "" FORCE) + + # add this library to exported targets + list(APPEND EXPORT_TARGETS ${TARGET}) + set(EXPORT_TARGETS ${EXPORT_TARGETS} CACHE STRING "" FORCE) +endfunction() + +macro(set_cache NAME VAL) + set(${NAME} ON CACHE BOOL "") +endmacro() + +function(acquire_kokkos_config) + if(NOT TARGET Kokkos::kokkos) + find_package(Kokkos QUIET) + if(Kokkos_FOUND) + set(Kokkos_FOUND_MSG "Found Kokkos installation") + get_property(Kokkos_INSTALL_DIR TARGET Kokkos::kokkoscore PROPERTY INTERFACE_INCLUDE_DIRECTORIES) + cmake_path(GET Kokkos_INSTALL_DIR PARENT_PATH Kokkos_INSTALL_DIR) + endif() + elseif(DEFINED Kokkos_DEVICES) + set(Kokkos_FOUND_MSG "Found Kokkos package already imported by superproject") + get_property(Kokkos_INSTALL_DIR TARGET Kokkos::kokkoscore PROPERTY INTERFACE_INCLUDE_DIRECTORIES) + cmake_path(GET Kokkos_INSTALL_DIR PARENT_PATH Kokkos_INSTALL_DIR) + else() + set(Kokkos_FOUND_MSG "Found Kokkos included by source in superproject") + get_property(Kokkos_INSTALL_DIR TARGET Kokkos::kokkos PROPERTY BINARY_DIR) + # Include Kokkos exported settings like we would have them from find_package(Kokkos) + set(Kokkos_FIND_QUIETLY ON) + include(${Kokkos_INSTALL_DIR}/KokkosConfigCommon.cmake) + endif() + foreach(VAR_NAME Kokkos_FOUND_MSG Kokkos_INSTALL_DIR + # Settings exported by Kokkos + Kokkos_DEVICES Kokkos_ARCH Kokkos_TPLS Kokkos_CXX_COMPILER Kokkos_CXX_COMPILER_ID Kokkos_OPTIONS + Kokkos_ENABLE_OPENMP Kokkos_ENABLE_CUDA Kokkos_ENABLE_HIP + # Kokkos exports the flags as well + CMAKE_CXX_FLAGS) + set(${VAR_NAME} ${${VAR_NAME}} PARENT_SCOPE) + endforeach() +endfunction() diff --git a/common/kernel-filter/CMakeLists.txt b/common/kernel-filter/CMakeLists.txt new file mode 100644 index 000000000..ae5cad488 --- /dev/null +++ b/common/kernel-filter/CMakeLists.txt @@ -0,0 +1 @@ +add_library(kp_kernel_filter ${KOKKOSTOOLS_LIBRARY_MODE} kp_kernel_filter.cpp) \ No newline at end of file diff --git a/common/kp_config.hpp.in b/common/kp_config.hpp.in new file mode 100644 index 000000000..b8533c25b --- /dev/null +++ b/common/kp_config.hpp.in @@ -0,0 +1,8 @@ +// Note: keep legacy 0|1 or update `#if USE_MPI` checks + +#define USE_MPI @KOKKOSTOOLS_HAS_MPI@ + +#cmakedefine KOKKOSTOOLS_HAS_NVPROF +#cmakedefine KOKKOSTOOLS_HAS_CALIPER +#cmakedefine KOKKOSTOOLS_HAS_VARIORUM +#cmakedefine KOKKOSTOOLS_HAS_VTUNE diff --git a/common/makefile-only/kp_config.hpp b/common/makefile-only/kp_config.hpp new file mode 100644 index 000000000..4bd8276f3 --- /dev/null +++ b/common/makefile-only/kp_config.hpp @@ -0,0 +1 @@ +// empty file diff --git a/debugging/kernel-logger/CMakeLists.txt b/debugging/kernel-logger/CMakeLists.txt new file mode 100644 index 000000000..6f95865d0 --- /dev/null +++ b/debugging/kernel-logger/CMakeLists.txt @@ -0,0 +1 @@ +add_library(kp_kernel_logger ${KOKKOSTOOLS_LIBRARY_MODE} kp_kernel_logger.cpp) \ No newline at end of file diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt new file mode 100644 index 000000000..749b4464d --- /dev/null +++ b/example/CMakeLists.txt @@ -0,0 +1,54 @@ +# Find Kokkos +find_package(Kokkos QUIET) +if(NOT Kokkos_FOUND) + message(FATAL_ERROR "Kokkos not found, set Kokkos_ROOT properly (current Kokkos_ROOT=${Kokkos_ROOT})") +endif() +foreach(_i "1;2;3") # cut .../lib/cmake/Kokkos suffix +cmake_path(GET Kokkos_DIR PARENT_PATH Kokkos_DIR) +endforeach() +message(STATUS "Found installed Kokkos at ${Kokkos_DIR}") + +# Create target executable +set(TEST_APP kp_example) +add_executable(${TEST_APP} main.cpp) +set(LIBS "Kokkos::kokkos;kokkostools") +if(USE_MPI) + list(APPEND LIBS MPI::MPI_CXX) +endif() +target_link_libraries(${TEST_APP} PRIVATE ${LIBS}) + +# Create tests +macro(add_kp_test NAME) + add_test(NAME test_kokkos_tools_${NAME} + COMMAND "kp_example" ${ARGN} + WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/..") +endmacro() +# TODO: Read profiling results and check if the profiler had succesfully run +# and exported output in expected format, fail the test otherwise. +if(NOT WIN32) + add_kp_test(kernel_timer "kernel-timer") + add_kp_test(kernel_timer_json "kernel-timer-json") + add_kp_test(memory_events "memory-events") + add_kp_test(memory_usage "memory-usage") + add_kp_test(chrome_tracing "chrome-tracing") + add_kp_test(space_time_stack "space-time-stack") + add_kp_test(systemtap_connector "systemtap-connector") + add_kp_test(highwater_mark "highwater-mark") + if(USE_MPI) + add_kp_test(highwater_mark_mpi "highwater-mark-mpi") + endif() +endif() +if(KokkosTools_ENABLE_CALIPER) + add_kp_test(caliper "caliper" "runtime-report(profile.kokkos)") +endif() +if(KOKKOSTOOLS_HAS_VARIORUM) + add_kp_test(variorum "variorum") +endif() +if(KOKKOSTOOLS_HAS_VTUNE) + add_kp_test(vtune_connector "vtune-connector") + add_kp_test(vtune_focused_connector "vtune-focused-connector") +endif() +if(KOKKOSTOOLS_HAS_NVPROF) + add_kp_test(vtune_connector "nvprof-connector") + add_kp_test(vtune_focused_connector "nvprof-focused-connector") +endif() diff --git a/example/kernels.hpp b/example/kernels.hpp new file mode 100644 index 000000000..924a0e19e --- /dev/null +++ b/example/kernels.hpp @@ -0,0 +1,36 @@ +#pragma once +//-------------------------------------------------------------------------------------// + +// Sample computation: S(N) = 1 + 2 + 3 + ... + N +// Tests: regions, allocation, parallel for and reduction +template +int run_calculation(const data_type SIZE) { + Kokkos::Profiling::pushRegion("Computation"); + + Kokkos::View data(Kokkos::ViewAllocateWithoutInitializing("data"), + SIZE); + Kokkos::parallel_for( + "initialize()", SIZE, KOKKOS_LAMBDA(data_type i) { data(i) = i; }); + Kokkos::fence(); + + data_type sum = 0; + Kokkos::parallel_reduce( + "accumulate()", SIZE, + KOKKOS_LAMBDA(data_type i, data_type & lsum) { lsum += 1 + data(i); }, + sum); + Kokkos::fence(); + + Kokkos::Profiling::popRegion(); + + // check results + const data_type check = (SIZE + 1) * SIZE / 2; + if (sum != check) { + std::cout << "BAD result, got S(" << SIZE << ") = " << sum << " (expected " + << check << ")" << std::endl; + return 1; + } + std::cout << "Result OK: S(" << SIZE << ") = " << sum << std::endl; + return 0; +} + +//-------------------------------------------------------------------------------------// diff --git a/example/main.cpp b/example/main.cpp new file mode 100644 index 000000000..c702f8a44 --- /dev/null +++ b/example/main.cpp @@ -0,0 +1,40 @@ +#include +#include +#include "kp_all.hpp" +#include "kernels.hpp" + +#if USE_MPI +#include +#endif + +//-------------------------------------------------------------------------------------// + +int main(int argc, char *argv[]) { +#if USE_MPI + MPI_Init(&argc, &argv); +#endif + + const char *profiler_name = argc >= 2 ? argv[1] : ""; + const char *profiler_config = argc >= 3 ? argv[2] : ""; + + auto eventSet = KokkosTools::get_event_set(profiler_name, profiler_config); + + // Note: callbacks must be set before Kokkos::initialize() + Kokkos::Tools::Experimental::set_callbacks(eventSet); + Kokkos::initialize(argc, argv); + + Kokkos::print_configuration(std::cout); + + std::cout << std::endl; + int ret_code = run_calculation(100000); + std::cout << std::endl; + + Kokkos::finalize(); +#if USE_MPI + MPI_Finalize(); +#endif + + return ret_code; +} + +//-------------------------------------------------------------------------------------// diff --git a/profiling/all/CMakeLists.txt b/profiling/all/CMakeLists.txt new file mode 100644 index 000000000..ce8b13e27 --- /dev/null +++ b/profiling/all/CMakeLists.txt @@ -0,0 +1,22 @@ +set(LIBNAME kokkostools) + +#if(NOT SINGLELIB_PROFILERS) +# message(FATAL_ERROR "Can't build ${kokkostools}: no profilers enabled") +# return() +#endif() + +add_library(${LIBNAME} ${KOKKOSTOOLS_LIBRARY_MODE} kp_all.cpp) + +target_include_directories(${LIBNAME} + PUBLIC $ + $ + $) + +if(SINGLELIB_PROFILERS) + target_link_libraries(${LIBNAME} PUBLIC ${SINGLELIB_PROFILERS}) +endif() + +file(GLOB_RECURSE HEADER_FILES CONFIGURE_DEPENDS kp_all.hpp "${COMMON_HEADERS_PATH}/*.hpp") + +install(FILES ${HEADER_FILES} DESTINATION ${EXPORT_INCLUDE_DIR}) +install(TARGETS ${LIBNAME} EXPORT ${EXPORT_NAME}) \ No newline at end of file diff --git a/profiling/all/impl/Kokkos_Profiling_C_Interface.h b/profiling/all/impl/Kokkos_Profiling_C_Interface.h new file mode 100644 index 000000000..33eaa3920 --- /dev/null +++ b/profiling/all/impl/Kokkos_Profiling_C_Interface.h @@ -0,0 +1,268 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_PROFILING_C_INTERFACE_HPP +#define KOKKOS_PROFILING_C_INTERFACE_HPP + +#ifdef __cplusplus +#include +#include +#else +#include +#include +#include +#endif + +#define KOKKOSP_INTERFACE_VERSION 20210623 + +// Profiling + +struct Kokkos_Profiling_KokkosPDeviceInfo { + size_t deviceID; +}; + +struct Kokkos_Profiling_SpaceHandle { + char name[64]; +}; + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_initFunction)( + const int, const uint64_t, const uint32_t, + struct Kokkos_Profiling_KokkosPDeviceInfo*); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_finalizeFunction)(); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_parseArgsFunction)(int, char**); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_printHelpFunction)(char*); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_beginFunction)(const char*, const uint32_t, + uint64_t*); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_endFunction)(uint64_t); + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_pushFunction)(const char*); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_popFunction)(); + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_allocateDataFunction)( + const struct Kokkos_Profiling_SpaceHandle, const char*, const void*, + const uint64_t); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_deallocateDataFunction)( + const struct Kokkos_Profiling_SpaceHandle, const char*, const void*, + const uint64_t); + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_createProfileSectionFunction)(const char*, + uint32_t*); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_startProfileSectionFunction)(const uint32_t); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_stopProfileSectionFunction)(const uint32_t); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_destroyProfileSectionFunction)(const uint32_t); + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_profileEventFunction)(const char*); + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_beginDeepCopyFunction)( + struct Kokkos_Profiling_SpaceHandle, const char*, const void*, + struct Kokkos_Profiling_SpaceHandle, const char*, const void*, uint64_t); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_endDeepCopyFunction)(); +typedef void (*Kokkos_Profiling_beginFenceFunction)(const char*, const uint32_t, + uint64_t*); +typedef void (*Kokkos_Profiling_endFenceFunction)(uint64_t); + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_dualViewSyncFunction)(const char*, + const void* const, bool); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_dualViewModifyFunction)(const char*, + const void* const, + bool); + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_declareMetadataFunction)(const char*, + const char*); + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Tools_toolInvokedFenceFunction)(const uint32_t); + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Tools_functionPointer)(); +struct Kokkos_Tools_ToolProgrammingInterface { + Kokkos_Tools_toolInvokedFenceFunction fence; + // allow addition of more actions + Kokkos_Tools_functionPointer padding[31]; +}; + +struct Kokkos_Tools_ToolSettings { + bool requires_global_fencing; + bool padding[255]; +}; + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Tools_provideToolProgrammingInterfaceFunction)( + const uint32_t, struct Kokkos_Tools_ToolProgrammingInterface); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Tools_requestToolSettingsFunction)( + const uint32_t, struct Kokkos_Tools_ToolSettings*); + +// Tuning + +#define KOKKOS_TOOLS_TUNING_STRING_LENGTH 64 +typedef char Kokkos_Tools_Tuning_String[KOKKOS_TOOLS_TUNING_STRING_LENGTH]; +union Kokkos_Tools_VariableValue_ValueUnion { + int64_t int_value; + double double_value; + Kokkos_Tools_Tuning_String string_value; +}; + +union Kokkos_Tools_VariableValue_ValueUnionSet { + int64_t* int_value; + double* double_value; + Kokkos_Tools_Tuning_String* string_value; +}; + +struct Kokkos_Tools_ValueSet { + size_t size; + union Kokkos_Tools_VariableValue_ValueUnionSet values; +}; + +enum Kokkos_Tools_OptimizationType { + Kokkos_Tools_Minimize, + Kokkos_Tools_Maximize +}; + +struct Kokkos_Tools_OptimzationGoal { + size_t type_id; + enum Kokkos_Tools_OptimizationType goal; +}; + +struct Kokkos_Tools_ValueRange { + union Kokkos_Tools_VariableValue_ValueUnion lower; + union Kokkos_Tools_VariableValue_ValueUnion upper; + union Kokkos_Tools_VariableValue_ValueUnion step; + bool openLower; + bool openUpper; +}; + +enum Kokkos_Tools_VariableInfo_ValueType { + kokkos_value_double, + kokkos_value_int64, + kokkos_value_string, +}; + +enum Kokkos_Tools_VariableInfo_StatisticalCategory { + kokkos_value_categorical, // unordered distinct objects + kokkos_value_ordinal, // ordered distinct objects + kokkos_value_interval, // ordered distinct objects for which distance matters + kokkos_value_ratio // ordered distinct objects for which distance matters, + // division matters, and the concept of zero exists +}; + +enum Kokkos_Tools_VariableInfo_CandidateValueType { + kokkos_value_set, // I am one of [2,3,4,5] + kokkos_value_range, // I am somewhere in [2,12) + kokkos_value_unbounded // I am [text/int/float], but we don't know at + // declaration time what values are appropriate. Only + // valid for Context Variables +}; + +union Kokkos_Tools_VariableInfo_SetOrRange { + struct Kokkos_Tools_ValueSet set; + struct Kokkos_Tools_ValueRange range; +}; + +struct Kokkos_Tools_VariableInfo { + enum Kokkos_Tools_VariableInfo_ValueType type; + enum Kokkos_Tools_VariableInfo_StatisticalCategory category; + enum Kokkos_Tools_VariableInfo_CandidateValueType valueQuantity; + union Kokkos_Tools_VariableInfo_SetOrRange candidates; + void* toolProvidedInfo; +}; + +struct Kokkos_Tools_VariableValue { + size_t type_id; + union Kokkos_Tools_VariableValue_ValueUnion value; + struct Kokkos_Tools_VariableInfo* metadata; +}; + +typedef void (*Kokkos_Tools_outputTypeDeclarationFunction)( + const char*, const size_t, struct Kokkos_Tools_VariableInfo* info); +typedef void (*Kokkos_Tools_inputTypeDeclarationFunction)( + const char*, const size_t, struct Kokkos_Tools_VariableInfo* info); + +typedef void (*Kokkos_Tools_requestValueFunction)( + const size_t, const size_t, const struct Kokkos_Tools_VariableValue*, + const size_t count, struct Kokkos_Tools_VariableValue*); +typedef void (*Kokkos_Tools_contextBeginFunction)(const size_t); +typedef void (*Kokkos_Tools_contextEndFunction)( + const size_t, struct Kokkos_Tools_VariableValue); +typedef void (*Kokkos_Tools_optimizationGoalDeclarationFunction)( + const size_t, const struct Kokkos_Tools_OptimzationGoal goal); + +struct Kokkos_Profiling_EventSet { + Kokkos_Profiling_initFunction init; + Kokkos_Profiling_finalizeFunction finalize; + Kokkos_Profiling_parseArgsFunction parse_args; + Kokkos_Profiling_printHelpFunction print_help; + Kokkos_Profiling_beginFunction begin_parallel_for; + Kokkos_Profiling_endFunction end_parallel_for; + Kokkos_Profiling_beginFunction begin_parallel_reduce; + Kokkos_Profiling_endFunction end_parallel_reduce; + Kokkos_Profiling_beginFunction begin_parallel_scan; + Kokkos_Profiling_endFunction end_parallel_scan; + Kokkos_Profiling_pushFunction push_region; + Kokkos_Profiling_popFunction pop_region; + Kokkos_Profiling_allocateDataFunction allocate_data; + Kokkos_Profiling_deallocateDataFunction deallocate_data; + Kokkos_Profiling_createProfileSectionFunction create_profile_section; + Kokkos_Profiling_startProfileSectionFunction start_profile_section; + Kokkos_Profiling_stopProfileSectionFunction stop_profile_section; + Kokkos_Profiling_destroyProfileSectionFunction destroy_profile_section; + Kokkos_Profiling_profileEventFunction profile_event; + Kokkos_Profiling_beginDeepCopyFunction begin_deep_copy; + Kokkos_Profiling_endDeepCopyFunction end_deep_copy; + Kokkos_Profiling_beginFenceFunction begin_fence; + Kokkos_Profiling_endFenceFunction end_fence; + Kokkos_Profiling_dualViewSyncFunction sync_dual_view; + Kokkos_Profiling_dualViewModifyFunction modify_dual_view; + Kokkos_Profiling_declareMetadataFunction declare_metadata; + Kokkos_Tools_provideToolProgrammingInterfaceFunction + provide_tool_programming_interface; + Kokkos_Tools_requestToolSettingsFunction request_tool_settings; + char profiling_padding[9 * sizeof(Kokkos_Tools_functionPointer)]; + Kokkos_Tools_outputTypeDeclarationFunction declare_output_type; + Kokkos_Tools_inputTypeDeclarationFunction declare_input_type; + Kokkos_Tools_requestValueFunction request_output_values; + Kokkos_Tools_contextBeginFunction begin_tuning_context; + Kokkos_Tools_contextEndFunction end_tuning_context; + Kokkos_Tools_optimizationGoalDeclarationFunction declare_optimization_goal; + char padding[232 * + sizeof( + Kokkos_Tools_functionPointer)]; // allows us to add another + // 256 events to the Tools + // interface without + // changing struct layout +}; + +#endif // KOKKOS_PROFILING_C_INTERFACE_HPP diff --git a/profiling/all/impl/Kokkos_Profiling_DeviceInfo.hpp b/profiling/all/impl/Kokkos_Profiling_DeviceInfo.hpp new file mode 100644 index 000000000..1db44815c --- /dev/null +++ b/profiling/all/impl/Kokkos_Profiling_DeviceInfo.hpp @@ -0,0 +1,28 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSP_DEVICE_INFO_HPP +#define KOKKOSP_DEVICE_INFO_HPP + +#include +#include +namespace Kokkos { +namespace Profiling { +using KokkosPDeviceInfo = Kokkos_Profiling_KokkosPDeviceInfo; +} // namespace Profiling +} // namespace Kokkos + +#endif diff --git a/profiling/all/impl/Kokkos_Profiling_Interface.hpp b/profiling/all/impl/Kokkos_Profiling_Interface.hpp new file mode 100644 index 000000000..37acc23b6 --- /dev/null +++ b/profiling/all/impl/Kokkos_Profiling_Interface.hpp @@ -0,0 +1,240 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSP_INTERFACE_HPP +#define KOKKOSP_INTERFACE_HPP + +#include +#include + +#include + +// NOTE: in this Kokkos::Profiling block, do not define anything that shouldn't +// exist should Profiling be disabled + +namespace Kokkos { +namespace Tools { +namespace Experimental { + +constexpr const uint32_t NumReservedDeviceIDs = 1; + +enum SpecialSynchronizationCases : int { + GlobalDeviceSynchronization = 1, + DeepCopyResourceSynchronization = 2, +}; + +enum struct DeviceType { + Serial, + OpenMP, + Cuda, + HIP, + OpenMPTarget, + HPX, + Threads, + SYCL, + Unknown +}; + +struct ExecutionSpaceIdentifier { + DeviceType type; + uint32_t device_id; + uint32_t instance_id; +}; +inline DeviceType devicetype_from_uint32t(const uint32_t in) { + switch (in) { + case 0: return DeviceType::Serial; + case 1: return DeviceType::OpenMP; + case 2: return DeviceType::Cuda; + case 3: return DeviceType::HIP; + case 4: return DeviceType::OpenMPTarget; + case 5: return DeviceType::HPX; + case 6: return DeviceType::Threads; + case 7: return DeviceType::SYCL; + default: return DeviceType::Unknown; // TODO: error out? + } +} + +inline ExecutionSpaceIdentifier identifier_from_devid(const uint32_t in) { + // ExecutionSpaceIdentifier out; + // out.type = in >> 24; + // out.device_id = in >> 17; + // out.instance_id = ((uint32_t(-1)) << 17 ) & in; + return {devicetype_from_uint32t(in >> 24), + (~((uint32_t(-1)) << 24)) & (in >> 17), + (~((uint32_t(-1)) << 17)) & in}; +} + +template +struct DeviceTypeTraits; + +constexpr const size_t device_type_bits = 8; +constexpr const size_t instance_bits = 24; +template +constexpr uint32_t device_id_root() { + /** uncomment when C++14 is enabled + constexpr auto device_id = + static_cast(DeviceTypeTraits::id); + return (device_id << instance_bits); + */ + return 0; +} +template +inline uint32_t device_id(ExecutionSpace const& space) noexcept { + return device_id_root() + space.impl_instance_id(); +} +} // namespace Experimental +} // namespace Tools +} // end namespace Kokkos + +#if defined(KOKKOS_ENABLE_LIBDL) +// We check at configure time that libdl is available. +#include +#endif + +#include +#include + +namespace Kokkos { +namespace Tools { + +using SpaceHandle = Kokkos_Profiling_SpaceHandle; + +} // namespace Tools + +namespace Tools { + +namespace Experimental { +using EventSet = Kokkos_Profiling_EventSet; +static_assert(sizeof(EventSet) / sizeof(Kokkos_Tools_functionPointer) == 275, + "sizeof EventSet has changed, this is an error on the part of a " + "Kokkos developer"); +static_assert(sizeof(Kokkos_Tools_ToolSettings) / sizeof(bool) == 256, + "sizeof EventSet has changed, this is an error on the part of a " + "Kokkos developer"); +static_assert(sizeof(Kokkos_Tools_ToolProgrammingInterface) / + sizeof(Kokkos_Tools_functionPointer) == + 32, + "sizeof EventSet has changed, this is an error on the part of a " + "Kokkos developer"); + +using toolInvokedFenceFunction = Kokkos_Tools_toolInvokedFenceFunction; +using provideToolProgrammingInterfaceFunction = + Kokkos_Tools_provideToolProgrammingInterfaceFunction; +using requestToolSettingsFunction = Kokkos_Tools_requestToolSettingsFunction; +using ToolSettings = Kokkos_Tools_ToolSettings; +using ToolProgrammingInterface = Kokkos_Tools_ToolProgrammingInterface; +} // namespace Experimental +using initFunction = Kokkos_Profiling_initFunction; +using finalizeFunction = Kokkos_Profiling_finalizeFunction; +using parseArgsFunction = Kokkos_Profiling_parseArgsFunction; +using printHelpFunction = Kokkos_Profiling_printHelpFunction; +using beginFunction = Kokkos_Profiling_beginFunction; +using endFunction = Kokkos_Profiling_endFunction; +using pushFunction = Kokkos_Profiling_pushFunction; +using popFunction = Kokkos_Profiling_popFunction; +using allocateDataFunction = Kokkos_Profiling_allocateDataFunction; +using deallocateDataFunction = Kokkos_Profiling_deallocateDataFunction; +using createProfileSectionFunction = + Kokkos_Profiling_createProfileSectionFunction; +using startProfileSectionFunction = + Kokkos_Profiling_startProfileSectionFunction; +using stopProfileSectionFunction = Kokkos_Profiling_stopProfileSectionFunction; +using destroyProfileSectionFunction = + Kokkos_Profiling_destroyProfileSectionFunction; +using profileEventFunction = Kokkos_Profiling_profileEventFunction; +using beginDeepCopyFunction = Kokkos_Profiling_beginDeepCopyFunction; +using endDeepCopyFunction = Kokkos_Profiling_endDeepCopyFunction; +using beginFenceFunction = Kokkos_Profiling_beginFenceFunction; +using endFenceFunction = Kokkos_Profiling_endFenceFunction; +using dualViewSyncFunction = Kokkos_Profiling_dualViewSyncFunction; +using dualViewModifyFunction = Kokkos_Profiling_dualViewModifyFunction; +using declareMetadataFunction = Kokkos_Profiling_declareMetadataFunction; + +} // namespace Tools + +} // namespace Kokkos + +// Profiling + +namespace Kokkos { + +namespace Profiling { + +/** The Profiling namespace is being renamed to Tools. + * This is reexposing the contents of what used to be the Profiling + * Interface with their original names, to avoid breaking old code + */ + +namespace Experimental { + +using Kokkos::Tools::Experimental::device_id; +using Kokkos::Tools::Experimental::DeviceType; +using Kokkos::Tools::Experimental::DeviceTypeTraits; + +} // namespace Experimental + +using Kokkos::Tools::allocateDataFunction; +using Kokkos::Tools::beginDeepCopyFunction; +using Kokkos::Tools::beginFunction; +using Kokkos::Tools::createProfileSectionFunction; +using Kokkos::Tools::deallocateDataFunction; +using Kokkos::Tools::destroyProfileSectionFunction; +using Kokkos::Tools::endDeepCopyFunction; +using Kokkos::Tools::endFunction; +using Kokkos::Tools::finalizeFunction; +using Kokkos::Tools::initFunction; +using Kokkos::Tools::parseArgsFunction; +using Kokkos::Tools::popFunction; +using Kokkos::Tools::printHelpFunction; +using Kokkos::Tools::profileEventFunction; +using Kokkos::Tools::pushFunction; +using Kokkos::Tools::SpaceHandle; +using Kokkos::Tools::startProfileSectionFunction; +using Kokkos::Tools::stopProfileSectionFunction; + +} // namespace Profiling +} // namespace Kokkos + +// Tuning + +namespace Kokkos { +namespace Tools { +namespace Experimental { +using ValueSet = Kokkos_Tools_ValueSet; +using ValueRange = Kokkos_Tools_ValueRange; +using StatisticalCategory = Kokkos_Tools_VariableInfo_StatisticalCategory; +using ValueType = Kokkos_Tools_VariableInfo_ValueType; +using CandidateValueType = Kokkos_Tools_VariableInfo_CandidateValueType; +using SetOrRange = Kokkos_Tools_VariableInfo_SetOrRange; +using VariableInfo = Kokkos_Tools_VariableInfo; +using OptimizationGoal = Kokkos_Tools_OptimzationGoal; +using TuningString = Kokkos_Tools_Tuning_String; +using VariableValue = Kokkos_Tools_VariableValue; + +using outputTypeDeclarationFunction = + Kokkos_Tools_outputTypeDeclarationFunction; +using inputTypeDeclarationFunction = Kokkos_Tools_inputTypeDeclarationFunction; +using requestValueFunction = Kokkos_Tools_requestValueFunction; +using contextBeginFunction = Kokkos_Tools_contextBeginFunction; +using contextEndFunction = Kokkos_Tools_contextEndFunction; +using optimizationGoalDeclarationFunction = + Kokkos_Tools_optimizationGoalDeclarationFunction; +} // end namespace Experimental +} // end namespace Tools + +} // end namespace Kokkos + +#endif diff --git a/profiling/all/kp_all.cpp b/profiling/all/kp_all.cpp new file mode 100644 index 000000000..e41e7a06c --- /dev/null +++ b/profiling/all/kp_all.cpp @@ -0,0 +1,109 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include +#include +#include + +#include "kp_all.hpp" + +#define KOKKOSTOOLS_EXTERN_EVENT_SET(NAMESPACE) \ + namespace KokkosTools { \ + namespace NAMESPACE { \ + extern Kokkos::Tools::Experimental::EventSet get_event_set(); \ + } \ + } + +#ifndef WIN32 +KOKKOSTOOLS_EXTERN_EVENT_SET(KernelTimer) +KOKKOSTOOLS_EXTERN_EVENT_SET(KernelTimerJSON) +KOKKOSTOOLS_EXTERN_EVENT_SET(MemoryEvents) +KOKKOSTOOLS_EXTERN_EVENT_SET(MemoryUsage) +KOKKOSTOOLS_EXTERN_EVENT_SET(HighwaterMark) +KOKKOSTOOLS_EXTERN_EVENT_SET(HighwaterMarkMPI) +KOKKOSTOOLS_EXTERN_EVENT_SET(ChromeTracing) +KOKKOSTOOLS_EXTERN_EVENT_SET(SpaceTimeStack) +KOKKOSTOOLS_EXTERN_EVENT_SET(SystemtapConnector) +#endif +#ifdef KOKKOSTOOLS_HAS_VTUNE +KOKKOSTOOLS_EXTERN_EVENT_SET(VTuneConnector) +KOKKOSTOOLS_EXTERN_EVENT_SET(VTuneFocusedConnector) +#endif +#ifdef KOKKOSTOOLS_HAS_VARIORUM +KOKKOSTOOLS_EXTERN_EVENT_SET(VariorumConnector) +#endif +#ifdef KOKKOSTOOLS_HAS_NVPROF +KOKKOSTOOLS_EXTERN_EVENT_SET(NVProfConnector) +KOKKOSTOOLS_EXTERN_EVENT_SET(NVProfFocusedConnector) +#endif +#ifdef KOKKOSTOOLS_HAS_CALIPER +namespace cali { +extern Kokkos::Tools::Experimental::EventSet get_kokkos_event_set( + const char* config_str); +} +#endif + +using EventSet = Kokkos::Tools::Experimental::EventSet; + +namespace KokkosTools { + +EventSet get_event_set(const char* profiler, const char* config_str) { + std::map handlers; +#ifndef WIN32 + handlers["kernel-timer"] = KernelTimer::get_event_set(); + handlers["kernel-timer-json"] = KernelTimerJSON::get_event_set(); + handlers["memory-events"] = MemoryEvents::get_event_set(); + handlers["memory-usage"] = MemoryUsage::get_event_set(); +#if USE_MPI + handlers["highwater-mark-mpi"] = HighwaterMarkMPI::get_event_set(); +#endif + handlers["highwater-mark"] = HighwaterMark::get_event_set(); + handlers["chrome-tracing"] = ChromeTracing::get_event_set(); + handlers["space-time-stack"] = SpaceTimeStack::get_event_set(); + handlers["systemtap-connector"] = SystemtapConnector::get_event_set(); +#endif +#ifdef KOKKOSTOOLS_HAS_VARIORUM + handlers["variorum"] = VariorumConnector::get_event_set(); +#endif +#ifdef KOKKOSTOOLS_HAS_VTUNE + handlers["vtune-connector"] = VTuneConnector::get_event_set(); + handlers["vtune-focused-connector"] = VTuneFocusedConnector::get_event_set(); +#endif +#ifdef KOKKOSTOOLS_HAS_CALIPER + handlers["caliper"] = cali::get_kokkos_event_set(config_str); +#endif +#ifdef KOKKOSTOOLS_HAS_NVPROF + handlers["nvprof-connector"] = NVProfConnector::get_event_set(); + handlers["nvprof-focused-connector"] = + NVProfFocusedConnector::get_event_set(); +#endif + auto e = handlers.find(profiler); + if (e != handlers.end()) return e->second; + + if (strlen(profiler) > 0) { + const auto msg = + std::string("Profiler not supported: ") + profiler + " (unknown tool)"; + throw std::runtime_error(msg); + } + + // default = no profiling + EventSet eventSet; + memset(&eventSet, 0, sizeof(eventSet)); + return eventSet; +} + +} // namespace KokkosTools diff --git a/profiling/all/kp_all.hpp b/profiling/all/kp_all.hpp new file mode 100644 index 000000000..ecda733fa --- /dev/null +++ b/profiling/all/kp_all.hpp @@ -0,0 +1,30 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSTOOLS_ALL_HPP +#define KOKKOSTOOLS_ALL_HPP + +#include "kp_config.hpp" +#include "impl/Kokkos_Profiling_Interface.hpp" // Note: impl/... is used inside the header + +namespace KokkosTools { + +Kokkos::Tools::Experimental::EventSet get_event_set(const char *profiler, + const char *options); + +} + +#endif diff --git a/profiling/all/kp_core.hpp b/profiling/all/kp_core.hpp new file mode 100644 index 000000000..f8b59b164 --- /dev/null +++ b/profiling/all/kp_core.hpp @@ -0,0 +1,169 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSTOOLS_KOKKOSINTERFACE_HPP +#define KOKKOSTOOLS_KOKKOSINTERFACE_HPP + +#include + +#include "kp_config.hpp" +#include "impl/Kokkos_Profiling_Interface.hpp" // Note: impl/... is used inside the header + +using Kokkos::Tools::SpaceHandle; + +#ifdef WIN32 + +#define EXPOSE_INIT(FUNC_NAME) +#define EXPOSE_NOARGFUNCTION(HANDLER_NAME, FUNC_NAME) +#define EXPOSE_FINALIZE(FUNC_NAME) +#define EXPOSE_ALLOCATE(FUNC_NAME) +#define EXPOSE_DEALLOCATE(FUNC_NAME) +#define EXPOSE_PUSH_REGION(FUNC_NAME) +#define EXPOSE_POP_REGION(FUNC_NAME) +#define EXPOSE_BEGIN_PARALLEL_FOR(FUNC_NAME) +#define EXPOSE_END_PARALLEL_FOR(FUNC_NAME) +#define EXPOSE_BEGIN_PARALLEL_SCAN(FUNC_NAME) +#define EXPOSE_END_PARALLEL_SCAN(FUNC_NAME) +#define EXPOSE_BEGIN_PARALLEL_REDUCE(FUNC_NAME) +#define EXPOSE_END_PARALLEL_REDUCE(FUNC_NAME) +#define EXPOSE_BEGIN_DEEP_COPY(FUNC_NAME) +#define EXPOSE_END_DEEP_COPY(FUNC_NAME) +#define EXPOSE_CREATE_PROFILE_SECTION(FUNC_NAME) +#define EXPOSE_START_PROFILE_SECTION(FUNC_NAME) +#define EXPOSE_STOP_PROFILE_SECTION(FUNC_NAME) +#define EXPOSE_DESTROY_PROFILE_SECTION(FUNC_NAME) +#define EXPOSE_PROFILE_EVENT(FUNC_NAME) + +#else + +#define EXPOSE_TOOL_SETTINGS(FUNC_NAME) \ + __attribute__((weak)) void kokkosp_request_tool_settings( \ + const uint32_t num_actions, Kokkos_Tools_ToolSettings* settings) { \ + FUNC_NAME(num_actions, settings); \ + } + +#define EXPOSE_INIT(FUNC_NAME) \ + __attribute__((weak)) void kokkosp_init_library( \ + const int loadSeq, const uint64_t interfaceVer, \ + const uint32_t devInfoCount, \ + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { \ + FUNC_NAME(loadSeq, interfaceVer, devInfoCount, deviceInfo); \ + } + +#define EXPOSE_NOARGFUNCTION(HANDLER_NAME, FUNC_NAME) \ + __attribute__((weak)) void HANDLER_NAME() { FUNC_NAME(); } + +#define EXPOSE_FINALIZE(FUNC_NAME) \ + EXPOSE_NOARGFUNCTION(kokkosp_finalize_library, FUNC_NAME) + +#define EXPOSE_ALLOCATE(FUNC_NAME) \ + __attribute__((weak)) void kokkosp_allocate_data( \ + const SpaceHandle space, const char* label, const void* const ptr, \ + const uint64_t size) { \ + FUNC_NAME(space, label, ptr, size); \ + } + +#define EXPOSE_DEALLOCATE(FUNC_NAME) \ + __attribute__((weak)) void kokkosp_deallocate_data( \ + const SpaceHandle space, const char* label, const void* const ptr, \ + const uint64_t size) { \ + FUNC_NAME(space, label, ptr, size); \ + } + +#define EXPOSE_PUSH_REGION(FUNC_NAME) \ + __attribute__((weak)) void kokkosp_push_profile_region(const char* name) { \ + FUNC_NAME(name); \ + } + +#define EXPOSE_POP_REGION(FUNC_NAME) \ + EXPOSE_NOARGFUNCTION(kokkosp_pop_profile_region, FUNC_NAME) + +#define EXPOSE_BEGIN_PARALLEL_FOR(FUNC_NAME) \ + __attribute__((weak)) void kokkosp_begin_parallel_for( \ + const char* name, const uint32_t devID, uint64_t* kID) { \ + FUNC_NAME(name, devID, kID); \ + } + +#define EXPOSE_END_PARALLEL_FOR(FUNC_NAME) \ + __attribute__((weak)) void kokkosp_end_parallel_for(const uint64_t kID) { \ + FUNC_NAME(kID); \ + } + +#define EXPOSE_BEGIN_PARALLEL_SCAN(FUNC_NAME) \ + __attribute__((weak)) void kokkosp_begin_parallel_scan( \ + const char* name, const uint32_t devID, uint64_t* kID) { \ + FUNC_NAME(name, devID, kID); \ + } + +#define EXPOSE_END_PARALLEL_SCAN(FUNC_NAME) \ + __attribute__((weak)) void kokkosp_end_parallel_scan(const uint64_t kID) { \ + FUNC_NAME(kID); \ + } + +#define EXPOSE_BEGIN_PARALLEL_REDUCE(FUNC_NAME) \ + __attribute__((weak)) void kokkosp_begin_parallel_reduce( \ + const char* name, const uint32_t devID, uint64_t* kID) { \ + FUNC_NAME(name, devID, kID); \ + } + +#define EXPOSE_END_PARALLEL_REDUCE(FUNC_NAME) \ + __attribute__((weak)) void kokkosp_end_parallel_reduce(const uint64_t kID) { \ + FUNC_NAME(kID); \ + } + +#define EXPOSE_BEGIN_DEEP_COPY(FUNC_NAME) \ + __attribute__((weak)) void kokkosp_begin_deep_copy( \ + SpaceHandle dst_handle, const char* dst_name, const void* dst_ptr, \ + SpaceHandle src_handle, const char* src_name, const void* src_ptr, \ + uint64_t size) { \ + FUNC_NAME(dst_handle, dst_name, dst_ptr, src_handle, src_name, src_ptr, \ + size); \ + } + +#define EXPOSE_END_DEEP_COPY(FUNC_NAME) \ + EXPOSE_NOARGFUNCTION(kokkosp_end_deep_copy, FUNC_NAME) + +#define EXPOSE_CREATE_PROFILE_SECTION(FUNC_NAME) \ + __attribute__((weak)) void kokkosp_create_profile_section( \ + const char* name, uint32_t* sec_id) { \ + FUNC_NAME(name, sec_id); \ + } + +#define EXPOSE_START_PROFILE_SECTION(FUNC_NAME) \ + __attribute__((weak)) void kokkosp_start_profile_section( \ + const uint32_t sec_id) { \ + FUNC_NAME(sec_id); \ + } + +#define EXPOSE_STOP_PROFILE_SECTION(FUNC_NAME) \ + __attribute__((weak)) void kokkosp_stop_profile_section( \ + const uint32_t sec_id) { \ + FUNC_NAME(sec_id); \ + } + +#define EXPOSE_DESTROY_PROFILE_SECTION(FUNC_NAME) \ + __attribute__((weak)) void kokkosp_destroy_profile_section( \ + const uint32_t sec_id) { \ + FUNC_NAME(sec_id); \ + } + +#define EXPOSE_PROFILE_EVENT(FUNC_NAME) \ + __attribute__((weak)) void kokkosp_profile_event(const char* name) { \ + FUNC_NAME(name); \ + } +#endif + +#endif // KOKKOSTOOLS_KOKKOSINTERFACE_HPP diff --git a/profiling/chrome-tracing/CMakeLists.txt b/profiling/chrome-tracing/CMakeLists.txt new file mode 100644 index 000000000..2653affd3 --- /dev/null +++ b/profiling/chrome-tracing/CMakeLists.txt @@ -0,0 +1,5 @@ +kp_add_library(kp_chrome_tracing kp_chrome_tracing.cpp) + +if(USE_MPI) + target_link_libraries(kp_chrome_tracing PRIVATE MPI::MPI_CXX) +endif() diff --git a/profiling/chrome-tracing/Makefile b/profiling/chrome-tracing/Makefile index d6cfa32a2..afe419376 100644 --- a/profiling/chrome-tracing/Makefile +++ b/profiling/chrome-tracing/Makefile @@ -1,15 +1,15 @@ CXX=mpicxx -CFLAGS=-shared -O3 -g -fPIC -std=c++11 -Wall -Wextra +CXXFLAGS=-shared -O3 -g -fPIC -std=c++11 -Wall -Wextra #Turn MPI support off: -#CFLAGS += -DUSE_MPI=0 +#CXXFLAGS += -DUSE_MPI=0 MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) -CXXFLAGS+=-I${MAKEFILE_PATH} +CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all kp_chrome_tracing.so: ${MAKEFILE_PATH}kp_chrome_tracing.cpp - $(CXX) $(CFLAGS) -o $@ $< + $(CXX) $(CXXFLAGS) -o $@ $< clean: rm *.so diff --git a/profiling/chrome-tracing/kp_chrome_tracing.cpp b/profiling/chrome-tracing/kp_chrome_tracing.cpp index e024c2422..ca6291b25 100644 --- a/profiling/chrome-tracing/kp_chrome_tracing.cpp +++ b/profiling/chrome-tracing/kp_chrome_tracing.cpp @@ -31,9 +31,7 @@ #include #include -#ifndef USE_MPI -#define USE_MPI 1 -#endif +#include "kp_core.hpp" #if USE_MPI #include @@ -41,14 +39,8 @@ #include -namespace { - -struct SpaceHandle { - char name[64]; -}; -struct KokkosPDeviceInfo { - std::uint32_t deviceID; -}; +namespace KokkosTools { +namespace ChromeTracing { enum Space { SPACE_HOST, SPACE_CUDA }; @@ -198,76 +190,111 @@ struct State { State *global_state = nullptr; -} // end anonymous namespace - -extern "C" void kokkosp_init_library(int loadseq, uint64_t, uint32_t ndevinfos, - KokkosPDeviceInfo *devinfos) { +void kokkosp_init_library(int loadseq, uint64_t, uint32_t ndevinfos, + Kokkos_Profiling_KokkosPDeviceInfo *devinfos) { (void)loadseq; (void)ndevinfos; (void)devinfos; global_state = new State(); } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { delete global_state; global_state = nullptr; } -extern "C" void kokkosp_begin_parallel_for(const char *name, - std::uint32_t devid, - std::uint64_t *kernid) { +void kokkosp_begin_parallel_for(const char *name, std::uint32_t devid, + std::uint64_t *kernid) { (void)devid; *kernid = global_state->begin_kernel(name, STACK_FOR); } -extern "C" void kokkosp_begin_parallel_reduce(const char *name, - std::uint32_t devid, - std::uint64_t *kernid) { +void kokkosp_begin_parallel_reduce(const char *name, std::uint32_t devid, + std::uint64_t *kernid) { (void)devid; *kernid = global_state->begin_kernel(name, STACK_REDUCE); } -extern "C" void kokkosp_begin_parallel_scan(const char *name, - std::uint32_t devid, - std::uint64_t *kernid) { +void kokkosp_begin_parallel_scan(const char *name, std::uint32_t devid, + std::uint64_t *kernid) { (void)devid; *kernid = global_state->begin_kernel(name, STACK_SCAN); } -extern "C" void kokkosp_end_parallel_for(std::uint64_t kernid) { +void kokkosp_end_parallel_for(std::uint64_t kernid) { global_state->end_kernel(kernid); } -extern "C" void kokkosp_end_parallel_reduce(std::uint64_t kernid) { +void kokkosp_end_parallel_reduce(std::uint64_t kernid) { global_state->end_kernel(kernid); } -extern "C" void kokkosp_end_parallel_scan(std::uint64_t kernid) { +void kokkosp_end_parallel_scan(std::uint64_t kernid) { global_state->end_kernel(kernid); } -extern "C" void kokkosp_push_profile_region(const char *name) { +void kokkosp_push_profile_region(const char *name) { global_state->push_region(name); } -extern "C" void kokkosp_pop_profile_region() { global_state->pop_region(); } +void kokkosp_pop_profile_region() { global_state->pop_region(); } -extern "C" void kokkosp_allocate_data(SpaceHandle, const char *, void *, - uint64_t) {} +void kokkosp_allocate_data(SpaceHandle, const char *, const void *, uint64_t) {} -extern "C" void kokkosp_deallocate_data(SpaceHandle, const char *, void *, - uint64_t) {} +void kokkosp_deallocate_data(SpaceHandle, const char *, const void *, + uint64_t) {} -extern "C" void kokkosp_begin_deep_copy(SpaceHandle dst_handle, - const char *dst_name, - const void *dst_ptr, - SpaceHandle src_handle, - const char *src_name, - const void *src_ptr, uint64_t size) { +void kokkosp_begin_deep_copy(SpaceHandle dst_handle, const char *dst_name, + const void *dst_ptr, SpaceHandle src_handle, + const char *src_name, const void *src_ptr, + uint64_t size) { auto dst_space = get_space(dst_handle); auto src_space = get_space(src_handle); global_state->begin_deep_copy(dst_space, dst_name, dst_ptr, src_space, src_name, src_ptr, size); } -extern "C" void kokkosp_end_deep_copy() { global_state->end_deep_copy(); } +void kokkosp_end_deep_copy() { global_state->end_deep_copy(); } + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, + sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.push_region = kokkosp_push_profile_region; + my_event_set.pop_region = kokkosp_pop_profile_region; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + my_event_set.begin_deep_copy = kokkosp_begin_deep_copy; + my_event_set.end_deep_copy = kokkosp_end_deep_copy; + return my_event_set; +} + +} // namespace ChromeTracing +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::ChromeTracing; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) +EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_ALLOCATE(impl::kokkosp_allocate_data) +EXPOSE_DEALLOCATE(impl::kokkosp_deallocate_data) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) +EXPOSE_BEGIN_DEEP_COPY(impl::kokkosp_begin_deep_copy) +EXPOSE_END_DEEP_COPY(impl::kokkosp_end_deep_copy) + +} // extern "C" diff --git a/profiling/memory-events/CMakeLists.txt b/profiling/memory-events/CMakeLists.txt new file mode 100644 index 000000000..db88966b4 --- /dev/null +++ b/profiling/memory-events/CMakeLists.txt @@ -0,0 +1 @@ +kp_add_library(kp_memory_events kp_memory_events.cpp) \ No newline at end of file diff --git a/profiling/memory-events/Makefile b/profiling/memory-events/Makefile index 5b274b81b..05e88b7e6 100644 --- a/profiling/memory-events/Makefile +++ b/profiling/memory-events/Makefile @@ -6,7 +6,7 @@ all: kp_memory_events.so MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) -CXXFLAGS+=-I${MAKEFILE_PATH} +CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all kp_memory_events.so: ${MAKEFILE_PATH}kp_memory_events.cpp ${MAKEFILE_PATH}kp_memory_events.hpp ${MAKEFILE_PATH}kp_timer.hpp $(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) -o $@ ${MAKEFILE_PATH}kp_memory_events.cpp diff --git a/profiling/memory-events/kp_memory_events.cpp b/profiling/memory-events/kp_memory_events.cpp index c0912737c..cf52b0d3d 100644 --- a/profiling/memory-events/kp_memory_events.cpp +++ b/profiling/memory-events/kp_memory_events.cpp @@ -24,9 +24,15 @@ #include #include +#include "kp_core.hpp" #include "kp_memory_events.hpp" #include "kp_timer.hpp" +namespace KokkosTools { +namespace MemoryEvents { + +char space_name[16][64]; + std::vector events; int num_spaces; @@ -44,10 +50,9 @@ double max_mem_usage() { return max_rssKB * 1024; } -extern "C" void kokkosp_init_library(const int loadSeq, - const uint64_t interfaceVer, - const uint32_t devInfoCount, - void* deviceInfo) { +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { num_spaces = 0; for (int i = 0; i < 16; i++) space_size[i] = 0; @@ -57,7 +62,7 @@ extern "C" void kokkosp_init_library(const int loadSeq, timer.reset(); } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { char* hostname = (char*)malloc(sizeof(char) * 256); gethostname(hostname, 256); int pid = getpid(); @@ -103,9 +108,8 @@ extern "C" void kokkosp_finalize_library() { free(hostname); } -extern "C" void kokkosp_allocate_data(const SpaceHandle space, - const char* label, const void* const ptr, - const uint64_t size) { +void kokkosp_allocate_data(const SpaceHandle space, const char* label, + const void* const ptr, const uint64_t size) { std::lock_guard lock(m); double time = timer.seconds(); @@ -127,10 +131,8 @@ extern "C" void kokkosp_allocate_data(const SpaceHandle space, EventRecord(ptr, size, MEMOP_ALLOCATE, space_i, time, label)); } -extern "C" void kokkosp_deallocate_data(const SpaceHandle space, - const char* label, - const void* const ptr, - const uint64_t size) { +void kokkosp_deallocate_data(const SpaceHandle space, const char* label, + const void* const ptr, const uint64_t size) { std::lock_guard lock(m); double time = timer.seconds(); @@ -154,14 +156,43 @@ extern "C" void kokkosp_deallocate_data(const SpaceHandle space, EventRecord(ptr, size, MEMOP_DEALLOCATE, space_i, time, label)); } -extern "C" void kokkosp_push_profile_region(const char* name) { +void kokkosp_push_profile_region(const char* name) { std::lock_guard lock(m); double time = timer.seconds(); events.push_back(EventRecord(nullptr, 0, MEMOP_PUSH_REGION, 0, time, name)); } -extern "C" void kokkosp_pop_profile_region() { +void kokkosp_pop_profile_region() { std::lock_guard lock(m); double time = timer.seconds(); events.push_back(EventRecord(nullptr, 0, MEMOP_POP_REGION, 0, time, "")); } + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, + sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.allocate_data = kokkosp_allocate_data; + my_event_set.deallocate_data = kokkosp_deallocate_data; + my_event_set.push_region = kokkosp_push_profile_region; + my_event_set.pop_region = kokkosp_pop_profile_region; + return my_event_set; +} + +} // namespace MemoryEvents +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::MemoryEvents; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_ALLOCATE(impl::kokkosp_allocate_data) +EXPOSE_DEALLOCATE(impl::kokkosp_deallocate_data) +EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) +EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) + +} // extern "C" diff --git a/profiling/memory-events/kp_memory_events.hpp b/profiling/memory-events/kp_memory_events.hpp index 2b964da0f..fd756579d 100644 --- a/profiling/memory-events/kp_memory_events.hpp +++ b/profiling/memory-events/kp_memory_events.hpp @@ -18,15 +18,14 @@ #define MEMOP_PUSH_REGION 3 #define MEMOP_POP_REGION 4 -#include #include #include -struct SpaceHandle { - char name[64]; -}; +#include "kp_core.hpp" + +namespace KokkosTools::MemoryEvents { -char space_name[16][64]; +extern char space_name[16][64]; struct EventRecord { const void* ptr; @@ -60,3 +59,5 @@ struct EventRecord { fprintf(ofile, "%lf } PopRegion %s\n", time, name); } }; + +} // namespace KokkosTools::MemoryEvents diff --git a/profiling/memory-hwm-mpi/CMakeLists.txt b/profiling/memory-hwm-mpi/CMakeLists.txt new file mode 100644 index 000000000..7deaa5ebf --- /dev/null +++ b/profiling/memory-hwm-mpi/CMakeLists.txt @@ -0,0 +1,7 @@ +if(NOT MPI_FOUND OR NOT TARGET MPI::MPI_CXX) + message(FATAL_ERROR "kp_hwm_mpi requires MPI") +endif() + +kp_add_library(kp_hwm_mpi kp_hwm_mpi.cpp) + +target_link_libraries(kp_hwm_mpi PRIVATE MPI::MPI_CXX) diff --git a/profiling/memory-hwm-mpi/Makefile b/profiling/memory-hwm-mpi/Makefile index 1c875895d..4386d5462 100644 --- a/profiling/memory-hwm-mpi/Makefile +++ b/profiling/memory-hwm-mpi/Makefile @@ -3,7 +3,7 @@ CXXFLAGS = -shared -O3 -fPIC -std=c++11 MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) -CXXFLAGS+=-I${MAKEFILE_PATH} +CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all kp_hwm_mpi.so: ${MAKEFILE_PATH}kp_hwm_mpi.cpp $(CXX) $(CXXFLAGS) -o $@ $< diff --git a/profiling/memory-hwm-mpi/kp_hwm_mpi.cpp b/profiling/memory-hwm-mpi/kp_hwm_mpi.cpp index fea97a5bd..95dd86f25 100644 --- a/profiling/memory-hwm-mpi/kp_hwm_mpi.cpp +++ b/profiling/memory-hwm-mpi/kp_hwm_mpi.cpp @@ -20,6 +20,11 @@ #include #include +#include "kp_core.hpp" + +namespace KokkosTools { +namespace HighwaterMarkMPI { + static int world_rank = 0; static int world_size = 1; @@ -30,10 +35,9 @@ static int world_size = 1; #define RU_MAXRSS_UNITS 1 #endif -extern "C" void kokkosp_init_library(const int loadSeq, - const uint64_t interfaceVer, - const uint32_t devInfoCount, - void* deviceInfo) { +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { int mpi_is_initialized; MPI_Initialized(&mpi_is_initialized); if (!mpi_is_initialized) { @@ -52,7 +56,7 @@ extern "C" void kokkosp_init_library(const int loadSeq, } } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { if (world_rank == 0) { printf("\n"); printf("KokkosP: Finalization of profiling library.\n"); @@ -81,3 +85,24 @@ extern "C" void kokkosp_finalize_library() { printf("\n"); } } + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, + sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + return my_event_set; +} + +} // namespace HighwaterMarkMPI +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::HighwaterMarkMPI; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) + +} // extern "C" diff --git a/profiling/memory-hwm/CMakeLists.txt b/profiling/memory-hwm/CMakeLists.txt new file mode 100644 index 000000000..b5c362e5f --- /dev/null +++ b/profiling/memory-hwm/CMakeLists.txt @@ -0,0 +1 @@ +kp_add_library(kp_hwm kp_hwm.cpp) \ No newline at end of file diff --git a/profiling/memory-hwm/Makefile b/profiling/memory-hwm/Makefile index 1a6473c76..dcf193d1f 100644 --- a/profiling/memory-hwm/Makefile +++ b/profiling/memory-hwm/Makefile @@ -1,12 +1,14 @@ + + CXX=g++ -CFLAGS=-shared -O3 -fPIC -std=c++11 +CXXFLAGS=-shared -O3 -fPIC -std=c++11 MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) -CXXFLAGS+=-I${MAKEFILE_PATH} +CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all kp_hwm.so: ${MAKEFILE_PATH}kp_hwm.cpp - $(CXX) $(CFLAGS) -o $@ $< + $(CXX) $(CXXFLAGS) -o $@ $< clean: rm *.so diff --git a/profiling/memory-hwm/kp_hwm.cpp b/profiling/memory-hwm/kp_hwm.cpp index ba48227ce..7b0bf7a41 100644 --- a/profiling/memory-hwm/kp_hwm.cpp +++ b/profiling/memory-hwm/kp_hwm.cpp @@ -28,12 +28,16 @@ #include #include +#include "kp_core.hpp" + +namespace KokkosTools { +namespace HighwaterMark { + static uint64_t uniqID = 0; -extern "C" void kokkosp_init_library(const int loadSeq, - const uint64_t interfaceVer, - const uint32_t devInfoCount, - void* deviceInfo) { +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { printf( "KokkosP: High Water Mark Library Initialized (sequence is %d, version: " "%llu)\n", @@ -47,7 +51,7 @@ extern "C" void kokkosp_init_library(const int loadSeq, #define RU_MAXRSS_UNITS 1 #endif -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { printf("\n"); printf("KokkosP: Finalization of profiling library.\n"); @@ -58,3 +62,28 @@ extern "C" void kokkosp_finalize_library() { (long)sys_resources.ru_maxrss * RU_MAXRSS_UNITS); printf("\n"); } + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, + sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + return my_event_set; +} + +// static auto event_set = get_event_set(); + +} // namespace HighwaterMark +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::HighwaterMark; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) + +// EXPOSE_KOKKOS_INTERFACE(KokkosTools::HighwaterMark::event_set) + +} // extern "C" diff --git a/profiling/memory-usage/CMakeLists.txt b/profiling/memory-usage/CMakeLists.txt new file mode 100644 index 000000000..e3d0969f1 --- /dev/null +++ b/profiling/memory-usage/CMakeLists.txt @@ -0,0 +1,5 @@ +kp_add_library(kp_memory_usage kp_memory_usage.cpp) + +# enable headers from memory-events (kp_timer.hpp) +target_include_directories(kp_memory_usage + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../memory-events) \ No newline at end of file diff --git a/profiling/memory-usage/Makefile b/profiling/memory-usage/Makefile index 34d206a5f..8b1378917 100644 --- a/profiling/memory-usage/Makefile +++ b/profiling/memory-usage/Makefile @@ -1,15 +1 @@ -CXX=g++ -CXXFLAGS=-O3 -std=c++11 -g -fopenmp -SHARED_CXXFLAGS=-shared -fPIC -all: kp_memory_usage.so - -MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) - -CXXFLAGS+=-I${MAKEFILE_PATH} - -kp_memory_usage.so: ${MAKEFILE_PATH}kp_memory_usage.cpp ${MAKEFILE_PATH}kp_memory_events.hpp ${MAKEFILE_PATH}kp_timer.hpp - $(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) -o $@ ${MAKEFILE_PATH}kp_memory_usage.cpp - -clean: - rm *.so diff --git a/profiling/memory-usage/kp_memory_events.hpp b/profiling/memory-usage/kp_memory_events.hpp deleted file mode 100644 index 9bc6d6ee2..000000000 --- a/profiling/memory-usage/kp_memory_events.hpp +++ /dev/null @@ -1,62 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#define MEMOP_ALLOCATE 1 -#define MEMOP_DEALLOCATE 2 -#include - -struct SpaceHandle { - char name[64]; -}; - -char space_name[16][64]; - -struct EventRecord { - const void* ptr; - uint64_t size; - int operation; - int space; - double time; - char name[256]; - - EventRecord(const void* const ptr_, const uint64_t size_, - const int operation_, const int space_, const double time_, - const char* const name_) { - ptr = ptr_; - size = size_; - operation = operation_; - space = space_; - time = time_; - strncpy(name, name_, 256); - } - - void print_record() const { - if (operation == MEMOP_ALLOCATE) - printf("%lf %16p %14d %16s Allocate %s\n", time, ptr, size, - space < 0 ? "" : space_name[space], name); - if (operation == MEMOP_DEALLOCATE) - printf("%lf %16p %14d %16s DeAllocate %s\n", time, ptr, -size, - space < 0 ? "" : space_name[space], name); - } - void print_record(FILE* ofile) const { - if (operation == MEMOP_ALLOCATE) - fprintf(ofile, "%lf %16p %14d %16s Allocate %s\n", time, ptr, size, - space < 0 ? "" : space_name[space], name); - if (operation == MEMOP_DEALLOCATE) - fprintf(ofile, "%lf %16p %14d %16s DeAllocate %s\n", time, ptr, -size, - space < 0 ? "" : space_name[space], name); - } -}; diff --git a/profiling/memory-usage/kp_memory_usage.cpp b/profiling/memory-usage/kp_memory_usage.cpp index aab1046b1..b5390c14b 100644 --- a/profiling/memory-usage/kp_memory_usage.cpp +++ b/profiling/memory-usage/kp_memory_usage.cpp @@ -24,9 +24,14 @@ #include #include -#include "kp_memory_events.hpp" +#include "kp_core.hpp" #include "kp_timer.hpp" +namespace KokkosTools { +namespace MemoryUsage { + +char space_name[16][64]; + int num_spaces; std::vector > space_size_track[16]; uint64_t space_size[16]; @@ -42,17 +47,16 @@ double max_mem_usage() { return max_rssKB * 1024; } -extern "C" void kokkosp_init_library(const int loadSeq, - const uint64_t interfaceVer, - const uint32_t devInfoCount, - void* deviceInfo) { +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { num_spaces = 0; for (int i = 0; i < 16; i++) space_size[i] = 0; timer.reset(); } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { char* hostname = (char*)malloc(sizeof(char) * 256); gethostname(hostname, 256); int pid = getpid(); @@ -83,9 +87,8 @@ extern "C" void kokkosp_finalize_library() { free(hostname); } -extern "C" void kokkosp_allocate_data(const SpaceHandle space, - const char* label, const void* const ptr, - const uint64_t size) { +void kokkosp_allocate_data(const SpaceHandle space, const char* label, + const void* const ptr, const uint64_t size) { std::lock_guard lock(m); double time = timer.seconds(); @@ -103,10 +106,8 @@ extern "C" void kokkosp_allocate_data(const SpaceHandle space, std::make_tuple(time, space_size[space_i], max_mem_usage())); } -extern "C" void kokkosp_deallocate_data(const SpaceHandle space, - const char* label, - const void* const ptr, - const uint64_t size) { +void kokkosp_deallocate_data(const SpaceHandle space, const char* label, + const void* const ptr, const uint64_t size) { std::lock_guard lock(m); double time = timer.seconds(); @@ -125,3 +126,28 @@ extern "C" void kokkosp_deallocate_data(const SpaceHandle space, std::make_tuple(time, space_size[space_i], max_mem_usage())); } } + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, + sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.allocate_data = kokkosp_allocate_data; + my_event_set.deallocate_data = kokkosp_deallocate_data; + return my_event_set; +} + +} // namespace MemoryUsage +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::MemoryUsage; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_ALLOCATE(impl::kokkosp_allocate_data) +EXPOSE_DEALLOCATE(impl::kokkosp_deallocate_data) + +} // extern "C" diff --git a/profiling/memory-usage/kp_timer.hpp b/profiling/memory-usage/kp_timer.hpp deleted file mode 100644 index 79cf77d70..000000000 --- a/profiling/memory-usage/kp_timer.hpp +++ /dev/null @@ -1,48 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#ifndef KOKKOS_TIMER_HPP -#define KOKKOS_TIMER_HPP - -#include - -namespace Kokkos { - -/** \brief Time since construction */ - -class Timer { - private: - std::chrono::high_resolution_clock::time_point m_old; - Timer(const Timer&); - Timer& operator=(const Timer&); - - public: - void reset() { m_old = std::chrono::high_resolution_clock::now(); } - - Timer() { reset(); } - - double seconds() const { - std::chrono::high_resolution_clock::time_point m_new = - std::chrono::high_resolution_clock::now(); - return std::chrono::duration_cast>(m_new - - m_old) - .count(); - } -}; - -} // namespace Kokkos - -#endif /* #ifndef KOKKOS_TIMER_HPP */ diff --git a/profiling/nvprof-connector/CMakeLists.txt b/profiling/nvprof-connector/CMakeLists.txt new file mode 100644 index 000000000..eae33dc2d --- /dev/null +++ b/profiling/nvprof-connector/CMakeLists.txt @@ -0,0 +1,4 @@ +find_package(CUDAToolkit REQUIRED) +kp_add_library(kp_nvprof_connector kp_nvprof_connector.cpp) + +target_link_libraries(kp_nvprof_connector CUDA::nvToolsExt) \ No newline at end of file diff --git a/profiling/nvprof-connector/Makefile b/profiling/nvprof-connector/Makefile index 53828f3bc..bff0a30cf 100644 --- a/profiling/nvprof-connector/Makefile +++ b/profiling/nvprof-connector/Makefile @@ -8,7 +8,7 @@ all: kp_nvprof_connector.so MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) -CXXFLAGS+=-I${MAKEFILE_PATH} +CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all kp_nvprof_connector.so: ${MAKEFILE_PATH}kp_nvprof_connector.cpp $(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) \ diff --git a/profiling/nvprof-connector/kp_nvprof_connector.cpp b/profiling/nvprof-connector/kp_nvprof_connector.cpp index 28a22d9b7..2db81ff29 100644 --- a/profiling/nvprof-connector/kp_nvprof_connector.cpp +++ b/profiling/nvprof-connector/kp_nvprof_connector.cpp @@ -21,22 +21,21 @@ #include "nvToolsExt.h" -struct Kokkos_Tools_ToolSettings { - bool requires_global_fencing; - bool padding[255]; -}; +#include "kp_core.hpp" + +namespace KokkosTools { +namespace NVProfConnector { -extern "C" void kokkosp_request_tool_settings( - const uint32_t, Kokkos_Tools_ToolSettings* settings) { +void kokkosp_request_tool_settings(const uint32_t, + Kokkos_Tools_ToolSettings* settings) { settings->requires_global_fencing = false; } static uint64_t nextKernelID; -extern "C" void kokkosp_init_library(const int loadSeq, - const uint64_t interfaceVer, - const uint32_t devInfoCount, - void* deviceInfo) { +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { printf("-----------------------------------------------------------\n"); printf("KokkosP: NVTX Analyzer Connector (sequence is %d, version: %llu)\n", loadSeq, interfaceVer); @@ -47,7 +46,7 @@ extern "C" void kokkosp_init_library(const int loadSeq, nvtxMarkA("Kokkos::Initialization Complete"); } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { printf("-----------------------------------------------------------\n"); printf("KokkosP: Finalization of NVTX Connector. Complete.\n"); printf("-----------------------------------------------------------\n"); @@ -55,40 +54,34 @@ extern "C" void kokkosp_finalize_library() { nvtxMarkA("Kokkos::Finalization Complete"); } -extern "C" void kokkosp_begin_parallel_for(const char* name, - const uint32_t devID, - uint64_t* kID) { +void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, + uint64_t* kID) { nvtxRangePush(name); } -extern "C" void kokkosp_end_parallel_for(const uint64_t kID) { nvtxRangePop(); } +void kokkosp_end_parallel_for(const uint64_t kID) { nvtxRangePop(); } -extern "C" void kokkosp_begin_parallel_scan(const char* name, - const uint32_t devID, - uint64_t* kID) { +void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, + uint64_t* kID) { nvtxRangePush(name); } -extern "C" void kokkosp_end_parallel_scan(const uint64_t kID) { - nvtxRangePop(); -} +void kokkosp_end_parallel_scan(const uint64_t kID) { nvtxRangePop(); } -extern "C" void kokkosp_begin_parallel_reduce(const char* name, - const uint32_t devID, - uint64_t* kID) { +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, + uint64_t* kID) { nvtxRangePush(name); } -extern "C" void kokkosp_end_parallel_reduce(const uint64_t kID) { - nvtxRangePop(); -} +void kokkosp_end_parallel_reduce(const uint64_t kID) { nvtxRangePop(); } -extern "C" void kokkosp_push_profile_region(char* regionName) { +void kokkosp_push_profile_region(const char* regionName) { nvtxRangePush(regionName); } -extern "C" void kokkosp_pop_profile_region() { nvtxRangePop(); } +void kokkosp_pop_profile_region() { nvtxRangePop(); } +// TODO: move this to kp_core? namespace { struct Section { std::string label; @@ -97,19 +90,57 @@ struct Section { std::vector
kokkosp_sections; } // namespace -extern "C" void kokkosp_create_profile_section(const char* name, - uint32_t* sID) { +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, + sizeof(my_event_set)); // zero any pointers not set here + my_event_set.request_tool_settings = kokkosp_request_tool_settings; + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.push_region = kokkosp_push_profile_region; + my_event_set.pop_region = kokkosp_pop_profile_region; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + return my_event_set; +} + +void kokkosp_create_profile_section(const char* name, uint32_t* sID) { *sID = kokkosp_sections.size(); kokkosp_sections.push_back( {std::string(name), static_cast(-1)}); } -extern "C" void kokkosp_start_profile_section(const uint32_t sID) { +void kokkosp_start_profile_section(const uint32_t sID) { auto& section = kokkosp_sections[sID]; section.id = nvtxRangeStartA(section.label.c_str()); } -extern "C" void kokkosp_stop_profile_section(const uint32_t sID) { +void kokkosp_stop_profile_section(const uint32_t sID) { auto const& section = kokkosp_sections[sID]; nvtxRangeEnd(section.id); } + +} // namespace NVProfConnector +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::NVProfConnector; + +EXPOSE_TOOL_SETTINGS(impl::kokkosp_request_tool_settings) +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) +EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) +// TODO: expose section stuff +} // extern "C" diff --git a/profiling/nvprof-focused-connector/CMakeLists.txt b/profiling/nvprof-focused-connector/CMakeLists.txt new file mode 100644 index 000000000..072198bf5 --- /dev/null +++ b/profiling/nvprof-focused-connector/CMakeLists.txt @@ -0,0 +1,4 @@ +find_package(CUDAToolkit REQUIRED) +kp_add_library(kp_nvprof_focused_connector kp_nvprof_focused_connector.cpp) + +target_link_libraries(kp_nvprof_focused_connector CUDA::nvToolsExt) \ No newline at end of file diff --git a/profiling/nvprof-focused-connector/Makefile b/profiling/nvprof-focused-connector/Makefile index c66311d66..06628279d 100644 --- a/profiling/nvprof-focused-connector/Makefile +++ b/profiling/nvprof-focused-connector/Makefile @@ -8,7 +8,7 @@ all: kp_nvprof_focused_connector.so MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) -CXXFLAGS+=-I${MAKEFILE_PATH} +CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all kp_nvprof_focused_connector.so: ${MAKEFILE_PATH}kp_nvprof_focused_connector.cpp $(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) \ diff --git a/profiling/nvprof-focused-connector/kp_nvprof_focused_connector.cpp b/profiling/nvprof-focused-connector/kp_nvprof_focused_connector.cpp index ecfc66432..dba824257 100644 --- a/profiling/nvprof-focused-connector/kp_nvprof_focused_connector.cpp +++ b/profiling/nvprof-focused-connector/kp_nvprof_focused_connector.cpp @@ -22,17 +22,22 @@ #include #include #include + #include "kp_nvprof_focused_connector_domain.h" +#include "kp_core.hpp" + +namespace KokkosTools { +namespace NVProfFocusedConnector { + static KernelNVProfFocusedConnectorInfo* currentKernel; static std::unordered_map domain_map; static uint64_t nextKernelID; -extern "C" void kokkosp_init_library(const int loadSeq, - const uint64_t interfaceVer, - const uint32_t devInfoCount, - void* deviceInfo) { +void kokkosp_init_library( + const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, + struct Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { printf("-----------------------------------------------------------\n"); printf( "KokkosP: NVProf Analyzer Focused Connector (sequence is %d, version: " @@ -71,47 +76,77 @@ void focusedConnectorExecuteEnd() { currentKernel = NULL; } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { printf("-----------------------------------------------------------\n"); printf("KokkosP: Finalization of NVProf Connector. Complete.\n"); printf("-----------------------------------------------------------\n"); } -extern "C" void kokkosp_begin_parallel_for(const char* name, - const uint32_t devID, - uint64_t* kID) { +void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, + uint64_t* kID) { *kID = nextKernelID++; currentKernel = getFocusedConnectorInfo(name, PARALLEL_FOR); focusedConnectorExecuteStart(); } -extern "C" void kokkosp_end_parallel_for(const uint64_t kID) { +void kokkosp_end_parallel_for(const uint64_t kID) { focusedConnectorExecuteEnd(); } -extern "C" void kokkosp_begin_parallel_scan(const char* name, - const uint32_t devID, - uint64_t* kID) { +void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, + uint64_t* kID) { *kID = nextKernelID++; currentKernel = getFocusedConnectorInfo(name, PARALLEL_SCAN); focusedConnectorExecuteStart(); } -extern "C" void kokkosp_end_parallel_scan(const uint64_t kID) { +void kokkosp_end_parallel_scan(const uint64_t kID) { focusedConnectorExecuteEnd(); } -extern "C" void kokkosp_begin_parallel_reduce(const char* name, - const uint32_t devID, - uint64_t* kID) { +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, + uint64_t* kID) { *kID = nextKernelID++; currentKernel = getFocusedConnectorInfo(name, PARALLEL_REDUCE); focusedConnectorExecuteStart(); } -extern "C" void kokkosp_end_parallel_reduce(const uint64_t kID) { +void kokkosp_end_parallel_reduce(const uint64_t kID) { focusedConnectorExecuteEnd(); } + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, + sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + return my_event_set; +} + +} // namespace NVProfFocusedConnector +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::NVProfFocusedConnector; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) + +} // extern "C" diff --git a/profiling/nvprof-focused-connector/kp_nvprof_focused_connector_domain.h b/profiling/nvprof-focused-connector/kp_nvprof_focused_connector_domain.h index 6ccf21f3c..dc2d3cabb 100644 --- a/profiling/nvprof-focused-connector/kp_nvprof_focused_connector_domain.h +++ b/profiling/nvprof-focused-connector/kp_nvprof_focused_connector_domain.h @@ -23,6 +23,9 @@ #include "nvToolsExt.h" +namespace KokkosTools { +namespace NVProfFocusedConnector { + enum KernelExecutionType { PARALLEL_FOR = 0, PARALLEL_REDUCE = 1, @@ -77,3 +80,5 @@ class KernelNVProfFocusedConnectorInfo { }; #endif +} +} // KokkosTools::NVProfFocusedConnector diff --git a/profiling/papi-connector/CMakeLists.txt b/profiling/papi-connector/CMakeLists.txt new file mode 100644 index 000000000..478e996b1 --- /dev/null +++ b/profiling/papi-connector/CMakeLists.txt @@ -0,0 +1,3 @@ +add_library(kp_papi_connector SHARED kp_papi_connector.cpp) + +target_link_libraries(kp_papi_connector PRIVATE PAPI::PAPI) \ No newline at end of file diff --git a/profiling/papi-connector/Makefile b/profiling/papi-connector/Makefile index ed599f05b..2b83d8f79 100644 --- a/profiling/papi-connector/Makefile +++ b/profiling/papi-connector/Makefile @@ -8,7 +8,7 @@ all: kp_papi_connector.so MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) -CXXFLAGS+=-I${MAKEFILE_PATH} +CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all kp_papi_connector.so: ${MAKEFILE_PATH}kp_papi_connector.cpp $(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) \ diff --git a/profiling/roctx-connector/Makefile b/profiling/roctx-connector/Makefile index e0505551c..52d36d129 100644 --- a/profiling/roctx-connector/Makefile +++ b/profiling/roctx-connector/Makefile @@ -8,7 +8,7 @@ all: kp_roctx_connector.so MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) -CXXFLAGS+=-I${MAKEFILE_PATH} +CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all kp_roctx_connector.so: ${MAKEFILE_PATH}kp_roctx_connector.cpp $(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) \ diff --git a/profiling/simple-kernel-timer-json/Makefile b/profiling/simple-kernel-timer-json/Makefile deleted file mode 100644 index e676b3581..000000000 --- a/profiling/simple-kernel-timer-json/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -CXX=g++ -CXXFLAGS=-O3 -std=c++11 -g -SHARED_CXXFLAGS=-shared -fPIC - -all: kp_kernel_timer_json.so - -MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) - -CXXFLAGS+=-I${MAKEFILE_PATH} - -kp_kernel_timer_json.so: ${MAKEFILE_PATH}kp_kernel_timer.cpp ${MAKEFILE_PATH}kp_kernel_info.h - $(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) -o $@ ${MAKEFILE_PATH}kp_kernel_timer.cpp - -clean: - rm *.so diff --git a/profiling/simple-kernel-timer-json/kp_kernel_info.h b/profiling/simple-kernel-timer-json/kp_kernel_info.h deleted file mode 100644 index 97c396d1c..000000000 --- a/profiling/simple-kernel-timer-json/kp_kernel_info.h +++ /dev/null @@ -1,189 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#ifndef _H_KOKKOSP_KERNEL_INFO -#define _H_KOKKOSP_KERNEL_INFO - -#include -#include -#include - -#if defined(__GXX_ABI_VERSION) -#define HAVE_GCC_ABI_DEMANGLE -#endif - -#if defined(HAVE_GCC_ABI_DEMANGLE) -#include -#endif // HAVE_GCC_ABI_DEMANGLE - -char* demangleName(char* kernelName) { -#if defined(HAVE_GCC_ABI_DEMANGLE) - int status = -1; - char* demangledKernelName = - abi::__cxa_demangle(kernelName, NULL, NULL, &status); - if (status == 0) { - free(kernelName); - kernelName = demangledKernelName; - } -#endif // HAVE_GCC_ABI_DEMANGLE - return kernelName; -} - -double seconds() { - struct timeval now; - gettimeofday(&now, NULL); - - return (double)(now.tv_sec + (now.tv_usec * 1.0e-6)); -} - -enum KernelExecutionType { - PARALLEL_FOR = 0, - PARALLEL_REDUCE = 1, - PARALLEL_SCAN = 2 -}; - -class KernelPerformanceInfo { - public: - KernelPerformanceInfo(std::string kName, KernelExecutionType kernelType) - : kType(kernelType) { - kernelName = (char*)malloc(sizeof(char) * (kName.size() + 1)); - regionName = ""; - strcpy(kernelName, kName.c_str()); - - callCount = 0; - time = 0; - } - - ~KernelPerformanceInfo() { free(kernelName); } - - KernelExecutionType getKernelType() { return kType; } - - void incrementCount() { callCount++; } - - void addTime(double t) { - time += t; - timeSq += (t * t); - } - - void addFromTimer() { - addTime(seconds() - startTime); - - incrementCount(); - } - - void startTimer() { startTime = seconds(); } - - uint64_t getCallCount() { return callCount; } - - double getTime() { return time; } - - double getTimeSq() { return timeSq; } - - char* getName() { return kernelName; } - - void addCallCount(const uint64_t newCalls) { callCount += newCalls; } - - bool readFromFile(FILE* input) { - uint32_t recordLen = 0; - uint32_t actual_read = fread(&recordLen, sizeof(recordLen), 1, input); - if (actual_read != 1) return false; - - char* entry = (char*)malloc(recordLen); - fread(entry, recordLen, 1, input); - - uint32_t nextIndex = 0; - uint32_t kernelNameLength; - copy((char*)&kernelNameLength, &entry[nextIndex], sizeof(kernelNameLength)); - nextIndex += sizeof(kernelNameLength); - - if (strlen(kernelName) > 0) { - free(kernelName); - } - - kernelName = (char*)malloc(sizeof(char) * (kernelNameLength + 1)); - copy(kernelName, &entry[nextIndex], kernelNameLength); - kernelName[kernelNameLength] = '\0'; - - kernelName = demangleName(kernelName); - - nextIndex += kernelNameLength; - - copy((char*)&callCount, &entry[nextIndex], sizeof(callCount)); - nextIndex += sizeof(callCount); - - copy((char*)&time, &entry[nextIndex], sizeof(time)); - nextIndex += sizeof(time); - - copy((char*)&timeSq, &entry[nextIndex], sizeof(timeSq)); - nextIndex += sizeof(timeSq); - - uint32_t kernelT = 0; - copy((char*)&kernelT, &entry[nextIndex], sizeof(kernelT)); - nextIndex += sizeof(kernelT); - - if (kernelT == 0) { - kType = PARALLEL_FOR; - } else if (kernelT == 1) { - kType = PARALLEL_REDUCE; - } else if (kernelT == 2) { - kType = PARALLEL_SCAN; - } - - free(entry); - return true; - } - - void writeToFile(FILE* output, char* indent) { - fprintf(output, "%s{\n", indent); - - char* indentBuffer = (char*)malloc(sizeof(char) * 256); - sprintf(indentBuffer, "%s ", indent); - - fprintf(output, "%s\"kernel-name\" : \"%s\",\n", indentBuffer, - kernelName); - fprintf(output, "%s\"region\" : \"%s\",\n", indentBuffer, - regionName); - fprintf(output, "%s\"call-count\" : %lu,\n", indentBuffer, callCount); - fprintf(output, "%s\"total-time\" : %f,\n", indentBuffer, time); - fprintf(output, "%s\"time-per-call\" : %16.8f,\n", indentBuffer, - (time / static_cast( - std::max(static_cast(1), callCount)))); - fprintf( - output, "%s\"kernel-type\" : \"%s\"\n", indentBuffer, - (kType == PARALLEL_FOR) - ? "PARALLEL-FOR" - : (kType == PARALLEL_REDUCE) ? "PARALLEL-REDUCE" : "PARALLEL-SCAN"); - - fprintf(output, "%s}", indent); - } - - private: - void copy(char* dest, const char* src, uint32_t len) { - for (uint32_t i = 0; i < len; i++) { - dest[i] = src[i]; - } - } - - char* kernelName; - char* regionName; - uint64_t callCount; - double time; - double timeSq; - double startTime; - KernelExecutionType kType; -}; - -#endif diff --git a/profiling/simple-kernel-timer/CMakeLists.txt b/profiling/simple-kernel-timer/CMakeLists.txt new file mode 100644 index 000000000..74518bedd --- /dev/null +++ b/profiling/simple-kernel-timer/CMakeLists.txt @@ -0,0 +1,23 @@ +# shared global objects +add_library(kp_kernel_shared STATIC kp_shared.cpp) +list(APPEND EXPORT_TARGETS kp_kernel_shared) +set(EXPORT_TARGETS ${EXPORT_TARGETS} CACHE STRING "" FORCE) + +if(NOT MSVC) + set_property(TARGET kp_kernel_shared PROPERTY POSITION_INDEPENDENT_CODE ON) +endif() + +# Add JSON kernel-timer +kp_add_library(kp_kernel_timer_json kp_kernel_timer_json.cpp) +target_link_libraries(kp_kernel_timer_json PRIVATE kp_kernel_shared) + +# Add binary kernel-timer +kp_add_library(kp_kernel_timer kp_kernel_timer.cpp) +target_link_libraries(kp_kernel_timer PRIVATE kp_kernel_shared) + +# Add binary utilities +add_executable(kp_reader kp_reader.cpp) +target_link_libraries(kp_reader PRIVATE kp_kernel_timer) + +add_executable(kp_json_writer kp_json_writer.cpp) +target_link_libraries(kp_json_writer PRIVATE kp_kernel_timer) diff --git a/profiling/simple-kernel-timer/Makefile b/profiling/simple-kernel-timer/Makefile index b73472832..8917e0384 100644 --- a/profiling/simple-kernel-timer/Makefile +++ b/profiling/simple-kernel-timer/Makefile @@ -6,7 +6,7 @@ all: kp_kernel_timer.so kp_reader kp_json_writer MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) -CXXFLAGS+=-I${MAKEFILE_PATH} +CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all kp_reader: ${MAKEFILE_PATH}kp_reader.cpp kp_kernel_timer.so $(CXX) $(CXXFLAGS) -o kp_reader ${MAKEFILE_PATH}kp_reader.cpp diff --git a/profiling/simple-kernel-timer/kp_json_writer.cpp b/profiling/simple-kernel-timer/kp_json_writer.cpp index 905292780..cb5144053 100644 --- a/profiling/simple-kernel-timer/kp_json_writer.cpp +++ b/profiling/simple-kernel-timer/kp_json_writer.cpp @@ -24,7 +24,9 @@ #include #include -#include "kp_kernel_info.h" +#include "kp_shared.h" + +using namespace KokkosTools::KernelTimer; // clang-format on bool is_region(KernelPerformanceInfo const& kp) { @@ -55,10 +57,6 @@ inline void write_json(std::ostream& os, KernelPerformanceInfo const& kp, } // clang-format off -bool compareKernelPerformanceInfo(KernelPerformanceInfo* left, KernelPerformanceInfo* right) { - return left->getTime() > right->getTime(); -}; - int find_index(std::vector& kernels, const char* kernelName) { diff --git a/profiling/simple-kernel-timer/kp_kernel_info.h b/profiling/simple-kernel-timer/kp_kernel_info.h index 025f19fba..d4dec827c 100644 --- a/profiling/simple-kernel-timer/kp_kernel_info.h +++ b/profiling/simple-kernel-timer/kp_kernel_info.h @@ -30,7 +30,9 @@ #include #endif // HAVE_GCC_ABI_DEMANGLE -char* demangleName(char* kernelName) { +namespace KokkosTools::KernelTimer { + +inline char* demangleName(char* kernelName) { #if defined(HAVE_GCC_ABI_DEMANGLE) int status = -1; char* demangledKernelName = @@ -43,7 +45,7 @@ char* demangleName(char* kernelName) { return kernelName; } -double seconds() { +inline double seconds() { struct timeval now; gettimeofday(&now, NULL); @@ -149,9 +151,8 @@ class KernelPerformanceInfo { return true; } - void writeToFile(FILE* output) { + void writeToBinaryFile(FILE* output) { const uint32_t kernelNameLen = (uint32_t)strlen(kernelName); - const uint32_t recordLen = sizeof(uint32_t) + sizeof(char) * kernelNameLen + sizeof(uint64_t) + sizeof(double) + sizeof(double) + sizeof(uint32_t); @@ -183,6 +184,30 @@ class KernelPerformanceInfo { free(entry); } + void writeToJSONFile(FILE* output, const char* indent) { + fprintf(output, "%s{\n", indent); + + char* indentBuffer = (char*)malloc(sizeof(char) * 256); + sprintf(indentBuffer, "%s ", indent); + + fprintf(output, "%s\"kernel-name\" : \"%s\",\n", indentBuffer, + kernelName); + // fprintf(output, "%s\"region\" : \"%s\",\n", indentBuffer, + // regionName); + fprintf(output, "%s\"call-count\" : %lu,\n", indentBuffer, callCount); + fprintf(output, "%s\"total-time\" : %f,\n", indentBuffer, time); + fprintf(output, "%s\"time-per-call\" : %16.8f,\n", indentBuffer, + (time / static_cast( + std::max(static_cast(1), callCount)))); + fprintf( + output, "%s\"kernel-type\" : \"%s\"\n", indentBuffer, + (kType == PARALLEL_FOR) + ? "PARALLEL-FOR" + : (kType == PARALLEL_REDUCE) ? "PARALLEL-REDUCE" : "PARALLEL-SCAN"); + + fprintf(output, "%s}", indent); + } + private: void copy(char* dest, const char* src, uint32_t len) { for (uint32_t i = 0; i < len; i++) { @@ -191,6 +216,7 @@ class KernelPerformanceInfo { } char* kernelName; + // const char* regionName; uint64_t callCount; double time; double timeSq; @@ -198,4 +224,6 @@ class KernelPerformanceInfo { KernelExecutionType kType; }; +} // namespace KokkosTools::KernelTimer + #endif diff --git a/profiling/simple-kernel-timer/kp_kernel_timer.cpp b/profiling/simple-kernel-timer/kp_kernel_timer.cpp index dbe327a30..02da6d4c8 100644 --- a/profiling/simple-kernel-timer/kp_kernel_timer.cpp +++ b/profiling/simple-kernel-timer/kp_kernel_timer.cpp @@ -14,73 +14,20 @@ // //@HEADER -#include -#include -#include -#include -#include -#include #include -#include #include -#include #include - #include -#include "kp_kernel_info.h" - -bool compareKernelPerformanceInfo(KernelPerformanceInfo* left, - KernelPerformanceInfo* right) { - return left->getTime() > right->getTime(); -}; - -static uint64_t uniqID = 0; -static KernelPerformanceInfo* currentEntry; -static std::map count_map; -static double initTime; -static char* outputDelimiter; -static int current_region_level = 0; -static KernelPerformanceInfo* regions[512]; - -#define MAX_STACK_SIZE 128 - -void increment_counter(const char* name, KernelExecutionType kType) { - std::string nameStr(name); - - if (count_map.find(name) == count_map.end()) { - KernelPerformanceInfo* info = new KernelPerformanceInfo(nameStr, kType); - count_map.insert( - std::pair(nameStr, info)); - currentEntry = info; - } else { - currentEntry = count_map[nameStr]; - } - - currentEntry->startTimer(); -} - -void increment_counter_region(const char* name, KernelExecutionType kType) { - std::string nameStr(name); +#include "kp_core.hpp" +#include "kp_shared.h" - if (count_map.find(name) == count_map.end()) { - KernelPerformanceInfo* info = new KernelPerformanceInfo(nameStr, kType); - count_map.insert( - std::pair(nameStr, info)); +namespace KokkosTools { +namespace KernelTimer { - regions[current_region_level] = info; - } else { - regions[current_region_level] = count_map[nameStr]; - } - - regions[current_region_level]->startTimer(); - current_region_level++; -} - -extern "C" void kokkosp_init_library(const int loadSeq, - const uint64_t interfaceVer, - const uint32_t devInfoCount, - void* deviceInfo) { +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { const char* output_delim_env = getenv("KOKKOSP_OUTPUT_DELIM"); if (NULL == output_delim_env) { outputDelimiter = (char*)malloc(sizeof(char) * 2); @@ -102,7 +49,7 @@ extern "C" void kokkosp_init_library(const int loadSeq, initTime = seconds(); } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { double finishTime = seconds(); double kernelTimes = 0; @@ -122,7 +69,7 @@ extern "C" void kokkosp_finalize_library() { for (auto kernel_itr = count_map.begin(); kernel_itr != count_map.end(); kernel_itr++) { - kernel_itr->second->writeToFile(output_data); + kernel_itr->second->writeToBinaryFile(output_data); } fclose(output_data); @@ -244,9 +191,8 @@ extern "C" void kokkosp_finalize_library() { }*/ } -extern "C" void kokkosp_begin_parallel_for(const char* name, - const uint32_t devID, - uint64_t* kID) { +void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, + uint64_t* kID) { *kID = uniqID++; if ((NULL == name) || (strcmp("", name) == 0)) { @@ -257,13 +203,12 @@ extern "C" void kokkosp_begin_parallel_for(const char* name, increment_counter(name, PARALLEL_FOR); } -extern "C" void kokkosp_end_parallel_for(const uint64_t kID) { +void kokkosp_end_parallel_for(const uint64_t kID) { currentEntry->addFromTimer(); } -extern "C" void kokkosp_begin_parallel_scan(const char* name, - const uint32_t devID, - uint64_t* kID) { +void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, + uint64_t* kID) { *kID = uniqID++; if ((NULL == name) || (strcmp("", name) == 0)) { @@ -274,13 +219,12 @@ extern "C" void kokkosp_begin_parallel_scan(const char* name, increment_counter(name, PARALLEL_SCAN); } -extern "C" void kokkosp_end_parallel_scan(const uint64_t kID) { +void kokkosp_end_parallel_scan(const uint64_t kID) { currentEntry->addFromTimer(); } -extern "C" void kokkosp_begin_parallel_reduce(const char* name, - const uint32_t devID, - uint64_t* kID) { +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, + uint64_t* kID) { *kID = uniqID++; if ((NULL == name) || (strcmp("", name) == 0)) { @@ -291,15 +235,15 @@ extern "C" void kokkosp_begin_parallel_reduce(const char* name, increment_counter(name, PARALLEL_REDUCE); } -extern "C" void kokkosp_end_parallel_reduce(const uint64_t kID) { +void kokkosp_end_parallel_reduce(const uint64_t kID) { currentEntry->addFromTimer(); } -extern "C" void kokkosp_push_profile_region(char* regionName) { +void kokkosp_push_profile_region(char* regionName) { increment_counter_region(regionName, REGION); } -extern "C" void kokkosp_pop_profile_region() { +void kokkosp_pop_profile_region() { current_region_level--; // current_region_level is out of bounds, inform the user they @@ -328,3 +272,36 @@ extern "C" void kokkosp_pop_profile_region() { regions[current_region_level]->addFromTimer(); } } + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, + sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + return my_event_set; +} + +} // namespace KernelTimer +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::KernelTimer; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) + +} // extern "C" diff --git a/profiling/simple-kernel-timer-json/kp_kernel_timer.cpp b/profiling/simple-kernel-timer/kp_kernel_timer_json.cpp similarity index 61% rename from profiling/simple-kernel-timer-json/kp_kernel_timer.cpp rename to profiling/simple-kernel-timer/kp_kernel_timer_json.cpp index 942415701..9a16def1b 100644 --- a/profiling/simple-kernel-timer-json/kp_kernel_timer.cpp +++ b/profiling/simple-kernel-timer/kp_kernel_timer_json.cpp @@ -14,53 +14,22 @@ // //@HEADER -#include -#include -#include -#include -#include -#include #include #include #include -#include - #include -#include "kp_kernel_info.h" - -bool compareKernelPerformanceInfo(KernelPerformanceInfo* left, - KernelPerformanceInfo* right) { - return left->getTime() > right->getTime(); -}; - -static uint64_t uniqID = 0; -static KernelPerformanceInfo* currentEntry; -static std::map count_map; -static double initTime; -static char* outputDelimiter; - -#define MAX_STACK_SIZE 128 -void increment_counter(const char* name, KernelExecutionType kType) { - std::string nameStr(name); +#include "kp_core.hpp" +#include "kp_shared.h" - if (count_map.find(name) == count_map.end()) { - KernelPerformanceInfo* info = new KernelPerformanceInfo(nameStr, kType); - count_map.insert( - std::pair(nameStr, info)); +using namespace KokkosTools::KernelTimer; - currentEntry = info; - } else { - currentEntry = count_map[nameStr]; - } - - currentEntry->startTimer(); -} +namespace KokkosTools { +namespace KernelTimerJSON { -extern "C" void kokkosp_init_library(const int loadSeq, - const uint64_t interfaceVer, - const uint32_t devInfoCount, - void* deviceInfo) { +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { const char* output_delim_env = getenv("KOKKOSP_OUTPUT_DELIM"); if (NULL == output_delim_env) { outputDelimiter = (char*)malloc(sizeof(char) * 2); @@ -74,12 +43,12 @@ extern "C" void kokkosp_init_library(const int loadSeq, printf( "KokkosP: LDMS JSON Connector Initialized (sequence is %d, version: " "%llu)\n", - loadSeq, interfaceVer); + loadSeq, (long long unsigned int)interfaceVer); initTime = seconds(); } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { double finishTime = seconds(); double kernelTimes = 0; @@ -130,7 +99,7 @@ extern "C" void kokkosp_finalize_library() { bool print_comma = false; for (auto const& kernel : count_map) { if (print_comma) fprintf(output_data, ",\n"); - kernel.second->writeToFile(output_data, KERNEL_INFO_INDENT); + kernel.second->writeToJSONFile(output_data, KERNEL_INFO_INDENT); print_comma = true; } @@ -140,9 +109,8 @@ extern "C" void kokkosp_finalize_library() { fclose(output_data); } -extern "C" void kokkosp_begin_parallel_for(const char* name, - const uint32_t devID, - uint64_t* kID) { +void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, + uint64_t* kID) { *kID = uniqID++; if ((NULL == name) || (strcmp("", name) == 0)) { @@ -153,13 +121,12 @@ extern "C" void kokkosp_begin_parallel_for(const char* name, increment_counter(name, PARALLEL_FOR); } -extern "C" void kokkosp_end_parallel_for(const uint64_t kID) { +void kokkosp_end_parallel_for(const uint64_t kID) { currentEntry->addFromTimer(); } -extern "C" void kokkosp_begin_parallel_scan(const char* name, - const uint32_t devID, - uint64_t* kID) { +void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, + uint64_t* kID) { *kID = uniqID++; if ((NULL == name) || (strcmp("", name) == 0)) { @@ -170,13 +137,12 @@ extern "C" void kokkosp_begin_parallel_scan(const char* name, increment_counter(name, PARALLEL_SCAN); } -extern "C" void kokkosp_end_parallel_scan(const uint64_t kID) { +void kokkosp_end_parallel_scan(const uint64_t kID) { currentEntry->addFromTimer(); } -extern "C" void kokkosp_begin_parallel_reduce(const char* name, - const uint32_t devID, - uint64_t* kID) { +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, + uint64_t* kID) { *kID = uniqID++; if ((NULL == name) || (strcmp("", name) == 0)) { @@ -187,6 +153,39 @@ extern "C" void kokkosp_begin_parallel_reduce(const char* name, increment_counter(name, PARALLEL_REDUCE); } -extern "C" void kokkosp_end_parallel_reduce(const uint64_t kID) { +void kokkosp_end_parallel_reduce(const uint64_t kID) { currentEntry->addFromTimer(); } + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, + sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + return my_event_set; +} + +} // namespace KernelTimerJSON +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::KernelTimerJSON; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) + +} // extern "C" diff --git a/profiling/simple-kernel-timer/kp_reader.cpp b/profiling/simple-kernel-timer/kp_reader.cpp index f05135dc7..84495a5eb 100644 --- a/profiling/simple-kernel-timer/kp_reader.cpp +++ b/profiling/simple-kernel-timer/kp_reader.cpp @@ -21,12 +21,9 @@ #include #include -#include "kp_kernel_info.h" +#include "kp_shared.h" -bool compareKernelPerformanceInfo(KernelPerformanceInfo* left, - KernelPerformanceInfo* right) { - return left->getTime() > right->getTime(); -}; +using namespace KokkosTools::KernelTimer; int find_index(std::vector& kernels, const char* kernelName) { diff --git a/profiling/simple-kernel-timer/kp_shared.cpp b/profiling/simple-kernel-timer/kp_shared.cpp new file mode 100644 index 000000000..43f306d81 --- /dev/null +++ b/profiling/simple-kernel-timer/kp_shared.cpp @@ -0,0 +1,31 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include "kp_shared.h" + +namespace KokkosTools { +namespace KernelTimer { + +uint64_t uniqID = 0; +KernelPerformanceInfo* currentEntry; +std::map count_map; +double initTime; +char* outputDelimiter; +int current_region_level = 0; +KernelPerformanceInfo* regions[512]; + +} // namespace KernelTimer +} // namespace KokkosTools diff --git a/profiling/simple-kernel-timer/kp_shared.h b/profiling/simple-kernel-timer/kp_shared.h new file mode 100644 index 000000000..990fee2a7 --- /dev/null +++ b/profiling/simple-kernel-timer/kp_shared.h @@ -0,0 +1,75 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef _H_KOKKOSP_KERNEL_SHARED +#define _H_KOKKOSP_KERNEL_SHARED + +#include +#include +#include "kp_kernel_info.h" + +namespace KokkosTools::KernelTimer { + +extern uint64_t uniqID; +extern KernelPerformanceInfo* currentEntry; +extern std::map count_map; +extern double initTime; +extern char* outputDelimiter; +extern int current_region_level; +extern KernelPerformanceInfo* regions[512]; + +inline void increment_counter(const char* name, KernelExecutionType kType) { + std::string nameStr(name); + + if (count_map.find(name) == count_map.end()) { + KernelPerformanceInfo* info = new KernelPerformanceInfo(nameStr, kType); + count_map.insert( + std::pair(nameStr, info)); + + currentEntry = info; + } else { + currentEntry = count_map[nameStr]; + } + + currentEntry->startTimer(); +} + +inline void increment_counter_region(const char* name, + KernelExecutionType kType) { + std::string nameStr(name); + + if (count_map.find(name) == count_map.end()) { + KernelPerformanceInfo* info = new KernelPerformanceInfo(nameStr, kType); + count_map.insert( + std::pair(nameStr, info)); + + regions[current_region_level] = info; + } else { + regions[current_region_level] = count_map[nameStr]; + } + + regions[current_region_level]->startTimer(); + current_region_level++; +} + +inline bool compareKernelPerformanceInfo(KernelPerformanceInfo* left, + KernelPerformanceInfo* right) { + return left->getTime() > right->getTime(); +}; + +} // namespace KokkosTools::KernelTimer + +#endif // _H_KOKKOSP_KERNEL_SHARED diff --git a/profiling/space-time-stack/CMakeLists.txt b/profiling/space-time-stack/CMakeLists.txt new file mode 100644 index 000000000..2cc44373d --- /dev/null +++ b/profiling/space-time-stack/CMakeLists.txt @@ -0,0 +1,5 @@ +kp_add_library(kp_space_time_stack kp_space_time_stack.cpp) + +if(USE_MPI) + target_link_libraries(kp_space_time_stack PRIVATE MPI::MPI_CXX) +endif() \ No newline at end of file diff --git a/profiling/space-time-stack/Makefile b/profiling/space-time-stack/Makefile index ea0f1114c..6ed5971ea 100644 --- a/profiling/space-time-stack/Makefile +++ b/profiling/space-time-stack/Makefile @@ -1,15 +1,15 @@ CXX=mpicxx -CFLAGS=-shared -O3 -g -fPIC -std=c++11 -Wall -Wextra +CXXFLAGS=-shared -O3 -g -fPIC -std=c++11 -Wall -Wextra #Turn MPI support off: -#CFLAGS += -DUSE_MPI=0 +#CXXFLAGS += -DUSE_MPI=0 MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) -CXXFLAGS+=-I${MAKEFILE_PATH} +CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all kp_space_time_stack.so: ${MAKEFILE_PATH}kp_space_time_stack.cpp - $(CXX) $(CFLAGS) -o $@ $< + $(CXX) $(CXXFLAGS) -o $@ $< clean: rm *.so diff --git a/profiling/space-time-stack/kp_space_time_stack.cpp b/profiling/space-time-stack/kp_space_time_stack.cpp index 435ab6786..e1fa85da4 100644 --- a/profiling/space-time-stack/kp_space_time_stack.cpp +++ b/profiling/space-time-stack/kp_space_time_stack.cpp @@ -31,9 +31,7 @@ #include #include -#ifndef USE_MPI -#define USE_MPI 1 -#endif +#include "kp_core.hpp" #if USE_MPI #include @@ -41,15 +39,8 @@ #include -namespace { - -struct KokkosPDeviceInfo { - std::uint32_t deviceID; -}; - -struct SpaceHandle { - char name[64]; -}; +namespace KokkosTools { +namespace SpaceTimeStack { enum Space { SPACE_HOST, SPACE_CUDA, SPACE_HIP, SPACE_SYCL, SPACE_OMPT }; @@ -493,10 +484,10 @@ struct StackNode { struct Allocation { std::string name; - void* ptr; + const void* ptr; std::uint64_t size; StackNode* frame; - Allocation(std::string&& name_in, void* ptr_in, std::uint64_t size_in, + Allocation(std::string&& name_in, const void* ptr_in, std::uint64_t size_in, StackNode* frame_in) : name(std::move(name_in)), ptr(ptr_in), size(size_in), frame(frame_in) {} bool operator<(Allocation const& other) const { @@ -509,13 +500,13 @@ struct Allocations { std::uint64_t total_size; std::set alloc_set; Allocations() : total_size(0) {} - void allocate(std::string&& name, void* ptr, std::uint64_t size, + void allocate(std::string&& name, const void* ptr, std::uint64_t size, StackNode* frame) { auto res = alloc_set.emplace(Allocation(std::move(name), ptr, size, frame)); assert(res.second); total_size += size; } - void deallocate(std::string&& name, void* ptr, std::uint64_t size, + void deallocate(std::string&& name, const void* ptr, std::uint64_t size, StackNode* frame) { auto key = Allocation(std::move(name), ptr, size, frame); auto it = alloc_set.find(key); @@ -706,7 +697,8 @@ struct State { } void push_region(const char* name) { begin_frame(name, STACK_REGION); } void pop_region() { end_frame(now()); } - void allocate(Space space, const char* name, void* ptr, std::uint64_t size) { + void allocate(Space space, const char* name, const void* ptr, + std::uint64_t size) { current_allocations[space].allocate(std::string(name), ptr, size, stack_frame); if (current_allocations[space].total_size > @@ -714,7 +706,7 @@ struct State { hwm_allocations[space] = current_allocations[space]; } } - void deallocate(Space space, const char* name, void* ptr, + void deallocate(Space space, const char* name, const void* ptr, std::uint64_t size) { current_allocations[space].deallocate(std::string(name), ptr, size, stack_frame); @@ -734,82 +726,116 @@ struct State { State* global_state = nullptr; -} // end anonymous namespace - -extern "C" void kokkosp_init_library(int loadseq, uint64_t, uint32_t ndevinfos, - KokkosPDeviceInfo* devinfos) { - (void)loadseq; - (void)ndevinfos; - (void)devinfos; +void kokkosp_init_library(int /* loadseq */, uint64_t /* interfaceVer */, + uint32_t /* ndevinfos */, + Kokkos_Profiling_KokkosPDeviceInfo* /* devinfos */) { global_state = new State(); } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { delete global_state; global_state = nullptr; } -extern "C" void kokkosp_begin_parallel_for(const char* name, - std::uint32_t devid, - std::uint64_t* kernid) { +void kokkosp_begin_parallel_for(const char* name, std::uint32_t devid, + std::uint64_t* kernid) { (void)devid; *kernid = global_state->begin_kernel(name, STACK_FOR); } -extern "C" void kokkosp_begin_parallel_reduce(const char* name, - std::uint32_t devid, - std::uint64_t* kernid) { +void kokkosp_begin_parallel_reduce(const char* name, std::uint32_t devid, + std::uint64_t* kernid) { (void)devid; *kernid = global_state->begin_kernel(name, STACK_REDUCE); } -extern "C" void kokkosp_begin_parallel_scan(const char* name, - std::uint32_t devid, - std::uint64_t* kernid) { +void kokkosp_begin_parallel_scan(const char* name, std::uint32_t devid, + std::uint64_t* kernid) { (void)devid; *kernid = global_state->begin_kernel(name, STACK_SCAN); } -extern "C" void kokkosp_end_parallel_for(std::uint64_t kernid) { +void kokkosp_end_parallel_for(std::uint64_t kernid) { global_state->end_kernel(kernid); } -extern "C" void kokkosp_end_parallel_reduce(std::uint64_t kernid) { +void kokkosp_end_parallel_reduce(std::uint64_t kernid) { global_state->end_kernel(kernid); } -extern "C" void kokkosp_end_parallel_scan(std::uint64_t kernid) { +void kokkosp_end_parallel_scan(std::uint64_t kernid) { global_state->end_kernel(kernid); } -extern "C" void kokkosp_push_profile_region(const char* name) { +void kokkosp_push_profile_region(const char* name) { global_state->push_region(name); } -extern "C" void kokkosp_pop_profile_region() { global_state->pop_region(); } +void kokkosp_pop_profile_region() { global_state->pop_region(); } -extern "C" void kokkosp_allocate_data(SpaceHandle handle, const char* name, - void* ptr, uint64_t size) { +void kokkosp_allocate_data(SpaceHandle handle, const char* name, + const void* ptr, uint64_t size) { auto space = get_space(handle); global_state->allocate(space, name, ptr, size); } -extern "C" void kokkosp_deallocate_data(SpaceHandle handle, const char* name, - void* ptr, uint64_t size) { +void kokkosp_deallocate_data(SpaceHandle handle, const char* name, + const void* ptr, uint64_t size) { auto space = get_space(handle); global_state->deallocate(space, name, ptr, size); } -extern "C" void kokkosp_begin_deep_copy(SpaceHandle dst_handle, - const char* dst_name, - const void* dst_ptr, - SpaceHandle src_handle, - const char* src_name, - const void* src_ptr, uint64_t size) { +void kokkosp_begin_deep_copy(SpaceHandle dst_handle, const char* dst_name, + const void* dst_ptr, SpaceHandle src_handle, + const char* src_name, const void* src_ptr, + uint64_t size) { auto dst_space = get_space(dst_handle); auto src_space = get_space(src_handle); global_state->begin_deep_copy(dst_space, dst_name, dst_ptr, src_space, src_name, src_ptr, size); } -extern "C" void kokkosp_end_deep_copy() { global_state->end_deep_copy(); } +void kokkosp_end_deep_copy() { global_state->end_deep_copy(); } + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, + sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.push_region = kokkosp_push_profile_region; + my_event_set.pop_region = kokkosp_pop_profile_region; + my_event_set.allocate_data = kokkosp_allocate_data; + my_event_set.deallocate_data = kokkosp_deallocate_data; + my_event_set.begin_deep_copy = kokkosp_begin_deep_copy; + my_event_set.end_deep_copy = kokkosp_end_deep_copy; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + return my_event_set; +} + +} // namespace SpaceTimeStack +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::SpaceTimeStack; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) +EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) +EXPOSE_ALLOCATE(impl::kokkosp_allocate_data) +EXPOSE_DEALLOCATE(impl::kokkosp_deallocate_data) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) + +} // extern "C" diff --git a/profiling/systemtap-connector/CMakeLists.txt b/profiling/systemtap-connector/CMakeLists.txt new file mode 100644 index 000000000..6fac2e459 --- /dev/null +++ b/profiling/systemtap-connector/CMakeLists.txt @@ -0,0 +1,20 @@ +set(PROBES_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/probes.d) +set(PROBES_HEADER ${CMAKE_CURRENT_BINARY_DIR}/probes.h) +set(PROBES_OBJECT ${CMAKE_CURRENT_BINARY_DIR}/probes.o) + +# Note: connect external/generated object file via imported object library +add_custom_command(OUTPUT ${PROBES_OBJECT} + COMMAND dtrace -C -G -s ${PROBES_SOURCE} -o ${PROBES_OBJECT} + DEPENDS ${PROBES_SOURCE} VERBATIM) +add_library(kp_systemtap_probe OBJECT IMPORTED) +set_property(TARGET kp_systemtap_probe PROPERTY IMPORTED_OBJECTS ${PROBES_OBJECT}) + +kp_add_library(kp_systemtap_connector kp_systemtap_connector.cpp ${PROBES_HEADER} + $) +set_property(SOURCE ${PROBES_HEADER} PROPERTY HEADER_FILE_ONLY ON) + +# Note: connect generated header +target_include_directories(kp_systemtap_connector PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) +add_custom_command(OUTPUT ${PROBES_HEADER} + COMMAND dtrace -C -h -s ${PROBES_SOURCE} -o ${PROBES_HEADER} + DEPENDS ${PROBES_SOURCE} VERBATIM) diff --git a/profiling/systemtap-connector/Makefile b/profiling/systemtap-connector/Makefile index 03caf5296..fd10632b7 100644 --- a/profiling/systemtap-connector/Makefile +++ b/profiling/systemtap-connector/Makefile @@ -12,7 +12,7 @@ ${MAKEFILE_PATH}probes.h: ${MAKEFILE_PATH}probes.d probes.o: ${MAKEFILE_PATH}probes.d dtrace -C -G -s $< -o $@ -CXXFLAGS+=-I${MAKEFILE_PATH} +CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all kp_systemtap_connector.so: ${MAKEFILE_PATH}kp_systemtap_connector.cpp ${MAKEFILE_PATH}probes.h probes.o $(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) probes.o \ diff --git a/profiling/systemtap-connector/kp_systemtap_connector.cpp b/profiling/systemtap-connector/kp_systemtap_connector.cpp index def7cf1ae..ea5e93573 100644 --- a/profiling/systemtap-connector/kp_systemtap_connector.cpp +++ b/profiling/systemtap-connector/kp_systemtap_connector.cpp @@ -25,145 +25,192 @@ #include #include "probes.h" +#include "kp_core.hpp" -struct SpaceHandle { - char name[64]; -}; +namespace KokkosTools { +namespace SystemtapConnector { static uint64_t next_kernid; static uint32_t next_sec_id; -extern "C" void kokkosp_begin_parallel_for(const char* name, - const uint32_t devid, - uint64_t* kernid) { +void kokkosp_begin_parallel_for(const char* name, const uint32_t devid, + uint64_t* kernid) { *kernid = next_kernid++; if (KOKKOS_END_PARALLEL_FOR_ENABLED()) { KOKKOS_BEGIN_PARALLEL_FOR(name, devid, kernid); } } -extern "C" void kokkosp_begin_parallel_scan(const char* name, - const uint32_t devid, - uint64_t* kernid) { +void kokkosp_begin_parallel_scan(const char* name, const uint32_t devid, + uint64_t* kernid) { *kernid = next_kernid++; if (KOKKOS_BEGIN_PARALLEL_SCAN_ENABLED()) { KOKKOS_BEGIN_PARALLEL_SCAN(name, devid, kernid); } } -extern "C" void kokkosp_begin_parallel_reduce(const char* name, - const uint32_t devid, - uint64_t* kernid) { +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devid, + uint64_t* kernid) { *kernid = next_kernid++; if (KOKKOS_BEGIN_PARALLEL_REDUCE_ENABLED()) { KOKKOS_BEGIN_PARALLEL_REDUCE(name, devid, kernid); } } -extern "C" void kokkosp_end_parallel_scan(uint64_t kernid) { +void kokkosp_end_parallel_scan(uint64_t kernid) { if (KOKKOS_END_PARALLEL_SCAN_ENABLED()) { KOKKOS_END_PARALLEL_SCAN(kernid); } } -extern "C" void kokkosp_end_parallel_for(uint64_t kernid) { +void kokkosp_end_parallel_for(uint64_t kernid) { if (KOKKOS_END_PARALLEL_FOR_ENABLED()) { KOKKOS_END_PARALLEL_FOR(kernid); } } -extern "C" void kokkosp_end_parallel_reduce(uint64_t kernid) { +void kokkosp_end_parallel_reduce(uint64_t kernid) { if (KOKKOS_END_PARALLEL_REDUCE_ENABLED()) { KOKKOS_END_PARALLEL_REDUCE(kernid); } } -extern "C" void kokkosp_init_library(const int loadseq, const uint64_t version, - const uint32_t ndevinfos, - void* deviceinfos) { +void kokkosp_init_library(const int loadseq, const uint64_t version, + const uint32_t ndevinfos, + Kokkos_Profiling_KokkosPDeviceInfo* deviceinfos) { if (KOKKOS_INIT_LIBRARY_ENABLED()) { KOKKOS_INIT_LIBRARY(loadseq, version, ndevinfos, deviceinfos); } } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { if (KOKKOS_FINALIZE_LIBRARY_ENABLED()) { KOKKOS_FINALIZE_LIBRARY(); } } -extern "C" void kokkosp_push_profile_region(const char* name) { +void kokkosp_push_profile_region(const char* name) { if (KOKKOS_PUSH_PROFILE_REGION_ENABLED()) { KOKKOS_PUSH_PROFILE_REGION(name); } } -extern "C" void kokkosp_pop_profile_region() { +void kokkosp_pop_profile_region() { if (KOKKOS_POP_PROFILE_REGION_ENABLED()) { KOKKOS_POP_PROFILE_REGION(); } } -extern "C" void kokkosp_allocate_data(SpaceHandle handle, const char* name, - void* ptr, uint64_t size) { +void kokkosp_allocate_data(SpaceHandle handle, const char* name, + const void* ptr, uint64_t size) { if (KOKKOS_ALLOCATE_DATA_ENABLED()) { KOKKOS_ALLOCATE_DATA(handle, name, ptr, size); } } -extern "C" void kokkosp_deallocate_data(SpaceHandle handle, const char* name, - void* ptr, uint64_t size) { +void kokkosp_deallocate_data(SpaceHandle handle, const char* name, + const void* ptr, uint64_t size) { if (KOKKOS_DEALLOCATE_DATA_ENABLED()) { KOKKOS_DEALLOCATE_DATA(handle, name, ptr, size); } } -extern "C" void kokkosp_begin_deep_copy(SpaceHandle dst_handle, - const char* dst_name, - const void* dst_ptr, - SpaceHandle src_handle, - const char* src_name, - const void* src_ptr, uint64_t size) { +void kokkosp_begin_deep_copy(SpaceHandle dst_handle, const char* dst_name, + const void* dst_ptr, SpaceHandle src_handle, + const char* src_name, const void* src_ptr, + uint64_t size) { if (KOKKOS_BEGIN_DEEP_COPY_ENABLED()) { KOKKOS_BEGIN_DEEP_COPY(dst_handle, dst_name, dst_ptr, src_handle, src_name, src_ptr, size); } } -extern "C" void kokkosp_end_deep_copy() { +void kokkosp_end_deep_copy() { if (KOKKOS_END_DEEP_COPY_ENABLED()) { KOKKOS_END_DEEP_COPY(); } } -extern "C" void kokkosp_create_profile_section(const char* name, - uint32_t* sec_id) { +void kokkosp_create_profile_section(const char* name, uint32_t* sec_id) { *sec_id = next_sec_id++; if (KOKKOS_CREATE_PROFILE_SECTION_ENABLED()) { KOKKOS_CREATE_PROFILE_SECTION(name, sec_id); } } -extern "C" void kokkosp_start_profile_section(const uint32_t sec_id) { +void kokkosp_start_profile_section(const uint32_t sec_id) { if (KOKKOS_START_PROFILE_SECTION_ENABLED()) { KOKKOS_START_PROFILE_SECTION(sec_id); } } -extern "C" void kokkosp_stop_profile_section(const uint32_t sec_id) { +void kokkosp_stop_profile_section(const uint32_t sec_id) { if (KOKKOS_STOP_PROFILE_SECTION_ENABLED()) { KOKKOS_STOP_PROFILE_SECTION(sec_id); } } -extern "C" void kokkosp_destroy_profile_section(const uint32_t sec_id) { +void kokkosp_destroy_profile_section(const uint32_t sec_id) { if (KOKKOS_DESTROY_PROFILE_SECTION_ENABLED()) { KOKKOS_DESTROY_PROFILE_SECTION(sec_id); } } -extern "C" void kokkosp_profile_event(const char* name) { +void kokkosp_profile_event(const char* name) { if (KOKKOS_PROFILE_EVENT_ENABLED()) { KOKKOS_PROFILE_EVENT(name); } } + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, + sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.push_region = kokkosp_push_profile_region; + my_event_set.pop_region = kokkosp_pop_profile_region; + my_event_set.allocate_data = kokkosp_allocate_data; + my_event_set.deallocate_data = kokkosp_deallocate_data; + my_event_set.begin_deep_copy = kokkosp_begin_deep_copy; + my_event_set.end_deep_copy = kokkosp_end_deep_copy; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + my_event_set.create_profile_section = kokkosp_create_profile_section; + my_event_set.start_profile_section = kokkosp_start_profile_section; + my_event_set.stop_profile_section = kokkosp_stop_profile_section; + my_event_set.destroy_profile_section = kokkosp_destroy_profile_section; + my_event_set.profile_event = kokkosp_profile_event; + return my_event_set; +} + +} // namespace SystemtapConnector +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::SystemtapConnector; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) +EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) +EXPOSE_ALLOCATE(impl::kokkosp_allocate_data) +EXPOSE_DEALLOCATE(impl::kokkosp_deallocate_data) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) +EXPOSE_CREATE_PROFILE_SECTION(impl::kokkosp_create_profile_section) +EXPOSE_START_PROFILE_SECTION(impl::kokkosp_start_profile_section) +EXPOSE_STOP_PROFILE_SECTION(impl::kokkosp_stop_profile_section) +EXPOSE_DESTROY_PROFILE_SECTION(impl::kokkosp_destroy_profile_section) +EXPOSE_PROFILE_EVENT(impl::kokkosp_profile_event) + +} // extern "C" diff --git a/profiling/variorum-connector/CMakeLists.txt b/profiling/variorum-connector/CMakeLists.txt new file mode 100644 index 000000000..ce74690a2 --- /dev/null +++ b/profiling/variorum-connector/CMakeLists.txt @@ -0,0 +1,9 @@ +# Based on Makefile authored by Zachary S. Frye (CASC at LLNL) in July 2020 + +kp_add_library(kp_variorum_connector variorum-connector.cpp) + +target_link_libraries(kp_variorum_connector PRIVATE variorum::variorum) + +if(USE_MPI) + target_link_libraries(kp_variorum_connector PRIVATE MPI::MPI_CXX) +endif() diff --git a/profiling/variorum-connector/Makefile b/profiling/variorum-connector/Makefile index 8018ba6c5..bdb7b4987 100644 --- a/profiling/variorum-connector/Makefile +++ b/profiling/variorum-connector/Makefile @@ -1,4 +1,4 @@ -#Author: Zachary S. Frye +# Author: Zachary S. Frye #Organization: CASC at LLNL #Date: July 2020 #Description: This is a simple makefile for testing and developing the Kokkos-variorum connector @@ -12,13 +12,11 @@ LINK_FLAG=-lvariorum CXX=mpicxx CXXFLAGS=-O3 -std=c++11 -g SHARED_CXXFLAGS=-shared -fPIC -CFLAGS=-std=c++11 -Wall -g all: variorum_connector.so - - MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) +CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all variorum_connector.so: ${MAKEFILE_PATH}variorum-connector.cpp $(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) $(VAR_LIB) $(VAR_INC) -o $@ ${MAKEFILE_PATH}variorum-connector.cpp $(LINK_FLAG) diff --git a/profiling/variorum-connector/variorum-connector.cpp b/profiling/variorum-connector/variorum-connector.cpp index 0b323873e..14c44314c 100644 --- a/profiling/variorum-connector/variorum-connector.cpp +++ b/profiling/variorum-connector/variorum-connector.cpp @@ -39,14 +39,15 @@ extern "C" { #include } -#ifndef USE_MPI -#define USE_MPI 1 -#endif +#include "kp_core.hpp" #if USE_MPI #include #endif +namespace KokkosTools { +namespace VariorumConnector { + bool filterKernels; uint64_t nextKernelID; std::vector kernelNames; @@ -160,6 +161,7 @@ char* variorum_json_call() { // Post: An output message if variourum returned an error or if it functioned // correctly void variorum_call_mpi() { +#if USE_MPI if (usingMPI == true) { int rank; std::string output; @@ -217,6 +219,7 @@ void variorum_call_mpi() { } file.close(); } +#endif } // Function: variorum_call @@ -240,10 +243,9 @@ void variorum_call() { } } -extern "C" void kokkosp_init_library(const int loadSeq, - const uint64_t interfaceVer, - const uint32_t devInfoCount, - void* deviceInfo) { +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { char* outputPathChar; try { outputPathChar = getenv("VARIORUM_OUTPUT_PATH"); @@ -316,6 +318,7 @@ extern "C" void kokkosp_init_library(const int loadSeq, throw 20; } if (strcmp(usingMPIstr, "true") == 0) { +#if USE_MPI usingMPI = true; try { char* perRankOutput = getenv("RANKED_OUTPUT"); @@ -333,6 +336,12 @@ extern "C" void kokkosp_init_library(const int loadSeq, << std::endl; mpiOutPut = false; } +#else + usingMPI = false; + std::cout << "Ignoring MPI enabled in Variorum: the connector was built " + "without MPI support" + << std::endl; +#endif } } catch (int e) { std::cout << "No MPI Option provided, not using per rank output" @@ -347,7 +356,7 @@ extern "C" void kokkosp_init_library(const int loadSeq, // variorum_call(); } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { if (usingMPI) { variorum_call_mpi(); } else { @@ -364,9 +373,8 @@ extern "C" void kokkosp_finalize_library() { << std::endl; } -extern "C" void kokkosp_begin_parallel_for(const char* name, - const uint32_t devID, - uint64_t* kID) { +void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, + uint64_t* kID) { std::cout << "Device ID: " << devID << "\n"; if (usingMPI) { variorum_call_mpi(); @@ -375,7 +383,7 @@ extern "C" void kokkosp_begin_parallel_for(const char* name, } } -extern "C" void kokkosp_end_parallel_for(const uint64_t kID) { +void kokkosp_end_parallel_for(const uint64_t kID) { if (usingMPI) { variorum_call_mpi(); } else { @@ -383,9 +391,8 @@ extern "C" void kokkosp_end_parallel_for(const uint64_t kID) { } } -extern "C" void kokkosp_begin_parallel_scan(const char* name, - const uint32_t devID, - uint64_t* kID) { +void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, + uint64_t* kID) { std::cout << "Device ID: " << devID << "\n"; if (usingMPI) { variorum_call_mpi(); @@ -394,7 +401,7 @@ extern "C" void kokkosp_begin_parallel_scan(const char* name, } } -extern "C" void kokkosp_end_parallel_scan(const uint64_t kID) { +void kokkosp_end_parallel_scan(const uint64_t kID) { if (usingMPI) { variorum_call_mpi(); } else { @@ -402,9 +409,8 @@ extern "C" void kokkosp_end_parallel_scan(const uint64_t kID) { } } -extern "C" void kokkosp_begin_parallel_reduce(const char* name, - const uint32_t devID, - uint64_t* kID) { +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, + uint64_t* kID) { std::cout << "Device ID: " << devID << "\n"; if (usingMPI) { variorum_call_mpi(); @@ -413,10 +419,43 @@ extern "C" void kokkosp_begin_parallel_reduce(const char* name, } } -extern "C" void kokkosp_end_parallel_reduce(const uint64_t kID) { +void kokkosp_end_parallel_reduce(const uint64_t kID) { if (usingMPI) { variorum_call_mpi(); } else { variorum_call(); } } + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, + sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + return my_event_set; +} + +} // namespace VariorumConnector +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::VariorumConnector; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) + +} // extern "C" diff --git a/profiling/vtune-connector/CMakeLists.txt b/profiling/vtune-connector/CMakeLists.txt new file mode 100644 index 000000000..8a795e518 --- /dev/null +++ b/profiling/vtune-connector/CMakeLists.txt @@ -0,0 +1,5 @@ +find_package(ITT REQUIRED) + +kp_add_library(kp_vtune_connector kp_vtune_connector.cpp) + +target_link_libraries(kp_vtune_connector ittapi) diff --git a/profiling/vtune-connector/Makefile b/profiling/vtune-connector/Makefile index b92fee015..a0c356779 100644 --- a/profiling/vtune-connector/Makefile +++ b/profiling/vtune-connector/Makefile @@ -8,7 +8,7 @@ all: kp_vtune_connector.so MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) -CXXFLAGS+=-I${MAKEFILE_PATH} +CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all kp_vtune_connector.so: ${MAKEFILE_PATH}kp_vtune_connector.cpp ${MAKEFILE_PATH}kp_vtune_connector_domain.h $(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) \ diff --git a/profiling/vtune-connector/kp_vtune_connector.cpp b/profiling/vtune-connector/kp_vtune_connector.cpp index 7df439823..ef4550846 100644 --- a/profiling/vtune-connector/kp_vtune_connector.cpp +++ b/profiling/vtune-connector/kp_vtune_connector.cpp @@ -20,18 +20,20 @@ #include #include #include -#include +#include "kp_core.hpp" #include "kp_vtune_connector_domain.h" +namespace KokkosTools { +namespace VTuneConnector { + static KernelVTuneConnectorInfo* currentKernel; static std::unordered_map domain_map; static uint64_t nextKernelID; -extern "C" void kokkosp_init_library(const int loadSeq, - const uint64_t interfaceVer, - const uint32_t devInfoCount, - void* deviceInfo) { +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { printf("-----------------------------------------------------------\n"); printf("KokkosP: VTune Analyzer Connector (sequence is %d, version: %llu)\n", loadSeq, interfaceVer); @@ -44,7 +46,7 @@ extern "C" void kokkosp_init_library(const int loadSeq, __itt_event_start(startEv); } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { printf("-----------------------------------------------------------\n"); printf("KokkosP: Finalization of VTune Connector. Complete.\n"); printf("-----------------------------------------------------------\n"); @@ -55,9 +57,8 @@ extern "C" void kokkosp_finalize_library() { __itt_event_start(finalEv); } -extern "C" void kokkosp_begin_parallel_for(const char* name, - const uint32_t devID, - uint64_t* kID) { +void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, + uint64_t* kID) { *kID = nextKernelID++; std::string nameStr(name); @@ -75,14 +76,13 @@ extern "C" void kokkosp_begin_parallel_for(const char* name, __itt_frame_begin_v3(currentKernel->getDomain(), NULL); } -extern "C" void kokkosp_end_parallel_for(const uint64_t kID) { +void kokkosp_end_parallel_for(const uint64_t kID) { __itt_frame_end_v3(currentKernel->getDomain(), NULL); currentKernel = NULL; } -extern "C" void kokkosp_begin_parallel_scan(const char* name, - const uint32_t devID, - uint64_t* kID) { +void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, + uint64_t* kID) { *kID = nextKernelID++; std::string nameStr(name); @@ -100,14 +100,13 @@ extern "C" void kokkosp_begin_parallel_scan(const char* name, __itt_frame_begin_v3(currentKernel->getDomain(), NULL); } -extern "C" void kokkosp_end_parallel_scan(const uint64_t kID) { +void kokkosp_end_parallel_scan(const uint64_t kID) { __itt_frame_end_v3(currentKernel->getDomain(), NULL); currentKernel = NULL; } -extern "C" void kokkosp_begin_parallel_reduce(const char* name, - const uint32_t devID, - uint64_t* kID) { +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, + uint64_t* kID) { *kID = nextKernelID++; std::string nameStr(name); @@ -125,7 +124,40 @@ extern "C" void kokkosp_begin_parallel_reduce(const char* name, __itt_frame_begin_v3(currentKernel->getDomain(), NULL); } -extern "C" void kokkosp_end_parallel_reduce(const uint64_t kID) { +void kokkosp_end_parallel_reduce(const uint64_t kID) { __itt_frame_end_v3(currentKernel->getDomain(), NULL); currentKernel = NULL; } + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, + sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + return my_event_set; +} + +} // namespace VTuneConnector +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::VTuneConnector; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) + +} // extern "C" diff --git a/profiling/vtune-connector/kp_vtune_connector_domain.h b/profiling/vtune-connector/kp_vtune_connector_domain.h index 692281964..8d7cc9216 100644 --- a/profiling/vtune-connector/kp_vtune_connector_domain.h +++ b/profiling/vtune-connector/kp_vtune_connector_domain.h @@ -18,11 +18,13 @@ #define _H_KOKKOSP_KERNEL_VTUNE_CONNECTOR_INFO #include -#include #include #include "ittnotify.h" +namespace KokkosTools { +namespace VTuneConnector { + enum KernelExecutionType { PARALLEL_FOR = 0, PARALLEL_REDUCE = 1, @@ -62,4 +64,7 @@ class KernelVTuneConnectorInfo { __itt_string_handle* domainNameHandle; }; -#endif +} // namespace VTuneConnector +} // namespace KokkosTools + +#endif // _H_KOKKOSP_KERNEL_VTUNE_CONNECTOR_INFO diff --git a/profiling/vtune-focused-connector/CMakeLists.txt b/profiling/vtune-focused-connector/CMakeLists.txt new file mode 100644 index 000000000..192a43add --- /dev/null +++ b/profiling/vtune-focused-connector/CMakeLists.txt @@ -0,0 +1,5 @@ +find_package(ITT REQUIRED) + +kp_add_library(kp_vtune_focused_connector kp_vtune_focused_connector.cpp) + +target_link_libraries(kp_vtune_focused_connector ittapi) \ No newline at end of file diff --git a/profiling/vtune-focused-connector/Makefile b/profiling/vtune-focused-connector/Makefile index 076b25aff..536dd2fdd 100644 --- a/profiling/vtune-focused-connector/Makefile +++ b/profiling/vtune-focused-connector/Makefile @@ -8,7 +8,7 @@ all: kp_vtune_focused_connector.so MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) -CXXFLAGS+=-I${MAKEFILE_PATH} +CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all kp_vtune_focused_connector.so: ${MAKEFILE_PATH}kp_vtune_focused_connector.cpp ${MAKEFILE_PATH}kp_vtune_focused_connector_domain.h $(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) \ diff --git a/profiling/vtune-focused-connector/kp_vtune_focused_connector.cpp b/profiling/vtune-focused-connector/kp_vtune_focused_connector.cpp index cf93dbca9..604411303 100644 --- a/profiling/vtune-focused-connector/kp_vtune_focused_connector.cpp +++ b/profiling/vtune-focused-connector/kp_vtune_focused_connector.cpp @@ -20,19 +20,21 @@ #include #include #include -#include +#include "kp_core.hpp" #include "kp_vtune_focused_connector_domain.h" +namespace KokkosTools { +namespace VTuneFocusedConnector { + static KernelVTuneFocusedConnectorInfo* currentKernel; static std::unordered_map domain_map; static uint64_t nextKernelID; -extern "C" void kokkosp_init_library(const int loadSeq, - const uint64_t interfaceVer, - const uint32_t devInfoCount, - void* deviceInfo) { +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { printf("-----------------------------------------------------------\n"); printf("KokkosP: VTune Analyzer Connector (sequence is %d, version: %llu)\n", loadSeq, interfaceVer); @@ -71,7 +73,7 @@ void focusedConnectorExecuteEnd() { currentKernel = NULL; } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { printf("-----------------------------------------------------------\n"); printf("KokkosP: Finalization of VTune Connector. Complete.\n"); printf("-----------------------------------------------------------\n"); @@ -79,41 +81,71 @@ extern "C" void kokkosp_finalize_library() { __itt_detach(); } -extern "C" void kokkosp_begin_parallel_for(const char* name, - const uint32_t devID, - uint64_t* kID) { +void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, + uint64_t* kID) { *kID = nextKernelID++; currentKernel = getFocusedConnectorInfo(name, PARALLEL_FOR); focusedConnectorExecuteStart(); } -extern "C" void kokkosp_end_parallel_for(const uint64_t kID) { +void kokkosp_end_parallel_for(const uint64_t kID) { focusedConnectorExecuteEnd(); } -extern "C" void kokkosp_begin_parallel_scan(const char* name, - const uint32_t devID, - uint64_t* kID) { +void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, + uint64_t* kID) { *kID = nextKernelID++; currentKernel = getFocusedConnectorInfo(name, PARALLEL_SCAN); focusedConnectorExecuteStart(); } -extern "C" void kokkosp_end_parallel_scan(const uint64_t kID) { +void kokkosp_end_parallel_scan(const uint64_t kID) { focusedConnectorExecuteEnd(); } -extern "C" void kokkosp_begin_parallel_reduce(const char* name, - const uint32_t devID, - uint64_t* kID) { +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, + uint64_t* kID) { *kID = nextKernelID++; currentKernel = getFocusedConnectorInfo(name, PARALLEL_REDUCE); focusedConnectorExecuteStart(); } -extern "C" void kokkosp_end_parallel_reduce(const uint64_t kID) { +void kokkosp_end_parallel_reduce(const uint64_t kID) { focusedConnectorExecuteEnd(); } + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, + sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + return my_event_set; +} + +} // namespace VTuneFocusedConnector +} // namespace KokkosTools + +extern "C" { + +namespace impl = KokkosTools::VTuneFocusedConnector; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) + +} // extern "C" diff --git a/profiling/vtune-focused-connector/kp_vtune_focused_connector_domain.h b/profiling/vtune-focused-connector/kp_vtune_focused_connector_domain.h index 9a2fcdaee..b8ff635a9 100644 --- a/profiling/vtune-focused-connector/kp_vtune_focused_connector_domain.h +++ b/profiling/vtune-focused-connector/kp_vtune_focused_connector_domain.h @@ -18,11 +18,13 @@ #define _H_KOKKOSP_KERNEL_VTUNE_CONNECTOR_INFO #include -#include #include #include "ittnotify.h" +namespace KokkosTools { +namespace VTuneFocusedConnector { + enum KernelExecutionType { PARALLEL_FOR = 0, PARALLEL_REDUCE = 1, @@ -62,5 +64,7 @@ class KernelVTuneFocusedConnectorInfo { __itt_domain* domain; __itt_string_handle* domainNameHandle; }; +} // namespace VTuneFocusedConnector +} // namespace KokkosTools #endif diff --git a/tpls/Caliper b/tpls/Caliper new file mode 160000 index 000000000..b4314be9d --- /dev/null +++ b/tpls/Caliper @@ -0,0 +1 @@ +Subproject commit b4314be9dcdfcc1c28854e545a7cf1bcd34141d4 diff --git a/tpls/apex b/tpls/apex new file mode 160000 index 000000000..48b7831c3 --- /dev/null +++ b/tpls/apex @@ -0,0 +1 @@ +Subproject commit 48b7831c30c4202bc3c655c2bc4f552217b1eb00