Skip to content

Commit

Permalink
kp_sampler_skip.cpp: include probability sampling
Browse files Browse the repository at this point in the history
This commit allows for sampling based on probability as an alternative to periodicity. Tool-invoked fencing is needing by tool utility. The environment variable KOKKOS_TOOLS_SAMPLER_PROB is introduced in this PR.
  • Loading branch information
vlkale authored Apr 1, 2024
1 parent 141747d commit 95b02e2
Showing 1 changed file with 162 additions and 76 deletions.
238 changes: 162 additions & 76 deletions common/kokkos-sampler/kp_sampler_skip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,18 @@
#include <dlfcn.h>
#include "../../profiling/all/kp_core.hpp"
#include "kp_config.hpp"
#include <ctime>
#include <limits>
#include <iostream>

namespace KokkosTools {
namespace Sampler {
static uint64_t uniqID = 0;
static uint64_t kernelSampleSkip = 101;
static uint64_t kernelSampleSkip = std::numeric_limits<uint64_t>::max();
static double tool_prob_num = -1.0;
static int tool_verbosity = 0;
static int tool_globFence = 0;
static int tool_seed = -1;

// a hash table mapping kID to nestedkID
static std::unordered_map<uint64_t, uint64_t> infokIDSample;
Expand All @@ -33,7 +38,7 @@ static endFunction endReduceCallee = NULL;

void kokkosp_request_tool_settings(const uint32_t,
Kokkos_Tools_ToolSettings* settings) {
settings->requires_global_fencing = false;
settings->requires_global_fencing = 0;
}

// set of functions from Kokkos ToolProgrammingInterface (includes fence)
Expand All @@ -48,36 +53,38 @@ uint32_t getDeviceID(uint32_t devid_in) {

void invoke_ktools_fence(uint32_t devID) {
if (tpi_funcs.fence != nullptr) {
tpi_funcs.fence(devID);
if (tool_verbosity > 1) {
printf(
"KokkosP: Sampler utility sucessfully invoked "
" tool-induced fence on device %d\n",
getDeviceID(devID));
std::cout << "KokkosP: Sampler attempting to invoke tool-induced fence "
"on device "
<< getDeviceID(devID) << '\n';
}
(*(tpi_funcs.fence))(devID);
if (tool_verbosity > 1) {
std::cout << "KokkosP: Sampler sucessfully invoked tool-induced fence on "
"device "
<< getDeviceID(devID) << '\n';
}
} else {
printf(
"KokkosP: FATAL: Kokkos Tools Programming Interface's tool-invoked "
"Fence is NULL!\n");
exit(-1);
std::cout << "KokkosP: FATAL: Kokkos Tools Programming Interface's "
"tool-invoked Fence is NULL!\n";
}
}

void kokkosp_provide_tool_programming_interface(
uint32_t num_funcs, Kokkos_Tools_ToolProgrammingInterface* funcsFromTPI) {
uint32_t num_funcs, Kokkos_Tools_ToolProgrammingInterface funcsFromTPI) {
if (!num_funcs) {
if (tool_verbosity > 0)
printf(
"KokkosP: Note: Number of functions in Tools Programming Interface "
"is 0!\n");
std::cout << "KokkosP: Note: Number of functions in Tools Programming "
"Interface is 0!\n";
}
tpi_funcs = *funcsFromTPI;
tpi_funcs = funcsFromTPI;
}

void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
const uint32_t devInfoCount, void* deviceInfo) {
const char* tool_verbose_str = getenv("KOKKOS_TOOLS_SAMPLER_VERBOSE");
const char* tool_globFence_str = getenv("KOKKOS_TOOLS_GLOBALFENCES");
const char* tool_seed_str = getenv("KOKKOS_TOOLS_RANDOM_SEED");
if (NULL != tool_verbose_str) {
tool_verbosity = atoi(tool_verbose_str);
} else {
Expand All @@ -88,15 +95,17 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
} else {
tool_globFence = 0;
}
if (NULL != tool_seed_str) {
tool_seed = atoi(tool_seed_str);
}

char* profileLibrary = getenv("KOKKOS_TOOLS_LIBS");
if (NULL == profileLibrary) {
printf(
"Checking KOKKOS_PROFILE_LIBRARY. WARNING: This is a depreciated "
"variable. Please use KOKKOS_TOOLS_LIBS\n");
std::cout << "Checking KOKKOS_PROFILE_LIBRARY. WARNING: This is a "
"deprecated variable. Please use KOKKOS_TOOLS_LIBS\n";
profileLibrary = getenv("KOKKOS_PROFILE_LIBRARY");
if (NULL == profileLibrary) {
printf("KokkosP: No library to call in %s\n", profileLibrary);
std::cout << "KokkosP: No library to call in " << profileLibrary << '\n';
exit(-1);
}
}
Expand All @@ -113,19 +122,20 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
nextLibrary = strtok(NULL, ";");

if (NULL == nextLibrary) {
printf("KokkosP: No child library to call in %s\n", profileLibrary);
std::cout << "KokkosP: No child library to call in " << profileLibrary
<< '\n';
exit(-1);
} else {
if (tool_verbosity > 0) {
printf("KokkosP: Next library to call: %s\n", nextLibrary);
printf("KokkosP: Loading child library ..\n");
std::cout << "KokkosP: Next library to call: " << nextLibrary << '\n';
std::cout << "KokkosP: Loading child library ..\n";
}

void* childLibrary = dlopen(nextLibrary, RTLD_NOW | RTLD_GLOBAL);

if (NULL == childLibrary) {
fprintf(stderr, "KokkosP: Error: Unable to load: %s (Error=%s)\n",
nextLibrary, dlerror());
std::cerr << "KokkosP: Error: Unable to load: " << nextLibrary
<< " (Error=" << dlerror() << ")\n";
exit(-1);
} else {
beginForCallee =
Expand Down Expand Up @@ -153,19 +163,19 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
}

if (tool_verbosity > 0) {
printf("KokkosP: Function Status:\n");
printf("KokkosP: begin-parallel-for: %s\n",
(beginForCallee == NULL) ? "no" : "yes");
printf("KokkosP: begin-parallel-scan: %s\n",
(beginScanCallee == NULL) ? "no" : "yes");
printf("KokkosP: begin-parallel-reduce: %s\n",
(beginReduceCallee == NULL) ? "no" : "yes");
printf("KokkosP: end-parallel-for: %s\n",
(endForCallee == NULL) ? "no" : "yes");
printf("KokkosP: end-parallel-scan: %s\n",
(endScanCallee == NULL) ? "no" : "yes");
printf("KokkosP: end-parallel-reduce: %s\n",
(endReduceCallee == NULL) ? "no" : "yes");
std::cout << "KokkosP: Function Status:\n";
std::cout << "KokkosP: begin-parallel-for: "
<< ((beginForCallee == NULL) ? "no" : "yes") << '\n';
std::cout << "KokkosP: begin-parallel-scan: "
<< ((beginScanCallee == NULL) ? "no" : "yes") << '\n';
std::cout << "KokkosP: begin-parallel-reduce: "
<< ((beginReduceCallee == NULL) ? "no" : "yes") << '\n';
std::cout << "KokkosP: end-parallel-for: "
<< ((endForCallee == NULL) ? "no" : "yes") << '\n';
std::cout << "KokkosP: end-parallel-scan: "
<< ((endScanCallee == NULL) ? "no" : "yes") << '\n';
std::cout << "KokkosP: end-parallel-reduce: "
<< ((endReduceCallee == NULL) ? "no" : "yes") << '\n';
}
}
}
Expand All @@ -174,14 +184,71 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,

uniqID = 1;

if (0 > tool_seed) {
srand(time(NULL));
if (tool_verbosity > 0) {
std::cout << "KokkosP: Seeding random number generator using clock for "
"random sampling.\n";
}
} else {
srand(tool_seed);
if (tool_verbosity > 0) {
std::cout << "KokkosP: Seeding random number generator using seed "
<< tool_seed << " for random sampling.\n";
}
}

const char* tool_probability = getenv("KOKKOS_TOOLS_SAMPLER_PROB");

if (NULL != tool_probability) {
// read sampling probability as a float between 0 and 100, representing
// a percentage that data should be gathered.
// Connector reasons about probability as a double between 0.0 and 1.0.
tool_prob_num = atof(tool_probability);
if (tool_prob_num > 100.0) {
std::cout << "KokkosP: The sampling probability value is set to be "
"greater than 100.0. The probability for the sampler will "
"be set to 100 percent; all of the invocations of a Kokkos "
"kernel will be profiled.\n";
tool_prob_num = 100.0;
} else if (tool_prob_num < 0.0) {
std::cout
<< "KokkosP: The sampling probability value is set to be a negative "
"number. The sampler's probability will be set to 0 percent; none "
"of the invocations of a Kokkos kernel will be profiled.\n";
tool_prob_num = 0.0;
}
if (tool_verbosity > 0) {
std::cout << "KokkosP: Probability for the sampler set to: "
<< tool_prob_num << '\n';
}
kernelSampleSkip = 1;
return;
}

const char* tool_sample = getenv("KOKKOS_TOOLS_SAMPLER_SKIP");
if (NULL != tool_sample) {
tool_prob_num = 100.0;
kernelSampleSkip = atoi(tool_sample) + 1;
if (tool_verbosity > 0) {
std::cout << "KokkosP: Sampling rate set to: " << tool_sample << '\n';
}
return;
}

if (tool_verbosity > 0) {
printf("KokkosP: Sampling rate set to: %s\n", tool_sample);
std::cout << "KokkosP: Neither the probability nor the skip rate for "
"sampling were set...\n";
}
tool_prob_num = 10.0;
kernelSampleSkip = 1;
if (tool_verbosity > 0) {
std::cout
<< "KokkosP: The probability for the sampler is set to the default of "
<< tool_prob_num
<< " percent. The skip rate for sampler will not be used.\n";
}
kernelSampleSkip = 1;
}

void kokkosp_finalize_library() {
Expand All @@ -194,17 +261,23 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t devID,
static uint64_t invocationNum = 0;
++invocationNum;
if ((invocationNum % kernelSampleSkip) == 0) {
if (tool_verbosity > 0) {
printf("KokkosP: sample %llu calling child-begin function...\n",
(unsigned long long)(*kID));
}
if (tool_globFence) {
invoke_ktools_fence(0);
}
if (NULL != beginForCallee) {
uint64_t nestedkID = 0;
(*beginForCallee)(name, devID, &nestedkID);
infokIDSample.insert({*kID, nestedkID});
if ((rand() / (1.0 * RAND_MAX)) < (tool_prob_num / 100.0)) {
if (tool_verbosity > 0) {
std::cout << "KokkosP: sample " << *kID
<< " calling child-begin function...\n";
}
if (NULL != beginForCallee) {
if (tool_globFence) {
invoke_ktools_fence(0);
}
uint64_t nestedkID = 0;
(*beginForCallee)(name, devID, &nestedkID);
if (tool_verbosity > 0) {
std::cout << "KokkosP: sample " << *kID
<< " finished with child-begin function.\n";
}
infokIDSample.insert({*kID, nestedkID});
}
}
}
}
Expand All @@ -214,8 +287,8 @@ void kokkosp_end_parallel_for(const uint64_t kID) {
if (!(infokIDSample.find(kID) == infokIDSample.end())) {
uint64_t retrievedNestedkID = infokIDSample[kID];
if (tool_verbosity > 0) {
printf("KokkosP: sample %llu calling child-end function...\n",
(unsigned long long)(kID));
std::cout << "KokkosP: sample " << kID
<< " calling child-end function...\n";
}
if (tool_globFence) {
invoke_ktools_fence(0);
Expand All @@ -232,17 +305,23 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID,
static uint64_t invocationNum = 0;
++invocationNum;
if ((invocationNum % kernelSampleSkip) == 0) {
if (tool_verbosity > 0) {
printf("KokkosP: sample %llu calling child-begin function...\n",
(unsigned long long)(*kID));
}
if (NULL != beginScanCallee) {
uint64_t nestedkID = 0;
if (tool_globFence) {
invoke_ktools_fence(0);
if ((rand() / (1.0 * RAND_MAX)) < (tool_prob_num / 100.0)) {
if (tool_verbosity > 0) {
std::cout << "KokkosP: sample " << *kID
<< " calling child-begin function...\n";
}
if (NULL != beginScanCallee) {
uint64_t nestedkID = 0;
if (tool_globFence) {
invoke_ktools_fence(0);
}
(*beginScanCallee)(name, devID, &nestedkID);
if (tool_verbosity > 0) {
std::cout << "KokkosP: sample " << *kID
<< " finished with child-begin function.\n";
}
infokIDSample.insert({*kID, nestedkID});
}
(*beginScanCallee)(name, devID, &nestedkID);
infokIDSample.insert({*kID, nestedkID});
}
}
}
Expand All @@ -252,8 +331,8 @@ void kokkosp_end_parallel_scan(const uint64_t kID) {
if (!(infokIDSample.find(kID) == infokIDSample.end())) {
uint64_t retrievedNestedkID = infokIDSample[kID];
if (tool_verbosity > 0) {
printf("KokkosP: sample %llu calling child-end function...\n",
(unsigned long long)(kID));
std::cout << "KokkosP: sample " << kID
<< " calling child-end function...\n";
}
if (tool_globFence) {
invoke_ktools_fence(0);
Expand All @@ -270,17 +349,23 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID,
static uint64_t invocationNum = 0;
++invocationNum;
if ((invocationNum % kernelSampleSkip) == 0) {
if (tool_verbosity > 0) {
printf("KokkosP: sample %llu calling child-begin function...\n",
(unsigned long long)(*kID));
}
if (NULL != beginReduceCallee) {
uint64_t nestedkID = 0;
if (tool_globFence) {
invoke_ktools_fence(0);
if ((rand() / (1.0 * RAND_MAX)) < (tool_prob_num / 100.0)) {
if (tool_verbosity > 0) {
std::cout << "KokkosP: sample " << *kID
<< " calling child-begin function...\n";
}
if (NULL != beginReduceCallee) {
uint64_t nestedkID = 0;
if (tool_globFence) {
invoke_ktools_fence(0);
}
(*beginReduceCallee)(name, devID, &nestedkID);
if (tool_verbosity > 0) {
std::cout << "KokkosP: sample " << *kID
<< " finished with child-begin function.\n";
}
infokIDSample.insert({*kID, nestedkID});
}
(*beginReduceCallee)(name, devID, &nestedkID);
infokIDSample.insert({*kID, nestedkID});
}
}
}
Expand All @@ -290,12 +375,13 @@ void kokkosp_end_parallel_reduce(const uint64_t kID) {
if (!(infokIDSample.find(kID) == infokIDSample.end())) {
uint64_t retrievedNestedkID = infokIDSample[kID];
if (tool_verbosity > 0) {
printf("KokkosP: sample %llu calling child-end function...\n",
(unsigned long long)(kID));
std::cout << "KokkosP: sample " << kID
<< " calling child-end function...\n";
}
if (tool_globFence) {
invoke_ktools_fence(0);
}

(*endScanCallee)(retrievedNestedkID);
infokIDSample.erase(kID);
}
Expand Down

0 comments on commit 95b02e2

Please sign in to comment.