Skip to content

Commit aefd848

Browse files
authored
Merge pull request #63 from RadeonOpenCompute/rbtCpuTimerOpt
Construct and collect CPU timer only if requested by user
2 parents 2c6c202 + ffbc28b commit aefd848

File tree

4 files changed

+49
-13
lines changed

4 files changed

+49
-13
lines changed

hsatimer.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@
4545
#define NANOSECONDS_PER_SECOND 1000000000
4646

4747
PerfTimer::PerfTimer() {
48-
freq_in_100mhz = MeasureTSCFreqHz();
4948
}
5049

5150
PerfTimer::~PerfTimer() {
@@ -56,6 +55,10 @@ PerfTimer::~PerfTimer() {
5655
}
5756
}
5857

58+
void PerfTimer::InitTimer() {
59+
freq_in_100mhz = MeasureTSCFreqHz();
60+
}
61+
5962
// Create a new timer instance and return its index
6063
int PerfTimer::CreateTimer() {
6164

hsatimer.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ class PerfTimer {
8282

8383
PerfTimer();
8484
~PerfTimer();
85+
void InitTimer();
8586

8687
private:
8788

rocm_bandwidth_test.cpp

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@
5151
#include <cmath>
5252
#include <sstream>
5353
#include <limits>
54+
#include <chrono>
55+
#include <thread>
5456

5557
// Initialize the variable used to capture validation failure
5658
const double RocmBandwidthTest::VALIDATE_COPY_OP_FAILURE = std::numeric_limits<double>::max();
@@ -587,12 +589,21 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
587589
hsa_signal_store_relaxed(signal_start_bidir, 1);
588590
}
589591

590-
// Create a timer object and reset signals
592+
// Temporary code for testing
593+
if (sleep_time_ > 0) {
594+
std::this_thread::sleep_for(sleep_usecs_);
595+
}
596+
597+
// Create a timer object and start it
591598
PerfTimer timer;
592-
uint32_t index = timer.CreateTimer();
599+
uint32_t cpuTimerIdx = 0;
600+
if (print_cpu_time_) {
601+
timer.InitTimer();
602+
cpuTimerIdx = timer.CreateTimer();
603+
timer.StartTimer(cpuTimerIdx);
604+
}
593605

594-
// Start the timer and launch forward copy operation
595-
timer.StartTimer(index);
606+
// Launch the copy operation
596607
if (bidir == false) {
597608
err_ = hsa_amd_memory_async_copy(buf_dst_fwd, dst_agent_fwd,
598609
buf_src_fwd, src_agent_fwd,
@@ -621,11 +632,11 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
621632

622633
WaitForCopyCompletion(signal_list);
623634

624-
// Stop the timer object
625-
timer.StopTimer(index);
626-
627-
// Push the time taken for copy into a vector of copy times
628-
cpu_time.push_back(timer.ReadTimer(index));
635+
// Stop the timer object and extract time taken
636+
if (print_cpu_time_) {
637+
timer.StopTimer(cpuTimerIdx);
638+
cpu_time.push_back(timer.ReadTimer(cpuTimerIdx));
639+
}
629640

630641
// Collect time from the signal(s)
631642
if (print_cpu_time_ == false) {
@@ -667,7 +678,9 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
667678
verify = true;
668679

669680
// Clear the stack of cpu times
670-
cpu_time.clear();
681+
if (print_cpu_time_) {
682+
cpu_time.clear();
683+
}
671684
gpu_time.clear();
672685
}
673686

@@ -803,10 +816,24 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() {
803816

804817
// Initialize version of the test
805818
version_.major_id = 2;
806-
version_.minor_id = 3;
807-
version_.step_id = 11;
819+
version_.minor_id = 4;
820+
version_.step_id = 0;
808821
version_.reserved = 0;
809822

823+
// Test impact of sleep, temp code
824+
sleep_time_ = 0;
825+
bw_sleep_time_ = getenv("ROCM_BW_SLEEP_TIME");
826+
if (bw_sleep_time_ != NULL) {
827+
sleep_time_ = atoi(bw_sleep_time_);
828+
if ((sleep_time_ < 0) || (sleep_time_ > 60000)) {
829+
std::cout << "Value of ROCM_BW_SLEEP_TIME must be between [1, 60000)" << sleep_time_ << std::endl;
830+
exit(1);
831+
}
832+
sleep_time_ *= 100;
833+
std::chrono::duration<uint32_t, std::micro> temp(sleep_time_);
834+
sleep_usecs_ = temp;
835+
}
836+
810837
bw_iter_cnt_ = getenv("ROCM_BW_ITER_CNT");
811838
bw_default_run_ = getenv("ROCM_BW_DEFAULT_RUN");
812839
bw_blocking_run_ = getenv("ROCR_BW_RUN_BLOCKING");
@@ -817,6 +844,7 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() {
817844
int32_t num = atoi(bw_iter_cnt_);
818845
if (num < 0) {
819846
std::cout << "Value of ROCM_BW_ITER_CNT can't be negative: " << num << std::endl;
847+
exit(1);
820848
}
821849
set_num_iteration(num);
822850
}

rocm_bandwidth_test.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
#include "common.hpp"
5050

5151
#include <vector>
52+
#include <chrono>
5253

5354
using namespace std;
5455

@@ -505,6 +506,9 @@ class RocmBandwidthTest : public BaseTest {
505506

506507
// Env key to specify iteration count
507508
char* bw_iter_cnt_;
509+
char* bw_sleep_time_;
510+
uint32_t sleep_time_;
511+
std::chrono::duration<uint32_t, std::micro> sleep_usecs_;
508512

509513
// Variable to store argument number
510514

0 commit comments

Comments
 (0)