Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 14 additions & 11 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
#include <string>
#include <stdint.h>
#include <stdlib.h>

#include "resultfilename.h"
#include "timer.h"
#include "test.h"
#include "setupScheduler.h"

const unsigned kIterationPerDigit = 100000;
const unsigned kIterationForRandom = 100;
Expand Down Expand Up @@ -135,7 +137,7 @@ void BenchSequential(void(*f)(T, char*), const char* type, const char* fname, FI
for (int digit = 1; digit <= Traits<T>::kMaxDigit; digit++) {
T end = (digit == Traits<T>::kMaxDigit) ? std::numeric_limits<T>::max() : start * 10;

double duration = std::numeric_limits<double>::max();
uint64_t duration = std::numeric_limits<uint64_t>::max();
for (unsigned trial = 0; trial < kTrial; trial++) {
T v = start;
T sign = 1;
Expand All @@ -148,14 +150,14 @@ void BenchSequential(void(*f)(T, char*), const char* type, const char* fname, FI
v = start;
}
timer.Stop();
duration = std::min(duration, timer.GetElapsedMilliseconds());
duration = std::min(duration, timer.GetElapsedNanoseconds());
}

duration *= 1e6 / kIterationPerDigit; // convert to nano second per operation
double duration_per_op = (double)duration / kIterationPerDigit; // convert to nano second per operation

minDuration = std::min(minDuration, duration);
maxDuration = std::max(maxDuration, duration);
fprintf(fp, "%s_sequential,%s,%d,%f\n", type, fname, digit, duration);
minDuration = std::min(minDuration, duration_per_op);
maxDuration = std::max(maxDuration, duration_per_op);
fprintf(fp, "%s_sequential,%s,%d,%f\n", type, fname, digit, duration_per_op);
start = end;
}

Expand Down Expand Up @@ -209,7 +211,7 @@ void BenchRandom(void(*f)(T, char*), const char* type, const char* fname, FILE*
T* data = RandomData<T>::GetData();
size_t n = RandomData<T>::kCount;

double duration = std::numeric_limits<double>::max();
uint64_t duration = std::numeric_limits<uint64_t>::max();
for (unsigned trial = 0; trial < kTrial; trial++) {
Timer timer;
timer.Start();
Expand All @@ -219,12 +221,12 @@ void BenchRandom(void(*f)(T, char*), const char* type, const char* fname, FILE*
f(data[i], buffer);

timer.Stop();
duration = std::min(duration, timer.GetElapsedMilliseconds());
duration = std::min(duration, timer.GetElapsedNanoseconds());
}
duration *= 1e6 / (kIterationForRandom * n); // convert to nano second per operation
fprintf(fp, "%s_random,%s,0,%f\n", type, fname, duration);
double duration_per_op = (double)duration / (kIterationForRandom * n); // convert to nano second per operation
fprintf(fp, "%s_random,%s,0,%f\n", type, fname, duration_per_op);

printf("%8.3fns\n", duration);
printf("%8.3fns\n", duration_per_op);
}

template <typename T>
Expand Down Expand Up @@ -283,5 +285,6 @@ int main() {
});

VerifyAll();
setupScheduler();
BenchAll();
}
85 changes: 85 additions & 0 deletions src/setupScheduler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <fcntl.h>
#include <sched.h>
#include <unistd.h>
#include <err.h>

static bool debuggerIsAttached(void)
{
#if defined(_WIN32) || defined(_WIN64)
return IsDebuggerPresent() != 0;
#elif defined(__linux)
char buf[4096];

const int status_fd = open("/proc/self/status", O_RDONLY);
if (status_fd == -1)
return false;

const ssize_t num_read = read(status_fd, buf, sizeof(buf) - 1);
close(status_fd);

if (num_read <= 0)
return false;

buf[num_read] = '\0';
constexpr char tracerPidString[] = "TracerPid:";
const auto tracer_pid_ptr = strstr(buf, tracerPidString);
if (!tracer_pid_ptr)
return false;

for (const char* characterPtr = tracer_pid_ptr + sizeof(tracerPidString) - 1; characterPtr <= buf + num_read; ++characterPtr)
{
if (isspace(*characterPtr))
continue;
else
return isdigit(*characterPtr) != 0 && *characterPtr != '0';
}
#else
#warning "debugger detection not implemented for this platform"
#endif
return false;
}

void setupScheduler(void)
{
int fixToCPU = 3;

if(debuggerIsAttached)
return;
#if defined(_WIN32) || defined(_WIN64)
{
SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS);
SetThreadAffinityMask(GetCurrentThread(), 1 << fixToCPU);
SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL);
}
#elif defined(__linux)
{
struct sched_param param = {
.sched_priority = 99, // Highest real-time priority
};
int policy = SCHED_FIFO;

// Check if the priority is valid for the chosen policy
if (param.sched_priority < sched_get_priority_min(policy) || //
param.sched_priority > sched_get_priority_max(policy)) {
err(EXIT_FAILURE, "Priority %d is not valid for policy %d\n", param.sched_priority, policy);
}

if (sched_setscheduler(0, policy, &param) == -1) {
err(EXIT_FAILURE, "sched_setscheduler");
}

{ // Set CPU affinity to the specified CPU core
cpu_set_t set;
CPU_SET(fixToCPU, &set);
if (sched_setaffinity(0, sizeof(set), &set) == -1)
err(EXIT_FAILURE, "sched_setaffinity");
}
}
#else
#warning "Real-time scheduling not implemented for this platform"
#endif
}
6 changes: 6 additions & 0 deletions src/setupScheduler.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#ifndef __SETUPSCHEDULER__
#define __SETUPSCHEDULER__

void setupScheduler(void);

#endif /* __SETUPSCHEDULER__ */
127 changes: 123 additions & 4 deletions src/timer.h
Original file line number Diff line number Diff line change
@@ -1,13 +1,91 @@
#pragma once

#ifdef _WIN32
#include <cstdint>

// #define USE_TSC

#ifdef USE_TSC

#if defined(__i386) || defined(__x86_64)

#if defined(__GNUC__)
#include <cpuid.h>
#include <x86intrin.h>
#define _ReadWriteBarrier __sync_synchronize
#else
#include <intrin.h>
#endif

class Timer {
public:
Timer() : start_(), end_() {

#if defined(__GNUC__)
unsigned int eax, ebx, ecx, edx;
if (!__get_cpuid(0x15, &eax, &ebx, &ecx, &edx) || eax == 0 || ebx == 0) {
// Fallback to 2.5 GHz if CPUID leaf 0x15 is not supported
tsc_frequency_ = 2500000000ULL;
} else {
tsc_frequency_ = (uint64_t)ecx * ebx / eax;
}
#else
int info[4];
__cpuidex(info, 0x15, 0);
if (info[0] == 0 || info[1] == 0) {
// Fallback to 2.5 GHz if CPUID leaf 0x15 is not supported
tsc_frequency_ = 2500000000ULL;
} else {
tsc_frequency_ = (uint64_t)info[2] * info[1] / info[0];
}
#endif
}

void Start() {
int info[4];

_ReadWriteBarrier();
__cpuidex(info, 0, 0);
start_ = __rdtsc();
_ReadWriteBarrier();
}

void Stop() {
int info[4];
unsigned int aux;
_ReadWriteBarrier();
end_ = __rdtscp(&aux);
__cpuidex(info, 0, 0);
_ReadWriteBarrier();
}

double GetElapsedMilliseconds() {
return (end_ - start_) * 1000.0 / tsc_frequency_;
}
uint64_t GetElapsedNanoseconds() {
return (end_ - start_) * 1000000000ULL / tsc_frequency_;
}

private:
uint64_t start_;
uint64_t end_;
uint64_t tsc_frequency_;
};

#else
#error "TSC timer is only supported on x86/x86_64 architectures"
#endif

#else // USE_TSC

#if defined(_WIN32) || defined(_WIN64)

#define WIN32_LEAN_AND_MEAN
#include <windows.h>

class Timer {
public:
Timer() : start_(), end_() {
QueryPerformanceFrequency(&freq_);
}

void Start() {
Expand All @@ -19,20 +97,55 @@ class Timer {
}

double GetElapsedMilliseconds() {
LARGE_INTEGER freq;
QueryPerformanceFrequency(&freq);
return (end_.QuadPart - start_.QuadPart) * 1000.0 / freq.QuadPart;
return (end_.QuadPart - start_.QuadPart) * 1000.0 / freq_.QuadPart;
}

uint64_t GetElapsedNanoseconds() {
return (end_.QuadPart - start_.QuadPart) * 1000000000ULL / freq.QuadPart;
}

private:
LARGE_INTEGER start_;
LARGE_INTEGER end_;
LARGE_INTEGER freq_;
};

// Undefine Windows bad macros
#undef min
#undef max

#elif defined(__linux)

#include <time.h>

class Timer {
public:
Timer() : start_(), end_() {
}

void Start() {
clock_gettime(CLOCK_MONOTONIC_RAW, &start_);
}

void Stop() {
clock_gettime(CLOCK_MONOTONIC_RAW, &end_);
}

double GetElapsedMilliseconds() {
return (end_.tv_sec - start_.tv_sec) * 1000.0
+ (end_.tv_nsec - start_.tv_nsec) / 1000000.0;
}

uint64_t GetElapsedNanoseconds() {
return (end_.tv_sec - start_.tv_sec) * 1000000000ULL
+ (end_.tv_nsec - start_.tv_nsec);
}

private:
struct timespec start_;
struct timespec end_;
};

#else

#include <sys/time.h>
Expand All @@ -55,9 +168,15 @@ class Timer {
+ (end_.tv_usec - start_.tv_usec) / 1000.0;
}

uint64_t GetElapsedNanoseconds() {
return (end_.tv_sec - start_.tv_sec) * 1000000000ULL
+ (end_.tv_usec - start_.tv_usec) * 1000ULL;
}

private:
struct timeval start_;
struct timeval end_;
};

#endif
#endif // USE_TSC