diff --git a/src/main.cpp b/src/main.cpp index 4867e2d..068e59c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -7,9 +7,11 @@ #include #include #include + #include "resultfilename.h" #include "timer.h" #include "test.h" +#include "setupScheduler.h" const unsigned kIterationPerDigit = 100000; const unsigned kIterationForRandom = 100; @@ -135,7 +137,7 @@ void BenchSequential(void(*f)(T, char*), const char* type, const char* fname, FI for (int digit = 1; digit <= Traits::kMaxDigit; digit++) { T end = (digit == Traits::kMaxDigit) ? std::numeric_limits::max() : start * 10; - double duration = std::numeric_limits::max(); + uint64_t duration = std::numeric_limits::max(); for (unsigned trial = 0; trial < kTrial; trial++) { T v = start; T sign = 1; @@ -148,14 +150,14 @@ void BenchSequential(void(*f)(T, char*), const char* type, const char* fname, FI v = start; } timer.Stop(); - duration = std::min(duration, timer.GetElapsedMilliseconds()); + duration = std::min(duration, timer.GetElapsedNanoseconds()); } - duration *= 1e6 / kIterationPerDigit; // convert to nano second per operation + double duration_per_op = (double)duration / kIterationPerDigit; // convert to nano second per operation - minDuration = std::min(minDuration, duration); - maxDuration = std::max(maxDuration, duration); - fprintf(fp, "%s_sequential,%s,%d,%f\n", type, fname, digit, duration); + minDuration = std::min(minDuration, duration_per_op); + maxDuration = std::max(maxDuration, duration_per_op); + fprintf(fp, "%s_sequential,%s,%d,%f\n", type, fname, digit, duration_per_op); start = end; } @@ -209,7 +211,7 @@ void BenchRandom(void(*f)(T, char*), const char* type, const char* fname, FILE* T* data = RandomData::GetData(); size_t n = RandomData::kCount; - double duration = std::numeric_limits::max(); + uint64_t duration = std::numeric_limits::max(); for (unsigned trial = 0; trial < kTrial; trial++) { Timer timer; timer.Start(); @@ -219,12 +221,12 @@ void BenchRandom(void(*f)(T, char*), const char* type, const char* fname, FILE* f(data[i], buffer); timer.Stop(); - duration = std::min(duration, timer.GetElapsedMilliseconds()); + duration = std::min(duration, timer.GetElapsedNanoseconds()); } - duration *= 1e6 / (kIterationForRandom * n); // convert to nano second per operation - fprintf(fp, "%s_random,%s,0,%f\n", type, fname, duration); + double duration_per_op = (double)duration / (kIterationForRandom * n); // convert to nano second per operation + fprintf(fp, "%s_random,%s,0,%f\n", type, fname, duration_per_op); - printf("%8.3fns\n", duration); + printf("%8.3fns\n", duration_per_op); } template @@ -283,5 +285,6 @@ int main() { }); VerifyAll(); + setupScheduler(); BenchAll(); } diff --git a/src/setupScheduler.cpp b/src/setupScheduler.cpp new file mode 100644 index 0000000..f461607 --- /dev/null +++ b/src/setupScheduler.cpp @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +static bool debuggerIsAttached(void) +{ +#if defined(_WIN32) || defined(_WIN64) + return IsDebuggerPresent() != 0; +#elif defined(__linux) + char buf[4096]; + + const int status_fd = open("/proc/self/status", O_RDONLY); + if (status_fd == -1) + return false; + + const ssize_t num_read = read(status_fd, buf, sizeof(buf) - 1); + close(status_fd); + + if (num_read <= 0) + return false; + + buf[num_read] = '\0'; + constexpr char tracerPidString[] = "TracerPid:"; + const auto tracer_pid_ptr = strstr(buf, tracerPidString); + if (!tracer_pid_ptr) + return false; + + for (const char* characterPtr = tracer_pid_ptr + sizeof(tracerPidString) - 1; characterPtr <= buf + num_read; ++characterPtr) + { + if (isspace(*characterPtr)) + continue; + else + return isdigit(*characterPtr) != 0 && *characterPtr != '0'; + } +#else + #warning "debugger detection not implemented for this platform" +#endif + return false; +} + +void setupScheduler(void) +{ + int fixToCPU = 3; + + if(debuggerIsAttached) + return; +#if defined(_WIN32) || defined(_WIN64) + { + SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS); + SetThreadAffinityMask(GetCurrentThread(), 1 << fixToCPU); + SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL); + } +#elif defined(__linux) + { + struct sched_param param = { + .sched_priority = 99, // Highest real-time priority + }; + int policy = SCHED_FIFO; + + // Check if the priority is valid for the chosen policy + if (param.sched_priority < sched_get_priority_min(policy) || // + param.sched_priority > sched_get_priority_max(policy)) { + err(EXIT_FAILURE, "Priority %d is not valid for policy %d\n", param.sched_priority, policy); + } + + if (sched_setscheduler(0, policy, ¶m) == -1) { + err(EXIT_FAILURE, "sched_setscheduler"); + } + + { // Set CPU affinity to the specified CPU core + cpu_set_t set; + CPU_SET(fixToCPU, &set); + if (sched_setaffinity(0, sizeof(set), &set) == -1) + err(EXIT_FAILURE, "sched_setaffinity"); + } + } +#else + #warning "Real-time scheduling not implemented for this platform" +#endif +} \ No newline at end of file diff --git a/src/setupScheduler.h b/src/setupScheduler.h new file mode 100644 index 0000000..52bd9f0 --- /dev/null +++ b/src/setupScheduler.h @@ -0,0 +1,6 @@ +#ifndef __SETUPSCHEDULER__ +#define __SETUPSCHEDULER__ + +void setupScheduler(void); + +#endif /* __SETUPSCHEDULER__ */ diff --git a/src/timer.h b/src/timer.h index c7ad22f..4cfe8e8 100644 --- a/src/timer.h +++ b/src/timer.h @@ -1,6 +1,83 @@ #pragma once -#ifdef _WIN32 +#include + +// #define USE_TSC + +#ifdef USE_TSC + +#if defined(__i386) || defined(__x86_64) + +#if defined(__GNUC__) +#include +#include +#define _ReadWriteBarrier __sync_synchronize +#else +#include +#endif + +class Timer { +public: + Timer() : start_(), end_() { + +#if defined(__GNUC__) + unsigned int eax, ebx, ecx, edx; + if (!__get_cpuid(0x15, &eax, &ebx, &ecx, &edx) || eax == 0 || ebx == 0) { + // Fallback to 2.5 GHz if CPUID leaf 0x15 is not supported + tsc_frequency_ = 2500000000ULL; + } else { + tsc_frequency_ = (uint64_t)ecx * ebx / eax; + } +#else + int info[4]; + __cpuidex(info, 0x15, 0); + if (info[0] == 0 || info[1] == 0) { + // Fallback to 2.5 GHz if CPUID leaf 0x15 is not supported + tsc_frequency_ = 2500000000ULL; + } else { + tsc_frequency_ = (uint64_t)info[2] * info[1] / info[0]; + } +#endif + } + + void Start() { + int info[4]; + + _ReadWriteBarrier(); + __cpuidex(info, 0, 0); + start_ = __rdtsc(); + _ReadWriteBarrier(); + } + + void Stop() { + int info[4]; + unsigned int aux; + _ReadWriteBarrier(); + end_ = __rdtscp(&aux); + __cpuidex(info, 0, 0); + _ReadWriteBarrier(); + } + + double GetElapsedMilliseconds() { + return (end_ - start_) * 1000.0 / tsc_frequency_; + } + uint64_t GetElapsedNanoseconds() { + return (end_ - start_) * 1000000000ULL / tsc_frequency_; + } + +private: + uint64_t start_; + uint64_t end_; + uint64_t tsc_frequency_; +}; + +#else +#error "TSC timer is only supported on x86/x86_64 architectures" +#endif + +#else // USE_TSC + +#if defined(_WIN32) || defined(_WIN64) #define WIN32_LEAN_AND_MEAN #include @@ -8,6 +85,7 @@ class Timer { public: Timer() : start_(), end_() { + QueryPerformanceFrequency(&freq_); } void Start() { @@ -19,20 +97,55 @@ class Timer { } double GetElapsedMilliseconds() { - LARGE_INTEGER freq; - QueryPerformanceFrequency(&freq); - return (end_.QuadPart - start_.QuadPart) * 1000.0 / freq.QuadPart; + return (end_.QuadPart - start_.QuadPart) * 1000.0 / freq_.QuadPart; + } + + uint64_t GetElapsedNanoseconds() { + return (end_.QuadPart - start_.QuadPart) * 1000000000ULL / freq.QuadPart; } private: LARGE_INTEGER start_; LARGE_INTEGER end_; + LARGE_INTEGER freq_; }; // Undefine Windows bad macros #undef min #undef max +#elif defined(__linux) + +#include + +class Timer { +public: + Timer() : start_(), end_() { + } + + void Start() { + clock_gettime(CLOCK_MONOTONIC_RAW, &start_); + } + + void Stop() { + clock_gettime(CLOCK_MONOTONIC_RAW, &end_); + } + + double GetElapsedMilliseconds() { + return (end_.tv_sec - start_.tv_sec) * 1000.0 + + (end_.tv_nsec - start_.tv_nsec) / 1000000.0; + } + + uint64_t GetElapsedNanoseconds() { + return (end_.tv_sec - start_.tv_sec) * 1000000000ULL + + (end_.tv_nsec - start_.tv_nsec); + } + +private: + struct timespec start_; + struct timespec end_; +}; + #else #include @@ -55,9 +168,15 @@ class Timer { + (end_.tv_usec - start_.tv_usec) / 1000.0; } + uint64_t GetElapsedNanoseconds() { + return (end_.tv_sec - start_.tv_sec) * 1000000000ULL + + (end_.tv_usec - start_.tv_usec) * 1000ULL; + } + private: struct timeval start_; struct timeval end_; }; #endif +#endif // USE_TSC