Skip to content

Commit 50c9788

Browse files
committed
completely rewrite memory ordering
- remove memory bariers - add missing `std::atomic` - make more operations with `std::memory_order_relaxed`
1 parent 0c60365 commit 50c9788

File tree

2 files changed

+38
-35
lines changed

2 files changed

+38
-35
lines changed

ScanFile.cpp

Lines changed: 22 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,7 @@
33
#include <assert.h>
44

55
#include <algorithm>
6-
#include <atomic> // for std::atomic_thread_fence
76

8-
#include <windows.h>
97

108
namespace
119
{
@@ -251,14 +249,14 @@ bool CScanFile::LockFreecInit()
251249
return false;
252250
}
253251

254-
this->_threadFinishSpinlock = false;
255-
this->_threadOperationReadStartSpinlock = false;
256-
this->_threadOperationReadCompletedSpinlock = false;
252+
this->_threadFinishSpinlock.store(false, std::memory_order_relaxed);
253+
this->_threadOperationReadStartSpinlock.store(false, std::memory_order_relaxed);
254+
this->_threadOperationReadCompletedSpinlock.store(false, std::memory_order_relaxed);
257255
this->_pThreadReadBuffer = nullptr;
258256
this->_threadReadBufferSize = 0;
259257
this->_threadActuallyReadBytes = 0;
260258
this->_threadReadSucceeded = false;
261-
std::atomic_thread_fence(std::memory_order_release);
259+
// no need to synchronize before worker thread is started
262260

263261
// This is a dirty hack for speedup inter-thread communication on hyperthreading CPUs
264262
#if ENABLE_THREAD_PREFERRED_AFFINITY
@@ -305,7 +303,7 @@ void CScanFile::LockFreecClean()
305303
{
306304
if (this->_hThread != nullptr)
307305
{
308-
this->_threadFinishSpinlock = true;
306+
this->_threadFinishSpinlock.store(true, std::memory_order_relaxed); // we won't reorder after WaitForSingleObject
309307
WaitForSingleObject(this->_hThread, INFINITE); // ignore return value in this case
310308
this->_hThread = nullptr;
311309
}
@@ -318,17 +316,17 @@ bool CScanFile::LockFreeReadStart(char* const buffer, const size_t bufferLength)
318316
{
319317
return false;
320318
}
321-
322319
this->_threadOperationInProgress = true;
323-
this->_threadOperationReadCompletedSpinlock = false;
320+
321+
assert(this->_threadOperationReadStartSpinlock.load(std::memory_order_relaxed) == false);
322+
assert(this->_threadOperationReadCompletedSpinlock.load(std::memory_order_relaxed) == false);
323+
324324
this->_pThreadReadBuffer = buffer;
325325
this->_threadReadBufferSize = bufferLength;
326326
this->_threadActuallyReadBytes = 0;
327327
this->_threadReadSucceeded = false;
328-
std::atomic_thread_fence(std::memory_order_release);
329328

330-
this->_threadOperationReadStartSpinlock = true;
331-
std::atomic_thread_fence(std::memory_order_release);
329+
this->_threadOperationReadStartSpinlock.store(true, std::memory_order_release);
332330

333331
return true;
334332
}
@@ -340,18 +338,18 @@ bool CScanFile::LockFreeReadWait(size_t& readBytes)
340338
{
341339
return false;
342340
}
343-
344341
this->_threadOperationInProgress = false;
345-
std::atomic_thread_fence(std::memory_order_release);
346342

347-
std::atomic_thread_fence(std::memory_order_acquire);
348-
while (!this->_threadOperationReadCompletedSpinlock)
343+
// Wait for _threadOperationReadCompletedSpinlock == True and reset it:
344+
while (true)
349345
{
350-
// nothing
351-
std::atomic_thread_fence(std::memory_order_acquire);
346+
bool operationCompleted = true;
347+
if (this->_threadOperationReadCompletedSpinlock.compare_exchange_weak(operationCompleted, false, std::memory_order_acquire, std::memory_order_relaxed))
348+
{
349+
break;
350+
}
352351
}
353352

354-
std::atomic_thread_fence(std::memory_order_acquire);
355353
readBytes = this->_threadActuallyReadBytes;
356354
if (!this->_threadReadSucceeded)
357355
{
@@ -365,31 +363,27 @@ void CScanFile::LockFreeThreadProc()
365363
{
366364
while (true)
367365
{
368-
std::atomic_thread_fence(std::memory_order_acquire);
369-
if (this->_threadFinishSpinlock)
366+
if (this->_threadFinishSpinlock.load(std::memory_order_relaxed))
370367
{
371368
// thread exit signal is caught
372369
break;
373370
}
374371

375-
if (!this->_threadOperationReadStartSpinlock)
372+
// Check for _threadOperationReadStartSpinlock == True and reset it:
373+
bool operationStarted = true;
374+
if (!this->_threadOperationReadStartSpinlock.compare_exchange_weak(operationStarted, false, std::memory_order_acquire, std::memory_order_relaxed))
376375
{
377376
// nothing to do
378377
continue;
379378
}
380379

381-
std::atomic_thread_fence(std::memory_order_acquire);
382-
383380
size_t readBytes = 0;
384381
const bool readOk = this->Read(this->_pThreadReadBuffer, this->_threadReadBufferSize, readBytes);
385382

386-
this->_threadOperationReadStartSpinlock = false;
387383
this->_threadActuallyReadBytes = readBytes;
388384
this->_threadReadSucceeded = readOk;
389-
std::atomic_thread_fence(std::memory_order_release);
390385

391-
this->_threadOperationReadCompletedSpinlock = true;
392-
std::atomic_thread_fence(std::memory_order_release);
386+
this->_threadOperationReadCompletedSpinlock.store(true, std::memory_order_release);
393387
}
394388
}
395389

ScanFile.h

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ class CScanFile
3636
void LockFreeThreadProc();
3737

3838
protected:
39+
alignas(std::hardware_constructive_interference_size) // small speedup to get a bunch of variables into single cache line
3940
HANDLE _hFile = nullptr;
4041

4142
// For memory mapping:
@@ -50,16 +51,24 @@ class CScanFile
5051

5152
// Separate thread + Lock free:
5253
HANDLE _hThread = nullptr;
53-
bool _threadOperationInProgress = false; // for protection against wrong API usage, not for use in a worker thread, no memory protection
54-
// Spin lock variables:
55-
// We could use kernel-mode events instead of spin locks, it will save CPU, but will loose time during synchronization (switch to kernel).
56-
alignas(std::hardware_constructive_interference_size) // small speedup to get a bunch of variables into single cache line
57-
bool _threadFinishSpinlock = false;
58-
bool _threadOperationReadStartSpinlock = false;
59-
bool _threadOperationReadCompletedSpinlock = false;
54+
55+
// for protection against wrong API usage, not for use in a worker thread, no memory protection:
56+
// this is not about synchronization, but about correct class method call sequence
57+
bool _threadOperationInProgress = false;
58+
6059
// other data protected by spin locks:
6160
char* _pThreadReadBuffer = nullptr;
6261
size_t _threadReadBufferSize = 0;
6362
size_t _threadActuallyReadBytes = 0;
6463
bool _threadReadSucceeded = false;
64+
65+
// Spin lock variables:
66+
67+
// We could use kernel-mode events instead of spin locks, it will save CPU, but will loose time during synchronization (switch to kernel).
68+
alignas(std::hardware_destructive_interference_size)
69+
std::atomic<bool> _threadFinishSpinlock = ATOMIC_VAR_INIT(false);
70+
alignas(std::hardware_destructive_interference_size)
71+
std::atomic<bool> _threadOperationReadStartSpinlock = ATOMIC_VAR_INIT(false);
72+
alignas(std::hardware_destructive_interference_size)
73+
std::atomic<bool> _threadOperationReadCompletedSpinlock = ATOMIC_VAR_INIT(false);
6574
};

0 commit comments

Comments
 (0)