From c6994b92984b37ad3da2258e034dd3b7cc1f2cc8 Mon Sep 17 00:00:00 2001 From: Julian Lenz Date: Wed, 5 Feb 2025 17:16:21 +0100 Subject: [PATCH] Add memfence before unsetting bit --- .../FlatterScatter/PageInterpretation.hpp | 10 ++++++++++ include/mallocMC/creationPolicies/Scatter.hpp | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/include/mallocMC/creationPolicies/FlatterScatter/PageInterpretation.hpp b/include/mallocMC/creationPolicies/FlatterScatter/PageInterpretation.hpp index 3f0bf82c..6f5de188 100644 --- a/include/mallocMC/creationPolicies/FlatterScatter/PageInterpretation.hpp +++ b/include/mallocMC/creationPolicies/FlatterScatter/PageInterpretation.hpp @@ -166,6 +166,16 @@ namespace mallocMC::CreationPolicies::FlatterScatterAlloc "to a valid chunk or it is not marked as allocated."}; } #endif // NDEBUG + + // CAUTION: This memfence is of utmost importance! As we are allowing a re-use of the chunk we're about to + // free, we need to make sure that any memory operation from the previous thread is executed before we can + // safely consider it free. If this is missing, an extended (non-atomic) write operation might not yet have + // finished when we unset the bit. In such a case, another thread might start using the memory while we're + // still writing to it, thus corrupting the new thread's data. It might even lead to us overwriting the + // bitmask itself, if the chunk size (and thereby the extent of the bitmask) changes before we finish. + // (The latter scenario might be excluded by other mem_fences in the code.) If a read is pending, the old + // thread might read data from the new thread leading to inconsistent information in the first thread. + alpaka::mem_fence(acc, alpaka::memory_scope::Device{}); bitField().unset(acc, chunkIndex); } diff --git a/include/mallocMC/creationPolicies/Scatter.hpp b/include/mallocMC/creationPolicies/Scatter.hpp index 0f1aea5f..6153e429 100644 --- a/include/mallocMC/creationPolicies/Scatter.hpp +++ b/include/mallocMC/creationPolicies/Scatter.hpp @@ -739,6 +739,16 @@ namespace mallocMC template ALPAKA_FN_ACC void deallocChunked(AlpakaAcc const& acc, void* mem, uint32 page, uint32 chunksize) { + // CAUTION: This memfence is of utmost importance! As we are allowing a re-use of the chunk we're about + // to free, we need to make sure that any memory operation from the previous thread is executed before + // we can safely consider it free. If this is missing, an extended (non-atomic) write operation might + // not yet have finished when we unset the bit. In such a case, another thread might start using the + // memory while we're still writing to it, thus corrupting the new thread's data. It might even lead to + // us overwriting the bitmask itself, if the chunk size (and thereby the extent of the bitmask) changes + // before we finish. (The latter scenario might be excluded by other mem_fences in the code.) If a read + // is pending, the old thread might read data from the new thread leading to inconsistent information + // in the first thread. + alpaka::mem_fence(acc, alpaka::memory_scope::Device{}); auto const inpage_offset = static_cast((char*) mem - _page[page].data); if(chunksize <= HierarchyThreshold) {