diff --git a/include/mallocMC/creationPolicies/FlatterScatter/AccessBlock.hpp b/include/mallocMC/creationPolicies/FlatterScatter/AccessBlock.hpp index 70e60bf7..64493b04 100644 --- a/include/mallocMC/creationPolicies/FlatterScatter/AccessBlock.hpp +++ b/include/mallocMC/creationPolicies/FlatterScatter/AccessBlock.hpp @@ -251,6 +251,16 @@ namespace mallocMC::CreationPolicies::FlatterScatterAlloc template ALPAKA_FN_INLINE ALPAKA_FN_ACC auto destroy(TAcc const& acc, void* const pointer) -> void { + // CAUTION: This memfence is of utmost importance! As we are allowing a re-use of the chunk we're about to + // free, we need to make sure that any memory operation from the previous thread is executed before we can + // safely consider it free. If this is missing, an extended (non-atomic) write operation might not yet have + // finished when we unset the bit. In such a case, another thread might start using the memory while we're + // still writing to it, thus corrupting the new thread's data. It might even lead to us overwriting the + // bitmask itself, if the chunk size (and thereby the extent of the bitmask) changes before we finish. + // (The latter scenario might be excluded by other mem_fences in the code.) If a read is pending, the old + // thread might read data from the new thread leading to inconsistent information in the first thread. + alpaka::mem_fence(acc, alpaka::memory_scope::Device{}); + auto const index = pageIndex(pointer); if(index >= static_cast(numPages()) || index < 0) { diff --git a/include/mallocMC/creationPolicies/FlatterScatter/PageInterpretation.hpp b/include/mallocMC/creationPolicies/FlatterScatter/PageInterpretation.hpp index 3f0bf82c..8b359964 100644 --- a/include/mallocMC/creationPolicies/FlatterScatter/PageInterpretation.hpp +++ b/include/mallocMC/creationPolicies/FlatterScatter/PageInterpretation.hpp @@ -166,6 +166,7 @@ namespace mallocMC::CreationPolicies::FlatterScatterAlloc "to a valid chunk or it is not marked as allocated."}; } #endif // NDEBUG + bitField().unset(acc, chunkIndex); } diff --git a/include/mallocMC/creationPolicies/Scatter.hpp b/include/mallocMC/creationPolicies/Scatter.hpp index 0f1aea5f..d9779256 100644 --- a/include/mallocMC/creationPolicies/Scatter.hpp +++ b/include/mallocMC/creationPolicies/Scatter.hpp @@ -969,6 +969,18 @@ namespace mallocMC { if(mem == 0) return; + + // CAUTION: This memfence is of utmost importance! As we are allowing a re-use of the chunk we're about + // to free, we need to make sure that any memory operation from the previous thread is executed before + // we can safely consider it free. If this is missing, an extended (non-atomic) write operation might + // not yet have finished when we unset the bit. In such a case, another thread might start using the + // memory while we're still writing to it, thus corrupting the new thread's data. It might even lead to + // us overwriting the bitmask itself, if the chunk size (and thereby the extent of the bitmask) changes + // before we finish. (The latter scenario might be excluded by other mem_fences in the code.) If a read + // is pending, the old thread might read data from the new thread leading to inconsistent information + // in the first thread. + alpaka::mem_fence(acc, alpaka::memory_scope::Device{}); + // lets see on which page we are on auto const page = static_cast(((char*) mem - (char*) _page) / pagesize); /* Emulate atomic read.