diff --git a/CMakeLists.txt b/CMakeLists.txt index 076717ce6..67b2ef09d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,12 @@ option(EXPOSE_EXTERNAL_RESERVE "Expose an interface to reserve memory using the option(SNMALLOC_RUST_SUPPORT "Build static library for rust" OFF) set(CACHE_FRIENDLY_OFFSET OFF CACHE STRING "Base offset to place linked-list nodes.") +if ((CMAKE_BUILD_TYPE STREQUAL "Release") AND (NOT SNMALLOC_CI_BUILD)) + option(USE_POSIX_COMMIT_CHECKS "Instrument Posix PAL to check for access to unused blocks of memory." Off) +else () + option(USE_POSIX_COMMIT_CHECKS "Instrument Posix PAL to check for access to unused blocks of memory." On) +endif () + # Provide as macro so other projects can reuse macro(warnings_high) if(MSVC) @@ -111,6 +117,11 @@ if(CACHE_FRIENDLY_OFFSET) target_compile_definitions(snmalloc_lib INTERFACE -DCACHE_FRIENDLY_OFFSET=${CACHE_FRIENDLY_OFFSET}) endif() +if(USE_POSIX_COMMIT_CHECKS) + target_compile_definitions(snmalloc_lib INTERFACE -DUSE_POSIX_COMMIT_CHECKS) +endif() + + # To build with just the header library target define SNMALLOC_ONLY_HEADER_LIBRARY # in containing Cmake file. if(NOT DEFINED SNMALLOC_ONLY_HEADER_LIBRARY) diff --git a/README.md b/README.md index a3d639572..8b39dc9ec 100644 --- a/README.md +++ b/README.md @@ -163,12 +163,12 @@ pages, rather than zeroing them synchronously in this call ```c++ template -void* reserve(size_t* size, size_t align); +void* reserve(size_t size, size_t align); template -void* reserve(const size_t* size) noexcept; +void* reserve(size_t size) noexcept; ``` Only one of these needs to be implemented, depending on whether the underlying -system can provide strongly (e.g. 16MiB) aligned memory regions. +system can provide strongly aligned memory regions. If the system guarantees only page alignment, implement the second and snmalloc will over-allocate and then trim the requested region. If the system provides strong alignment, implement the first to return memory diff --git a/src/mem/alloc.h b/src/mem/alloc.h index 8d1ae1228..76434e6c0 100644 --- a/src/mem/alloc.h +++ b/src/mem/alloc.h @@ -1070,8 +1070,7 @@ namespace snmalloc bool was_full = super->is_full(); SlabList* sl = &small_classes[sizeclass]; Slab* slab = Metaslab::get_slab(p); - Superslab::Action a = - slab->dealloc_slow(sl, super, p); + Superslab::Action a = slab->dealloc_slow(sl, super, p); if (likely(a == Superslab::NoSlabReturn)) return; stats().sizeclass_dealloc_slab(sizeclass); diff --git a/src/mem/largealloc.h b/src/mem/largealloc.h index 93c5cffba..164382cf5 100644 --- a/src/mem/largealloc.h +++ b/src/mem/largealloc.h @@ -63,16 +63,12 @@ namespace snmalloc void new_block() { - size_t size = SUPERSLAB_SIZE; - void* r = reserve(&size, SUPERSLAB_SIZE); - - if (size < SUPERSLAB_SIZE) - error("out of memory"); - + // Reserve the smallest large_class which is SUPERSLAB_SIZE + void* r = reserve(0); PAL::template notify_using(r, OS_PAGE_SIZE); bump = r; - remaining = size; + remaining = SUPERSLAB_SIZE; } /** @@ -126,6 +122,24 @@ namespace snmalloc lazy_decommit_guard.clear(); } + void push_space(address_t start, size_t large_class) + { + void* p = pointer_cast(start); + if (large_class > 0) + PAL::template notify_using(p, OS_PAGE_SIZE); + else + { + if (decommit_strategy == DecommitSuperLazy) + { + PAL::template notify_using(p, OS_PAGE_SIZE); + p = new (p) Decommittedslab(); + } + else + PAL::template notify_using(p, SUPERSLAB_SIZE); + } + large_stack[large_class].push(reinterpret_cast(p)); + } + public: /** * Stack of large allocations that have been returned for reuse. @@ -202,35 +216,68 @@ namespace snmalloc } template - void* reserve(size_t* size, size_t align) noexcept + void* reserve(size_t large_class) noexcept { + size_t size = bits::one_at_bit(SUPERSLAB_BITS) << large_class; + size_t align = size; + if constexpr (pal_supports) { return PAL::template reserve(size, align); } else { - size_t request = *size; - // Add align, so we can guarantee to provide at least size. - request += align; - // Alignment must be a power of 2. - assert(align == bits::next_pow2(align)); + // Reserve 4 times the amount, and put aligned leftovers into the + // large_stack + size_t request = bits::max(size * 4, SUPERSLAB_SIZE * 8); + void* p = PAL::template reserve(request); - void* p = PAL::template reserve(&request); + address_t p0 = address_cast(p); + address_t start = bits::align_up(p0, align); + address_t p1 = p0 + request; + address_t end = start + size; - *size = request; - auto p0 = address_cast(p); - auto start = bits::align_up(p0, align); + for (; end < bits::align_down(p1, align); end += size) + { + push_space(end, large_class); + } + + // Put offcuts before alignment into the large stack + address_t offcut_end = start; + address_t offcut_start; + for (size_t i = large_class; i > 0;) + { + i--; + size_t offcut_align = bits::one_at_bit(SUPERSLAB_BITS) << i; + offcut_start = bits::align_up(p0, offcut_align); + if (offcut_start != offcut_end) + { + push_space(offcut_start, i); + offcut_end = offcut_start; + } + } - if (start > p0) + // Put offcuts after returned block into the large stack + offcut_start = end; + for (size_t i = large_class; i > 0;) { - uintptr_t end = bits::align_down(p0 + request, align); - *size = end - start; - PAL::notify_not_using(p, start - p0); - PAL::notify_not_using(pointer_cast(end), (p0 + request) - end); - p = pointer_cast(start); + i--; + auto offcut_align = bits::one_at_bit(SUPERSLAB_BITS) << i; + offcut_end = bits::align_down(p1, offcut_align); + if (offcut_start != offcut_end) + { + push_space(offcut_start, i); + offcut_start = offcut_end; + } } - return p; + + // printf("Alloc %zx (size = %zx)\n", start, size); + + void* result = pointer_cast(start); + if (committed) + PAL::template notify_using(result, size); + + return result; } } @@ -281,9 +328,6 @@ namespace snmalloc template class LargeAlloc { - void* reserved_start = nullptr; - void* reserved_end = nullptr; - public: // This will be a zero-size structure if stats are not enabled. Stats stats; @@ -292,38 +336,6 @@ namespace snmalloc LargeAlloc(MemoryProvider& mp) : memory_provider(mp) {} - template - bool reserve_memory(size_t need, size_t add) - { - assert(reserved_start <= reserved_end); - - /* - * Spell this comparison in terms of pointer subtraction like this, - * rather than "reserved_start + need < reserved_end" because the - * sum might not be representable on CHERI. - */ - if (pointer_diff(reserved_start, reserved_end) < need) - { - if constexpr (allow_reserve == YesReserve) - { - stats.segment_create(); - reserved_start = - memory_provider.template reserve(&add, SUPERSLAB_SIZE); - reserved_end = pointer_offset(reserved_start, add); - reserved_start = pointer_align_up(reserved_start); - - if (add < need) - return false; - } - else - { - return false; - } - } - - return true; - } - template void* alloc(size_t large_class, size_t size) { @@ -337,23 +349,8 @@ namespace snmalloc if (p == nullptr) { - assert(reserved_start <= reserved_end); - size_t add; - - if ((rsize + SUPERSLAB_SIZE) < RESERVE_SIZE) - add = RESERVE_SIZE; - else - add = rsize + SUPERSLAB_SIZE; - - if (!reserve_memory(rsize, add)) - return nullptr; - - p = reserved_start; - reserved_start = pointer_offset(p, rsize); - - stats.superslab_fresh(); - // All memory is zeroed since it comes from reserved space. - memory_provider.template notify_using(p, size); + p = memory_provider.template reserve(large_class); + memory_provider.template notify_using(p, size); } else { @@ -389,6 +386,7 @@ namespace snmalloc } } + assert(p == pointer_align_up(p, rsize)); return p; } @@ -403,13 +401,14 @@ namespace snmalloc } // Cross-reference largealloc's alloc() decommitted condition. - if ((decommit_strategy != DecommitNone) - && (large_class != 0 || decommit_strategy == DecommitSuper)) + if ( + (decommit_strategy != DecommitNone) && + (large_class != 0 || decommit_strategy == DecommitSuper)) { size_t rsize = bits::one_at_bit(SUPERSLAB_BITS) << large_class; memory_provider.notify_not_using( - pointer_offset(p, OS_PAGE_SIZE), rsize - OS_PAGE_SIZE); + pointer_offset(p, OS_PAGE_SIZE), rsize - OS_PAGE_SIZE); } stats.superslab_push(); diff --git a/src/mem/sizeclass.h b/src/mem/sizeclass.h index 825ebe023..a8b821bcc 100644 --- a/src/mem/sizeclass.h +++ b/src/mem/sizeclass.h @@ -179,8 +179,6 @@ namespace snmalloc { // Client responsible for checking alignment is not zero assert(alignment != 0); - // Client responsible for checking alignment is not above SUPERSLAB_SIZE - assert(alignment <= SUPERSLAB_SIZE); // Client responsible for checking alignment is a power of two assert(bits::next_pow2(alignment) == alignment); diff --git a/src/mem/slab.h b/src/mem/slab.h index ce71f7636..00058aed8 100644 --- a/src/mem/slab.h +++ b/src/mem/slab.h @@ -178,8 +178,8 @@ namespace snmalloc // This does not need to remove the "use" as done by the fast path. // Returns a complex return code for managing the superslab meta data. // i.e. This deallocation could make an entire superslab free. - SNMALLOC_SLOW_PATH typename Superslab::Action dealloc_slow( - SlabList* sl, Superslab* super, void* p) + SNMALLOC_SLOW_PATH typename Superslab::Action + dealloc_slow(SlabList* sl, Superslab* super, void* p) { Metaslab& meta = super->get_meta(this); diff --git a/src/mem/superslab.h b/src/mem/superslab.h index b0a036a98..8e3a7fc5b 100644 --- a/src/mem/superslab.h +++ b/src/mem/superslab.h @@ -84,7 +84,7 @@ namespace snmalloc { if (kind != Fresh) { - // If this wasn't previously Fresh, we need to zero some things. + // If this wasn't previously Fresh, we need to zero some things. used = 0; for (size_t i = 0; i < SLAB_COUNT; i++) { @@ -105,7 +105,8 @@ namespace snmalloc { curr = (curr + meta[curr].next + 1) & (SLAB_COUNT - 1); } - if (curr != 0) abort(); + if (curr != 0) + abort(); for (size_t i = 0; i < SLAB_COUNT; i++) { diff --git a/src/override/malloc.cc b/src/override/malloc.cc index 2248cbc9f..9c238d3d8 100644 --- a/src/override/malloc.cc +++ b/src/override/malloc.cc @@ -107,9 +107,7 @@ extern "C" SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(memalign)(size_t alignment, size_t size) { - if ( - (alignment == 0) || (alignment == size_t(-1)) || - (alignment > SUPERSLAB_SIZE)) + if ((alignment == 0) || (alignment == size_t(-1))) { errno = EINVAL; return nullptr; diff --git a/src/pal/pal_apple.h b/src/pal/pal_apple.h index 87c3eb878..2e7554f6c 100644 --- a/src/pal/pal_apple.h +++ b/src/pal/pal_apple.h @@ -56,11 +56,11 @@ namespace snmalloc * See comment below. */ template - void* reserve(const size_t* size) + void* reserve(size_t size) { void* p = mmap( nullptr, - *size, + size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, pal_anon_id, diff --git a/src/pal/pal_bsd_aligned.h b/src/pal/pal_bsd_aligned.h index 61abcfb2b..17e97ab4c 100644 --- a/src/pal/pal_bsd_aligned.h +++ b/src/pal/pal_bsd_aligned.h @@ -27,7 +27,7 @@ namespace snmalloc * Reserve memory at a specific alignment. */ template - void* reserve(const size_t* size, size_t align) noexcept + void* reserve(size_t size, size_t align) noexcept { // Alignment must be a power of 2. assert(align == bits::next_pow2(align)); @@ -38,7 +38,7 @@ namespace snmalloc void* p = mmap( nullptr, - *size, + size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED(log2align), -1, diff --git a/src/pal/pal_freebsd_kernel.h b/src/pal/pal_freebsd_kernel.h index 3af8d14d4..914b73618 100644 --- a/src/pal/pal_freebsd_kernel.h +++ b/src/pal/pal_freebsd_kernel.h @@ -61,13 +61,12 @@ namespace snmalloc } template - void* reserve(size_t* size, size_t align) + void* reserve(size_t size, size_t align) { - size_t request = *size; vm_offset_t addr; if (vmem_xalloc( kernel_arena, - request, + size, align, 0, 0, @@ -81,10 +80,10 @@ namespace snmalloc if (committed) { if ( - kmem_back(kernel_object, addr, request, M_ZERO | M_WAITOK) != + kmem_back(kernel_object, addr, size, M_ZERO | M_WAITOK) != KERN_SUCCESS) { - vmem_xfree(kernel_arena, addr, request); + vmem_xfree(kernel_arena, addr, size); return nullptr; } } diff --git a/src/pal/pal_open_enclave.h b/src/pal/pal_open_enclave.h index 9af523594..0361d5378 100644 --- a/src/pal/pal_open_enclave.h +++ b/src/pal/pal_open_enclave.h @@ -18,7 +18,7 @@ namespace snmalloc * Bitmap of PalFeatures flags indicating the optional features that this * PAL supports. */ - static constexpr uint64_t pal_features = AlignedAllocation; + static constexpr uint64_t pal_features = 0; static void error(const char* const str) { UNUSED(str); @@ -26,7 +26,7 @@ namespace snmalloc } template - void* reserve(size_t* size, size_t align) noexcept + void* reserve(size_t size) noexcept { if (oe_base == 0) { @@ -36,21 +36,18 @@ namespace snmalloc } void* old_base = oe_base; - void* old_base2 = old_base; void* next_base; auto end = __oe_get_heap_end(); do { - old_base2 = old_base; - auto new_base = pointer_align_up(old_base, align); - next_base = pointer_offset(new_base, *size); + auto new_base = old_base; + next_base = pointer_offset(new_base, size); if (next_base > end) error("Out of memory"); } while (oe_base.compare_exchange_strong(old_base, next_base)); - *size = pointer_diff(old_base2, next_base); return old_base; } diff --git a/src/pal/pal_posix.h b/src/pal/pal_posix.h index b484cf566..6ff5dc547 100644 --- a/src/pal/pal_posix.h +++ b/src/pal/pal_posix.h @@ -54,8 +54,12 @@ namespace snmalloc void notify_not_using(void* p, size_t size) noexcept { assert(is_aligned_block(p, size)); +#ifdef USE_POSIX_COMMIT_CHECKS + mprotect(p, size, PROT_NONE); +#else UNUSED(p); UNUSED(size); +#endif } /** @@ -72,11 +76,13 @@ namespace snmalloc if constexpr (zero_mem == YesZero) static_cast(this)->template zero(p, size); - else - { - UNUSED(p); - UNUSED(size); - } + +#ifdef USE_POSIX_COMMIT_CHECKS + mprotect(p, size, PROT_READ | PROT_WRITE); +#else + UNUSED(p); + UNUSED(size); +#endif } /** @@ -121,11 +127,11 @@ namespace snmalloc * greater than a page. */ template - void* reserve(const size_t* size) noexcept + void* reserve(size_t size) noexcept { void* p = mmap( nullptr, - *size, + size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, diff --git a/src/pal/pal_windows.h b/src/pal/pal_windows.h index 2a98e490e..75f87d1f3 100644 --- a/src/pal/pal_windows.h +++ b/src/pal/pal_windows.h @@ -183,7 +183,7 @@ namespace snmalloc } # elif defined(PLATFORM_HAS_VIRTUALALLOC2) template - void* reserve(size_t* size, size_t align) noexcept + void* reserve(size_t size, size_t align) noexcept { DWORD flags = MEM_RESERVE; @@ -209,7 +209,7 @@ namespace snmalloc param.Pointer = &addressReqs; void* ret = VirtualAlloc2FromApp( - nullptr, nullptr, *size, flags, PAGE_READWRITE, ¶m, 1); + nullptr, nullptr, size, flags, PAGE_READWRITE, ¶m, 1); if (ret == nullptr) { error("Failed to allocate memory\n"); @@ -218,14 +218,14 @@ namespace snmalloc } # else template - void* reserve(size_t* size) noexcept + void* reserve(size_t size) noexcept { DWORD flags = MEM_RESERVE; if (committed) flags |= MEM_COMMIT; - void* ret = VirtualAlloc(nullptr, *size, flags, PAGE_READWRITE); + void* ret = VirtualAlloc(nullptr, size, flags, PAGE_READWRITE); if (ret == nullptr) { error("Failed to allocate memory\n"); diff --git a/src/test/func/fixed_region/fixed_region.cc b/src/test/func/fixed_region/fixed_region.cc index fe1ae3927..b3205ed52 100644 --- a/src/test/func/fixed_region/fixed_region.cc +++ b/src/test/func/fixed_region/fixed_region.cc @@ -32,8 +32,12 @@ int main() { MemoryProviderStateMixin mp; - size_t size = 1ULL << 28; - oe_base = mp.reserve(&size, 0); + // 28 is large enough to produce a nested allocator. + // It is also large enough for the example to run in. + // For 1MiB superslabs, SUPERSLAB_BITS + 4 is not big enough for the example. + size_t large_class = 28 - SUPERSLAB_BITS; + size_t size = 1ULL << (SUPERSLAB_BITS + large_class); + oe_base = mp.reserve(large_class); oe_end = (uint8_t*)oe_base + size; std::cout << "Allocated region " << oe_base << " - " << oe_end << std::endl; diff --git a/src/test/func/malloc/malloc.cc b/src/test/func/malloc/malloc.cc index 446edf3af..3ed01ed37 100644 --- a/src/test/func/malloc/malloc.cc +++ b/src/test/func/malloc/malloc.cc @@ -108,7 +108,8 @@ int main(int argc, char** argv) test_posix_memalign((size_t)-1, 0, EINVAL, true); test_posix_memalign(OS_PAGE_SIZE, sizeof(uintptr_t) / 2, EINVAL, true); - for (size_t align = sizeof(uintptr_t); align <= SUPERSLAB_SIZE; align <<= 1) + for (size_t align = sizeof(uintptr_t); align <= SUPERSLAB_SIZE * 8; + align <<= 1) { for (snmalloc::sizeclass_t sc = 0; sc < NUM_SIZECLASSES; sc++) { diff --git a/src/test/func/two_alloc_types/main.cc b/src/test/func/two_alloc_types/main.cc index b8ee2ffff..74ae32f2b 100644 --- a/src/test/func/two_alloc_types/main.cc +++ b/src/test/func/two_alloc_types/main.cc @@ -45,8 +45,12 @@ int main() MemoryProviderStateMixin mp; - size_t size = 1ULL << 26; - oe_base = mp.reserve(&size, 1); + // 26 is large enough to produce a nested allocator. + // It is also large enough for the example to run in. + // For 1MiB superslabs, SUPERSLAB_BITS + 2 is not big enough for the example. + size_t large_class = 26 - SUPERSLAB_BITS; + size_t size = 1ULL << (SUPERSLAB_BITS + large_class); + oe_base = mp.reserve(large_class); oe_end = (uint8_t*)oe_base + size; std::cout << "Allocated region " << oe_base << " - " << oe_end << std::endl;