Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add native lock-free dynamic heap allocator #4749

Draft
wants to merge 25 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
8504094
Add `get_current_thread_id` to `base`
Feoramund Jan 20, 2025
3dc9b10
Add virtual memory procedures to `base`
Feoramund Jan 20, 2025
754e5a2
Add native heap allocator
Feoramund Jan 21, 2025
4f3c518
Add heap allocator tests to CI
Feoramund Jan 21, 2025
3a24df1
Fix indentation
Feoramund Jan 27, 2025
3326d6c
Strengthen `Consume` to `Acquire` in heap allocator
Feoramund Jan 28, 2025
781ef8f
Hoist reused calculations out into variables
Feoramund Jan 28, 2025
4fe98cb
Rearrange slab iteration conditionals
Feoramund Jan 28, 2025
3e450ad
Restrict superpage allocation on Darwin to AMD64 and 2MiB
Feoramund Jan 28, 2025
fe3bf9d
Use `VM_INHERIT_COPY` instead
Feoramund Jan 28, 2025
b66734e
Add heap allocator exception for Orca
Feoramund Jan 28, 2025
8c19793
Don't build feoramalloc on web platforms
Feoramund Jan 28, 2025
7a3e547
Simplify loop
Feoramund Feb 2, 2025
4a77f2d
Strengthen order to prevent reordering
Feoramund Feb 4, 2025
7b739f4
Unify heap allocators
Feoramund Feb 7, 2025
a08fef0
Optimize virtual memory resizing on Darwin
Feoramund Feb 10, 2025
5f00ecc
Favor rescheduling superpage cache entry instead
Feoramund Feb 10, 2025
06ef457
Use `mach_task_self_` global instead
Feoramund Feb 10, 2025
21c6116
Ensure remapped memory has desired protection on Darwin
Feoramund Feb 10, 2025
c625bc9
Revert to unoptimized behavior on Darwin
Feoramund Feb 10, 2025
6e0f518
Remove `HEAP_PANIC_ON_FREE_NIL`
Feoramund Feb 11, 2025
149dc4c
Fix slab iteration bug in `heap_cache_register_superpage`
Feoramund Mar 24, 2025
225549c
Fix in-place expansion of wide slabs
Feoramund Mar 28, 2025
49ef42a
Remove unneeded call to `min`
Feoramund Mar 30, 2025
9c863d0
Fix slab iteration bug in the remote free collection phase
Feoramund Mar 31, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ jobs:
./odin check examples/all -vet -strict-style -disallow-do -target:netbsd_arm64
./odin check vendor/sdl3 -vet -strict-style -disallow-do -target:netbsd_amd64 -no-entry-point
./odin check vendor/sdl3 -vet -strict-style -disallow-do -target:netbsd_arm64 -no-entry-point
./odin run tests/heap_allocator -vet -strict-style -disallow-do -define:ODIN_DEBUG_HEAP=true -- -allocator=feoramalloc -vmem-tests -serial-tests -parallel-tests
./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
./odin test tests/core/speed.odin -file -all-packages -vet -strict-style -disallow-do -o:speed -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
./odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
Expand Down Expand Up @@ -65,6 +66,7 @@ jobs:
gmake -C vendor/miniaudio/src
./odin check examples/all -vet -strict-style -disallow-do -target:freebsd_amd64
./odin check vendor/sdl3 -vet -strict-style -disallow-do -target:freebsd_amd64 -no-entry-point
./odin run tests/heap_allocator -vet -strict-style -disallow-do -define:ODIN_DEBUG_HEAP=true -- -allocator=feoramalloc -vmem-tests -serial-tests -parallel-tests
./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
./odin test tests/core/speed.odin -file -all-packages -vet -strict-style -disallow-do -o:speed -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
./odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
Expand Down Expand Up @@ -123,6 +125,8 @@ jobs:
run: ./odin check examples/all -strict-style -vet -disallow-do
- name: Odin check vendor/sdl3
run: ./odin check vendor/sdl3 -strict-style -vet -disallow-do -no-entry-point
- name: Odin heap allocator tests
run: ./odin run tests/heap_allocator -vet -strict-style -disallow-do -define:ODIN_DEBUG_HEAP=true -sanitize:thread -- -allocator=feoramalloc -vmem-tests -serial-tests -parallel-tests
- name: Normal Core library tests
run: ./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
- name: Optimized Core library tests
Expand Down Expand Up @@ -211,6 +215,11 @@ jobs:
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
odin check vendor/sdl3 -vet -strict-style -disallow-do -no-entry-point
- name: Odin heap allocator tests
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
odin run tests/heap_allocator -vet -strict-style -disallow-do -define:ODIN_DEBUG_HEAP=true -- -allocator=feoramalloc -vmem-tests -serial-tests -parallel-tests
- name: Core library tests
shell: cmd
run: |
Expand Down Expand Up @@ -305,6 +314,9 @@ jobs:
- name: Odin run -debug
run: ./odin run examples/demo -debug -vet -strict-style -disallow-do -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static" -no-rpath

- name: Odin heap allocator tests
run: ./odin run tests/heap_allocator -vet -strict-style -disallow-do -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static" -no-rpath -define:ODIN_DEBUG_HEAP=true -- -allocator=feoramalloc -vmem-tests -serial-tests -parallel-tests

- name: Normal Core library tests
run: ./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static" -no-rpath

Expand Down
3 changes: 0 additions & 3 deletions base/runtime/default_allocators_general.odin
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@ when ODIN_DEFAULT_TO_NIL_ALLOCATOR {
} else when ODIN_DEFAULT_TO_PANIC_ALLOCATOR {
default_allocator_proc :: panic_allocator_proc
default_allocator :: panic_allocator
} else when ODIN_OS != .Orca && (ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32) {
default_allocator :: default_wasm_allocator
default_allocator_proc :: wasm_allocator_proc
} else {
default_allocator :: heap_allocator
default_allocator_proc :: heap_allocator_proc
Expand Down
137 changes: 47 additions & 90 deletions base/runtime/heap_allocator.odin
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
#+build !js
#+build !orca
#+build !wasi
package runtime

import "base:intrinsics"
Expand All @@ -9,111 +12,65 @@ heap_allocator :: proc() -> Allocator {
}
}

heap_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
size, alignment: int,
old_memory: rawptr, old_size: int, loc := #caller_location) -> ([]byte, Allocator_Error) {
//
// NOTE(tetra, 2020-01-14): The heap doesn't respect alignment.
// Instead, we overallocate by `alignment + size_of(rawptr) - 1`, and insert
// padding. We also store the original pointer returned by heap_alloc right before
// the pointer we return to the user.
//

aligned_alloc :: proc(size, alignment: int, old_ptr: rawptr, old_size: int, zero_memory := true) -> ([]byte, Allocator_Error) {
// Not(flysand): We need to reserve enough space for alignment, which
// includes the user data itself, the space to store the pointer to
// allocation start, as well as the padding required to align both
// the user data and the pointer.
a := max(alignment, align_of(rawptr))
space := a-1 + size_of(rawptr) + size
allocated_mem: rawptr

force_copy := old_ptr != nil && alignment > align_of(rawptr)

if old_ptr != nil && !force_copy {
original_old_ptr := ([^]rawptr)(old_ptr)[-1]
allocated_mem = heap_resize(original_old_ptr, space)
} else {
allocated_mem = heap_alloc(space, zero_memory)
}
aligned_mem := rawptr(([^]u8)(allocated_mem)[size_of(rawptr):])

ptr := uintptr(aligned_mem)
aligned_ptr := (ptr + uintptr(a)-1) & ~(uintptr(a)-1)
if allocated_mem == nil {
aligned_free(old_ptr)
aligned_free(allocated_mem)
heap_allocator_proc :: proc(
allocator_data: rawptr,
mode: Allocator_Mode,
size, alignment: int,
old_memory: rawptr,
old_size: int,
loc := #caller_location,
) -> ([]byte, Allocator_Error) {
assert(alignment <= HEAP_MAX_ALIGNMENT, "Heap allocation alignment beyond HEAP_MAX_ALIGNMENT bytes is not supported.", loc = loc)
assert(alignment >= 0, "Alignment must be greater than or equal to zero.", loc = loc)
switch mode {
case .Alloc:
// All allocations are aligned to at least their size up to
// `HEAP_MAX_ALIGNMENT`, and by virtue of binary arithmetic, any
// address aligned to N will also be aligned to N>>1.
//
// Therefore, we have no book-keeping costs for alignment.
ptr := heap_alloc(max(size, alignment))
if ptr == nil {
return nil, .Out_Of_Memory
}

aligned_mem = rawptr(aligned_ptr)
([^]rawptr)(aligned_mem)[-1] = allocated_mem

if force_copy {
mem_copy_non_overlapping(aligned_mem, old_ptr, min(old_size, size))
aligned_free(old_ptr)
}

return byte_slice(aligned_mem, size), nil
}

aligned_free :: proc(p: rawptr) {
if p != nil {
heap_free(([^]rawptr)(p)[-1])
return transmute([]byte)Raw_Slice{ data = ptr, len = size }, nil
case .Alloc_Non_Zeroed:
ptr := heap_alloc(max(size, alignment), zero_memory = false)
if ptr == nil {
return nil, .Out_Of_Memory
}
}

aligned_resize :: proc(p: rawptr, old_size: int, new_size: int, new_alignment: int, zero_memory := true) -> (new_memory: []byte, err: Allocator_Error) {
if p == nil {
return aligned_alloc(new_size, new_alignment, nil, old_size, zero_memory)
return transmute([]byte)Raw_Slice{ data = ptr, len = size }, nil
case .Resize:
ptr := heap_resize(old_memory, old_size, max(size, alignment))
if ptr == nil {
return nil, .Out_Of_Memory
}

new_memory = aligned_alloc(new_size, new_alignment, p, old_size, zero_memory) or_return

// NOTE: heap_resize does not zero the new memory, so we do it
if zero_memory && new_size > old_size {
new_region := raw_data(new_memory[old_size:])
intrinsics.mem_zero(new_region, new_size - old_size)
return transmute([]byte)Raw_Slice{ data = ptr, len = size }, nil
case .Resize_Non_Zeroed:
ptr := heap_resize(old_memory, old_size, max(size, alignment), zero_memory = false)
if ptr == nil {
return nil, .Out_Of_Memory
}
return
}

switch mode {
case .Alloc, .Alloc_Non_Zeroed:
return aligned_alloc(size, alignment, nil, 0, mode == .Alloc)

return transmute([]byte)Raw_Slice{ data = ptr, len = size }, nil
case .Free:
aligned_free(old_memory)

heap_free(old_memory)
case .Free_All:
return nil, .Mode_Not_Implemented

case .Resize, .Resize_Non_Zeroed:
return aligned_resize(old_memory, old_size, size, alignment, mode == .Resize)

case .Query_Features:
set := (^Allocator_Mode_Set)(old_memory)
if set != nil {
set^ = {.Alloc, .Alloc_Non_Zeroed, .Free, .Resize, .Resize_Non_Zeroed, .Query_Features}
set^ = {
.Alloc,
.Alloc_Non_Zeroed,
.Resize,
.Resize_Non_Zeroed,
.Free,
.Query_Features,
}
}
return nil, nil

case .Query_Info:
return nil, .Mode_Not_Implemented
}

return nil, nil
}


heap_alloc :: proc "contextless" (size: int, zero_memory := true) -> rawptr {
return _heap_alloc(size, zero_memory)
}

heap_resize :: proc "contextless" (ptr: rawptr, new_size: int) -> rawptr {
return _heap_resize(ptr, new_size)
}

heap_free :: proc "contextless" (ptr: rawptr) {
_heap_free(ptr)
}
92 changes: 92 additions & 0 deletions base/runtime/heap_allocator_control.odin
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#+build !js
#+build !orca
#+build !wasi
package runtime

import "base:intrinsics"

/*
Merge all remote frees then free as many slabs as possible.

This bypasses any heuristics that keep slabs setup.

Returns true if the superpage was emptied and freed.
*/
@(private)
compact_superpage :: proc "contextless" (superpage: ^Heap_Superpage) -> (freed: bool) {
for i := 0; i < HEAP_SLAB_COUNT; /**/ {
slab := heap_superpage_index_slab(superpage, i)

if slab.bin_size > HEAP_MAX_BIN_SIZE {
// Skip contiguous slabs.
i += heap_slabs_needed_for_size(slab.bin_size)
} else {
i += 1
if slab.bin_size == 0 {
continue
}
}

slab_is_cached := slab.free_bins > 0
heap_merge_remote_frees(slab)

if slab.free_bins == slab.max_bins {
if slab.bin_size > HEAP_MAX_BIN_SIZE {
heap_free_wide_slab(superpage, slab)
} else {
if slab_is_cached {
heap_cache_remove_slab(slab, heap_bin_size_to_rank(slab.bin_size))
}
heap_free_slab(superpage, slab)
}
}
}

if superpage.free_slabs == HEAP_SLAB_COUNT && !superpage.cache_block.in_use {
heap_free_superpage(superpage)
freed = true
}
return
}

/*
Merge all remote frees then free as many slabs and superpages as possible.

This bypasses any heuristics that keep slabs setup.
*/
compact_heap :: proc "contextless" () {
superpage := local_heap
for {
if superpage == nil {
return
}
next_superpage := superpage.next
compact_superpage(superpage)
superpage = next_superpage
}
}

/*
Free any empty superpages in the orphanage.

This procedure assumes there won't ever be more than 128 superpages in the
orphanage. This limitation is due to the avoidance of heap allocation.
*/
compact_heap_orphanage :: proc "contextless" () {
// First, try to empty the orphanage so that we can evaluate each superpage.
buffer: [128]^Heap_Superpage
for &b in buffer {
b = heap_pop_orphan()
if b == nil {
break
}
}

// Next, compact each superpage and push it back to the orphanage if it was
// not freed.
for superpage in buffer {
if !compact_superpage(superpage) {
heap_push_orphan(superpage)
}
}
}
Loading
Loading