diff --git a/config/llvm_header.inc b/config/llvm_header.inc index 5d665bb99..ec8d6917e 100644 --- a/config/llvm_header.inc +++ b/config/llvm_header.inc @@ -202,7 +202,7 @@ declare void @write_configuration_to_proof_trace(ptr, ptr, i1) @current_interval = thread_local global i64 0 @GC_THRESHOLD = thread_local global i64 @GC_THRESHOLD@ -@gc_roots = global [256 x ptr] zeroinitializer +@gc_roots = thread_local global [256 x ptr] zeroinitializer define i64 @get_gc_threshold() { %threshold = load i64, ptr @GC_THRESHOLD diff --git a/include/runtime/arena.h b/include/runtime/arena.h index 1c32a0fc5..6248b243d 100644 --- a/include/runtime/arena.h +++ b/include/runtime/arena.h @@ -30,12 +30,19 @@ using memory_block_header = struct { // Macro to define a new arena with the given ID. Supports IDs ranging from 0 to // 127. #define REGISTER_ARENA(name, id) \ - static struct arena name = {.allocation_semispace_id = (id)} + static thread_local struct arena name = {.allocation_semispace_id = (id)} #define MEM_BLOCK_START(ptr) \ ((char *)(((uintptr_t)(ptr)-1) & ~(BLOCK_SIZE - 1))) +#ifdef __MACH__ +// +// thread_local disabled for Apple +// extern bool time_for_collection; +#else +extern thread_local bool time_for_collection; +#endif size_t get_gc_threshold(); diff --git a/include/runtime/collect.h b/include/runtime/collect.h index a4ef7d2b0..1d448fcd4 100644 --- a/include/runtime/collect.h +++ b/include/runtime/collect.h @@ -26,8 +26,8 @@ using set_node = set::iterator::node_t; using set_impl = set::iterator::tree_t; extern "C" { -extern size_t numBytesLiveAtCollection[1 << AGE_WIDTH]; -extern bool collect_old; +extern thread_local size_t numBytesLiveAtCollection[1 << AGE_WIDTH]; +extern thread_local bool collect_old; size_t get_size(uint64_t, uint16_t); void migrate_static_roots(void); void migrate(block **block_ptr); diff --git a/include/runtime/header.h b/include/runtime/header.h index 000ec7cd2..d82cae004 100644 --- a/include/runtime/header.h +++ b/include/runtime/header.h @@ -47,8 +47,14 @@ size_t hash_k(block *); void k_hash(block *, void *); bool hash_enter(void); void hash_exit(void); - +#ifdef __MACH__ +// +// thread_local disabled for Apple +// extern bool gc_enabled; +#else +extern thread_local bool gc_enabled; +#endif } class k_elem { diff --git a/lib/codegen/CreateTerm.cpp b/lib/codegen/CreateTerm.cpp index 201c615f6..3bb71c900 100644 --- a/lib/codegen/CreateTerm.cpp +++ b/lib/codegen/CreateTerm.cpp @@ -782,10 +782,25 @@ llvm::Value *create_term::disable_gc() { llvm::Constant *global = module_->getOrInsertGlobal("gc_enabled", llvm::Type::getInt1Ty(ctx_)); auto *global_var = llvm::cast(global); +#ifdef __MACH__ + // + // thread_local disabled for Apple + // + /* + global_var->setThreadLocal(true); + llvm::IRBuilder b(current_block_); + auto *global_var_address = b.CreateThreadLocalAddress(global_var); + */ + auto *global_var_address = global_var; +#else + global_var->setThreadLocal(true); + auto *global_var_address = global_var; +#endif auto *old_val = new llvm::LoadInst( - llvm::Type::getInt1Ty(ctx_), global_var, "was_enabled", current_block_); + llvm::Type::getInt1Ty(ctx_), global_var_address, "was_enabled", + current_block_); new llvm::StoreInst( - llvm::ConstantInt::getFalse(ctx_), global_var, current_block_); + llvm::ConstantInt::getFalse(ctx_), global_var_address, current_block_); return old_val; } @@ -793,7 +808,21 @@ void create_term::enable_gc(llvm::Value *was_enabled) { llvm::Constant *global = module_->getOrInsertGlobal("gc_enabled", llvm::Type::getInt1Ty(ctx_)); auto *global_var = llvm::cast(global); - new llvm::StoreInst(was_enabled, global_var, current_block_); +#ifdef __MACH__ + // + // thread_local disabled for Apple + // + /* + global_var->setThreadLocal(true); + llvm::IRBuilder b(current_block_); + auto *global_var_address = b.CreateThreadLocalAddress(global_var); + */ + auto *global_var_address = global_var; +#else + global_var->setThreadLocal(true); + auto *global_var_address = global_var; +#endif + new llvm::StoreInst(was_enabled, global_var_address, current_block_); } // We use tailcc calling convention for apply_rule_* and eval_* functions to diff --git a/lib/codegen/Decision.cpp b/lib/codegen/Decision.cpp index 3ac8dd2d9..6ac04460a 100644 --- a/lib/codegen/Decision.cpp +++ b/lib/codegen/Decision.cpp @@ -5,6 +5,7 @@ #include "kllvm/codegen/ProofEvent.h" #include "kllvm/codegen/Util.h" +#include "llvm/IR/IRBuilder.h" #include #include #include @@ -1006,9 +1007,25 @@ std::pair, llvm::BasicBlock *> step_function_header( auto *collection = module->getOrInsertGlobal( "time_for_collection", llvm::Type::getInt1Ty(module->getContext())); + +#ifdef __MACH__ + // + // thread_local disabled for Apple + // + /* + llvm::cast(collection)->setThreadLocal(true); + llvm::IRBuilder b(check_collect); + auto *collection_address = b.CreateThreadLocalAddress(collection); + */ + auto *collection_address = collection; +#else + llvm::cast(collection)->setThreadLocal(true); + auto *collection_address = collection; +#endif + auto *is_collection = new llvm::LoadInst( - llvm::Type::getInt1Ty(module->getContext()), collection, "is_collection", - check_collect); + llvm::Type::getInt1Ty(module->getContext()), collection_address, + "is_collection", check_collect); set_debug_loc(is_collection); auto *collect = llvm::BasicBlock::Create( module->getContext(), "isCollect", block->getParent()); diff --git a/runtime/alloc/arena.cpp b/runtime/alloc/arena.cpp index dd0f6b2e6..0e1e4de15 100644 --- a/runtime/alloc/arena.cpp +++ b/runtime/alloc/arena.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include "runtime/alloc.h" #include "runtime/arena.h" @@ -47,36 +49,68 @@ get_arena_semispace_id_of_object(void *ptr) { return mem_block_header(ptr)->semispace; } -static void *first_superblock_ptr = nullptr; -static void *superblock_ptr = nullptr; -static char **next_superblock_ptr = nullptr; -static unsigned blocks_left = 0; +// +// We will reserve enough address space for 1 million 1MB blocks. Might want to increase this on a > 1TB server. +// +size_t const HYPERBLOCK_SIZE = (size_t)BLOCK_SIZE * 1024 * 1024; +static thread_local void *hyperblock_ptr = nullptr; // only needed for munmap() static void *megabyte_malloc() { - if (blocks_left == 0) { - blocks_left = 15; - if (int result - = posix_memalign(&superblock_ptr, BLOCK_SIZE, BLOCK_SIZE * 15)) { - errno = result; - perror("posix_memalign"); - } - if (!first_superblock_ptr) { - first_superblock_ptr = superblock_ptr; - } - if (next_superblock_ptr) { - *next_superblock_ptr = (char *)superblock_ptr; + // + // Return pointer to a BLOCK_SIZE chunk of memory with BLOCK_SIZE alignment. + // + static thread_local char *currentblock_ptr + = nullptr; // char* rather than void* to permit pointer arithmetic + if (currentblock_ptr) { + // + // We expect an page fault due to not being able to map physical memory to this block or the + // process to be killed by the OOM killer long before we run off the end of our address space. + // + currentblock_ptr += BLOCK_SIZE; + } else { + // + // First call - need to reserve the address space. + // + size_t request = HYPERBLOCK_SIZE; + void *addr = mmap( + nullptr, // let OS choose the address + request, // Linux and MacOS both allow up to 64TB + PROT_READ | PROT_WRITE, // read, write but not execute + MAP_ANONYMOUS | MAP_PRIVATE + | MAP_NORESERVE, // allocate address space only + -1, // no file backing + 0); // no offset + if (addr == MAP_FAILED) { + perror("mmap()"); + abort(); } - auto *hdr = (memory_block_header *)superblock_ptr; - next_superblock_ptr = &hdr->next_superblock; - hdr->next_superblock = nullptr; + hyperblock_ptr = addr; + // + // We ask for one block worth of address space less than we allocated so alignment will always succeed. + // We don't worry about unused address space either side of our aligned address space because there will be no + // memory mapped to it. + // + currentblock_ptr = reinterpret_cast( + std::align(BLOCK_SIZE, HYPERBLOCK_SIZE - BLOCK_SIZE, addr, request)); } - blocks_left--; - void *result = superblock_ptr; - superblock_ptr = (char *)superblock_ptr + BLOCK_SIZE; - return result; + return currentblock_ptr; } +void free_all_memory() { + // + // Frees all memory that was demand paged into this address range. + // + munmap(hyperblock_ptr, HYPERBLOCK_SIZE); +} + +#ifdef __MACH__ +// +// thread_local disabled for Apple +// bool time_for_collection; +#else +thread_local bool time_for_collection; +#endif static void fresh_block(struct arena *arena) { char *next_block = nullptr; @@ -122,7 +156,14 @@ static void fresh_block(struct arena *arena) { BLOCK_SIZE - sizeof(memory_block_header)); } +#ifdef __MACH__ +// +// thread_local disabled for Apple +// bool gc_enabled = true; +#else +thread_local bool gc_enabled = true; +#endif __attribute__((noinline)) void * do_alloc_slow(size_t requested, struct arena *arena) { @@ -229,16 +270,3 @@ size_t arena_size(const struct arena *arena) { : arena->num_collection_blocks) * (BLOCK_SIZE - sizeof(memory_block_header)); } - -void free_all_memory() { - auto *superblock = (memory_block_header *)first_superblock_ptr; - while (superblock) { - auto *next_superblock = (memory_block_header *)superblock->next_superblock; - free(superblock); - superblock = next_superblock; - } - first_superblock_ptr = nullptr; - superblock_ptr = nullptr; - next_superblock_ptr = nullptr; - blocks_left = 0; -} diff --git a/runtime/alloc/register_gc_roots_enum.cpp b/runtime/alloc/register_gc_roots_enum.cpp index bbb4b2269..2c1a3165a 100644 --- a/runtime/alloc/register_gc_roots_enum.cpp +++ b/runtime/alloc/register_gc_roots_enum.cpp @@ -3,7 +3,7 @@ #include "runtime/collect.h" #include "runtime/header.h" -std::vector block_enumerators; +thread_local std::vector block_enumerators; void register_gc_roots_enumerator(BlockEnumerator f) { block_enumerators.push_back(f); diff --git a/runtime/arithmetic/int.cpp b/runtime/arithmetic/int.cpp index fcb4feec6..e76333fb6 100644 --- a/runtime/arithmetic/int.cpp +++ b/runtime/arithmetic/int.cpp @@ -373,8 +373,8 @@ void int_hash(mpz_t i, void *hasher) { } } -gmp_randstate_t kllvm_rand_state; -bool kllvm_rand_state_initialized = false; +thread_local gmp_randstate_t kllvm_rand_state; +thread_local bool kllvm_rand_state_initialized = false; SortK hook_INT_srand(SortInt seed) { if (!kllvm_rand_state_initialized) { diff --git a/runtime/collect/collect.cpp b/runtime/collect/collect.cpp index 31b8c4b77..b519bc15b 100644 --- a/runtime/collect/collect.cpp +++ b/runtime/collect/collect.cpp @@ -16,15 +16,15 @@ char **old_alloc_ptr(void); char *youngspace_ptr(void); char *oldspace_ptr(void); -static bool is_gc = false; -bool collect_old = false; +static thread_local bool is_gc = false; +bool thread_local collect_old = false; #ifndef GC_DBG -static uint8_t num_collection_only_young = 0; +static thread_local uint8_t num_collection_only_young = 0; #else -static char *last_alloc_ptr; +static thread_local char *last_alloc_ptr; #endif -size_t numBytesLiveAtCollection[1 << AGE_WIDTH]; +size_t thread_local numBytesLiveAtCollection[1 << AGE_WIDTH]; bool during_gc() { return is_gc; diff --git a/runtime/collect/migrate_static_roots.cpp b/runtime/collect/migrate_static_roots.cpp index d162f0bb8..3474e83ee 100644 --- a/runtime/collect/migrate_static_roots.cpp +++ b/runtime/collect/migrate_static_roots.cpp @@ -2,10 +2,10 @@ #include "runtime/collect.h" -extern std::vector block_enumerators; +extern thread_local std::vector block_enumerators; -extern gmp_randstate_t kllvm_rand_state; -extern bool kllvm_rand_state_initialized; +extern thread_local gmp_randstate_t kllvm_rand_state; +extern thread_local bool kllvm_rand_state_initialized; extern "C" { diff --git a/runtime/lto/alloc.cpp b/runtime/lto/alloc.cpp index 86fa11dfc..bcb2601e2 100644 --- a/runtime/lto/alloc.cpp +++ b/runtime/lto/alloc.cpp @@ -159,8 +159,8 @@ static inline void *kore_alloc_collection(kllvm::sort_category cat) { void *mem = kore_alloc(sizeof(blockheader) + sizeof(collection) + sizeof(uint64_t)); auto *hdr = (blockheader *)mem; - static std::string name = get_raw_symbol_name(cat) + "{}"; - static blockheader hdr_val + static thread_local std::string name = get_raw_symbol_name(cat) + "{}"; + static thread_local blockheader hdr_val = get_block_header_for_symbol(get_tag_for_symbol_name(name.c_str())); *hdr = hdr_val; auto *offset = (uint64_t *)(hdr + 1); diff --git a/unittests/runtime-collections/lists.cpp b/unittests/runtime-collections/lists.cpp index d4d2a20d9..1aa13a4e0 100644 --- a/unittests/runtime-collections/lists.cpp +++ b/unittests/runtime-collections/lists.cpp @@ -62,7 +62,15 @@ block D1 = {{1}}; block *DUMMY1 = &D1; } +#ifdef __MACH__ +// +// thread_local disabled for Apple +// bool gc_enabled; +#else +thread_local bool gc_enabled; +#endif + size_t get_gc_threshold() { return SIZE_MAX; }