diff --git a/dipu/torch_dipu/csrc_dipu/runtime/core/allocator/DIPUBSCachingAllocator.cpp b/dipu/torch_dipu/csrc_dipu/runtime/core/allocator/DIPUBSCachingAllocator.cpp index bb923a7d8..669cfa57a 100644 --- a/dipu/torch_dipu/csrc_dipu/runtime/core/allocator/DIPUBSCachingAllocator.cpp +++ b/dipu/torch_dipu/csrc_dipu/runtime/core/allocator/DIPUBSCachingAllocator.cpp @@ -67,10 +67,12 @@ class BSCachingAllocator : public CacheAllocator { static size_t getAllocateSize(size_t nbytes) { nbytes = getMemoryAlignmentStrategy()->roundBytes(nbytes); - static bool less_fragmentation = - std::getenv("DIPU_BS_MORE_ADAPTABLE") == nullptr; - return less_fragmentation ? getAllocateSizeLessFragmentation(nbytes) - : getAllocateSizeMoreAdaptable(nbytes); + constexpr size_t kMinBlockSize = 1 << 20; // 1M + if (nbytes <= kMinBlockSize) { + return kMinBlockSize; + } + int clz = __builtin_clzll(nbytes - 1); + return (1 << (sizeof(int64_t) - clz)); } c10::DataPtr allocate(size_t size) const override { @@ -78,8 +80,8 @@ class BSCachingAllocator : public CacheAllocator { << size << ",allocator:" << this << ", memory-usage" << memory_allocated() << "/" << memory_reserved()); - std::lock_guard lk(mutex_); flush_mem_pool(); + std::lock_guard lk(mutex_); size_t nbytes = getAllocateSize(size); void* ptr = nullptr; auto& idel_blocks = impl->idel_blocks_[nbytes]; @@ -138,6 +140,7 @@ class BSCachingAllocator : public CacheAllocator { } void empty_resource_pool() const { + std::lock_guard lk(mutex_); DIPU_DEBUG_ALLOCATOR( 8, "BSCachingAllocator::empty_resource_pool ,allocator:" << this); while (!async_mem_pool()->empty()) { @@ -180,6 +183,7 @@ class BSCachingAllocator : public CacheAllocator { void release_all_memory() const override { release_all_memory_impl(); } void flush_mem_pool() const { + std::lock_guard lk(mutex_); DIPU_DEBUG_ALLOCATOR( 8, "BSCachingAllocator::flush_mem_pool allocator:" << this); while (async_mem_pool()->ready()) {